Merge tag 'powerpc-5.4-2' of git://git.kernel.org/pub/scm/linux/kernel/git/powerpc...
authorLinus Torvalds <torvalds@linux-foundation.org>
Sat, 28 Sep 2019 20:43:00 +0000 (13:43 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Sat, 28 Sep 2019 20:43:00 +0000 (13:43 -0700)
Pull powerpc fixes from Michael Ellerman:
 "An assortment of fixes that were either missed by me, or didn't arrive
  quite in time for the first v5.4 pull.

   - Most notable is a fix for an issue with tlbie (broadcast TLB
     invalidation) on Power9, when using the Radix MMU. The tlbie can
     race with an mtpid (move to PID register, essentially MMU context
     switch) on another thread of the core, which can cause stores to
     continue to go to a page after it's unmapped.

   - A fix in our KVM code to add a missing barrier, the lack of which
     has been observed to cause missed IPIs and subsequently stuck CPUs
     in the host.

   - A change to the way we initialise PCR (Processor Compatibility
     Register) to make it forward compatible with future CPUs.

   - On some older PowerVM systems our H_BLOCK_REMOVE support could
     oops, fix it to detect such systems and fallback to the old
     invalidation method.

   - A fix for an oops seen on some machines when using KASAN on 32-bit.

   - A handful of other minor fixes, and two new selftests.

  Thanks to: Alistair Popple, Aneesh Kumar K.V, Christophe Leroy,
  Gustavo Romero, Joel Stanley, Jordan Niethe, Laurent Dufour, Michael
  Roth, Oliver O'Halloran"

* tag 'powerpc-5.4-2' of git://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux:
  powerpc/eeh: Fix eeh eeh_debugfs_break_device() with SRIOV devices
  powerpc/nvdimm: use H_SCM_QUERY hcall on H_OVERLAP error
  powerpc/nvdimm: Use HCALL error as the return value
  selftests/powerpc: Add test case for tlbie vs mtpidr ordering issue
  powerpc/mm: Fixup tlbie vs mtpidr/mtlpidr ordering issue on POWER9
  powerpc/book3s64/radix: Rename CPU_FTR_P9_TLBIE_BUG feature flag
  powerpc/book3s64/mm: Don't do tlbie fixup for some hardware revisions
  powerpc/pseries: Call H_BLOCK_REMOVE when supported
  powerpc/pseries: Read TLB Block Invalidate Characteristics
  KVM: PPC: Book3S HV: use smp_mb() when setting/clearing host_ipi flag
  powerpc/mm: Fix an Oops in kasan_mmu_init()
  powerpc/mm: Add a helper to select PAGE_KERNEL_RO or PAGE_READONLY
  powerpc/64s: Set reserved PCR bits
  powerpc: Fix definition of PCR bits to work with old binutils
  powerpc/book3s64/radix: Remove WARN_ON in destroy_context()
  powerpc/tm: Add tm-poison test

2368 files changed:
.gitignore
Documentation/ABI/testing/dev-kmsg
Documentation/ABI/testing/ima_policy
Documentation/ABI/testing/sysfs-class-backlight [new file with mode: 0644]
Documentation/ABI/testing/sysfs-class-remoteproc
Documentation/ABI/testing/sysfs-class-watchdog
Documentation/ABI/testing/sysfs-fs-f2fs
Documentation/ABI/testing/sysfs-kernel-slab
Documentation/admin-guide/cgroup-v1/memory.rst
Documentation/admin-guide/device-mapper/dm-clone.rst [new file with mode: 0644]
Documentation/admin-guide/device-mapper/verity.rst
Documentation/admin-guide/kernel-parameters.txt
Documentation/core-api/kernel-api.rst
Documentation/devicetree/bindings/arm/l2c2x0.yaml
Documentation/devicetree/bindings/arm/marvell/ap806-system-controller.txt
Documentation/devicetree/bindings/arm/marvell/armada-37xx.txt
Documentation/devicetree/bindings/arm/mediatek/mediatek,apmixedsys.txt
Documentation/devicetree/bindings/arm/mediatek/mediatek,audsys.txt
Documentation/devicetree/bindings/arm/mediatek/mediatek,camsys.txt
Documentation/devicetree/bindings/arm/mediatek/mediatek,imgsys.txt
Documentation/devicetree/bindings/arm/mediatek/mediatek,infracfg.txt
Documentation/devicetree/bindings/arm/mediatek/mediatek,ipesys.txt [new file with mode: 0644]
Documentation/devicetree/bindings/arm/mediatek/mediatek,mfgcfg.txt
Documentation/devicetree/bindings/arm/mediatek/mediatek,mmsys.txt
Documentation/devicetree/bindings/arm/mediatek/mediatek,pericfg.txt
Documentation/devicetree/bindings/arm/mediatek/mediatek,topckgen.txt
Documentation/devicetree/bindings/arm/mediatek/mediatek,vdecsys.txt
Documentation/devicetree/bindings/arm/mediatek/mediatek,vencsys.txt
Documentation/devicetree/bindings/clock/allwinner,sun4i-a10-ccu.yaml
Documentation/devicetree/bindings/clock/brcm,bcm2835-cprman.txt
Documentation/devicetree/bindings/clock/qcom,gcc.txt
Documentation/devicetree/bindings/clock/qcom,rpmh-clk.txt
Documentation/devicetree/bindings/clock/renesas,emev2-smu.txt [moved from Documentation/devicetree/bindings/clock/emev2-clock.txt with 100% similarity]
Documentation/devicetree/bindings/clock/rockchip,rk3308-cru.txt [new file with mode: 0644]
Documentation/devicetree/bindings/clock/ti,cdce925.txt
Documentation/devicetree/bindings/i2c/brcm,bcm2835-i2c.txt
Documentation/devicetree/bindings/i2c/renesas,i2c.txt [moved from Documentation/devicetree/bindings/i2c/i2c-rcar.txt with 100% similarity]
Documentation/devicetree/bindings/i2c/renesas,iic-emev2.txt [moved from Documentation/devicetree/bindings/i2c/i2c-emev2.txt with 100% similarity]
Documentation/devicetree/bindings/i2c/renesas,iic.txt [moved from Documentation/devicetree/bindings/i2c/i2c-sh_mobile.txt with 100% similarity]
Documentation/devicetree/bindings/i2c/renesas,riic.txt [moved from Documentation/devicetree/bindings/i2c/i2c-riic.txt with 100% similarity]
Documentation/devicetree/bindings/mfd/mt6397.txt
Documentation/devicetree/bindings/mfd/rn5t618.txt
Documentation/devicetree/bindings/mtd/mxic-nand.txt [new file with mode: 0644]
Documentation/devicetree/bindings/pci/designware-pcie.txt
Documentation/devicetree/bindings/pci/fsl,imx6q-pcie.txt
Documentation/devicetree/bindings/pci/mediatek-pcie.txt
Documentation/devicetree/bindings/pci/nvidia,tegra194-pcie.txt [new file with mode: 0644]
Documentation/devicetree/bindings/pci/pci-armada8k.txt
Documentation/devicetree/bindings/pci/pci.txt
Documentation/devicetree/bindings/pci/pcie-al.txt [new file with mode: 0644]
Documentation/devicetree/bindings/phy/phy-tegra194-p2u.txt [new file with mode: 0644]
Documentation/devicetree/bindings/power/reset/mt6323-poweroff.txt [new file with mode: 0644]
Documentation/devicetree/bindings/pwm/ingenic,jz47xx-pwm.txt [deleted file]
Documentation/devicetree/bindings/pwm/pwm-mediatek.txt
Documentation/devicetree/bindings/pwm/pwm-sprd.txt [new file with mode: 0644]
Documentation/devicetree/bindings/rtc/allwinner,sun6i-a31-rtc.yaml
Documentation/devicetree/bindings/rtc/nxp,rtc-2123.txt
Documentation/devicetree/bindings/rtc/pcf8563.txt
Documentation/devicetree/bindings/rtc/rtc-ds1307.txt
Documentation/devicetree/bindings/rtc/rtc-fsl-ftm-alarm.txt [new file with mode: 0644]
Documentation/devicetree/bindings/rtc/rtc-meson-vrtc.txt [new file with mode: 0644]
Documentation/devicetree/bindings/rtc/trivial-rtc.yaml
Documentation/devicetree/bindings/soundwire/soundwire-controller.yaml [new file with mode: 0644]
Documentation/devicetree/bindings/thermal/qoriq-thermal.txt
Documentation/devicetree/bindings/timer/ingenic,tcu.txt [new file with mode: 0644]
Documentation/devicetree/bindings/ufs/ufshcd-pltfrm.txt
Documentation/devicetree/bindings/watchdog/allwinner,sun4i-a10-wdt.yaml [new file with mode: 0644]
Documentation/devicetree/bindings/watchdog/aspeed-wdt.txt
Documentation/devicetree/bindings/watchdog/fsl-imx7ulp-wdt.txt [new file with mode: 0644]
Documentation/devicetree/bindings/watchdog/ingenic,jz4740-wdt.txt [deleted file]
Documentation/devicetree/bindings/watchdog/sunxi-wdt.txt [deleted file]
Documentation/devicetree/bindings/watchdog/watchdog.yaml [new file with mode: 0644]
Documentation/filesystems/ceph.txt
Documentation/filesystems/ext4/bigalloc.rst
Documentation/filesystems/ext4/blockgroup.rst
Documentation/filesystems/ext4/blocks.rst
Documentation/filesystems/ext4/directory.rst
Documentation/filesystems/ext4/group_descr.rst
Documentation/filesystems/ext4/inodes.rst
Documentation/filesystems/ext4/super.rst
Documentation/filesystems/f2fs.txt
Documentation/filesystems/index.rst
Documentation/filesystems/virtiofs.rst [new file with mode: 0644]
Documentation/index.rst
Documentation/infiniband/core_locking.rst
Documentation/kbuild/modules.rst
Documentation/kbuild/namespaces.rst [new file with mode: 0644]
Documentation/kernel-hacking/hacking.rst
Documentation/mips/index.rst
Documentation/mips/ingenic-tcu.rst [new file with mode: 0644]
Documentation/security/IMA-templates.rst
Documentation/virt/kvm/api.txt
Documentation/vm/hmm.rst
Documentation/vm/split_page_table_lock.rst
Documentation/watchdog/watchdog-parameters.rst
MAINTAINERS
Makefile
arch/Kconfig
arch/alpha/include/asm/pgalloc.h
arch/alpha/include/asm/pgtable.h
arch/alpha/include/uapi/asm/mman.h
arch/arc/include/asm/pgalloc.h
arch/arc/include/asm/pgtable.h
arch/arm/Kconfig
arch/arm/Kconfig.debug
arch/arm/Makefile
arch/arm/boot/compressed/head.S
arch/arm/boot/dts/Makefile
arch/arm/boot/dts/am33xx-l4.dtsi
arch/arm/boot/dts/am3517.dtsi
arch/arm/boot/dts/am437x-l4.dtsi
arch/arm/boot/dts/aspeed-ast2600-evb.dts [new file with mode: 0644]
arch/arm/boot/dts/aspeed-g6-pinctrl.dtsi [new file with mode: 0644]
arch/arm/boot/dts/aspeed-g6.dtsi [new file with mode: 0644]
arch/arm/boot/dts/dra7-l4.dtsi
arch/arm/boot/dts/gemini-dlink-dir-685.dts
arch/arm/boot/dts/ls1021a.dtsi
arch/arm/boot/dts/mmp2-olpc-xo-1-75.dts [new file with mode: 0644]
arch/arm/boot/dts/mmp2.dtsi
arch/arm/boot/dts/omap34xx.dtsi
arch/arm/boot/dts/omap36xx.dtsi
arch/arm/boot/dts/omap4-l4-abe.dtsi
arch/arm/boot/dts/omap4-l4.dtsi
arch/arm/boot/dts/omap4.dtsi
arch/arm/boot/dts/omap5.dtsi
arch/arm/boot/dts/omap54xx-clocks.dtsi
arch/arm/include/asm/hardware/cache-aurora-l2.h [moved from arch/arm/mm/cache-aurora-l2.h with 50% similarity]
arch/arm/include/asm/pgalloc.h
arch/arm/include/asm/pgtable-nommu.h
arch/arm/include/asm/pgtable.h
arch/arm/include/asm/processor.h
arch/arm/include/asm/tlb.h
arch/arm/kernel/perf_event_v7.c
arch/arm/kernel/process.c
arch/arm/kernel/vdso.c
arch/arm/lib/Makefile
arch/arm/lib/backtrace-clang.S [new file with mode: 0644]
arch/arm/mach-exynos/Kconfig
arch/arm/mach-omap2/omap_hwmod_33xx_43xx_common_data.h
arch/arm/mach-omap2/omap_hwmod_33xx_43xx_interconnect_data.c
arch/arm/mach-omap2/omap_hwmod_33xx_43xx_ipblock_data.c
arch/arm/mach-omap2/omap_hwmod_33xx_data.c
arch/arm/mach-omap2/omap_hwmod_43xx_data.c
arch/arm/mach-omap2/omap_hwmod_44xx_data.c
arch/arm/mach-omap2/omap_hwmod_7xx_data.c
arch/arm/mm/cache-l2x0.c
arch/arm/mm/fault.c
arch/arm/mm/fault.h
arch/arm/mm/flush.c
arch/arm/mm/mmap.c
arch/arm/mm/mmu.c
arch/arm/plat-samsung/watchdog-reset.c
arch/arm64/Kconfig
arch/arm64/boot/dts/freescale/fsl-ls1012a.dtsi
arch/arm64/boot/dts/freescale/fsl-ls1043a.dtsi
arch/arm64/boot/dts/freescale/fsl-ls1046a.dtsi
arch/arm64/boot/dts/freescale/fsl-ls1088a.dtsi
arch/arm64/boot/dts/freescale/fsl-ls208xa.dtsi
arch/arm64/boot/dts/marvell/Makefile
arch/arm64/boot/dts/marvell/armada-3720-turris-mox.dts [new file with mode: 0644]
arch/arm64/boot/dts/marvell/armada-37xx.dtsi
arch/arm64/boot/dts/nvidia/tegra194-p2888.dtsi
arch/arm64/boot/dts/nvidia/tegra194-p2972-0000.dts
arch/arm64/boot/dts/nvidia/tegra194.dtsi
arch/arm64/boot/dts/qcom/sdm845-db845c.dts
arch/arm64/boot/dts/qcom/sdm845-mtp.dts
arch/arm64/boot/dts/ti/k3-am65-main.dtsi
arch/arm64/boot/dts/ti/k3-am65-mcu.dtsi
arch/arm64/boot/dts/ti/k3-am65-wakeup.dtsi
arch/arm64/boot/dts/ti/k3-am65.dtsi
arch/arm64/boot/dts/ti/k3-am654-base-board.dts
arch/arm64/boot/dts/ti/k3-j721e-common-proc-board.dts
arch/arm64/boot/dts/ti/k3-j721e-main.dtsi
arch/arm64/boot/dts/ti/k3-j721e-mcu-wakeup.dtsi
arch/arm64/boot/dts/ti/k3-j721e.dtsi
arch/arm64/include/asm/pgalloc.h
arch/arm64/include/asm/pgtable.h
arch/arm64/include/asm/processor.h
arch/arm64/include/asm/tlb.h
arch/arm64/kernel/process.c
arch/arm64/mm/flush.c
arch/arm64/mm/mmap.c
arch/arm64/mm/mmu.c
arch/arm64/mm/pgd.c
arch/c6x/include/asm/pgtable.h
arch/csky/include/asm/pgalloc.h
arch/csky/include/asm/pgtable.h
arch/csky/include/asm/tlb.h
arch/h8300/include/asm/pgtable.h
arch/hexagon/include/asm/pgalloc.h
arch/hexagon/include/asm/pgtable.h
arch/hexagon/mm/Makefile
arch/hexagon/mm/init.c
arch/hexagon/mm/pgalloc.c [deleted file]
arch/ia64/Kconfig
arch/ia64/include/asm/pgalloc.h
arch/ia64/include/asm/pgtable.h
arch/ia64/kernel/irq_ia64.c
arch/ia64/mm/contig.c
arch/ia64/mm/discontig.c
arch/ia64/mm/init.c
arch/m68k/include/asm/export.h
arch/m68k/include/asm/mcf_pgalloc.h
arch/m68k/include/asm/motorola_pgalloc.h
arch/m68k/include/asm/pgtable_mm.h
arch/m68k/include/asm/pgtable_no.h
arch/m68k/include/asm/sun3_pgalloc.h
arch/microblaze/Kconfig
arch/microblaze/boot/dts/system.dts
arch/microblaze/configs/mmu_defconfig
arch/microblaze/configs/nommu_defconfig
arch/microblaze/include/asm/io.h
arch/microblaze/include/asm/pci.h
arch/microblaze/include/asm/pgalloc.h
arch/microblaze/include/asm/pgtable.h
arch/microblaze/include/asm/uaccess.h
arch/microblaze/kernel/reset.c
arch/microblaze/mm/consistent.c
arch/microblaze/mm/pgtable.c
arch/mips/Kconfig
arch/mips/Makefile
arch/mips/bcm47xx/board.c
arch/mips/bcm47xx/buttons.c
arch/mips/boot/dts/brcm/bcm3368.dtsi
arch/mips/boot/dts/brcm/bcm63268.dtsi
arch/mips/boot/dts/brcm/bcm6328.dtsi
arch/mips/boot/dts/brcm/bcm6358.dtsi
arch/mips/boot/dts/brcm/bcm6362.dtsi
arch/mips/boot/dts/brcm/bcm6368.dtsi
arch/mips/boot/dts/ingenic/ci20.dts
arch/mips/boot/dts/ingenic/gcw0.dts
arch/mips/boot/dts/ingenic/jz4740.dtsi
arch/mips/boot/dts/ingenic/jz4770.dtsi
arch/mips/boot/dts/ingenic/jz4780.dtsi
arch/mips/boot/dts/ingenic/qi_lb60.dts
arch/mips/boot/dts/mscc/ocelot.dtsi
arch/mips/cavium-octeon/dma-octeon.c
arch/mips/cavium-octeon/setup.c
arch/mips/configs/qi_lb60_defconfig
arch/mips/fw/arc/memory.c
arch/mips/include/asm/addrspace.h
arch/mips/include/asm/atomic.h
arch/mips/include/asm/barrier.h
arch/mips/include/asm/bitops.h
arch/mips/include/asm/bootinfo.h
arch/mips/include/asm/cmpxchg.h
arch/mips/include/asm/cpu-features.h
arch/mips/include/asm/cpu-type.h
arch/mips/include/asm/cpu.h
arch/mips/include/asm/io.h
arch/mips/include/asm/mach-bcm47xx/bcm47xx_board.h
arch/mips/include/asm/mach-cavium-octeon/cpu-feature-overrides.h
arch/mips/include/asm/mach-cavium-octeon/war.h
arch/mips/include/asm/mach-dec/cpu-feature-overrides.h
arch/mips/include/asm/mach-generic/war.h
arch/mips/include/asm/mach-ip22/war.h
arch/mips/include/asm/mach-ip27/war.h
arch/mips/include/asm/mach-ip28/war.h
arch/mips/include/asm/mach-ip32/war.h
arch/mips/include/asm/mach-jz4740/gpio.h [deleted file]
arch/mips/include/asm/mach-jz4740/jz4740_fb.h [deleted file]
arch/mips/include/asm/mach-jz4740/jz4740_mmc.h [deleted file]
arch/mips/include/asm/mach-jz4740/platform.h [deleted file]
arch/mips/include/asm/mach-loongson64/cpu-feature-overrides.h
arch/mips/include/asm/mach-malta/war.h
arch/mips/include/asm/mach-pmcs-msp71xx/war.h
arch/mips/include/asm/mach-rc32434/war.h
arch/mips/include/asm/mach-rm/war.h
arch/mips/include/asm/mach-sibyte/war.h
arch/mips/include/asm/mach-tx49xx/war.h
arch/mips/include/asm/mipsregs.h
arch/mips/include/asm/module.h
arch/mips/include/asm/octeon/octeon.h
arch/mips/include/asm/pci.h
arch/mips/include/asm/pgalloc.h
arch/mips/include/asm/pgtable-32.h
arch/mips/include/asm/pgtable-bits.h
arch/mips/include/asm/pgtable.h
arch/mips/include/asm/processor.h
arch/mips/include/asm/syscall.h
arch/mips/include/asm/vdso.h
arch/mips/include/asm/vdso/gettimeofday.h [new file with mode: 0644]
arch/mips/include/asm/vdso/vdso.h [moved from arch/mips/vdso/vdso.h with 76% similarity]
arch/mips/include/asm/vdso/vsyscall.h [new file with mode: 0644]
arch/mips/include/asm/war.h
arch/mips/include/uapi/asm/mman.h
arch/mips/jz4740/Makefile
arch/mips/jz4740/board-qi_lb60.c [deleted file]
arch/mips/jz4740/platform.c [deleted file]
arch/mips/jz4740/prom.c
arch/mips/jz4740/setup.c
arch/mips/jz4740/time.c
arch/mips/kernel/branch.c
arch/mips/kernel/cpu-probe.c
arch/mips/kernel/genex.S
arch/mips/kernel/idle.c
arch/mips/kernel/proc.c
arch/mips/kernel/scall32-o32.S
arch/mips/kernel/scall64-n32.S
arch/mips/kernel/scall64-n64.S
arch/mips/kernel/scall64-o32.S
arch/mips/kernel/setup.c
arch/mips/kernel/syscall.c
arch/mips/kernel/syscalls/syscalltbl.sh
arch/mips/kernel/vdso.c
arch/mips/lantiq/xway/sysctrl.c
arch/mips/mm/Makefile
arch/mips/mm/c-r4k.c
arch/mips/mm/init.c
arch/mips/mm/mmap.c
arch/mips/mm/pgtable-32.c
arch/mips/mm/sc-mips.c
arch/mips/mm/tlb-r8k.c [deleted file]
arch/mips/mm/tlbex.c
arch/mips/mti-malta/malta-memory.c
arch/mips/netlogic/xlp/setup.c
arch/mips/pci/pci-xtalk-bridge.c
arch/mips/pmcs-msp71xx/msp_prom.c
arch/mips/ralink/Kconfig
arch/mips/ralink/timer.c
arch/mips/sgi-ip22/ip28-berr.c
arch/mips/vdso/Makefile
arch/mips/vdso/config-n32-o32-env.c [new file with mode: 0644]
arch/mips/vdso/elf.S
arch/mips/vdso/sigreturn.S
arch/mips/vdso/vdso.lds.S
arch/mips/vdso/vgettimeofday.c [new file with mode: 0644]
arch/nds32/include/asm/pgalloc.h
arch/nds32/include/asm/pgtable.h
arch/nios2/include/asm/pgalloc.h
arch/nios2/include/asm/pgtable.h
arch/nios2/kernel/setup.c
arch/openrisc/include/asm/pgalloc.h
arch/openrisc/include/asm/pgtable.h
arch/openrisc/kernel/dma.c
arch/parisc/include/asm/pgalloc.h
arch/parisc/include/asm/pgtable.h
arch/parisc/include/uapi/asm/mman.h
arch/powerpc/include/asm/pci.h
arch/powerpc/include/asm/pgalloc.h
arch/powerpc/include/asm/pgtable.h
arch/powerpc/mm/book3s64/hash_utils.c
arch/powerpc/mm/book3s64/iommu_api.c
arch/powerpc/mm/book3s64/subpage_prot.c
arch/powerpc/mm/hugetlbpage.c
arch/powerpc/mm/pgtable-frag.c
arch/powerpc/platforms/cell/spufs/inode.c
arch/riscv/Kconfig
arch/riscv/boot/dts/sifive/fu540-c000.dtsi
arch/riscv/boot/dts/sifive/hifive-unleashed-a00.dts
arch/riscv/configs/defconfig
arch/riscv/configs/rv32_defconfig
arch/riscv/include/asm/pgalloc.h
arch/riscv/include/asm/pgtable.h
arch/riscv/kernel/entry.S
arch/riscv/kernel/head.S
arch/riscv/kernel/smp.c
arch/riscv/kernel/time.c
arch/s390/Kconfig
arch/s390/hypfs/inode.c
arch/s390/include/asm/cpu_mf.h
arch/s390/include/asm/perf_event.h
arch/s390/include/asm/pgtable.h
arch/s390/include/uapi/asm/zcrypt.h
arch/s390/kernel/kexec_elf.c
arch/s390/kernel/kexec_image.c
arch/s390/kernel/machine_kexec_file.c
arch/s390/kernel/perf_cpum_sf.c
arch/s390/kernel/topology.c
arch/s390/mm/gmap.c
arch/s390/mm/pgalloc.c
arch/sh/include/asm/pgalloc.h
arch/sh/include/asm/pgtable.h
arch/sh/mm/Kconfig
arch/sh/mm/nommu.c
arch/sparc/include/asm/pci.h
arch/sparc/include/asm/pgalloc_32.h
arch/sparc/include/asm/pgalloc_64.h
arch/sparc/include/asm/pgtable_32.h
arch/sparc/include/asm/pgtable_64.h
arch/sparc/mm/init_32.c
arch/sparc/mm/init_64.c
arch/sparc/mm/srmmu.c
arch/um/drivers/Kconfig
arch/um/drivers/Makefile
arch/um/drivers/chan.h
arch/um/drivers/chan_kern.c
arch/um/drivers/chan_user.c
arch/um/drivers/chan_user.h
arch/um/drivers/cow_user.c
arch/um/drivers/daemon.h
arch/um/drivers/daemon_kern.c
arch/um/drivers/daemon_user.c
arch/um/drivers/fd.c
arch/um/drivers/harddog_user.c
arch/um/drivers/hostaudio_kern.c
arch/um/drivers/line.c
arch/um/drivers/line.h
arch/um/drivers/mconsole.h
arch/um/drivers/mconsole_kern.c
arch/um/drivers/mconsole_kern.h
arch/um/drivers/mconsole_user.c
arch/um/drivers/net_kern.c
arch/um/drivers/net_user.c
arch/um/drivers/null.c
arch/um/drivers/pcap_kern.c
arch/um/drivers/pcap_user.c
arch/um/drivers/pcap_user.h
arch/um/drivers/port.h
arch/um/drivers/port_kern.c
arch/um/drivers/port_user.c
arch/um/drivers/pty.c
arch/um/drivers/slip_kern.c
arch/um/drivers/slip_user.c
arch/um/drivers/slirp_kern.c
arch/um/drivers/slirp_user.c
arch/um/drivers/ssl.c
arch/um/drivers/stdio_console.c
arch/um/drivers/stdio_console.h
arch/um/drivers/tty.c
arch/um/drivers/ubd.h
arch/um/drivers/ubd_kern.c
arch/um/drivers/ubd_user.c
arch/um/drivers/umcast.h
arch/um/drivers/umcast_kern.c
arch/um/drivers/umcast_user.c
arch/um/drivers/vde.h
arch/um/drivers/vde_kern.c
arch/um/drivers/vde_user.c
arch/um/drivers/vector_kern.c
arch/um/drivers/vector_kern.h
arch/um/drivers/vector_transports.c
arch/um/drivers/vector_user.c
arch/um/drivers/vector_user.h
arch/um/drivers/vhost_user.h [new file with mode: 0644]
arch/um/drivers/virtio_uml.c [new file with mode: 0644]
arch/um/drivers/xterm.c
arch/um/drivers/xterm.h
arch/um/drivers/xterm_kern.c
arch/um/include/asm/Kbuild
arch/um/include/asm/common.lds.S
arch/um/include/asm/irq.h
arch/um/include/asm/irqflags.h
arch/um/include/asm/kmap_types.h
arch/um/include/asm/mmu.h
arch/um/include/asm/mmu_context.h
arch/um/include/asm/page.h
arch/um/include/asm/pgalloc.h
arch/um/include/asm/pgtable-2level.h
arch/um/include/asm/pgtable-3level.h
arch/um/include/asm/pgtable.h
arch/um/include/asm/processor-generic.h
arch/um/include/asm/ptrace-generic.h
arch/um/include/asm/thread_info.h
arch/um/include/asm/tlbflush.h
arch/um/include/asm/uaccess.h
arch/um/include/shared/arch.h
arch/um/include/shared/as-layout.h
arch/um/include/shared/elf_user.h
arch/um/include/shared/frame_kern.h
arch/um/include/shared/irq_kern.h
arch/um/include/shared/irq_user.h
arch/um/include/shared/kern.h
arch/um/include/shared/kern_util.h
arch/um/include/shared/longjmp.h
arch/um/include/shared/mem.h
arch/um/include/shared/net_kern.h
arch/um/include/shared/net_user.h
arch/um/include/shared/os.h
arch/um/include/shared/ptrace_user.h
arch/um/include/shared/registers.h
arch/um/include/shared/sigio.h
arch/um/include/shared/skas/mm_id.h
arch/um/include/shared/skas/skas.h
arch/um/include/shared/skas/stub-data.h
arch/um/include/shared/timer-internal.h
arch/um/include/shared/um_malloc.h
arch/um/include/shared/user.h
arch/um/kernel/Makefile
arch/um/kernel/config.c.in
arch/um/kernel/dyn.lds.S
arch/um/kernel/exec.c
arch/um/kernel/exitcode.c
arch/um/kernel/gmon_syms.c
arch/um/kernel/gprof_syms.c
arch/um/kernel/initrd.c
arch/um/kernel/irq.c
arch/um/kernel/ksyms.c
arch/um/kernel/mem.c
arch/um/kernel/physmem.c
arch/um/kernel/process.c
arch/um/kernel/ptrace.c
arch/um/kernel/reboot.c
arch/um/kernel/sigio.c
arch/um/kernel/signal.c
arch/um/kernel/skas/Makefile
arch/um/kernel/skas/clone.c
arch/um/kernel/skas/mmu.c
arch/um/kernel/skas/process.c
arch/um/kernel/skas/syscall.c
arch/um/kernel/skas/uaccess.c
arch/um/kernel/syscall.c
arch/um/kernel/time.c
arch/um/kernel/tlb.c
arch/um/kernel/trap.c
arch/um/kernel/um_arch.c
arch/um/kernel/umid.c
arch/um/kernel/uml.lds.S
arch/um/os-Linux/Makefile
arch/um/os-Linux/drivers/Makefile
arch/um/os-Linux/drivers/etap.h
arch/um/os-Linux/drivers/ethertap_kern.c
arch/um/os-Linux/drivers/ethertap_user.c
arch/um/os-Linux/drivers/tuntap.h
arch/um/os-Linux/drivers/tuntap_kern.c
arch/um/os-Linux/drivers/tuntap_user.c
arch/um/os-Linux/file.c
arch/um/os-Linux/helper.c
arch/um/os-Linux/irq.c
arch/um/os-Linux/main.c
arch/um/os-Linux/mem.c
arch/um/os-Linux/process.c
arch/um/os-Linux/registers.c
arch/um/os-Linux/sigio.c
arch/um/os-Linux/signal.c
arch/um/os-Linux/skas/Makefile
arch/um/os-Linux/skas/mem.c
arch/um/os-Linux/skas/process.c
arch/um/os-Linux/start_up.c
arch/um/os-Linux/time.c
arch/um/os-Linux/tty.c
arch/um/os-Linux/umid.c
arch/um/os-Linux/util.c
arch/unicore32/include/asm/pgalloc.h
arch/unicore32/include/asm/pgtable.h
arch/unicore32/include/asm/tlb.h
arch/x86/Kconfig
arch/x86/boot/compressed/acpi.c
arch/x86/include/asm/acpi.h
arch/x86/include/asm/alternative.h
arch/x86/include/asm/bug.h
arch/x86/include/asm/hyperv-tlfs.h
arch/x86/include/asm/kvm_host.h
arch/x86/include/asm/pgtable_32.h
arch/x86/include/asm/pgtable_64.h
arch/x86/include/asm/svm.h
arch/x86/include/asm/vmx.h
arch/x86/include/asm/x86_init.h
arch/x86/include/uapi/asm/svm.h
arch/x86/include/uapi/asm/vmx.h
arch/x86/kernel/acpi/boot.c
arch/x86/kernel/cpu/umwait.c
arch/x86/kernel/ima_arch.c
arch/x86/kernel/ioport.c
arch/x86/kernel/kexec-bzimage64.c
arch/x86/kernel/msr.c
arch/x86/kernel/x86_init.c
arch/x86/kvm/cpuid.c
arch/x86/kvm/hyperv.c
arch/x86/kvm/lapic.c
arch/x86/kvm/lapic.h
arch/x86/kvm/mmu.c
arch/x86/kvm/mmutrace.h
arch/x86/kvm/svm.c
arch/x86/kvm/vmx/capabilities.h
arch/x86/kvm/vmx/evmcs.h
arch/x86/kvm/vmx/nested.c
arch/x86/kvm/vmx/ops.h
arch/x86/kvm/vmx/vmx.c
arch/x86/kvm/vmx/vmx.h
arch/x86/kvm/x86.c
arch/x86/kvm/x86.h
arch/x86/mm/pat_rbtree.c
arch/x86/mm/pgtable.c
arch/x86/mm/testmmiotrace.c
arch/x86/purgatory/Makefile
arch/x86/um/asm/barrier.h
arch/x86/um/vdso/um_vdso.c
arch/xtensa/include/asm/pgalloc.h
arch/xtensa/include/asm/pgtable.h
arch/xtensa/include/asm/tlbflush.h
arch/xtensa/include/uapi/asm/mman.h
block/bfq-iosched.c
block/blk-core.c
block/blk-flush.c
block/blk-integrity.c
block/blk-iocost.c
block/blk-mq-sched.c
block/blk-mq.c
block/blk-sysfs.c
block/blk.h
block/bsg-lib.c
block/elevator.c
block/t10-pi.c
certs/system_keyring.c
crypto/Kconfig
crypto/Makefile
crypto/asymmetric_keys/pkcs7_verify.c
crypto/asymmetric_keys/verify_pefile.c
crypto/essiv.c [new file with mode: 0644]
drivers/acpi/acpi_apd.c
drivers/acpi/custom_method.c
drivers/acpi/nfit/intel.c
drivers/acpi/osl.c
drivers/acpi/pci_root.c
drivers/acpi/tables.c
drivers/amba/bus.c
drivers/ata/libahci_platform.c
drivers/base/memory.c
drivers/base/node.c
drivers/block/drbd/drbd_interval.c
drivers/block/nbd.c
drivers/block/pktcdvd.c
drivers/block/rbd.c
drivers/bus/ti-sysc.c
drivers/char/hw_random/core.c
drivers/char/mem.c
drivers/char/tpm/tpm-interface.c
drivers/char/xillybus/xillybus_pcie.c
drivers/clk/Kconfig
drivers/clk/Makefile
drivers/clk/actions/owl-common.c
drivers/clk/actions/owl-factor.c
drivers/clk/at91/clk-main.c
drivers/clk/at91/sama5d2.c
drivers/clk/bcm/clk-bcm2835.c
drivers/clk/bcm/clk-bcm63xx-gate.c
drivers/clk/clk-aspeed.c
drivers/clk/clk-aspeed.h [new file with mode: 0644]
drivers/clk/clk-ast2600.c [new file with mode: 0644]
drivers/clk/clk-bulk.c
drivers/clk/clk-cdce925.c
drivers/clk/clk-composite.c
drivers/clk/clk-lochnagar.c
drivers/clk/clk-milbeaut.c
drivers/clk/clk-qoriq.c
drivers/clk/clk-si5341.c
drivers/clk/clk.c
drivers/clk/davinci/pll.c
drivers/clk/imx/clk-imx7ulp.c
drivers/clk/imx/clk-imx8mm.c
drivers/clk/imx/clk-imx8mn.c
drivers/clk/imx/clk-imx8mq.c
drivers/clk/imx/clk-pll14xx.c
drivers/clk/imx/clk.h
drivers/clk/ingenic/Kconfig
drivers/clk/ingenic/Makefile
drivers/clk/ingenic/jz4725b-cgu.c
drivers/clk/ingenic/jz4740-cgu.c
drivers/clk/ingenic/jz4770-cgu.c
drivers/clk/ingenic/jz4780-cgu.c
drivers/clk/ingenic/tcu.c [new file with mode: 0644]
drivers/clk/mediatek/Kconfig
drivers/clk/mediatek/Makefile
drivers/clk/mediatek/clk-gate.c
drivers/clk/mediatek/clk-gate.h
drivers/clk/mediatek/clk-mt6779-aud.c [new file with mode: 0644]
drivers/clk/mediatek/clk-mt6779-cam.c [new file with mode: 0644]
drivers/clk/mediatek/clk-mt6779-img.c [new file with mode: 0644]
drivers/clk/mediatek/clk-mt6779-ipe.c [new file with mode: 0644]
drivers/clk/mediatek/clk-mt6779-mfg.c [new file with mode: 0644]
drivers/clk/mediatek/clk-mt6779-mm.c [new file with mode: 0644]
drivers/clk/mediatek/clk-mt6779-vdec.c [new file with mode: 0644]
drivers/clk/mediatek/clk-mt6779-venc.c [new file with mode: 0644]
drivers/clk/mediatek/clk-mt6779.c [new file with mode: 0644]
drivers/clk/mediatek/clk-mt8183-mfgcfg.c
drivers/clk/mediatek/clk-mt8183.c
drivers/clk/mediatek/clk-mtk.c
drivers/clk/mediatek/clk-mtk.h
drivers/clk/mediatek/reset.c
drivers/clk/meson/axg-audio.c
drivers/clk/meson/axg-audio.h
drivers/clk/meson/g12a.c
drivers/clk/meson/g12a.h
drivers/clk/mvebu/Kconfig
drivers/clk/mvebu/Makefile
drivers/clk/mvebu/ap-cpu-clk.c [new file with mode: 0644]
drivers/clk/mvebu/ap806-system-controller.c
drivers/clk/mvebu/armada_ap_cp_helper.c [new file with mode: 0644]
drivers/clk/mvebu/armada_ap_cp_helper.h [new file with mode: 0644]
drivers/clk/mvebu/cp110-system-controller.c
drivers/clk/qcom/Kconfig
drivers/clk/qcom/Makefile
drivers/clk/qcom/clk-alpha-pll.c
drivers/clk/qcom/clk-alpha-pll.h
drivers/clk/qcom/clk-rcg2.c
drivers/clk/qcom/clk-rpmh.c
drivers/clk/qcom/common.c
drivers/clk/qcom/common.h
drivers/clk/qcom/gcc-ipq8074.c
drivers/clk/qcom/gcc-msm8998.c
drivers/clk/qcom/gcc-qcs404.c
drivers/clk/qcom/gcc-sdm660.c
drivers/clk/qcom/gcc-sdm845.c
drivers/clk/qcom/gcc-sm8150.c [new file with mode: 0644]
drivers/clk/qcom/lpasscc-sdm845.c
drivers/clk/qcom/turingcc-qcs404.c
drivers/clk/renesas/clk-mstp.c
drivers/clk/renesas/r9a06g032-clocks.c
drivers/clk/renesas/rcar-usb2-clock-sel.c
drivers/clk/renesas/renesas-cpg-mssr.c
drivers/clk/rockchip/Makefile
drivers/clk/rockchip/clk-rk3308.c [new file with mode: 0644]
drivers/clk/rockchip/clk-rv1108.c
drivers/clk/rockchip/clk.h
drivers/clk/sirf/clk-common.c
drivers/clk/socfpga/clk-gate.c
drivers/clk/socfpga/clk-periph-a10.c
drivers/clk/spear/spear1340_clock.c
drivers/clk/sprd/common.c
drivers/clk/sprd/pll.c
drivers/clk/st/clk-flexgen.c
drivers/clk/st/clkgen-fsyn.c
drivers/clk/st/clkgen-pll.c
drivers/clk/sunxi-ng/ccu-sun50i-h6.c
drivers/clk/sunxi-ng/ccu-sun8i-v3s.c
drivers/clk/sunxi-ng/ccu-sun8i-v3s.h
drivers/clk/sunxi-ng/ccu_common.c
drivers/clk/ti/apll.c
drivers/clk/ti/clk-54xx.c
drivers/clk/ti/clk-814x.c
drivers/clk/ti/dpll.c
drivers/clk/versatile/clk-versatile.c
drivers/clk/zte/clk-zx296718.c
drivers/clocksource/Kconfig
drivers/clocksource/Makefile
drivers/clocksource/ingenic-timer.c [new file with mode: 0644]
drivers/crypto/chelsio/chtls/chtls_io.c
drivers/crypto/hisilicon/sec/sec_algs.c
drivers/crypto/hisilicon/zip/zip_crypto.c
drivers/crypto/hisilicon/zip/zip_main.c
drivers/crypto/inside-secure/safexcel.c
drivers/crypto/talitos.c
drivers/dma/Kconfig
drivers/dma/Makefile
drivers/dma/dma-jz4740.c [deleted file]
drivers/edac/Kconfig
drivers/edac/Makefile
drivers/edac/armada_xp_edac.c [new file with mode: 0644]
drivers/edac/debugfs.c
drivers/edac/edac_module.h
drivers/firmware/broadcom/Kconfig
drivers/firmware/broadcom/bcm47xx_nvram.c
drivers/firmware/efi/efi.c
drivers/gpio/gpio-mvebu.c
drivers/gpu/drm/amd/amdgpu/Kconfig
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.c
drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
drivers/gpu/drm/amd/amdgpu/smu_v11_0_i2c.c
drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h
drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx10.asm
drivers/gpu/drm/amd/amdkfd/kfd_priv.h
drivers/gpu/drm/amd/amdkfd/kfd_process.c
drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
drivers/gpu/drm/amd/display/dc/calcs/dcn_calcs.c
drivers/gpu/drm/amd/display/dc/clk_mgr/dce110/dce110_clk_mgr.c
drivers/gpu/drm/amd/display/dc/dce/dce_mem_input.c
drivers/gpu/drm/amd/display/dc/dce100/dce100_resource.c
drivers/gpu/drm/amd/display/dc/dce110/dce110_resource.c
drivers/gpu/drm/amd/display/dc/dce112/dce112_resource.c
drivers/gpu/drm/amd/display/dc/dce120/dce120_resource.c
drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c
drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c
drivers/gpu/drm/amd/display/dc/gpio/dcn21/hw_factory_dcn21.c
drivers/gpu/drm/amd/display/dc/gpio/dcn21/hw_translate_dcn21.c
drivers/gpu/drm/amd/display/dc/inc/resource.h
drivers/gpu/drm/amd/display/include/dal_asic_id.h
drivers/gpu/drm/amd/include/renoir_ip_offset.h
drivers/gpu/drm/amd/powerplay/amd_powerplay.c
drivers/gpu/drm/amd/powerplay/amdgpu_smu.c
drivers/gpu/drm/amd/powerplay/renoir_ppt.c
drivers/gpu/drm/amd/powerplay/renoir_ppt.h
drivers/gpu/drm/bridge/adv7511/adv7511_drv.c
drivers/gpu/drm/drm_atomic_helper.c
drivers/gpu/drm/drm_atomic_state_helper.c
drivers/gpu/drm/drm_atomic_uapi.c
drivers/gpu/drm/drm_drv.c
drivers/gpu/drm/drm_ioctl.c
drivers/gpu/drm/drm_mode_object.c
drivers/gpu/drm/drm_self_refresh_helper.c
drivers/gpu/drm/nouveau/Kconfig
drivers/gpu/drm/nouveau/dispnv50/wndw.c
drivers/gpu/drm/nouveau/nouveau_dmem.c
drivers/gpu/drm/nouveau/nouveau_dmem.h
drivers/gpu/drm/nouveau/nouveau_drm.c
drivers/gpu/drm/nouveau/nouveau_svm.c
drivers/gpu/drm/panfrost/panfrost_devfreq.c
drivers/gpu/drm/panfrost/panfrost_device.c
drivers/gpu/drm/panfrost/panfrost_mmu.c
drivers/gpu/drm/radeon/radeon.h
drivers/gpu/drm/radeon/radeon_device.c
drivers/gpu/drm/radeon/radeon_drv.c
drivers/gpu/drm/radeon/radeon_gem.c
drivers/gpu/drm/radeon/radeon_kms.c
drivers/gpu/drm/radeon/radeon_mn.c
drivers/gpu/drm/rockchip/rockchip_drm_vop.c
drivers/gpu/drm/via/via_dmablit.c
drivers/hid/Kconfig
drivers/hid/Makefile
drivers/hid/hid-apple.c
drivers/hid/hid-core.c
drivers/hid/hid-cougar.c
drivers/hid/hid-creative-sb0540.c [new file with mode: 0644]
drivers/hid/hid-gfrm.c
drivers/hid/hid-hyperv.c
drivers/hid/hid-ids.h
drivers/hid/hid-lenovo.c
drivers/hid/hid-lg.c
drivers/hid/hid-lg4ff.c
drivers/hid/hid-logitech-dj.c
drivers/hid/hid-multitouch.c
drivers/hid/hid-picolcd_core.c
drivers/hid/hid-prodikeys.c
drivers/hid/hid-quirks.c
drivers/hid/hid-sensor-hub.c
drivers/hid/hid-sony.c
drivers/hid/hidraw.c
drivers/hid/i2c-hid/i2c-hid-core.c
drivers/hid/intel-ish-hid/ipc/hw-ish.h
drivers/hid/intel-ish-hid/ipc/ipc.c
drivers/hid/intel-ish-hid/ipc/pci-ish.c
drivers/hid/usbhid/hiddev.c
drivers/hid/wacom_sys.c
drivers/hid/wacom_wac.c
drivers/hsi/clients/ssi_protocol.c
drivers/hsi/controllers/omap_ssi_core.c
drivers/hsi/controllers/omap_ssi_port.c
drivers/hv/channel_mgmt.c
drivers/hv/connection.c
drivers/hv/hv.c
drivers/hv/hv_balloon.c
drivers/hv/hyperv_vmbus.h
drivers/hv/vmbus_drv.c
drivers/hwmon/Kconfig
drivers/hwmon/Makefile
drivers/hwmon/jz4740-hwmon.c [deleted file]
drivers/i2c/busses/Kconfig
drivers/i2c/busses/Makefile
drivers/i2c/busses/i2c-axxia.c
drivers/i2c/busses/i2c-bcm-iproc.c
drivers/i2c/busses/i2c-bcm2835.c
drivers/i2c/busses/i2c-cht-wc.c
drivers/i2c/busses/i2c-designware-master.c
drivers/i2c/busses/i2c-designware-pcidrv.c
drivers/i2c/busses/i2c-designware-platdrv.c
drivers/i2c/busses/i2c-exynos5.c
drivers/i2c/busses/i2c-fsi.c
drivers/i2c/busses/i2c-hix5hd2.c
drivers/i2c/busses/i2c-i801.c
drivers/i2c/busses/i2c-icy.c [new file with mode: 0644]
drivers/i2c/busses/i2c-imx-lpi2c.c
drivers/i2c/busses/i2c-imx.c
drivers/i2c/busses/i2c-ismt.c
drivers/i2c/busses/i2c-mxs.c
drivers/i2c/busses/i2c-ocores.c
drivers/i2c/busses/i2c-piix4.c
drivers/i2c/busses/i2c-sprd.c
drivers/i2c/busses/i2c-stm32f7.c
drivers/i2c/busses/i2c-synquacer.c
drivers/i2c/busses/i2c-taos-evm.c
drivers/i2c/busses/i2c-tegra.c
drivers/i2c/busses/i2c-uniphier-f.c
drivers/i2c/busses/i2c-uniphier.c
drivers/i2c/i2c-core-base.c
drivers/i2c/i2c-slave-eeprom.c
drivers/infiniband/Kconfig
drivers/infiniband/core/addr.c
drivers/infiniband/core/cache.c
drivers/infiniband/core/cma.c
drivers/infiniband/core/cma_configfs.c
drivers/infiniband/core/core_priv.h
drivers/infiniband/core/counters.c
drivers/infiniband/core/cq.c
drivers/infiniband/core/device.c
drivers/infiniband/core/fmr_pool.c
drivers/infiniband/core/iwpm_msg.c
drivers/infiniband/core/iwpm_util.c
drivers/infiniband/core/netlink.c
drivers/infiniband/core/nldev.c
drivers/infiniband/core/rw.c
drivers/infiniband/core/sa_query.c
drivers/infiniband/core/sysfs.c
drivers/infiniband/core/umem.c
drivers/infiniband/core/umem_odp.c
drivers/infiniband/core/user_mad.c
drivers/infiniband/core/uverbs_cmd.c
drivers/infiniband/core/uverbs_main.c
drivers/infiniband/core/verbs.c
drivers/infiniband/hw/bnxt_re/hw_counters.c
drivers/infiniband/hw/bnxt_re/ib_verbs.c
drivers/infiniband/hw/bnxt_re/main.c
drivers/infiniband/hw/cxgb3/iwch_provider.c
drivers/infiniband/hw/cxgb4/provider.c
drivers/infiniband/hw/efa/efa.h
drivers/infiniband/hw/efa/efa_com.c
drivers/infiniband/hw/efa/efa_com_cmd.c
drivers/infiniband/hw/efa/efa_com_cmd.h
drivers/infiniband/hw/efa/efa_main.c
drivers/infiniband/hw/efa/efa_verbs.c
drivers/infiniband/hw/hfi1/chip.c
drivers/infiniband/hw/hfi1/chip.h
drivers/infiniband/hw/hfi1/mad.c
drivers/infiniband/hw/hfi1/rc.c
drivers/infiniband/hw/hfi1/tid_rdma.c
drivers/infiniband/hw/hfi1/trace_tid.h
drivers/infiniband/hw/hfi1/user_pages.c
drivers/infiniband/hw/hfi1/user_sdma.h
drivers/infiniband/hw/hfi1/verbs.c
drivers/infiniband/hw/hns/Kconfig
drivers/infiniband/hw/hns/hns_roce_ah.c
drivers/infiniband/hw/hns/hns_roce_cmd.c
drivers/infiniband/hw/hns/hns_roce_cq.c
drivers/infiniband/hw/hns/hns_roce_device.h
drivers/infiniband/hw/hns/hns_roce_hem.c
drivers/infiniband/hw/hns/hns_roce_hem.h
drivers/infiniband/hw/hns/hns_roce_hw_v1.c
drivers/infiniband/hw/hns/hns_roce_hw_v2.c
drivers/infiniband/hw/hns/hns_roce_hw_v2.h
drivers/infiniband/hw/hns/hns_roce_main.c
drivers/infiniband/hw/hns/hns_roce_mr.c
drivers/infiniband/hw/hns/hns_roce_qp.c
drivers/infiniband/hw/hns/hns_roce_srq.c
drivers/infiniband/hw/i40iw/i40iw_verbs.c
drivers/infiniband/hw/mlx4/main.c
drivers/infiniband/hw/mlx4/mr.c
drivers/infiniband/hw/mlx4/qp.c
drivers/infiniband/hw/mlx5/devx.c
drivers/infiniband/hw/mlx5/flow.c
drivers/infiniband/hw/mlx5/main.c
drivers/infiniband/hw/mlx5/mem.c
drivers/infiniband/hw/mlx5/mlx5_ib.h
drivers/infiniband/hw/mlx5/mr.c
drivers/infiniband/hw/mlx5/odp.c
drivers/infiniband/hw/mlx5/qp.c
drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
drivers/infiniband/hw/qedr/main.c
drivers/infiniband/hw/qedr/qedr.h
drivers/infiniband/hw/qedr/verbs.c
drivers/infiniband/hw/qib/qib_file_ops.c
drivers/infiniband/hw/qib/qib_rc.c
drivers/infiniband/hw/qib/qib_sysfs.c
drivers/infiniband/hw/qib/qib_user_pages.c
drivers/infiniband/hw/usnic/usnic_ib_main.c
drivers/infiniband/hw/usnic/usnic_ib_verbs.c
drivers/infiniband/hw/usnic/usnic_uiom.c
drivers/infiniband/sw/rxe/rxe.h
drivers/infiniband/sw/rxe/rxe_param.h
drivers/infiniband/sw/rxe/rxe_verbs.c
drivers/infiniband/sw/siw/siw_mem.c
drivers/infiniband/sw/siw/siw_qp_tx.c
drivers/infiniband/sw/siw/siw_verbs.c
drivers/infiniband/ulp/iser/iscsi_iser.h
drivers/infiniband/ulp/srpt/ib_srpt.c
drivers/irqchip/Kconfig
drivers/irqchip/Makefile
drivers/irqchip/irq-ingenic-tcu.c [new file with mode: 0644]
drivers/md/Kconfig
drivers/md/Makefile
drivers/md/dm-bufio.c
drivers/md/dm-clone-metadata.c [new file with mode: 0644]
drivers/md/dm-clone-metadata.h [new file with mode: 0644]
drivers/md/dm-clone-target.c [new file with mode: 0644]
drivers/md/dm-crypt.c
drivers/md/dm-integrity.c
drivers/md/dm-ioctl.c
drivers/md/dm-raid.c
drivers/md/dm-raid1.c
drivers/md/dm-stats.c
drivers/md/dm-table.c
drivers/md/dm-verity-target.c
drivers/md/dm-verity-verify-sig.c [new file with mode: 0644]
drivers/md/dm-verity-verify-sig.h [new file with mode: 0644]
drivers/md/dm-verity.h
drivers/md/dm-writecache.c
drivers/md/dm-zoned-target.c
drivers/md/dm.c
drivers/md/dm.h
drivers/md/persistent-data/dm-space-map-common.c
drivers/media/i2c/adv748x/adv748x-core.c
drivers/media/i2c/adv7604.c
drivers/media/v4l2-core/videobuf-dma-contig.c
drivers/mfd/88pm800.c
drivers/mfd/88pm860x-core.c
drivers/mfd/Kconfig
drivers/mfd/Makefile
drivers/mfd/ab3100-core.c
drivers/mfd/ab8500-debugfs.c
drivers/mfd/asic3.c
drivers/mfd/bcm590xx.c
drivers/mfd/da9150-core.c
drivers/mfd/davinci_voicecodec.c
drivers/mfd/db8500-prcmu.c
drivers/mfd/ezx-pcap.c
drivers/mfd/fsl-imx25-tsadc.c
drivers/mfd/htc-i2cpld.c
drivers/mfd/intel-lpss-acpi.c
drivers/mfd/intel-lpss-pci.c
drivers/mfd/intel-lpss.c
drivers/mfd/intel_soc_pmic_bxtwc.c
drivers/mfd/intel_soc_pmic_mrfld.c [new file with mode: 0644]
drivers/mfd/jz4740-adc.c [deleted file]
drivers/mfd/max14577.c
drivers/mfd/max77620.c
drivers/mfd/max77693.c
drivers/mfd/max77843.c
drivers/mfd/max8907.c
drivers/mfd/max8925-i2c.c
drivers/mfd/max8997.c
drivers/mfd/max8998.c
drivers/mfd/mt6397-core.c
drivers/mfd/mt6397-irq.c [new file with mode: 0644]
drivers/mfd/palmas.c
drivers/mfd/qcom_rpm.c
drivers/mfd/sm501.c
drivers/mfd/syscon.c
drivers/mfd/timberdale.c
drivers/mfd/tps80031.c
drivers/mfd/twl-core.c
drivers/misc/eeprom/at24.c
drivers/misc/sgi-gru/grufile.c
drivers/misc/sgi-gru/grutables.h
drivers/misc/sgi-gru/grutlbpurge.c
drivers/mtd/Kconfig
drivers/mtd/Makefile
drivers/mtd/chips/cfi_cmdset_0002.c
drivers/mtd/chips/gen_probe.c
drivers/mtd/devices/Kconfig
drivers/mtd/devices/Makefile
drivers/mtd/devices/m25p80.c [deleted file]
drivers/mtd/devices/phram.c
drivers/mtd/devices/pmc551.c
drivers/mtd/maps/pismo.c
drivers/mtd/maps/pxa2xx-flash.c
drivers/mtd/mtdcore.c
drivers/mtd/nand/onenand/onenand_base.c
drivers/mtd/nand/raw/Kconfig
drivers/mtd/nand/raw/Makefile
drivers/mtd/nand/raw/brcmnand/brcmnand.c
drivers/mtd/nand/raw/ingenic/Kconfig
drivers/mtd/nand/raw/ingenic/Makefile
drivers/mtd/nand/raw/ingenic/ingenic_nand_drv.c
drivers/mtd/nand/raw/ingenic/jz4740_nand.c [deleted file]
drivers/mtd/nand/raw/meson_nand.c
drivers/mtd/nand/raw/mxic_nand.c [new file with mode: 0644]
drivers/mtd/nand/raw/nand_base.c
drivers/mtd/nand/raw/nand_bbt.c
drivers/mtd/nand/raw/nuc900_nand.c [deleted file]
drivers/mtd/nand/raw/omap2.c
drivers/mtd/nand/raw/oxnas_nand.c
drivers/mtd/nand/raw/r852.c
drivers/mtd/nand/raw/stm32_fmc2_nand.c
drivers/mtd/nand/raw/tango_nand.c
drivers/mtd/nand/raw/vf610_nfc.c
drivers/mtd/parsers/Kconfig
drivers/mtd/parsers/Makefile
drivers/mtd/parsers/ar7part.c [moved from drivers/mtd/ar7part.c with 100% similarity]
drivers/mtd/parsers/bcm47xxpart.c [moved from drivers/mtd/bcm47xxpart.c with 100% similarity]
drivers/mtd/parsers/bcm63xxpart.c [moved from drivers/mtd/bcm63xxpart.c with 100% similarity]
drivers/mtd/parsers/cmdlinepart.c [moved from drivers/mtd/cmdlinepart.c with 100% similarity]
drivers/mtd/parsers/ofpart.c [moved from drivers/mtd/ofpart.c with 100% similarity]
drivers/mtd/sm_ftl.c
drivers/mtd/spi-nor/Kconfig
drivers/mtd/spi-nor/aspeed-smc.c
drivers/mtd/spi-nor/cadence-quadspi.c
drivers/mtd/spi-nor/hisi-sfc.c
drivers/mtd/spi-nor/intel-spi-pci.c
drivers/mtd/spi-nor/intel-spi.c
drivers/mtd/spi-nor/spi-nor.c
drivers/mtd/ubi/block.c
drivers/mtd/ubi/fastmap-wl.c
drivers/mtd/ubi/wl.c
drivers/net/ethernet/intel/e1000e/e1000.h
drivers/net/ethernet/jme.c
drivers/net/ethernet/mellanox/mlx5/core/rl.c
drivers/net/ethernet/qlogic/qed/qed_rdma.c
drivers/net/ethernet/realtek/r8169_main.c
drivers/net/wimax/i2400m/tx.c
drivers/net/wireless/ath/ath5k/pci.c
drivers/net/wireless/intel/iwlegacy/3945-mac.c
drivers/net/wireless/intel/iwlegacy/4965-mac.c
drivers/net/wireless/intel/iwlwifi/pcie/trans.c
drivers/ntb/hw/amd/ntb_hw_amd.c
drivers/ntb/hw/amd/ntb_hw_amd.h
drivers/ntb/hw/idt/Kconfig
drivers/ntb/hw/mscc/ntb_hw_switchtec.c
drivers/ntb/ntb_transport.c
drivers/ntb/test/ntb_perf.c
drivers/nvdimm/Kconfig
drivers/nvdimm/Makefile
drivers/nvdimm/bus.c
drivers/nvdimm/dimm_devs.c
drivers/nvdimm/label.c
drivers/nvdimm/namespace_devs.c
drivers/nvdimm/nd-core.h
drivers/nvdimm/nd.h
drivers/nvdimm/of_pmem.c
drivers/nvdimm/pfn.h
drivers/nvdimm/pfn_devs.c
drivers/nvdimm/pmem.c
drivers/nvdimm/region_devs.c
drivers/nvdimm/security.c
drivers/nvme/host/core.c
drivers/nvme/host/pci.c
drivers/pci/Kconfig
drivers/pci/access.c
drivers/pci/bus.c
drivers/pci/controller/dwc/Kconfig
drivers/pci/controller/dwc/Makefile
drivers/pci/controller/dwc/pci-exynos.c
drivers/pci/controller/dwc/pci-imx6.c
drivers/pci/controller/dwc/pci-layerscape-ep.c
drivers/pci/controller/dwc/pcie-al.c
drivers/pci/controller/dwc/pcie-armada8k.c
drivers/pci/controller/dwc/pcie-designware-ep.c
drivers/pci/controller/dwc/pcie-designware-host.c
drivers/pci/controller/dwc/pcie-designware.c
drivers/pci/controller/dwc/pcie-designware.h
drivers/pci/controller/dwc/pcie-histb.c
drivers/pci/controller/dwc/pcie-kirin.c
drivers/pci/controller/dwc/pcie-tegra194.c [new file with mode: 0644]
drivers/pci/controller/pci-host-common.c
drivers/pci/controller/pci-hyperv.c
drivers/pci/controller/pci-tegra.c
drivers/pci/controller/pcie-iproc-platform.c
drivers/pci/controller/pcie-mediatek.c
drivers/pci/controller/pcie-mobiveil.c
drivers/pci/controller/pcie-rockchip-host.c
drivers/pci/controller/vmd.c
drivers/pci/hotplug/cpci_hotplug_core.c
drivers/pci/hotplug/cpqphp_core.c
drivers/pci/hotplug/cpqphp_ctrl.c
drivers/pci/hotplug/cpqphp_nvram.h
drivers/pci/hotplug/ibmphp_res.c
drivers/pci/hotplug/pciehp.h
drivers/pci/hotplug/pciehp_core.c
drivers/pci/hotplug/pciehp_ctrl.c
drivers/pci/hotplug/pciehp_hpc.c
drivers/pci/hotplug/rpadlpar_core.c
drivers/pci/hotplug/rpaphp_core.c
drivers/pci/iov.c
drivers/pci/of.c
drivers/pci/p2pdma.c
drivers/pci/pci-acpi.c
drivers/pci/pci-bridge-emul.c
drivers/pci/pci-sysfs.c
drivers/pci/pci.c
drivers/pci/pci.h
drivers/pci/pcie/aspm.c
drivers/pci/pcie/err.c
drivers/pci/probe.c
drivers/pci/proc.c
drivers/pci/quirks.c
drivers/pci/search.c
drivers/pci/setup-bus.c
drivers/pci/syscall.c
drivers/pci/vc.c
drivers/pci/vpd.c
drivers/pcmcia/cistpl.c
drivers/phy/motorola/phy-cpcap-usb.c
drivers/phy/tegra/Kconfig
drivers/phy/tegra/Makefile
drivers/phy/tegra/phy-tegra194-p2u.c [new file with mode: 0644]
drivers/phy/ti/phy-am654-serdes.c
drivers/platform/x86/Kconfig
drivers/platform/x86/i2c-multi-instantiate.c
drivers/platform/x86/pmc_atom.c
drivers/power/reset/gpio-restart.c
drivers/power/reset/reboot-mode.c
drivers/power/supply/Kconfig
drivers/power/supply/Makefile
drivers/power/supply/ab8500_charger.c
drivers/power/supply/axp288_fuel_gauge.c
drivers/power/supply/bq25890_charger.c
drivers/power/supply/cpcap-charger.c
drivers/power/supply/isp1704_charger.c
drivers/power/supply/jz4740-battery.c [deleted file]
drivers/power/supply/max17042_battery.c
drivers/power/supply/max77650-charger.c
drivers/power/supply/power_supply_core.c
drivers/power/supply/power_supply_hwmon.c
drivers/power/supply/sbs-battery.c
drivers/power/supply/sc27xx_fuel_gauge.c
drivers/pwm/Kconfig
drivers/pwm/Makefile
drivers/pwm/core.c
drivers/pwm/pwm-atmel-hlcdc.c
drivers/pwm/pwm-atmel.c
drivers/pwm/pwm-bcm-iproc.c
drivers/pwm/pwm-bcm2835.c
drivers/pwm/pwm-cros-ec.c
drivers/pwm/pwm-fsl-ftm.c
drivers/pwm/pwm-hibvt.c
drivers/pwm/pwm-imx-tpm.c
drivers/pwm/pwm-imx27.c
drivers/pwm/pwm-jz4740.c
drivers/pwm/pwm-lpss.c
drivers/pwm/pwm-mediatek.c
drivers/pwm/pwm-meson.c
drivers/pwm/pwm-mxs.c
drivers/pwm/pwm-rcar.c
drivers/pwm/pwm-rockchip.c
drivers/pwm/pwm-sifive.c
drivers/pwm/pwm-sprd.c [new file with mode: 0644]
drivers/pwm/pwm-sti.c
drivers/pwm/pwm-stm32-lp.c
drivers/pwm/pwm-stm32.c
drivers/pwm/pwm-sun4i.c
drivers/pwm/pwm-zx.c
drivers/remoteproc/da8xx_remoteproc.c
drivers/remoteproc/keystone_remoteproc.c
drivers/remoteproc/qcom_common.c
drivers/remoteproc/qcom_q6v5.c
drivers/remoteproc/qcom_q6v5_mss.c
drivers/remoteproc/remoteproc_sysfs.c
drivers/remoteproc/stm32_rproc.c
drivers/rpmsg/qcom_glink_native.c
drivers/rpmsg/qcom_glink_smem.c
drivers/rpmsg/rpmsg_core.c
drivers/rpmsg/rpmsg_internal.h
drivers/rpmsg/virtio_rpmsg_bus.c
drivers/rtc/Kconfig
drivers/rtc/Makefile
drivers/rtc/class.c
drivers/rtc/rtc-88pm80x.c
drivers/rtc/rtc-88pm860x.c
drivers/rtc/rtc-ab-eoz9.c
drivers/rtc/rtc-ac100.c
drivers/rtc/rtc-armada38x.c
drivers/rtc/rtc-asm9260.c
drivers/rtc/rtc-aspeed.c
drivers/rtc/rtc-at91rm9200.c
drivers/rtc/rtc-at91sam9.c
drivers/rtc/rtc-bd70528.c
drivers/rtc/rtc-brcmstb-waketimer.c
drivers/rtc/rtc-cadence.c
drivers/rtc/rtc-davinci.c
drivers/rtc/rtc-ds1305.c
drivers/rtc/rtc-ds1672.c
drivers/rtc/rtc-fsl-ftm-alarm.c [new file with mode: 0644]
drivers/rtc/rtc-imx-sc.c
drivers/rtc/rtc-imxdi.c
drivers/rtc/rtc-isl12026.c
drivers/rtc/rtc-jz4740.c
drivers/rtc/rtc-max77686.c
drivers/rtc/rtc-meson-vrtc.c [new file with mode: 0644]
drivers/rtc/rtc-mt6397.c
drivers/rtc/rtc-mt7622.c
drivers/rtc/rtc-mxc.c
drivers/rtc/rtc-mxc_v2.c
drivers/rtc/rtc-nuc900.c [deleted file]
drivers/rtc/rtc-pcf2123.c
drivers/rtc/rtc-pcf2127.c
drivers/rtc/rtc-pcf85363.c
drivers/rtc/rtc-pcf8563.c
drivers/rtc/rtc-pic32.c
drivers/rtc/rtc-pm8xxx.c
drivers/rtc/rtc-puv3.c
drivers/rtc/rtc-pxa.c
drivers/rtc/rtc-rk808.c
drivers/rtc/rtc-rv3028.c
drivers/rtc/rtc-rv3029c2.c
drivers/rtc/rtc-rv8803.c
drivers/rtc/rtc-s35390a.c
drivers/rtc/rtc-s3c.c
drivers/rtc/rtc-s5m.c
drivers/rtc/rtc-sc27xx.c
drivers/rtc/rtc-sd3078.c
drivers/rtc/rtc-snvs.c
drivers/rtc/rtc-spear.c
drivers/rtc/rtc-stm32.c
drivers/rtc/rtc-sun6i.c
drivers/rtc/rtc-sunxi.c
drivers/rtc/rtc-tegra.c
drivers/rtc/rtc-tps6586x.c
drivers/rtc/rtc-tps65910.c
drivers/rtc/rtc-vt8500.c
drivers/rtc/rtc-xgene.c
drivers/rtc/rtc-zynqmp.c
drivers/s390/cio/ccwgroup.c
drivers/s390/cio/css.c
drivers/s390/cio/device_ops.c
drivers/s390/crypto/ap_bus.c
drivers/s390/crypto/ap_bus.h
drivers/s390/crypto/pkey_api.c
drivers/s390/crypto/vfio_ap_drv.c
drivers/s390/crypto/zcrypt_api.h
drivers/s390/crypto/zcrypt_cex4.c
drivers/scsi/aacraid/linit.c
drivers/scsi/aic94xx/aic94xx_init.c
drivers/scsi/bfa/bfad_im.c
drivers/scsi/bnx2fc/bnx2fc_fcoe.c
drivers/scsi/bnx2fc/bnx2fc_hwi.c
drivers/scsi/bnx2fc/bnx2fc_io.c
drivers/scsi/csiostor/csio_wr.c
drivers/scsi/cxlflash/main.c
drivers/scsi/device_handler/scsi_dh_rdac.c
drivers/scsi/esas2r/esas2r_init.c
drivers/scsi/esas2r/esas2r_ioctl.c
drivers/scsi/fcoe/fcoe.c
drivers/scsi/fdomain.c
drivers/scsi/fdomain_isa.c
drivers/scsi/fnic/fnic_debugfs.c
drivers/scsi/fnic/fnic_fcs.c
drivers/scsi/fnic/fnic_isr.c
drivers/scsi/fnic/fnic_trace.c
drivers/scsi/hisi_sas/hisi_sas.h
drivers/scsi/hisi_sas/hisi_sas_main.c
drivers/scsi/hisi_sas/hisi_sas_v1_hw.c
drivers/scsi/hisi_sas/hisi_sas_v2_hw.c
drivers/scsi/hisi_sas/hisi_sas_v3_hw.c
drivers/scsi/hpsa.c
drivers/scsi/ibmvscsi/ibmvfc.c
drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c
drivers/scsi/lpfc/lpfc.h
drivers/scsi/lpfc/lpfc_attr.c
drivers/scsi/lpfc/lpfc_bsg.c
drivers/scsi/lpfc/lpfc_crtn.h
drivers/scsi/lpfc/lpfc_ct.c
drivers/scsi/lpfc/lpfc_debugfs.c
drivers/scsi/lpfc/lpfc_debugfs.h
drivers/scsi/lpfc/lpfc_disc.h
drivers/scsi/lpfc/lpfc_els.c
drivers/scsi/lpfc/lpfc_hbadisc.c
drivers/scsi/lpfc/lpfc_hw.h
drivers/scsi/lpfc/lpfc_hw4.h
drivers/scsi/lpfc/lpfc_init.c
drivers/scsi/lpfc/lpfc_mem.c
drivers/scsi/lpfc/lpfc_nportdisc.c
drivers/scsi/lpfc/lpfc_nvme.c
drivers/scsi/lpfc/lpfc_nvmet.c
drivers/scsi/lpfc/lpfc_scsi.c
drivers/scsi/lpfc/lpfc_sli.c
drivers/scsi/lpfc/lpfc_sli.h
drivers/scsi/lpfc/lpfc_sli4.h
drivers/scsi/lpfc/lpfc_version.h
drivers/scsi/lpfc/lpfc_vport.c
drivers/scsi/megaraid/megaraid_sas.h
drivers/scsi/megaraid/megaraid_sas_base.c
drivers/scsi/megaraid/megaraid_sas_fusion.c
drivers/scsi/mpt3sas/mpi/mpi2.h
drivers/scsi/mpt3sas/mpi/mpi2_cnfg.h
drivers/scsi/mpt3sas/mpi/mpi2_image.h
drivers/scsi/mpt3sas/mpi/mpi2_pci.h
drivers/scsi/mpt3sas/mpi/mpi2_tool.h
drivers/scsi/mpt3sas/mpt3sas_base.c
drivers/scsi/mpt3sas/mpt3sas_base.h
drivers/scsi/mpt3sas/mpt3sas_ctl.c
drivers/scsi/mpt3sas/mpt3sas_scsih.c
drivers/scsi/ncr53c8xx.c
drivers/scsi/pm8001/pm8001_sas.c
drivers/scsi/pmcraid.c
drivers/scsi/qedf/qedf.h
drivers/scsi/qedf/qedf_debugfs.c
drivers/scsi/qedf/qedf_els.c
drivers/scsi/qedf/qedf_fip.c
drivers/scsi/qedf/qedf_io.c
drivers/scsi/qedf/qedf_main.c
drivers/scsi/qedf/qedf_version.h
drivers/scsi/qla2xxx/qla_attr.c
drivers/scsi/qla2xxx/qla_bsg.c
drivers/scsi/qla2xxx/qla_dbg.c
drivers/scsi/qla2xxx/qla_def.h
drivers/scsi/qla2xxx/qla_dfs.c
drivers/scsi/qla2xxx/qla_dsd.h
drivers/scsi/qla2xxx/qla_fw.h
drivers/scsi/qla2xxx/qla_gbl.h
drivers/scsi/qla2xxx/qla_gs.c
drivers/scsi/qla2xxx/qla_init.c
drivers/scsi/qla2xxx/qla_inline.h
drivers/scsi/qla2xxx/qla_iocb.c
drivers/scsi/qla2xxx/qla_isr.c
drivers/scsi/qla2xxx/qla_mbx.c
drivers/scsi/qla2xxx/qla_mid.c
drivers/scsi/qla2xxx/qla_mr.c
drivers/scsi/qla2xxx/qla_nvme.c
drivers/scsi/qla2xxx/qla_nvme.h
drivers/scsi/qla2xxx/qla_nx.c
drivers/scsi/qla2xxx/qla_nx.h
drivers/scsi/qla2xxx/qla_nx2.c
drivers/scsi/qla2xxx/qla_os.c
drivers/scsi/qla2xxx/qla_sup.c
drivers/scsi/qla2xxx/qla_target.c
drivers/scsi/qla2xxx/qla_target.h
drivers/scsi/qla2xxx/qla_tmpl.c
drivers/scsi/qla2xxx/qla_version.h
drivers/scsi/qla2xxx/tcm_qla2xxx.c
drivers/scsi/qlogicpti.c
drivers/scsi/scsi_debugfs.c
drivers/scsi/scsi_lib.c
drivers/scsi/scsi_logging.c
drivers/scsi/sd.c
drivers/scsi/smartpqi/Kconfig
drivers/scsi/smartpqi/smartpqi.h
drivers/scsi/smartpqi/smartpqi_init.c
drivers/scsi/smartpqi/smartpqi_sas_transport.c
drivers/scsi/sun3_scsi.c
drivers/scsi/sym53c8xx_2/sym_nvram.c
drivers/scsi/ufs/cdns-pltfrm.c
drivers/scsi/ufs/ufs-hisi.c
drivers/scsi/ufs/ufs-qcom.c
drivers/scsi/ufs/ufs-qcom.h
drivers/scsi/ufs/ufs-sysfs.c
drivers/scsi/ufs/ufs.h
drivers/scsi/ufs/ufshcd-pltfrm.c
drivers/scsi/ufs/ufshcd.c
drivers/scsi/ufs/ufshcd.h
drivers/scsi/virtio_scsi.c
drivers/scsi/wd33c93.c
drivers/soundwire/Makefile
drivers/soundwire/bus.c
drivers/soundwire/bus.h
drivers/soundwire/bus_type.c
drivers/soundwire/cadence_master.c
drivers/soundwire/cadence_master.h
drivers/soundwire/debugfs.c [new file with mode: 0644]
drivers/soundwire/intel.c
drivers/soundwire/intel_init.c
drivers/soundwire/mipi_disco.c
drivers/soundwire/slave.c
drivers/soundwire/stream.c
drivers/staging/android/ion/ion_system_heap.c
drivers/target/target_core_user.c
drivers/target/tcm_fc/tfc_io.c
drivers/tee/tee_shm.c
drivers/thermal/armada_thermal.c
drivers/thermal/intel/int340x_thermal/acpi_thermal_rel.c
drivers/thermal/intel/int340x_thermal/int3403_thermal.c
drivers/thermal/intel/int340x_thermal/processor_thermal_device.c
drivers/thermal/intel/intel_pch_thermal.c
drivers/thermal/qcom/tsens-8960.c
drivers/thermal/qcom/tsens-v0_1.c
drivers/thermal/qcom/tsens-v1.c
drivers/thermal/qcom/tsens.h
drivers/thermal/qoriq_thermal.c
drivers/thermal/rcar_gen3_thermal.c
drivers/thermal/tegra/soctherm.c
drivers/thermal/thermal_core.c
drivers/thermal/thermal_hwmon.c
drivers/tty/serial/serial_core.c
drivers/usb/gadget/function/f_fs.c
drivers/usb/storage/Makefile
drivers/usb/storage/alauda.c
drivers/usb/storage/cypress_atacb.c
drivers/usb/storage/datafab.c
drivers/usb/storage/ene_ub6250.c
drivers/usb/storage/freecom.c
drivers/usb/storage/isd200.c
drivers/usb/storage/jumpshot.c
drivers/usb/storage/karma.c
drivers/usb/storage/onetouch.c
drivers/usb/storage/realtek_cr.c
drivers/usb/storage/sddr09.c
drivers/usb/storage/sddr55.c
drivers/usb/storage/shuttle_usbat.c
drivers/usb/storage/uas.c
drivers/vfio/pci/vfio_pci.c
drivers/vfio/vfio_iommu_spapr_tce.c
drivers/vfio/vfio_iommu_type1.c
drivers/video/backlight/Kconfig
drivers/video/backlight/backlight.c
drivers/video/backlight/gpio_backlight.c
drivers/video/backlight/lm3630a_bl.c
drivers/video/backlight/lms283gf05.c
drivers/video/backlight/pwm_bl.c
drivers/video/backlight/rave-sp-backlight.c
drivers/video/backlight/tosa_lcd.c
drivers/video/fbdev/Kconfig
drivers/video/fbdev/Makefile
drivers/video/fbdev/jz4740_fb.c [deleted file]
drivers/watchdog/Kconfig
drivers/watchdog/Makefile
drivers/watchdog/aspeed_wdt.c
drivers/watchdog/ath79_wdt.c
drivers/watchdog/cpwd.c
drivers/watchdog/diag288_wdt.c
drivers/watchdog/f71808e_wdt.c
drivers/watchdog/iTCO_wdt.c
drivers/watchdog/imx2_wdt.c
drivers/watchdog/imx7ulp_wdt.c [new file with mode: 0644]
drivers/watchdog/imx_sc_wdt.c
drivers/watchdog/jz4740_wdt.c
drivers/watchdog/ks8695_wdt.c [deleted file]
drivers/watchdog/nuc900_wdt.c [deleted file]
drivers/watchdog/orion_wdt.c
drivers/watchdog/qcom-wdt.c
drivers/watchdog/sprd_wdt.c
drivers/watchdog/ziirave_wdt.c
drivers/xen/events/events_base.c
drivers/xen/pci.c
drivers/xen/swiotlb-xen.c
fs/9p/cache.c
fs/9p/vfs_file.c
fs/9p/vfs_super.c
fs/binfmt_elf.c
fs/ceph/Makefile
fs/ceph/addr.c
fs/ceph/cache.c
fs/ceph/caps.c
fs/ceph/debugfs.c
fs/ceph/export.c
fs/ceph/file.c
fs/ceph/inode.c
fs/ceph/io.c [new file with mode: 0644]
fs/ceph/io.h [new file with mode: 0644]
fs/ceph/locks.c
fs/ceph/mds_client.c
fs/ceph/mds_client.h
fs/ceph/super.c
fs/ceph/super.h
fs/ceph/xattr.c
fs/cifs/smbdirect.c
fs/debugfs/file.c
fs/debugfs/inode.c
fs/exec.c
fs/ext2/balloc.c
fs/ext2/super.c
fs/ext2/xattr.c
fs/ext4/block_validity.c
fs/ext4/dir.c
fs/ext4/ext4.h
fs/ext4/extents.c
fs/ext4/extents_status.c
fs/ext4/extents_status.h
fs/ext4/file.c
fs/ext4/hash.c
fs/ext4/inline.c
fs/ext4/inode.c
fs/ext4/ioctl.c
fs/ext4/namei.c
fs/ext4/super.c
fs/f2fs/Kconfig
fs/f2fs/data.c
fs/f2fs/debug.c
fs/f2fs/dir.c
fs/f2fs/f2fs.h
fs/f2fs/file.c
fs/f2fs/gc.c
fs/f2fs/hash.c
fs/f2fs/inline.c
fs/f2fs/inode.c
fs/f2fs/namei.c
fs/f2fs/node.c
fs/f2fs/segment.c
fs/f2fs/segment.h
fs/f2fs/super.c
fs/f2fs/sysfs.c
fs/f2fs/xattr.c
fs/fat/dir.c
fs/fat/fatent.c
fs/file_table.c
fs/fs_context.c
fs/fuse/Kconfig
fs/fuse/Makefile
fs/fuse/cuse.c
fs/fuse/dev.c
fs/fuse/dir.c
fs/fuse/file.c
fs/fuse/fuse_i.h
fs/fuse/inode.c
fs/fuse/readdir.c
fs/fuse/virtio_fs.c [new file with mode: 0644]
fs/fuse/xattr.c
fs/gfs2/aops.c
fs/gfs2/bmap.c
fs/gfs2/dentry.c
fs/gfs2/dir.c
fs/gfs2/dir.h
fs/gfs2/file.c
fs/gfs2/glock.c
fs/gfs2/glock.h
fs/gfs2/incore.h
fs/gfs2/inode.c
fs/gfs2/lock_dlm.c
fs/gfs2/ops_fstype.c
fs/gfs2/quota.c
fs/gfs2/rgrp.c
fs/gfs2/super.c
fs/gfs2/super.h
fs/gfs2/util.c
fs/inode.c
fs/io_uring.c
fs/iomap/direct-io.c
fs/jbd2/journal.c
fs/jbd2/revoke.c
fs/jbd2/transaction.c
fs/jffs2/fs.c
fs/jffs2/gc.c
fs/jffs2/nodelist.c
fs/jffs2/os-linux.h
fs/jffs2/scan.c
fs/jffs2/super.c
fs/locks.c
fs/namespace.c
fs/nfs/dir.c
fs/nfs/filelayout/filelayout.c
fs/nfs/internal.h
fs/nfs/nfs3proc.c
fs/nfs/nfs4_fs.h
fs/nfs/nfs4proc.c
fs/nfs/nfs4state.c
fs/nfs/nfs4xdr.c
fs/nfs/pnfs.c
fs/nfs/pnfs.h
fs/nfs/super.c
fs/nfsd/Kconfig
fs/nfsd/Makefile
fs/nfsd/acl.h
fs/nfsd/blocklayout.c
fs/nfsd/export.c
fs/nfsd/filecache.c [new file with mode: 0644]
fs/nfsd/filecache.h [new file with mode: 0644]
fs/nfsd/netns.h
fs/nfsd/nfs3proc.c
fs/nfsd/nfs3xdr.c
fs/nfsd/nfs4callback.c
fs/nfsd/nfs4layouts.c
fs/nfsd/nfs4proc.c
fs/nfsd/nfs4recover.c
fs/nfsd/nfs4state.c
fs/nfsd/nfs4xdr.c
fs/nfsd/nfsctl.c
fs/nfsd/nfsproc.c
fs/nfsd/nfssvc.c
fs/nfsd/state.h
fs/nfsd/trace.h
fs/nfsd/vfs.c
fs/nfsd/vfs.h
fs/nfsd/xdr3.h
fs/nfsd/xdr4.h
fs/notify/dnotify/dnotify.c
fs/notify/fanotify/fanotify_user.c
fs/notify/fsnotify.h
fs/notify/group.c
fs/notify/inotify/inotify_user.c
fs/notify/mark.c
fs/ntfs/mft.c
fs/ntfs/namei.c
fs/ntfs/runlist.c
fs/ntfs/super.c
fs/ocfs2/alloc.c
fs/ocfs2/aops.c
fs/ocfs2/blockcheck.c
fs/ocfs2/cluster/heartbeat.c
fs/ocfs2/dir.c
fs/ocfs2/dlm/dlmcommon.h
fs/ocfs2/dlm/dlmdebug.c
fs/ocfs2/dlm/dlmdebug.h
fs/ocfs2/dlm/dlmdomain.c
fs/ocfs2/dlm/dlmunlock.c
fs/ocfs2/dlmglue.c
fs/ocfs2/extent_map.c
fs/ocfs2/file.c
fs/ocfs2/inode.c
fs/ocfs2/journal.h
fs/ocfs2/namei.c
fs/ocfs2/ocfs2.h
fs/ocfs2/super.c
fs/open.c
fs/proc/kcore.c
fs/proc/meminfo.c
fs/proc/task_mmu.c
fs/proc_namespace.c
fs/quota/dquot.c
fs/reiserfs/do_balan.c
fs/reiserfs/fix_node.c
fs/reiserfs/journal.c
fs/reiserfs/lbalance.c
fs/reiserfs/objectid.c
fs/reiserfs/prints.c
fs/reiserfs/stree.c
fs/super.c
fs/tracefs/inode.c
fs/ubifs/auth.c
fs/ubifs/debug.c
fs/ubifs/super.c
fs/ubifs/tnc_misc.c
fs/udf/balloc.c
fs/udf/ecma_167.h
fs/udf/file.c
fs/udf/ialloc.c
fs/udf/inode.c
fs/udf/super.c
fs/udf/udf_i.h
fs/udf/udfdecl.h
fs/unicode/utf8-core.c
fs/unicode/utf8-selftest.c
fs/userfaultfd.c
fs/xfs/libxfs/xfs_alloc.h
fs/xfs/libxfs/xfs_bmap.c
fs/xfs/libxfs/xfs_sb.c
fs/xfs/scrub/alloc.c
fs/xfs/xfs_buf.c
fs/xfs/xfs_file.c
fs/xfs/xfs_sysfs.c
include/Kbuild
include/asm-generic/bug.h
include/asm-generic/export.h
include/asm-generic/pgalloc.h
include/asm-generic/pgtable.h
include/asm-generic/vmlinux.lds.h
include/crypto/pkcs7.h
include/drm/drm_crtc.h
include/drm/drm_self_refresh_helper.h
include/dt-bindings/bus/ti-sysc.h
include/dt-bindings/clock/ast2600-clock.h [new file with mode: 0644]
include/dt-bindings/clock/bcm2835.h
include/dt-bindings/clock/imx8mn-clock.h
include/dt-bindings/clock/ingenic,tcu.h [new file with mode: 0644]
include/dt-bindings/clock/jz4740-cgu.h
include/dt-bindings/clock/mt6779-clk.h [new file with mode: 0644]
include/dt-bindings/clock/mt8183-clk.h
include/dt-bindings/clock/omap5.h
include/dt-bindings/clock/qcom,gcc-qcs404.h
include/dt-bindings/clock/qcom,gcc-sm8150.h [new file with mode: 0644]
include/dt-bindings/clock/rk3308-cru.h [new file with mode: 0644]
include/dt-bindings/clock/sun8i-v3s-ccu.h
include/dt-bindings/pinctrl/k3.h
include/dt-bindings/reset-controller/mt8183-resets.h [new file with mode: 0644]
include/dt-bindings/reset/sun8i-v3s-ccu.h
include/linux/acpi.h
include/linux/backlight.h
include/linux/blkdev.h
include/linux/ceph/libceph.h
include/linux/ceph/messenger.h
include/linux/ceph/mon_client.h
include/linux/ceph/osd_client.h
include/linux/clk-provider.h
include/linux/clk.h
include/linux/clk/clk-conf.h
include/linux/compaction.h
include/linux/compiler_types.h
include/linux/cpu.h
include/linux/cpumask.h
include/linux/cred.h
include/linux/export.h
include/linux/f2fs_fs.h
include/linux/fs.h
include/linux/fs_context.h
include/linux/fsnotify_backend.h
include/linux/hid.h
include/linux/hmm.h
include/linux/huge_mm.h
include/linux/hugetlb.h
include/linux/hyperv.h
include/linux/i2c.h
include/linux/ima.h
include/linux/interval_tree_generic.h
include/linux/iomap.h
include/linux/ioport.h
include/linux/jbd2.h
include/linux/kernel.h
include/linux/kexec.h
include/linux/kgdb.h
include/linux/khugepaged.h
include/linux/libnvdimm.h
include/linux/lsm_hooks.h
include/linux/memcontrol.h
include/linux/memory.h
include/linux/memremap.h
include/linux/mfd/da9063/pdata.h [deleted file]
include/linux/mfd/intel_soc_pmic_mrfld.h [new file with mode: 0644]
include/linux/mfd/mt6397/core.h
include/linux/mfd/syscon.h
include/linux/migrate.h
include/linux/mlx5/device.h
include/linux/mm.h
include/linux/mm_types.h
include/linux/mm_types_task.h
include/linux/mmu_notifier.h
include/linux/mmzone.h
include/linux/module.h
include/linux/module_signature.h [new file with mode: 0644]
include/linux/mtd/mtd.h
include/linux/mtd/nand.h
include/linux/mtd/sharpsl.h
include/linux/mtd/spi-nor.h
include/linux/nfs_fs.h
include/linux/nvme-fc-driver.h
include/linux/page_ext.h
include/linux/pagemap.h
include/linux/pagewalk.h [new file with mode: 0644]
include/linux/pci-aspm.h [deleted file]
include/linux/pci-p2pdma.h
include/linux/pci.h
include/linux/pci_hotplug.h
include/linux/pci_ids.h
include/linux/platform_data/cros_ec_commands.h
include/linux/platform_data/ti-sysc.h
include/linux/printk.h
include/linux/pwm.h
include/linux/qed/qed_rdma_if.h
include/linux/quicklist.h [deleted file]
include/linux/quotaops.h
include/linux/rbtree_augmented.h
include/linux/rcuwait.h
include/linux/sched.h
include/linux/sched/mm.h
include/linux/sched/task.h
include/linux/security.h
include/linux/shrinker.h
include/linux/slab.h
include/linux/soundwire/sdw.h
include/linux/soundwire/sdw_intel.h
include/linux/string.h
include/linux/sunrpc/cache.h
include/linux/sunrpc/sched.h
include/linux/sunrpc/svc_rdma.h
include/linux/sunrpc/xdr.h
include/linux/sunrpc/xprt.h
include/linux/sunrpc/xprtrdma.h
include/linux/swap.h
include/linux/t10-pi.h
include/linux/thread_info.h
include/linux/uaccess.h
include/linux/verification.h
include/linux/vmalloc.h
include/linux/zpool.h
include/rdma/ib.h
include/rdma/ib_umem.h
include/rdma/ib_umem_odp.h
include/rdma/ib_verbs.h
include/rdma/iw_portmap.h
include/rdma/opa_port_info.h
include/rdma/rdma_netlink.h
include/rdma/rdma_vt.h
include/rdma/rdmavt_cq.h
include/rdma/rdmavt_qp.h
include/rdma/signature.h
include/scsi/scsi_cmnd.h
include/scsi/scsi_dbg.h
include/scsi/scsi_host.h
include/trace/events/rpcrdma.h
include/trace/events/writeback.h
include/uapi/asm-generic/mman-common.h
include/uapi/linux/coff.h
include/uapi/linux/dm-ioctl.h
include/uapi/linux/fs.h
include/uapi/linux/fuse.h
include/uapi/linux/io_uring.h
include/uapi/linux/kvm.h
include/uapi/linux/nfsd/cld.h
include/uapi/linux/pci_regs.h
include/uapi/linux/vfio.h
include/uapi/linux/virtio_fs.h [new file with mode: 0644]
include/uapi/linux/virtio_ids.h
include/uapi/rdma/mlx5_user_ioctl_verbs.h
include/uapi/scsi/scsi_bsg_fc.h
include/uapi/scsi/scsi_netlink.h
include/uapi/scsi/scsi_netlink_fc.h
init/Kconfig
init/main.c
ipc/mqueue.c
ipc/sem.c
kernel/Makefile
kernel/bpf/inode.c
kernel/cpu.c
kernel/debug/debug_core.c
kernel/elfcore.c
kernel/events/core.c
kernel/events/uprobes.c
kernel/exit.c
kernel/fork.c
kernel/gcov/Kconfig
kernel/kexec.c
kernel/kexec_core.c
kernel/kexec_file.c
kernel/livepatch/core.c
kernel/locking/qspinlock_paravirt.h
kernel/module.c
kernel/module_signature.c [new file with mode: 0644]
kernel/module_signing.c
kernel/panic.c
kernel/params.c
kernel/power/hibernate.c
kernel/printk/braille.c
kernel/printk/printk.c
kernel/resource.c
kernel/sched/core.c
kernel/sched/fair.c
kernel/sched/idle.c
kernel/sched/membarrier.c
kernel/sched/sched.h
kernel/sysctl.c
kernel/time/timer.c
kernel/trace/bpf_trace.c
kernel/trace/trace_kprobe.c
kernel/trace/trace_uprobe.c
lib/Kconfig.debug
lib/Kconfig.kasan
lib/bug.c
lib/extable.c
lib/generic-radix-tree.c
lib/hexdump.c
lib/iov_iter.c
lib/lzo/lzo1x_compress.c
lib/rbtree_test.c
lib/show_mem.c
lib/string.c
lib/strncpy_from_user.c
lib/strnlen_user.c
lib/test_kasan.c
lib/test_printf.c
lib/vsprintf.c
lib/zstd/mem.h
mm/Kconfig
mm/Kconfig.debug
mm/Makefile
mm/compaction.c
mm/filemap.c
mm/frame_vector.c
mm/gup.c
mm/hmm.c
mm/huge_memory.c
mm/hugetlb.c
mm/hugetlb_cgroup.c
mm/init-mm.c
mm/internal.h
mm/kasan/common.c
mm/kasan/kasan.h
mm/kasan/report.c
mm/kasan/tags_report.c
mm/khugepaged.c
mm/kmemleak.c
mm/ksm.c
mm/madvise.c
mm/memcontrol.c
mm/memfd.c
mm/memory.c
mm/memory_hotplug.c
mm/mempolicy.c
mm/memremap.c
mm/migrate.c
mm/mincore.c
mm/mlock.c
mm/mmap.c
mm/mmu_gather.c
mm/mmu_notifier.c
mm/mprotect.c
mm/mremap.c
mm/msync.c
mm/nommu.c
mm/oom_kill.c
mm/page_alloc.c
mm/page_owner.c
mm/page_poison.c
mm/page_vma_mapped.c
mm/pagewalk.c
mm/quicklist.c [deleted file]
mm/rmap.c
mm/shmem.c
mm/slab.h
mm/slab_common.c
mm/slob.c
mm/slub.c
mm/sparse.c
mm/swap.c
mm/swap_state.c
mm/usercopy.c
mm/util.c
mm/vmalloc.c
mm/vmscan.c
mm/vmstat.c
mm/z3fold.c
mm/zpool.c
mm/zsmalloc.c
mm/zswap.c
net/9p/client.c
net/9p/trans_rdma.c
net/ceph/ceph_common.c
net/ceph/messenger.c
net/ceph/mon_client.c
net/ceph/osd_client.c
net/ceph/osdmap.c
net/sunrpc/auth_gss/auth_gss.c
net/sunrpc/cache.c
net/sunrpc/clnt.c
net/sunrpc/sched.c
net/sunrpc/svc.c
net/sunrpc/xdr.c
net/sunrpc/xprt.c
net/sunrpc/xprtrdma/backchannel.c
net/sunrpc/xprtrdma/frwr_ops.c
net/sunrpc/xprtrdma/rpc_rdma.c
net/sunrpc/xprtrdma/svc_rdma.c
net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
net/sunrpc/xprtrdma/svc_rdma_transport.c
net/sunrpc/xprtrdma/transport.c
net/sunrpc/xprtrdma/verbs.c
net/sunrpc/xprtrdma/xprt_rdma.h
net/sunrpc/xprtsock.c
net/xdp/xdp_umem.c
net/xdp/xsk.c
samples/rpmsg/rpmsg_client_sample.c
samples/vfio-mdev/mtty.c
scripts/Makefile
scripts/Makefile.modpost
scripts/checkpatch.pl
scripts/coccinelle/misc/add_namespace.cocci [new file with mode: 0644]
scripts/export_report.pl
scripts/gcc-plugins/randomize_layout_plugin.c
scripts/gdb/linux/symbols.py
scripts/mod/modpost.c
scripts/mod/modpost.h
scripts/nsdeps [new file with mode: 0644]
security/Kconfig
security/Makefile
security/integrity/Kconfig
security/integrity/digsig.c
security/integrity/ima/Kconfig
security/integrity/ima/Makefile
security/integrity/ima/ima.h
security/integrity/ima/ima_api.c
security/integrity/ima/ima_appraise.c
security/integrity/ima/ima_crypto.c
security/integrity/ima/ima_main.c
security/integrity/ima/ima_modsig.c [new file with mode: 0644]
security/integrity/ima/ima_policy.c
security/integrity/ima/ima_template.c
security/integrity/ima/ima_template_lib.c
security/integrity/ima/ima_template_lib.h
security/integrity/integrity.h
security/keys/trusted.c
security/lockdown/Kconfig [new file with mode: 0644]
security/lockdown/Makefile [new file with mode: 0644]
security/lockdown/lockdown.c [new file with mode: 0644]
security/safesetid/securityfs.c
security/security.c
security/selinux/hooks.c
security/selinux/include/classmap.h
security/selinux/include/objsec.h
security/selinux/netif.c
security/selinux/netnode.c
security/selinux/netport.c
security/selinux/ss/policydb.c
security/selinux/ss/policydb.h
security/selinux/ss/services.c
security/selinux/ss/sidtab.c
security/selinux/ss/sidtab.h
security/smack/smack_access.c
security/smack/smack_lsm.c
sound/firewire/dice/dice-alesis.c
sound/pci/hda/hda_intel.c
sound/pci/hda/patch_analog.c
sound/pci/hda/patch_realtek.c
sound/soc/atmel/atmel_ssc_dai.c
sound/soc/atmel/atmel_ssc_dai.h
sound/soc/codecs/pcm3168a.c
sound/soc/fsl/fsl_sai.c
sound/soc/fsl/fsl_sai.h
sound/soc/jz4740/Kconfig
sound/soc/jz4740/Makefile
sound/soc/jz4740/qi_lb60.c [deleted file]
sound/soc/sh/rcar/ssi.c
sound/soc/soc-core.c
sound/soc/ti/Kconfig
sound/usb/quirks.c
tools/arch/x86/include/asm/cpufeatures.h
tools/arch/x86/include/uapi/asm/unistd.h
tools/hv/Build [new file with mode: 0644]
tools/hv/Makefile
tools/include/asm/bug.h
tools/include/linux/rbtree.h
tools/include/linux/rbtree_augmented.h
tools/include/uapi/asm-generic/unistd.h
tools/include/uapi/linux/fs.h
tools/include/uapi/linux/prctl.h
tools/lib/rbtree.c
tools/lib/traceevent/Build
tools/lib/traceevent/Documentation/libtraceevent-event_print.txt [new file with mode: 0644]
tools/lib/traceevent/Documentation/libtraceevent-func_apis.txt
tools/lib/traceevent/Documentation/libtraceevent-handle.txt
tools/lib/traceevent/Documentation/libtraceevent-plugins.txt [new file with mode: 0644]
tools/lib/traceevent/Documentation/libtraceevent.txt
tools/lib/traceevent/Makefile
tools/lib/traceevent/event-parse.c
tools/lib/traceevent/event-parse.h
tools/lib/traceevent/plugins/Build [new file with mode: 0644]
tools/lib/traceevent/plugins/Makefile [new file with mode: 0644]
tools/lib/traceevent/plugins/plugin_cfg80211.c [moved from tools/lib/traceevent/plugin_cfg80211.c with 100% similarity]
tools/lib/traceevent/plugins/plugin_function.c [moved from tools/lib/traceevent/plugin_function.c with 100% similarity]
tools/lib/traceevent/plugins/plugin_hrtimer.c [moved from tools/lib/traceevent/plugin_hrtimer.c with 100% similarity]
tools/lib/traceevent/plugins/plugin_jbd2.c [moved from tools/lib/traceevent/plugin_jbd2.c with 100% similarity]
tools/lib/traceevent/plugins/plugin_kmem.c [moved from tools/lib/traceevent/plugin_kmem.c with 100% similarity]
tools/lib/traceevent/plugins/plugin_kvm.c [moved from tools/lib/traceevent/plugin_kvm.c with 100% similarity]
tools/lib/traceevent/plugins/plugin_mac80211.c [moved from tools/lib/traceevent/plugin_mac80211.c with 100% similarity]
tools/lib/traceevent/plugins/plugin_sched_switch.c [moved from tools/lib/traceevent/plugin_sched_switch.c with 100% similarity]
tools/lib/traceevent/plugins/plugin_scsi.c [moved from tools/lib/traceevent/plugin_scsi.c with 100% similarity]
tools/lib/traceevent/plugins/plugin_xen.c [moved from tools/lib/traceevent/plugin_xen.c with 100% similarity]
tools/objtool/check.c
tools/perf/Makefile.config
tools/perf/Makefile.perf
tools/perf/arch/arm/util/cs-etm.c
tools/perf/arch/arm64/util/arm-spe.c
tools/perf/arch/arm64/util/dwarf-regs.c
tools/perf/arch/arm64/util/header.c
tools/perf/arch/arm64/util/unwind-libunwind.c
tools/perf/arch/powerpc/util/dwarf-regs.c
tools/perf/arch/powerpc/util/header.c
tools/perf/arch/powerpc/util/kvm-stat.c
tools/perf/arch/powerpc/util/skip-callchain-idx.c
tools/perf/arch/powerpc/util/sym-handling.c
tools/perf/arch/s390/Makefile
tools/perf/arch/s390/util/auxtrace.c
tools/perf/arch/s390/util/machine.c
tools/perf/arch/x86/tests/intel-cqm.c
tools/perf/arch/x86/tests/perf-time-to-tsc.c
tools/perf/arch/x86/tests/rdpmc.c
tools/perf/arch/x86/util/archinsn.c
tools/perf/arch/x86/util/event.c
tools/perf/arch/x86/util/intel-bts.c
tools/perf/arch/x86/util/intel-pt.c
tools/perf/arch/x86/util/machine.c
tools/perf/arch/x86/util/tsc.c
tools/perf/arch/x86/util/unwind-libunwind.c
tools/perf/bench/epoll-ctl.c
tools/perf/bench/epoll-wait.c
tools/perf/bench/futex-hash.c
tools/perf/bench/futex-lock-pi.c
tools/perf/bench/futex-requeue.c
tools/perf/bench/futex-wake-parallel.c
tools/perf/bench/futex-wake.c
tools/perf/bench/numa.c
tools/perf/bench/sched-messaging.c
tools/perf/bench/sched-pipe.c
tools/perf/builtin-annotate.c
tools/perf/builtin-buildid-cache.c
tools/perf/builtin-buildid-list.c
tools/perf/builtin-c2c.c
tools/perf/builtin-config.c
tools/perf/builtin-diff.c
tools/perf/builtin-evlist.c
tools/perf/builtin-inject.c
tools/perf/builtin-kmem.c
tools/perf/builtin-kvm.c
tools/perf/builtin-list.c
tools/perf/builtin-lock.c
tools/perf/builtin-mem.c
tools/perf/builtin-record.c
tools/perf/builtin-report.c
tools/perf/builtin-sched.c
tools/perf/builtin-script.c
tools/perf/builtin-stat.c
tools/perf/builtin-timechart.c
tools/perf/builtin-top.c
tools/perf/builtin-trace.c
tools/perf/jvmti/Build
tools/perf/lib/Makefile
tools/perf/lib/core.c
tools/perf/lib/cpumap.c
tools/perf/lib/evlist.c
tools/perf/lib/evsel.c
tools/perf/lib/include/internal/evlist.h
tools/perf/lib/include/internal/evsel.h
tools/perf/lib/include/internal/lib.h
tools/perf/lib/include/internal/mmap.h [new file with mode: 0644]
tools/perf/lib/include/perf/core.h
tools/perf/lib/include/perf/cpumap.h
tools/perf/lib/include/perf/evlist.h
tools/perf/lib/lib.c
tools/perf/lib/libperf.map
tools/perf/lib/tests/test-cpumap.c
tools/perf/lib/tests/test-evlist.c
tools/perf/lib/tests/test-evsel.c
tools/perf/lib/tests/test-threadmap.c
tools/perf/perf.c
tools/perf/pmu-events/README
tools/perf/pmu-events/arch/arm64/arm/cortex-a76-n1/branch.json [new file with mode: 0644]
tools/perf/pmu-events/arch/arm64/arm/cortex-a76-n1/bus.json [new file with mode: 0644]
tools/perf/pmu-events/arch/arm64/arm/cortex-a76-n1/cache.json [new file with mode: 0644]
tools/perf/pmu-events/arch/arm64/arm/cortex-a76-n1/exception.json [new file with mode: 0644]
tools/perf/pmu-events/arch/arm64/arm/cortex-a76-n1/instruction.json [new file with mode: 0644]
tools/perf/pmu-events/arch/arm64/arm/cortex-a76-n1/memory.json [new file with mode: 0644]
tools/perf/pmu-events/arch/arm64/arm/cortex-a76-n1/other.json [new file with mode: 0644]
tools/perf/pmu-events/arch/arm64/arm/cortex-a76-n1/pipeline.json [new file with mode: 0644]
tools/perf/pmu-events/arch/arm64/mapfile.csv
tools/perf/pmu-events/arch/powerpc/power8/other.json
tools/perf/pmu-events/arch/x86/amdfam17h/cache.json
tools/perf/pmu-events/arch/x86/amdfam17h/core.json
tools/perf/pmu-events/jevents.c
tools/perf/tests/backward-ring-buffer.c
tools/perf/tests/bitmap.c
tools/perf/tests/bpf.c
tools/perf/tests/clang.c
tools/perf/tests/code-reading.c
tools/perf/tests/cpumap.c
tools/perf/tests/dso-data.c
tools/perf/tests/dwarf-unwind.c
tools/perf/tests/event-times.c
tools/perf/tests/event_update.c
tools/perf/tests/evsel-roundtrip-name.c
tools/perf/tests/hists_common.c
tools/perf/tests/hists_cumulate.c
tools/perf/tests/hists_link.c
tools/perf/tests/hists_output.c
tools/perf/tests/keep-tracking.c
tools/perf/tests/llvm.c
tools/perf/tests/make
tools/perf/tests/mem2node.c
tools/perf/tests/mmap-basic.c
tools/perf/tests/mmap-thread-lookup.c
tools/perf/tests/openat-syscall-all-cpus.c
tools/perf/tests/openat-syscall-tp-fields.c
tools/perf/tests/parse-events.c
tools/perf/tests/parse-no-sample-id-all.c
tools/perf/tests/perf-hooks.c
tools/perf/tests/perf-record.c
tools/perf/tests/pmu.c
tools/perf/tests/sample-parsing.c
tools/perf/tests/sdt.c
tools/perf/tests/stat.c
tools/perf/tests/sw-clock.c
tools/perf/tests/switch-tracking.c
tools/perf/tests/task-exit.c
tools/perf/tests/thread-map.c
tools/perf/tests/topology.c
tools/perf/tests/vmlinux-kallsyms.c
tools/perf/ui/browser.c
tools/perf/ui/browsers/annotate.c
tools/perf/ui/browsers/header.c
tools/perf/ui/browsers/hists.c
tools/perf/ui/browsers/map.c
tools/perf/ui/browsers/res_sample.c
tools/perf/ui/browsers/scripts.c
tools/perf/ui/gtk/helpline.c
tools/perf/ui/gtk/hists.c
tools/perf/ui/gtk/progress.c
tools/perf/ui/gtk/setup.c
tools/perf/ui/gtk/util.c
tools/perf/ui/helpline.c
tools/perf/ui/hist.c
tools/perf/ui/setup.c
tools/perf/ui/stdio/hist.c
tools/perf/ui/tui/helpline.c
tools/perf/ui/tui/setup.c
tools/perf/ui/tui/util.c
tools/perf/util/Build
tools/perf/util/annotate.c
tools/perf/util/arm-spe.c
tools/perf/util/auxtrace.c
tools/perf/util/auxtrace.h
tools/perf/util/bpf-event.c
tools/perf/util/bpf-event.h
tools/perf/util/bpf-loader.c
tools/perf/util/branch.c
tools/perf/util/branch.h
tools/perf/util/build-id.c
tools/perf/util/callchain.c
tools/perf/util/callchain.h
tools/perf/util/cloexec.c
tools/perf/util/copyfile.c [new file with mode: 0644]
tools/perf/util/copyfile.h [new file with mode: 0644]
tools/perf/util/cs-etm-decoder/cs-etm-decoder.c
tools/perf/util/cs-etm.c
tools/perf/util/data-convert-bt.c
tools/perf/util/data.c
tools/perf/util/debug.c
tools/perf/util/debug.h
tools/perf/util/demangle-java.c
tools/perf/util/demangle-rust.c
tools/perf/util/dwarf-regs.c
tools/perf/util/env.h
tools/perf/util/event.c
tools/perf/util/event.h
tools/perf/util/evlist.c
tools/perf/util/evlist.h
tools/perf/util/evsel.c
tools/perf/util/evsel.h
tools/perf/util/evsel_config.h [new file with mode: 0644]
tools/perf/util/evsel_fprintf.c
tools/perf/util/evsel_fprintf.h [new file with mode: 0644]
tools/perf/util/genelf.h
tools/perf/util/header.c
tools/perf/util/header.h
tools/perf/util/hist.h
tools/perf/util/intel-bts.c
tools/perf/util/intel-pt.c
tools/perf/util/jitdump.c
tools/perf/util/kvm-stat.h
tools/perf/util/libunwind/arm64.c
tools/perf/util/libunwind/x86_32.c
tools/perf/util/llvm-utils.c
tools/perf/util/lzma.c
tools/perf/util/machine.c
tools/perf/util/machine.h
tools/perf/util/memswap.h
tools/perf/util/mmap.c
tools/perf/util/mmap.h
tools/perf/util/namespaces.c
tools/perf/util/namespaces.h
tools/perf/util/parse-events.c
tools/perf/util/parse-events.y
tools/perf/util/perf-hooks.c
tools/perf/util/perf_event_attr_fprintf.c [new file with mode: 0644]
tools/perf/util/pmu.c
tools/perf/util/probe-event.c
tools/perf/util/probe-file.c
tools/perf/util/probe-finder.c
tools/perf/util/python-ext-sources
tools/perf/util/python.c
tools/perf/util/record.c
tools/perf/util/rwsem.c
tools/perf/util/s390-cpumsf.c
tools/perf/util/s390-sample-raw.c
tools/perf/util/scripting-engines/trace-event-python.c
tools/perf/util/session.c
tools/perf/util/session.h
tools/perf/util/sort.c
tools/perf/util/srccode.c
tools/perf/util/stat-shadow.c
tools/perf/util/stat.c
tools/perf/util/stat.h
tools/perf/util/svghelper.c
tools/perf/util/symbol-elf.c
tools/perf/util/symbol-minimal.c
tools/perf/util/symbol.c
tools/perf/util/synthetic-events.c [new file with mode: 0644]
tools/perf/util/synthetic-events.h [new file with mode: 0644]
tools/perf/util/target.c
tools/perf/util/top.c
tools/perf/util/trace-event-info.c
tools/perf/util/trace-event-read.c
tools/perf/util/trace-event.c
tools/perf/util/tsc.h
tools/perf/util/unwind-libdw.c
tools/perf/util/unwind-libunwind-local.c
tools/perf/util/usage.c
tools/perf/util/util.c
tools/perf/util/util.h
tools/perf/util/vdso.c
tools/perf/util/zlib.c
tools/power/x86/intel-speed-select/isst-config.c
tools/power/x86/intel-speed-select/isst-core.c
tools/power/x86/intel-speed-select/isst-display.c
tools/power/x86/intel-speed-select/isst.h
tools/testing/nvdimm/dimm_devs.c
tools/testing/nvdimm/test/iomap.c
tools/testing/nvdimm/test/nfit.c
tools/testing/selftests/.gitignore
tools/testing/selftests/Makefile
tools/testing/selftests/ftrace/test.d/trigger/trigger-snapshot.tc
tools/testing/selftests/kexec/test_kexec_file_load.sh
tools/testing/selftests/kvm/dirty_log_test.c
tools/testing/selftests/kvm/include/kvm_util.h
tools/testing/selftests/kvm/include/x86_64/processor.h
tools/testing/selftests/kvm/lib/aarch64/processor.c
tools/testing/selftests/kvm/lib/kvm_util.c
tools/testing/selftests/kvm/lib/x86_64/processor.c
tools/testing/selftests/kvm/lib/x86_64/ucall.c
tools/testing/selftests/kvm/x86_64/hyperv_cpuid.c
tools/testing/selftests/livepatch/config
tools/testing/selftests/membarrier/.gitignore
tools/testing/selftests/membarrier/Makefile
tools/testing/selftests/membarrier/membarrier_test_impl.h [moved from tools/testing/selftests/membarrier/membarrier_test.c with 95% similarity]
tools/testing/selftests/membarrier/membarrier_test_multi_thread.c [new file with mode: 0644]
tools/testing/selftests/membarrier/membarrier_test_single_thread.c [new file with mode: 0644]
tools/testing/selftests/seccomp/seccomp_bpf.c
tools/testing/selftests/tpm2/Makefile
tools/testing/selftests/watchdog/watchdog-test.c
usr/Makefile

index ce2c634..70580bd 100644 (file)
@@ -32,6 +32,7 @@
 *.lzo
 *.mod
 *.mod.c
+*.ns_deps
 *.o
 *.o.*
 *.patch
index fff817e..f307506 100644 (file)
@@ -12,7 +12,7 @@ Description:  The /dev/kmsg character device node provides userspace access
                The logged line can be prefixed with a <N> syslog prefix, which
                carries the syslog priority and facility. The single decimal
                prefix number is composed of the 3 lowest bits being the syslog
-               priority and the higher bits the syslog facility number.
+               priority and the next 8 bits the syslog facility number.
 
                If no prefix is given, the priority number is the default kernel
                log priority and the facility number is set to LOG_USER (1). It
@@ -90,13 +90,12 @@ Description:        The /dev/kmsg character device node provides userspace access
                  +sound:card0 - subsystem:devname
 
                The flags field carries '-' by default. A 'c' indicates a
-               fragment of a line. All following fragments are flagged with
-               '+'. Note, that these hints about continuation lines are not
-               necessarily correct, and the stream could be interleaved with
-               unrelated messages, but merging the lines in the output
-               usually produces better human readable results. A similar
-               logic is used internally when messages are printed to the
-               console, /proc/kmsg or the syslog() syscall.
+               fragment of a line. Note, that these hints about continuation
+               lines are not necessarily correct, and the stream could be
+               interleaved with unrelated messages, but merging the lines in
+               the output usually produces better human readable results. A
+               similar logic is used internally when messages are printed to
+               the console, /proc/kmsg or the syslog() syscall.
 
                By default, kernel tries to avoid fragments by concatenating
                when it can and fragments are rare; however, when extended
index fc376a3..29ebe9a 100644 (file)
@@ -37,7 +37,7 @@ Description:
                        euid:= decimal value
                        fowner:= decimal value
                lsm:    are LSM specific
-               option: appraise_type:= [imasig]
+               option: appraise_type:= [imasig] [imasig|modsig]
                        template:= name of a defined IMA template type
                        (eg, ima-ng). Only valid when action is "measure".
                        pcr:= decimal value
@@ -105,3 +105,7 @@ Description:
 
                        measure func=KEXEC_KERNEL_CHECK pcr=4
                        measure func=KEXEC_INITRAMFS_CHECK pcr=5
+
+               Example of appraise rule allowing modsig appended signatures:
+
+                       appraise func=KEXEC_KERNEL_CHECK appraise_type=imasig|modsig
diff --git a/Documentation/ABI/testing/sysfs-class-backlight b/Documentation/ABI/testing/sysfs-class-backlight
new file mode 100644 (file)
index 0000000..3ab175a
--- /dev/null
@@ -0,0 +1,26 @@
+What:          /sys/class/backlight/<backlight>/scale
+Date:          July 2019
+KernelVersion: 5.4
+Contact:       Daniel Thompson <daniel.thompson@linaro.org>
+Description:
+               Description of the scale of the brightness curve.
+
+               The human eye senses brightness approximately logarithmically,
+               hence linear changes in brightness are perceived as being
+               non-linear. To achieve a linear perception of brightness changes
+               controls like sliders need to apply a logarithmic mapping for
+               backlights with a linear brightness curve.
+
+               Possible values of the attribute are:
+
+               unknown
+                 The scale of the brightness curve is unknown.
+
+               linear
+                 The brightness changes linearly with each step. Brightness
+                 controls should apply a logarithmic mapping for a linear
+                 perception.
+
+               non-linear
+                 The brightness changes non-linearly with each step. Brightness
+                 controls should use a linear mapping for a linear perception.
index c3afe9f..36094fb 100644 (file)
@@ -48,3 +48,13 @@ Description: Remote processor state
 
                Writing "stop" will attempt to halt the remote processor and
                return it to the "offline" state.
+
+What:          /sys/class/remoteproc/.../name
+Date:          August 2019
+KernelVersion: 5.4
+Contact:       Suman Anna <s-anna@ti.com>
+Description:   Remote processor name
+
+               Reports the name of the remote processor. This can be used by
+               userspace in exactly identifying a remote processor and ease
+               up the usage in modifying the 'firmware' or 'state' files.
index 6317ade..675f9b5 100644 (file)
@@ -72,3 +72,37 @@ Description:
                It is a read/write file. When read, the currently assigned
                pretimeout governor is returned.  When written, it sets
                the pretimeout governor.
+
+What:          /sys/class/watchdog/watchdog1/access_cs0
+Date:          August 2019
+Contact:       Ivan Mikhaylov <i.mikhaylov@yadro.com>,
+               Alexander Amelkin <a.amelkin@yadro.com>
+Description:
+               It is a read/write file. This attribute exists only if the
+               system has booted from the alternate flash chip due to
+               expiration of a watchdog timer of AST2400/AST2500 when
+               alternate boot function was enabled with 'aspeed,alt-boot'
+               devicetree option for that watchdog or with an appropriate
+               h/w strapping (for WDT2 only).
+
+               At alternate flash the 'access_cs0' sysfs node provides:
+                       ast2400: a way to get access to the primary SPI flash
+                               chip at CS0 after booting from the alternate
+                               chip at CS1.
+                       ast2500: a way to restore the normal address mapping
+                               from (CS0->CS1, CS1->CS0) to (CS0->CS0,
+                               CS1->CS1).
+
+               Clearing the boot code selection and timeout counter also
+               resets to the initial state the chip select line mapping. When
+               the SoC is in normal mapping state (i.e. booted from CS0),
+               clearing those bits does nothing for both versions of the SoC.
+               For alternate boot mode (booted from CS1 due to wdt2
+               expiration) the behavior differs as described above.
+
+               This option can be used with wdt2 (watchdog1) only.
+
+               When read, the current status of the boot code selection is
+               shown. When written with any non-zero value, it clears
+               the boot code selection and the timeout counter, which results
+               in chipselect reset for AST2400/AST2500.
index dca326e..7ab2b1b 100644 (file)
@@ -251,3 +251,10 @@ Description:
                If checkpoint=disable, it displays the number of blocks that are unusable.
                 If checkpoint=enable it displays the enumber of blocks that would be unusable
                 if checkpoint=disable were to be set.
+
+What:          /sys/fs/f2fs/<disk>/encoding
+Date           July 2019
+Contact:       "Daniel Rosenberg" <drosen@google.com>
+Description:
+               Displays name and version of the encoding set for the filesystem.
+                If no encoding is set, displays (none)
index 29601d9..ed35833 100644 (file)
@@ -429,10 +429,15 @@ KernelVersion:    2.6.22
 Contact:       Pekka Enberg <penberg@cs.helsinki.fi>,
                Christoph Lameter <cl@linux-foundation.org>
 Description:
-               The shrink file is written when memory should be reclaimed from
-               a cache.  Empty partial slabs are freed and the partial list is
-               sorted so the slabs with the fewest available objects are used
-               first.
+               The shrink file is used to reclaim unused slab cache
+               memory from a cache.  Empty per-cpu or partial slabs
+               are freed and the partial list is sorted so the slabs
+               with the fewest available objects are used first.
+               It only accepts a value of "1" on write for shrinking
+               the cache. Other input values are considered invalid.
+               Shrinking slab caches might be expensive and can
+               adversely impact other running applications.  So it
+               should be used with care.
 
 What:          /sys/kernel/slab/cache/slab_size
 Date:          May 2007
index 41bdc03..0ae4f56 100644 (file)
@@ -85,8 +85,10 @@ Brief summary of control files.
  memory.oom_control                 set/show oom controls.
  memory.numa_stat                   show the number of memory usage per numa
                                     node
-
  memory.kmem.limit_in_bytes          set/show hard limit for kernel memory
+                                     This knob is deprecated and shouldn't be
+                                     used. It is planned that this be removed in
+                                     the foreseeable future.
  memory.kmem.usage_in_bytes          show current kernel memory allocation
  memory.kmem.failcnt                 show the number of kernel memory usage
                                     hits limits
diff --git a/Documentation/admin-guide/device-mapper/dm-clone.rst b/Documentation/admin-guide/device-mapper/dm-clone.rst
new file mode 100644 (file)
index 0000000..b43a34c
--- /dev/null
@@ -0,0 +1,333 @@
+.. SPDX-License-Identifier: GPL-2.0-only
+
+========
+dm-clone
+========
+
+Introduction
+============
+
+dm-clone is a device mapper target which produces a one-to-one copy of an
+existing, read-only source device into a writable destination device: It
+presents a virtual block device which makes all data appear immediately, and
+redirects reads and writes accordingly.
+
+The main use case of dm-clone is to clone a potentially remote, high-latency,
+read-only, archival-type block device into a writable, fast, primary-type device
+for fast, low-latency I/O. The cloned device is visible/mountable immediately
+and the copy of the source device to the destination device happens in the
+background, in parallel with user I/O.
+
+For example, one could restore an application backup from a read-only copy,
+accessible through a network storage protocol (NBD, Fibre Channel, iSCSI, AoE,
+etc.), into a local SSD or NVMe device, and start using the device immediately,
+without waiting for the restore to complete.
+
+When the cloning completes, the dm-clone table can be removed altogether and be
+replaced, e.g., by a linear table, mapping directly to the destination device.
+
+The dm-clone target reuses the metadata library used by the thin-provisioning
+target.
+
+Glossary
+========
+
+   Hydration
+     The process of filling a region of the destination device with data from
+     the same region of the source device, i.e., copying the region from the
+     source to the destination device.
+
+Once a region gets hydrated we redirect all I/O regarding it to the destination
+device.
+
+Design
+======
+
+Sub-devices
+-----------
+
+The target is constructed by passing three devices to it (along with other
+parameters detailed later):
+
+1. A source device - the read-only device that gets cloned and source of the
+   hydration.
+
+2. A destination device - the destination of the hydration, which will become a
+   clone of the source device.
+
+3. A small metadata device - it records which regions are already valid in the
+   destination device, i.e., which regions have already been hydrated, or have
+   been written to directly, via user I/O.
+
+The size of the destination device must be at least equal to the size of the
+source device.
+
+Regions
+-------
+
+dm-clone divides the source and destination devices in fixed sized regions.
+Regions are the unit of hydration, i.e., the minimum amount of data copied from
+the source to the destination device.
+
+The region size is configurable when you first create the dm-clone device. The
+recommended region size is the same as the file system block size, which usually
+is 4KB. The region size must be between 8 sectors (4KB) and 2097152 sectors
+(1GB) and a power of two.
+
+Reads and writes from/to hydrated regions are serviced from the destination
+device.
+
+A read to a not yet hydrated region is serviced directly from the source device.
+
+A write to a not yet hydrated region will be delayed until the corresponding
+region has been hydrated and the hydration of the region starts immediately.
+
+Note that a write request with size equal to region size will skip copying of
+the corresponding region from the source device and overwrite the region of the
+destination device directly.
+
+Discards
+--------
+
+dm-clone interprets a discard request to a range that hasn't been hydrated yet
+as a hint to skip hydration of the regions covered by the request, i.e., it
+skips copying the region's data from the source to the destination device, and
+only updates its metadata.
+
+If the destination device supports discards, then by default dm-clone will pass
+down discard requests to it.
+
+Background Hydration
+--------------------
+
+dm-clone copies continuously from the source to the destination device, until
+all of the device has been copied.
+
+Copying data from the source to the destination device uses bandwidth. The user
+can set a throttle to prevent more than a certain amount of copying occurring at
+any one time. Moreover, dm-clone takes into account user I/O traffic going to
+the devices and pauses the background hydration when there is I/O in-flight.
+
+A message `hydration_threshold <#regions>` can be used to set the maximum number
+of regions being copied, the default being 1 region.
+
+dm-clone employs dm-kcopyd for copying portions of the source device to the
+destination device. By default, we issue copy requests of size equal to the
+region size. A message `hydration_batch_size <#regions>` can be used to tune the
+size of these copy requests. Increasing the hydration batch size results in
+dm-clone trying to batch together contiguous regions, so we copy the data in
+batches of this many regions.
+
+When the hydration of the destination device finishes, a dm event will be sent
+to user space.
+
+Updating on-disk metadata
+-------------------------
+
+On-disk metadata is committed every time a FLUSH or FUA bio is written. If no
+such requests are made then commits will occur every second. This means the
+dm-clone device behaves like a physical disk that has a volatile write cache. If
+power is lost you may lose some recent writes. The metadata should always be
+consistent in spite of any crash.
+
+Target Interface
+================
+
+Constructor
+-----------
+
+  ::
+
+   clone <metadata dev> <destination dev> <source dev> <region size>
+         [<#feature args> [<feature arg>]* [<#core args> [<core arg>]*]]
+
+ ================ ==============================================================
+ metadata dev     Fast device holding the persistent metadata
+ destination dev  The destination device, where the source will be cloned
+ source dev       Read only device containing the data that gets cloned
+ region size      The size of a region in sectors
+
+ #feature args    Number of feature arguments passed
+ feature args     no_hydration or no_discard_passdown
+
+ #core args       An even number of arguments corresponding to key/value pairs
+                  passed to dm-clone
+ core args        Key/value pairs passed to dm-clone, e.g. `hydration_threshold
+                  256`
+ ================ ==============================================================
+
+Optional feature arguments are:
+
+ ==================== =========================================================
+ no_hydration         Create a dm-clone instance with background hydration
+                      disabled
+ no_discard_passdown  Disable passing down discards to the destination device
+ ==================== =========================================================
+
+Optional core arguments are:
+
+ ================================ ==============================================
+ hydration_threshold <#regions>   Maximum number of regions being copied from
+                                  the source to the destination device at any
+                                  one time, during background hydration.
+ hydration_batch_size <#regions>  During background hydration, try to batch
+                                  together contiguous regions, so we copy data
+                                  from the source to the destination device in
+                                  batches of this many regions.
+ ================================ ==============================================
+
+Status
+------
+
+  ::
+
+   <metadata block size> <#used metadata blocks>/<#total metadata blocks>
+   <region size> <#hydrated regions>/<#total regions> <#hydrating regions>
+   <#feature args> <feature args>* <#core args> <core args>*
+   <clone metadata mode>
+
+ ======================= =======================================================
+ metadata block size     Fixed block size for each metadata block in sectors
+ #used metadata blocks   Number of metadata blocks used
+ #total metadata blocks  Total number of metadata blocks
+ region size             Configurable region size for the device in sectors
+ #hydrated regions       Number of regions that have finished hydrating
+ #total regions          Total number of regions to hydrate
+ #hydrating regions      Number of regions currently hydrating
+ #feature args           Number of feature arguments to follow
+ feature args            Feature arguments, e.g. `no_hydration`
+ #core args              Even number of core arguments to follow
+ core args               Key/value pairs for tuning the core, e.g.
+                         `hydration_threshold 256`
+ clone metadata mode     ro if read-only, rw if read-write
+
+                         In serious cases where even a read-only mode is deemed
+                         unsafe no further I/O will be permitted and the status
+                         will just contain the string 'Fail'. If the metadata
+                         mode changes, a dm event will be sent to user space.
+ ======================= =======================================================
+
+Messages
+--------
+
+  `disable_hydration`
+      Disable the background hydration of the destination device.
+
+  `enable_hydration`
+      Enable the background hydration of the destination device.
+
+  `hydration_threshold <#regions>`
+      Set background hydration threshold.
+
+  `hydration_batch_size <#regions>`
+      Set background hydration batch size.
+
+Examples
+========
+
+Clone a device containing a file system
+---------------------------------------
+
+1. Create the dm-clone device.
+
+   ::
+
+    dmsetup create clone --table "0 1048576000 clone $metadata_dev $dest_dev \
+      $source_dev 8 1 no_hydration"
+
+2. Mount the device and trim the file system. dm-clone interprets the discards
+   sent by the file system and it will not hydrate the unused space.
+
+   ::
+
+    mount /dev/mapper/clone /mnt/cloned-fs
+    fstrim /mnt/cloned-fs
+
+3. Enable background hydration of the destination device.
+
+   ::
+
+    dmsetup message clone 0 enable_hydration
+
+4. When the hydration finishes, we can replace the dm-clone table with a linear
+   table.
+
+   ::
+
+    dmsetup suspend clone
+    dmsetup load clone --table "0 1048576000 linear $dest_dev 0"
+    dmsetup resume clone
+
+   The metadata device is no longer needed and can be safely discarded or reused
+   for other purposes.
+
+Known issues
+============
+
+1. We redirect reads, to not-yet-hydrated regions, to the source device. If
+   reading the source device has high latency and the user repeatedly reads from
+   the same regions, this behaviour could degrade performance. We should use
+   these reads as hints to hydrate the relevant regions sooner. Currently, we
+   rely on the page cache to cache these regions, so we hopefully don't end up
+   reading them multiple times from the source device.
+
+2. Release in-core resources, i.e., the bitmaps tracking which regions are
+   hydrated, after the hydration has finished.
+
+3. During background hydration, if we fail to read the source or write to the
+   destination device, we print an error message, but the hydration process
+   continues indefinitely, until it succeeds. We should stop the background
+   hydration after a number of failures and emit a dm event for user space to
+   notice.
+
+Why not...?
+===========
+
+We explored the following alternatives before implementing dm-clone:
+
+1. Use dm-cache with cache size equal to the source device and implement a new
+   cloning policy:
+
+   * The resulting cache device is not a one-to-one mirror of the source device
+     and thus we cannot remove the cache device once cloning completes.
+
+   * dm-cache writes to the source device, which violates our requirement that
+     the source device must be treated as read-only.
+
+   * Caching is semantically different from cloning.
+
+2. Use dm-snapshot with a COW device equal to the source device:
+
+   * dm-snapshot stores its metadata in the COW device, so the resulting device
+     is not a one-to-one mirror of the source device.
+
+   * No background copying mechanism.
+
+   * dm-snapshot needs to commit its metadata whenever a pending exception
+     completes, to ensure snapshot consistency. In the case of cloning, we don't
+     need to be so strict and can rely on committing metadata every time a FLUSH
+     or FUA bio is written, or periodically, like dm-thin and dm-cache do. This
+     improves the performance significantly.
+
+3. Use dm-mirror: The mirror target has a background copying/mirroring
+   mechanism, but it writes to all mirrors, thus violating our requirement that
+   the source device must be treated as read-only.
+
+4. Use dm-thin's external snapshot functionality. This approach is the most
+   promising among all alternatives, as the thinly-provisioned volume is a
+   one-to-one mirror of the source device and handles reads and writes to
+   un-provisioned/not-yet-cloned areas the same way as dm-clone does.
+
+   Still:
+
+   * There is no background copying mechanism, though one could be implemented.
+
+   * Most importantly, we want to support arbitrary block devices as the
+     destination of the cloning process and not restrict ourselves to
+     thinly-provisioned volumes. Thin-provisioning has an inherent metadata
+     overhead, for maintaining the thin volume mappings, which significantly
+     degrades performance.
+
+   Moreover, cloning a device shouldn't force the use of thin-provisioning. On
+   the other hand, if we wish to use thin provisioning, we can just use a thin
+   LV as dm-clone's destination device.
index a4d1c14..bb02caa 100644 (file)
@@ -125,6 +125,13 @@ check_at_most_once
     blocks, and a hash block will not be verified any more after all the data
     blocks it covers have been verified anyway.
 
+root_hash_sig_key_desc <key_description>
+    This is the description of the USER_KEY that the kernel will lookup to get
+    the pkcs7 signature of the roothash. The pkcs7 signature is used to validate
+    the root hash during the creation of the device mapper block device.
+    Verification of roothash depends on the config DM_VERITY_VERIFY_ROOTHASH_SIG
+    being set in the kernel.
+
 Theory of operation
 ===================
 
index d381478..c7ac2f3 100644 (file)
                        enables the feature at boot time. By default, it is
                        disabled and the system will work mostly the same as a
                        kernel built without CONFIG_DEBUG_PAGEALLOC.
+                       Note: to get most of debug_pagealloc error reports, it's
+                       useful to also enable the page_owner functionality.
                        on: enable the feature
 
        debugpat        [X86] Enable PAT debugging
        lockd.nlm_udpport=M     [NFS] Assign UDP port.
                        Format: <integer>
 
+       lockdown=       [SECURITY]
+                       { integrity | confidentiality }
+                       Enable the kernel lockdown feature. If set to
+                       integrity, kernel features that allow userland to
+                       modify the running kernel are disabled. If set to
+                       confidentiality, kernel features that allow userland
+                       to extract confidential information from the kernel
+                       are also disabled.
+
        locktorture.nreaders_stress= [KNL]
                        Set the number of locking read-acquisition kthreads.
                        Defaults to being automatically set based on the
                                specify the device is described above.
                                If <order of align> is not specified,
                                PAGE_SIZE is used as alignment.
-                               PCI-PCI bridge can be specified, if resource
+                               A PCI-PCI bridge can be specified if resource
                                windows need to be expanded.
                                To specify the alignment for several
                                instances of a device, the PCI vendor,
                                device, subvendor, and subdevice may be
-                               specified, e.g., 4096@pci:8086:9c22:103c:198f
+                               specified, e.g., 12@pci:8086:9c22:103c:198f
+                               for 4096-byte alignment.
                ecrc=           Enable/disable PCIe ECRC (transaction layer
                                end-to-end CRC checking).
                                bios: Use BIOS/firmware settings. This is the
index 08af5ca..f77de49 100644 (file)
@@ -42,6 +42,9 @@ String Manipulation
 .. kernel-doc:: lib/string.c
    :export:
 
+.. kernel-doc:: include/linux/string.h
+   :internal:
+
 .. kernel-doc:: mm/util.c
    :functions: kstrdup kstrdup_const kstrndup kmemdup kmemdup_nul memdup_user
                vmemdup_user strndup_user memdup_user_nul
index bfc5c18..913a8cd 100644 (file)
@@ -176,6 +176,10 @@ properties:
     description: disable parity checking on the L2 cache (L220 or PL310).
     type: boolean
 
+  marvell,ecc-enable:
+    description: enable ECC protection on the L2 cache
+    type: boolean
+
   arm,outer-sync-disable:
     description: disable the outer sync operation on the L2 cache.
       Some core tiles, especially ARM PB11MPCore have a faulty L220 cache that
index 7b8b8eb..26410fb 100644 (file)
@@ -18,17 +18,19 @@ Clocks:
 -------
 
 
-The Device Tree node representing the AP806 system controller provides
-a number of clocks:
+The Device Tree node representing the AP806/AP807 system controller
+provides a number of clocks:
 
- - 0: clock of CPU cluster 0
- - 1: clock of CPU cluster 1
+ - 0: reference clock of CPU cluster 0
+ - 1: reference clock of CPU cluster 1
  - 2: fixed PLL at 1200 Mhz
  - 3: MSS clock, derived from the fixed PLL
 
 Required properties:
 
- - compatible: must be: "marvell,ap806-clock"
+ - compatible: must be one of:
+   * "marvell,ap806-clock"
+   * "marvell,ap807-clock"
  - #clock-cells: must be set to 1
 
 Pinctrl:
@@ -143,3 +145,33 @@ ap_syscon1: system-controller@6f8000 {
                #thermal-sensor-cells = <1>;
        };
 };
+
+Cluster clocks:
+---------------
+
+Device Tree Clock bindings for cluster clock of Marvell
+AP806/AP807. Each cluster contain up to 2 CPUs running at the same
+frequency.
+
+Required properties:
+ - compatible: must be one of:
+   * "marvell,ap806-cpu-clock"
+   * "marvell,ap807-cpu-clock"
+- #clock-cells : should be set to 1.
+
+- clocks : shall be the input parent clock(s) phandle for the clock
+           (one per cluster)
+
+- reg: register range associated with the cluster clocks
+
+ap_syscon1: system-controller@6f8000 {
+       compatible = "marvell,armada-ap806-syscon1", "syscon", "simple-mfd";
+       reg = <0x6f8000 0x1000>;
+
+       cpu_clk: clock-cpu@278 {
+               compatible = "marvell,ap806-cpu-clock";
+               clocks = <&ap_clk 0>, <&ap_clk 1>;
+               #clock-cells = <1>;
+               reg = <0x278 0xa30>;
+       };
+};
index eddde4f..f6d6642 100644 (file)
@@ -48,3 +48,11 @@ avs: avs@11500 {
        compatible = "marvell,armada-3700-avs", "syscon";
        reg = <0x11500 0x40>;
 }
+
+
+CZ.NIC's Turris Mox SOHO router Device Tree Bindings
+----------------------------------------------------
+
+Required root node property:
+
+ - compatible: must contain "cznic,turris-mox"
index 161e63a..ff000cc 100644 (file)
@@ -8,6 +8,7 @@ Required Properties:
 - compatible: Should be one of:
        - "mediatek,mt2701-apmixedsys"
        - "mediatek,mt2712-apmixedsys", "syscon"
+       - "mediatek,mt6779-apmixedsys", "syscon"
        - "mediatek,mt6797-apmixedsys"
        - "mediatek,mt7622-apmixedsys"
        - "mediatek,mt7623-apmixedsys", "mediatek,mt2701-apmixedsys"
index 07c9d81..e4ca7b7 100644 (file)
@@ -7,6 +7,7 @@ Required Properties:
 
 - compatible: Should be one of:
        - "mediatek,mt2701-audsys", "syscon"
+       - "mediatek,mt6779-audio", "syscon"
        - "mediatek,mt7622-audsys", "syscon"
        - "mediatek,mt7623-audsys", "mediatek,mt2701-audsys", "syscon"
        - "mediatek,mt8183-audiosys", "syscon"
index d8930f6..1f4aaa1 100644 (file)
@@ -6,6 +6,7 @@ The MediaTek camsys controller provides various clocks to the system.
 Required Properties:
 
 - compatible: Should be one of:
+       - "mediatek,mt6779-camsys", "syscon"
        - "mediatek,mt8183-camsys", "syscon"
 - #clock-cells: Must be 1
 
index e3bc4a1..2b693e3 100644 (file)
@@ -8,6 +8,7 @@ Required Properties:
 - compatible: Should be one of:
        - "mediatek,mt2701-imgsys", "syscon"
        - "mediatek,mt2712-imgsys", "syscon"
+       - "mediatek,mt6779-imgsys", "syscon"
        - "mediatek,mt6797-imgsys", "syscon"
        - "mediatek,mt7623-imgsys", "mediatek,mt2701-imgsys", "syscon"
        - "mediatek,mt8173-imgsys", "syscon"
index a909139..db2f4fd 100644 (file)
@@ -9,6 +9,7 @@ Required Properties:
 - compatible: Should be one of:
        - "mediatek,mt2701-infracfg", "syscon"
        - "mediatek,mt2712-infracfg", "syscon"
+       - "mediatek,mt6779-infracfg_ao", "syscon"
        - "mediatek,mt6797-infracfg", "syscon"
        - "mediatek,mt7622-infracfg", "syscon"
        - "mediatek,mt7623-infracfg", "mediatek,mt2701-infracfg", "syscon"
diff --git a/Documentation/devicetree/bindings/arm/mediatek/mediatek,ipesys.txt b/Documentation/devicetree/bindings/arm/mediatek/mediatek,ipesys.txt
new file mode 100644 (file)
index 0000000..2ce889b
--- /dev/null
@@ -0,0 +1,22 @@
+Mediatek ipesys controller
+============================
+
+The Mediatek ipesys controller provides various clocks to the system.
+
+Required Properties:
+
+- compatible: Should be one of:
+       - "mediatek,mt6779-ipesys", "syscon"
+- #clock-cells: Must be 1
+
+The ipesys controller uses the common clk binding from
+Documentation/devicetree/bindings/clock/clock-bindings.txt
+The available clocks are defined in dt-bindings/clock/mt*-clk.h.
+
+Example:
+
+ipesys: clock-controller@1b000000 {
+       compatible = "mediatek,mt6779-ipesys", "syscon";
+       reg = <0 0x1b000000 0 0x1000>;
+       #clock-cells = <1>;
+};
index 72787e7..ad5f9d2 100644 (file)
@@ -7,6 +7,7 @@ Required Properties:
 
 - compatible: Should be one of:
        - "mediatek,mt2712-mfgcfg", "syscon"
+       - "mediatek,mt6779-mfgcfg", "syscon"
        - "mediatek,mt8183-mfgcfg", "syscon"
 - #clock-cells: Must be 1
 
index 545eab7..301eefb 100644 (file)
@@ -8,6 +8,7 @@ Required Properties:
 - compatible: Should be one of:
        - "mediatek,mt2701-mmsys", "syscon"
        - "mediatek,mt2712-mmsys", "syscon"
+       - "mediatek,mt6779-mmsys", "syscon"
        - "mediatek,mt6797-mmsys", "syscon"
        - "mediatek,mt7623-mmsys", "mediatek,mt2701-mmsys", "syscon"
        - "mediatek,mt8173-mmsys", "syscon"
index 4c7e478..ecf027a 100644 (file)
@@ -14,6 +14,7 @@ Required Properties:
        - "mediatek,mt7629-pericfg", "syscon"
        - "mediatek,mt8135-pericfg", "syscon"
        - "mediatek,mt8173-pericfg", "syscon"
+       - "mediatek,mt8183-pericfg", "syscon"
 - #clock-cells: Must be 1
 - #reset-cells: Must be 1
 
index a023b83..0293d69 100644 (file)
@@ -8,6 +8,7 @@ Required Properties:
 - compatible: Should be one of:
        - "mediatek,mt2701-topckgen"
        - "mediatek,mt2712-topckgen", "syscon"
+       - "mediatek,mt6779-topckgen", "syscon"
        - "mediatek,mt6797-topckgen"
        - "mediatek,mt7622-topckgen"
        - "mediatek,mt7623-topckgen", "mediatek,mt2701-topckgen"
index 57176bb..7894558 100644 (file)
@@ -8,6 +8,7 @@ Required Properties:
 - compatible: Should be one of:
        - "mediatek,mt2701-vdecsys", "syscon"
        - "mediatek,mt2712-vdecsys", "syscon"
+       - "mediatek,mt6779-vdecsys", "syscon"
        - "mediatek,mt6797-vdecsys", "syscon"
        - "mediatek,mt7623-vdecsys", "mediatek,mt2701-vdecsys", "syscon"
        - "mediatek,mt8173-vdecsys", "syscon"
index c9faa62..6a6a14e 100644 (file)
@@ -7,6 +7,7 @@ Required Properties:
 
 - compatible: Should be one of:
        - "mediatek,mt2712-vencsys", "syscon"
+       - "mediatek,mt6779-vencsys", "syscon"
        - "mediatek,mt6797-vencsys", "syscon"
        - "mediatek,mt8173-vencsys", "syscon"
        - "mediatek,mt8183-vencsys", "syscon"
index fa4d143..64938fd 100644 (file)
@@ -31,6 +31,7 @@ properties:
       - allwinner,sun8i-h3-ccu
       - allwinner,sun8i-h3-r-ccu
       - allwinner,sun8i-r40-ccu
+      - allwinner,sun8i-v3-ccu
       - allwinner,sun8i-v3s-ccu
       - allwinner,sun9i-a80-ccu
       - allwinner,sun50i-a64-ccu
index dd906db..9e0b03a 100644 (file)
@@ -12,7 +12,9 @@ clock generators, but a few (like the ARM or HDMI) will source from
 the PLL dividers directly.
 
 Required properties:
-- compatible:  Should be "brcm,bcm2835-cprman"
+- compatible:  should be one of the following,
+       "brcm,bcm2711-cprman"
+       "brcm,bcm2835-cprman"
 - #clock-cells:        Should be <1>. The permitted clock-specifier values can be
                  found in include/dt-bindings/clock/bcm2835.h
 - reg:         Specifies base physical address and size of the registers
index 8661c3c..d14362a 100644 (file)
@@ -23,6 +23,7 @@ Required properties :
                        "qcom,gcc-sdm630"
                        "qcom,gcc-sdm660"
                        "qcom,gcc-sdm845"
+                       "qcom,gcc-sm8150"
 
 - reg : shall contain base register location and length
 - #clock-cells : shall contain 1
@@ -38,6 +39,13 @@ Documentation/devicetree/bindings/thermal/qcom-tsens.txt
 - protected-clocks : Protected clock specifier list as per common clock
  binding.
 
+For SM8150 only:
+       - clocks: a list of phandles and clock-specifier pairs,
+                 one for each entry in clock-names.
+       - clock-names: "bi_tcxo" (required)
+                      "sleep_clk" (optional)
+                      "aud_ref_clock" (optional)
+
 Example:
        clock-controller@900000 {
                compatible = "qcom,gcc-msm8960";
@@ -71,3 +79,16 @@ Example of GCC with protected-clocks properties:
                                   <GCC_LPASS_Q6_AXI_CLK>,
                                   <GCC_LPASS_SWAY_CLK>;
        };
+
+Example of GCC with clocks
+       gcc: clock-controller@100000 {
+               compatible = "qcom,gcc-sm8150";
+               reg = <0x00100000 0x1f0000>;
+               #clock-cells = <1>;
+               #reset-cells = <1>;
+               #power-domain-cells = <1>;
+               clock-names = "bi_tcxo",
+                             "sleep_clk";
+               clocks = <&rpmcc RPM_SMD_XO_CLK_SRC>,
+                        <&sleep_clk>;
+       };
index 3c00765..365bbde 100644 (file)
@@ -6,9 +6,14 @@ some Qualcomm Technologies Inc. SoCs. It accepts clock requests from
 other hardware subsystems via RSC to control clocks.
 
 Required properties :
-- compatible : shall contain "qcom,sdm845-rpmh-clk"
+- compatible : must be one of:
+              "qcom,sdm845-rpmh-clk"
+              "qcom,sm8150-rpmh-clk"
 
 - #clock-cells : must contain 1
+- clocks: a list of phandles and clock-specifier pairs,
+         one for each entry in clock-names.
+- clock-names: Parent board clock: "xo".
 
 Example :
 
diff --git a/Documentation/devicetree/bindings/clock/rockchip,rk3308-cru.txt b/Documentation/devicetree/bindings/clock/rockchip,rk3308-cru.txt
new file mode 100644 (file)
index 0000000..9b151c5
--- /dev/null
@@ -0,0 +1,60 @@
+* Rockchip RK3308 Clock and Reset Unit
+
+The RK3308 clock controller generates and supplies clock to various
+controllers within the SoC and also implements a reset controller for SoC
+peripherals.
+
+Required Properties:
+
+- compatible: CRU should be "rockchip,rk3308-cru"
+- reg: physical base address of the controller and length of memory mapped
+  region.
+- #clock-cells: should be 1.
+- #reset-cells: should be 1.
+
+Optional Properties:
+
+- rockchip,grf: phandle to the syscon managing the "general register files"
+  If missing, pll rates are not changeable, due to the missing pll lock status.
+
+Each clock is assigned an identifier and client nodes can use this identifier
+to specify the clock which they consume. All available clocks are defined as
+preprocessor macros in the dt-bindings/clock/rk3308-cru.h headers and can be
+used in device tree sources. Similar macros exist for the reset sources in
+these files.
+
+External clocks:
+
+There are several clocks that are generated outside the SoC. It is expected
+that they are defined using standard clock bindings with following
+clock-output-names:
+ - "xin24m" - crystal input - required,
+ - "xin32k" - rtc clock - optional,
+ - "mclk_i2s0_8ch_in", "mclk_i2s1_8ch_in", "mclk_i2s2_8ch_in",
+   "mclk_i2s3_8ch_in", "mclk_i2s0_2ch_in",
+   "mclk_i2s1_2ch_in" - external I2S or SPDIF clock - optional,
+ - "mac_clkin" - external MAC clock - optional
+
+Example: Clock controller node:
+
+       cru: clock-controller@ff500000 {
+               compatible = "rockchip,rk3308-cru";
+               reg = <0x0 0xff500000 0x0 0x1000>;
+               rockchip,grf = <&grf>;
+               #clock-cells = <1>;
+               #reset-cells = <1>;
+       };
+
+Example: UART controller node that consumes the clock generated by the clock
+  controller:
+
+       uart0: serial@ff0a0000 {
+               compatible = "rockchip,rk3308-uart", "snps,dw-apb-uart";
+               reg = <0x0 0xff0a0000 0x0 0x100>;
+               interrupts = <GIC_SPI 18 IRQ_TYPE_LEVEL_HIGH>;
+               clocks = <&cru SCLK_UART0>, <&cru PCLK_UART0>;
+               clock-names = "baudclk", "apb_pclk";
+               reg-shift = <2>;
+               reg-io-width = <4>;
+               status = "disabled";
+       };
index 0d01f2d..26544c8 100644 (file)
@@ -24,6 +24,8 @@ Required properties:
 Optional properties:
  - xtal-load-pf: Crystal load-capacitor value to fine-tune performance on a
                  board, or to compensate for external influences.
+- vdd-supply: A regulator node for Vdd
+- vddout-supply: A regulator node for Vddout
 
 For all PLL1, PLL2, ... an optional child node can be used to specify spread
 spectrum clocking parameters for a board.
@@ -41,6 +43,8 @@ Example:
                clocks = <&xtal_27Mhz>;
                #clock-cells = <1>;
                xtal-load-pf = <5>;
+               vdd-supply = <&1v8-reg>;
+               vddout-supply = <&3v3-reg>;
                /* PLL options to get SSC 1% centered */
                PLL2 {
                        spread-spectrum = <4>;
index e9de375..c9a6587 100644 (file)
@@ -1,7 +1,9 @@
 Broadcom BCM2835 I2C controller
 
 Required properties:
-- compatible : Should be "brcm,bcm2835-i2c".
+- compatible : Should be one of:
+       "brcm,bcm2711-i2c"
+       "brcm,bcm2835-i2c"
 - reg: Should contain register location and length.
 - interrupts: Should contain interrupt.
 - clocks : The clock feeding the I2C controller.
index 0ebd08a..a9b105a 100644 (file)
@@ -8,11 +8,12 @@ MT6397/MT6323 is a multifunction device with the following sub modules:
 - Clock
 - LED
 - Keys
+- Power controller
 
 It is interfaced to host controller using SPI interface by a proprietary hardware
 called PMIC wrapper or pwrap. MT6397/MT6323 MFD is a child device of pwrap.
 See the following for pwarp node definitions:
-Documentation/devicetree/bindings/soc/mediatek/pwrap.txt
+../soc/mediatek/pwrap.txt
 
 This document describes the binding for MFD device and its sub module.
 
@@ -22,14 +23,16 @@ compatible: "mediatek,mt6397" or "mediatek,mt6323"
 Optional subnodes:
 
 - rtc
-       Required properties:
+       Required properties: Should be one of follows
+               - compatible: "mediatek,mt6323-rtc"
                - compatible: "mediatek,mt6397-rtc"
+       For details, see ../rtc/rtc-mt6397.txt
 - regulators
        Required properties:
                - compatible: "mediatek,mt6397-regulator"
-       see Documentation/devicetree/bindings/regulator/mt6397-regulator.txt
+       see ../regulator/mt6397-regulator.txt
                - compatible: "mediatek,mt6323-regulator"
-       see Documentation/devicetree/bindings/regulator/mt6323-regulator.txt
+       see ../regulator/mt6323-regulator.txt
 - codec
        Required properties:
                - compatible: "mediatek,mt6397-codec"
@@ -39,12 +42,17 @@ Optional subnodes:
 - led
        Required properties:
                - compatible: "mediatek,mt6323-led"
-       see Documentation/devicetree/bindings/leds/leds-mt6323.txt
+       see ../leds/leds-mt6323.txt
 
 - keys
        Required properties:
                - compatible: "mediatek,mt6397-keys" or "mediatek,mt6323-keys"
-       see Documentation/devicetree/bindings/input/mtk-pmic-keys.txt
+       see ../input/mtk-pmic-keys.txt
+
+- power-controller
+       Required properties:
+               - compatible: "mediatek,mt6323-pwrc"
+       For details, see ../power/reset/mt6323-poweroff.txt
 
 Example:
        pwrap: pwrap@1000f000 {
index 65c2326..b74e5e9 100644 (file)
@@ -14,6 +14,10 @@ Required properties:
                "ricoh,rc5t619"
  - reg: the I2C slave address of the device
 
+Optional properties:
+ - system-power-controller:
+   See Documentation/devicetree/bindings/power/power-controller.txt
+
 Sub-nodes:
  - regulators: the node is required if the regulator functionality is
    needed. The valid regulator names are: DCDC1, DCDC2, DCDC3, DCDC4
@@ -28,6 +32,7 @@ Example:
        pmic@32 {
                compatible = "ricoh,rn5t618";
                reg = <0x32>;
+               system-power-controller;
 
                regulators {
                        DCDC1 {
diff --git a/Documentation/devicetree/bindings/mtd/mxic-nand.txt b/Documentation/devicetree/bindings/mtd/mxic-nand.txt
new file mode 100644 (file)
index 0000000..46c5529
--- /dev/null
@@ -0,0 +1,36 @@
+Macronix Raw NAND Controller Device Tree Bindings
+-------------------------------------------------
+
+Required properties:
+- compatible: should be "mxic,multi-itfc-v009-nand-controller"
+- reg: should contain 1 entry for the registers
+- #address-cells: should be set to 1
+- #size-cells: should be set to 0
+- interrupts: interrupt line connected to this raw NAND controller
+- clock-names: should contain "ps", "send" and "send_dly"
+- clocks: should contain 3 phandles for the "ps", "send" and
+        "send_dly" clocks
+
+Children nodes:
+- children nodes represent the available NAND chips.
+
+See Documentation/devicetree/bindings/mtd/nand-controller.yaml
+for more details on generic bindings.
+
+Example:
+
+       nand: nand-controller@43c30000 {
+               compatible = "mxic,multi-itfc-v009-nand-controller";
+               reg = <0x43c30000 0x10000>;
+               #address-cells = <1>;
+               #size-cells = <0>;
+               interrupts = <GIC_SPI 0x1d IRQ_TYPE_EDGE_RISING>;
+               clocks = <&clkwizard 0>, <&clkwizard 1>, <&clkc 15>;
+               clock-names = "send", "send_dly", "ps";
+
+               nand@0 {
+                       reg = <0>;
+                       nand-ecc-mode = "soft";
+                       nand-ecc-algo = "bch";
+               };
+       };
index 5561a1c..78494c4 100644 (file)
@@ -11,7 +11,6 @@ Required properties:
             the ATU address space.
     (The old way of getting the configuration address space from "ranges"
     is deprecated and should be avoided.)
-- num-lanes: number of lanes to use
 RC mode:
 - #address-cells: set to <3>
 - #size-cells: set to <2>
@@ -34,6 +33,11 @@ Optional properties:
 - clock-names: Must include the following entries:
        - "pcie"
        - "pcie_bus"
+- snps,enable-cdm-check: This is a boolean property and if present enables
+   automatic checking of CDM (Configuration Dependent Module) registers
+   for data corruption. CDM registers include standard PCIe configuration
+   space registers, Port Logic registers, DMA and iATU (internal Address
+   Translation Unit) registers.
 RC mode:
 - num-viewport: number of view ports configured in hardware. If a platform
   does not specify it, the driver assumes 2.
index a7f5f5a..de4b2ba 100644 (file)
@@ -50,7 +50,7 @@ Additional required properties for imx7d-pcie and imx8mq-pcie:
 - power-domains: Must be set to a phandle pointing to PCIE_PHY power domain
 - resets: Must contain phandles to PCIe-related reset lines exposed by SRC
   IP block
-- reset-names: Must contain the following entires:
+- reset-names: Must contain the following entries:
               - "pciephy"
               - "apps"
               - "turnoff"
index 92437a3..7468d66 100644 (file)
@@ -6,6 +6,7 @@ Required properties:
        "mediatek,mt2712-pcie"
        "mediatek,mt7622-pcie"
        "mediatek,mt7623-pcie"
+       "mediatek,mt7629-pcie"
 - device_type: Must be "pci"
 - reg: Base addresses and lengths of the PCIe subsys and root ports.
 - reg-names: Names of the above areas to use during resource lookup.
diff --git a/Documentation/devicetree/bindings/pci/nvidia,tegra194-pcie.txt b/Documentation/devicetree/bindings/pci/nvidia,tegra194-pcie.txt
new file mode 100644 (file)
index 0000000..b739f92
--- /dev/null
@@ -0,0 +1,171 @@
+NVIDIA Tegra PCIe controller (Synopsys DesignWare Core based)
+
+This PCIe host controller is based on the Synopsis Designware PCIe IP
+and thus inherits all the common properties defined in designware-pcie.txt.
+
+Required properties:
+- compatible: For Tegra19x, must contain "nvidia,tegra194-pcie".
+- device_type: Must be "pci"
+- power-domains: A phandle to the node that controls power to the respective
+  PCIe controller and a specifier name for the PCIe controller. Following are
+  the specifiers for the different PCIe controllers
+    TEGRA194_POWER_DOMAIN_PCIEX8B: C0
+    TEGRA194_POWER_DOMAIN_PCIEX1A: C1
+    TEGRA194_POWER_DOMAIN_PCIEX1A: C2
+    TEGRA194_POWER_DOMAIN_PCIEX1A: C3
+    TEGRA194_POWER_DOMAIN_PCIEX4A: C4
+    TEGRA194_POWER_DOMAIN_PCIEX8A: C5
+  these specifiers are defined in
+  "include/dt-bindings/power/tegra194-powergate.h" file.
+- reg: A list of physical base address and length pairs for each set of
+  controller registers. Must contain an entry for each entry in the reg-names
+  property.
+- reg-names: Must include the following entries:
+  "appl": Controller's application logic registers
+  "config": As per the definition in designware-pcie.txt
+  "atu_dma": iATU and DMA registers. This is where the iATU (internal Address
+             Translation Unit) registers of the PCIe core are made available
+             for SW access.
+  "dbi": The aperture where root port's own configuration registers are
+         available
+- interrupts: A list of interrupt outputs of the controller. Must contain an
+  entry for each entry in the interrupt-names property.
+- interrupt-names: Must include the following entries:
+  "intr": The Tegra interrupt that is asserted for controller interrupts
+  "msi": The Tegra interrupt that is asserted when an MSI is received
+- bus-range: Range of bus numbers associated with this controller
+- #address-cells: Address representation for root ports (must be 3)
+  - cell 0 specifies the bus and device numbers of the root port:
+    [23:16]: bus number
+    [15:11]: device number
+  - cell 1 denotes the upper 32 address bits and should be 0
+  - cell 2 contains the lower 32 address bits and is used to translate to the
+    CPU address space
+- #size-cells: Size representation for root ports (must be 2)
+- ranges: Describes the translation of addresses for root ports and standard
+  PCI regions. The entries must be 7 cells each, where the first three cells
+  correspond to the address as described for the #address-cells property
+  above, the fourth and fifth cells are for the physical CPU address to
+  translate to and the sixth and seventh cells are as described for the
+  #size-cells property above.
+  - Entries setup the mapping for the standard I/O, memory and
+    prefetchable PCI regions. The first cell determines the type of region
+    that is setup:
+    - 0x81000000: I/O memory region
+    - 0x82000000: non-prefetchable memory region
+    - 0xc2000000: prefetchable memory region
+  Please refer to the standard PCI bus binding document for a more detailed
+  explanation.
+- #interrupt-cells: Size representation for interrupts (must be 1)
+- interrupt-map-mask and interrupt-map: Standard PCI IRQ mapping properties
+  Please refer to the standard PCI bus binding document for a more detailed
+  explanation.
+- clocks: Must contain an entry for each entry in clock-names.
+  See ../clocks/clock-bindings.txt for details.
+- clock-names: Must include the following entries:
+  - core
+- resets: Must contain an entry for each entry in reset-names.
+  See ../reset/reset.txt for details.
+- reset-names: Must include the following entries:
+  - apb
+  - core
+- phys: Must contain a phandle to P2U PHY for each entry in phy-names.
+- phy-names: Must include an entry for each active lane.
+  "p2u-N": where N ranges from 0 to one less than the total number of lanes
+- nvidia,bpmp: Must contain a pair of phandle to BPMP controller node followed
+  by controller-id. Following are the controller ids for each controller.
+    0: C0
+    1: C1
+    2: C2
+    3: C3
+    4: C4
+    5: C5
+- vddio-pex-ctl-supply: Regulator supply for PCIe side band signals
+
+Optional properties:
+- pinctrl-names: A list of pinctrl state names.
+  It is mandatory for C5 controller and optional for other controllers.
+  - "default": Configures PCIe I/O for proper operation.
+- pinctrl-0: phandle for the 'default' state of pin configuration.
+  It is mandatory for C5 controller and optional for other controllers.
+- supports-clkreq: Refer to Documentation/devicetree/bindings/pci/pci.txt
+- nvidia,update-fc-fixup: This is a boolean property and needs to be present to
+    improve performance when a platform is designed in such a way that it
+    satisfies at least one of the following conditions thereby enabling root
+    port to exchange optimum number of FC (Flow Control) credits with
+    downstream devices
+    1. If C0/C4/C5 run at x1/x2 link widths (irrespective of speed and MPS)
+    2. If C0/C1/C2/C3/C4/C5 operate at their respective max link widths and
+       a) speed is Gen-2 and MPS is 256B
+       b) speed is >= Gen-3 with any MPS
+- nvidia,aspm-cmrt-us: Common Mode Restore Time for proper operation of ASPM
+   to be specified in microseconds
+- nvidia,aspm-pwr-on-t-us: Power On time for proper operation of ASPM to be
+   specified in microseconds
+- nvidia,aspm-l0s-entrance-latency-us: ASPM L0s entrance latency to be
+   specified in microseconds
+- vpcie3v3-supply: A phandle to the regulator node that supplies 3.3V to the slot
+  if the platform has one such slot. (Ex:- x16 slot owned by C5 controller
+  in p2972-0000 platform).
+- vpcie12v-supply: A phandle to the regulator node that supplies 12V to the slot
+  if the platform has one such slot. (Ex:- x16 slot owned by C5 controller
+  in p2972-0000 platform).
+
+Examples:
+=========
+
+Tegra194:
+--------
+
+       pcie@14180000 {
+               compatible = "nvidia,tegra194-pcie", "snps,dw-pcie";
+               power-domains = <&bpmp TEGRA194_POWER_DOMAIN_PCIEX8B>;
+               reg = <0x00 0x14180000 0x0 0x00020000   /* appl registers (128K)      */
+                      0x00 0x38000000 0x0 0x00040000   /* configuration space (256K) */
+                      0x00 0x38040000 0x0 0x00040000>; /* iATU_DMA reg space (256K)  */
+               reg-names = "appl", "config", "atu_dma";
+
+               #address-cells = <3>;
+               #size-cells = <2>;
+               device_type = "pci";
+               num-lanes = <8>;
+               linux,pci-domain = <0>;
+
+               pinctrl-names = "default";
+               pinctrl-0 = <&pex_rst_c5_out_state>, <&clkreq_c5_bi_dir_state>;
+
+               clocks = <&bpmp TEGRA194_CLK_PEX0_CORE_0>;
+               clock-names = "core";
+
+               resets = <&bpmp TEGRA194_RESET_PEX0_CORE_0_APB>,
+                        <&bpmp TEGRA194_RESET_PEX0_CORE_0>;
+               reset-names = "apb", "core";
+
+               interrupts = <GIC_SPI 72 IRQ_TYPE_LEVEL_HIGH>,  /* controller interrupt */
+                            <GIC_SPI 73 IRQ_TYPE_LEVEL_HIGH>;  /* MSI interrupt */
+               interrupt-names = "intr", "msi";
+
+               #interrupt-cells = <1>;
+               interrupt-map-mask = <0 0 0 0>;
+               interrupt-map = <0 0 0 0 &gic GIC_SPI 72 IRQ_TYPE_LEVEL_HIGH>;
+
+               nvidia,bpmp = <&bpmp 0>;
+
+               supports-clkreq;
+               nvidia,aspm-cmrt-us = <60>;
+               nvidia,aspm-pwr-on-t-us = <20>;
+               nvidia,aspm-l0s-entrance-latency-us = <3>;
+
+               bus-range = <0x0 0xff>;
+               ranges = <0x81000000 0x0  0x38100000 0x0  0x38100000 0x0 0x00100000    /* downstream I/O (1MB) */
+                         0x82000000 0x0  0x38200000 0x0  0x38200000 0x0 0x01E00000    /* non-prefetchable memory (30MB) */
+                         0xc2000000 0x18 0x00000000 0x18 0x00000000 0x4 0x00000000>;  /* prefetchable memory (16GB) */
+
+               vddio-pex-ctl-supply = <&vdd_1v8ao>;
+               vpcie3v3-supply = <&vdd_3v3_pcie>;
+               vpcie12v-supply = <&vdd_12v_pcie>;
+
+               phys = <&p2u_hsio_2>, <&p2u_hsio_3>, <&p2u_hsio_4>,
+                      <&p2u_hsio_5>;
+               phy-names = "p2u-0", "p2u-1", "p2u-2", "p2u-3";
+       };
index 8324a4e..7a813d0 100644 (file)
@@ -11,7 +11,7 @@ Required properties:
 - reg-names:
    - "ctrl" for the control register region
    - "config" for the config space region
-- interrupts: Interrupt specifier for the PCIe controler
+- interrupts: Interrupt specifier for the PCIe controller
 - clocks: reference to the PCIe controller clocks
 - clock-names: mandatory if there is a second clock, in this case the
    name must be "core" for the first clock and "reg" for the second
index 2a5d910..29bcbd8 100644 (file)
@@ -27,6 +27,11 @@ driver implementation may support the following properties:
 - reset-gpios:
    If present this property specifies PERST# GPIO. Host drivers can parse the
    GPIO and apply fundamental reset to endpoints.
+- supports-clkreq:
+   If present this property specifies that CLKREQ signal routing exists from
+   root port to downstream device and host bridge drivers can do programming
+   which depends on CLKREQ signal existence. For example, programming root port
+   not to advertise ASPM L1 Sub-States support if there is no CLKREQ signal.
 
 PCI-PCI Bridge properties
 -------------------------
diff --git a/Documentation/devicetree/bindings/pci/pcie-al.txt b/Documentation/devicetree/bindings/pci/pcie-al.txt
new file mode 100644 (file)
index 0000000..557a508
--- /dev/null
@@ -0,0 +1,46 @@
+* Amazon Annapurna Labs PCIe host bridge
+
+Amazon's Annapurna Labs PCIe Host Controller is based on the Synopsys DesignWare
+PCI core. It inherits common properties defined in
+Documentation/devicetree/bindings/pci/designware-pcie.txt.
+
+Properties of the host controller node that differ from it are:
+
+- compatible:
+       Usage: required
+       Value type: <stringlist>
+       Definition: Value should contain
+                       - "amazon,al-alpine-v2-pcie" for alpine_v2
+                       - "amazon,al-alpine-v3-pcie" for alpine_v3
+
+- reg:
+       Usage: required
+       Value type: <prop-encoded-array>
+       Definition: Register ranges as listed in the reg-names property
+
+- reg-names:
+       Usage: required
+       Value type: <stringlist>
+       Definition: Must include the following entries
+                       - "config"      PCIe ECAM space
+                       - "controller"  AL proprietary registers
+                       - "dbi"         Designware PCIe registers
+
+Example:
+
+       pcie-external0: pcie@fb600000 {
+               compatible = "amazon,al-alpine-v3-pcie";
+               reg = <0x0 0xfb600000 0x0 0x00100000
+                      0x0 0xfd800000 0x0 0x00010000
+                      0x0 0xfd810000 0x0 0x00001000>;
+               reg-names = "config", "controller", "dbi";
+               bus-range = <0 255>;
+               device_type = "pci";
+               #address-cells = <3>;
+               #size-cells = <2>;
+               #interrupt-cells = <1>;
+               interrupts = <GIC_SPI 49 IRQ_TYPE_LEVEL_HIGH>;
+               interrupt-map-mask = <0x00 0 0 7>;
+               interrupt-map = <0x0000 0 0 1 &gic GIC_SPI 41 IRQ_TYPE_LEVEL_HIGH>; /* INTa */
+               ranges = <0x02000000 0x0 0xc0010000 0x0 0xc0010000 0x0 0x07ff0000>;
+       };
diff --git a/Documentation/devicetree/bindings/phy/phy-tegra194-p2u.txt b/Documentation/devicetree/bindings/phy/phy-tegra194-p2u.txt
new file mode 100644 (file)
index 0000000..d23ff90
--- /dev/null
@@ -0,0 +1,28 @@
+NVIDIA Tegra194 P2U binding
+
+Tegra194 has two PHY bricks namely HSIO (High Speed IO) and NVHS (NVIDIA High
+Speed) each interfacing with 12 and 8 P2U instances respectively.
+A P2U instance is a glue logic between Synopsys DesignWare Core PCIe IP's PIPE
+interface and PHY of HSIO/NVHS bricks. Each P2U instance represents one PCIe
+lane.
+
+Required properties:
+- compatible: For Tegra19x, must contain "nvidia,tegra194-p2u".
+- reg: Should be the physical address space and length of respective each P2U
+       instance.
+- reg-names: Must include the entry "ctl".
+
+Required properties for PHY port node:
+- #phy-cells: Defined by generic PHY bindings.  Must be 0.
+
+Refer to phy/phy-bindings.txt for the generic PHY binding properties.
+
+Example:
+
+p2u_hsio_0: phy@3e10000 {
+       compatible = "nvidia,tegra194-p2u";
+       reg = <0x03e10000 0x10000>;
+       reg-names = "ctl";
+
+       #phy-cells = <0>;
+};
diff --git a/Documentation/devicetree/bindings/power/reset/mt6323-poweroff.txt b/Documentation/devicetree/bindings/power/reset/mt6323-poweroff.txt
new file mode 100644 (file)
index 0000000..933f0c4
--- /dev/null
@@ -0,0 +1,20 @@
+Device Tree Bindings for Power Controller on MediaTek PMIC
+
+The power controller which could be found on PMIC is responsible for externally
+powering off or on the remote MediaTek SoC through the circuit BBPU.
+
+Required properties:
+- compatible: Should be one of follows
+       "mediatek,mt6323-pwrc": for MT6323 PMIC
+
+Example:
+
+       pmic {
+               compatible = "mediatek,mt6323";
+
+               ...
+
+               power-controller {
+                       compatible = "mediatek,mt6323-pwrc";
+               };
+       }
diff --git a/Documentation/devicetree/bindings/pwm/ingenic,jz47xx-pwm.txt b/Documentation/devicetree/bindings/pwm/ingenic,jz47xx-pwm.txt
deleted file mode 100644 (file)
index 493bec8..0000000
+++ /dev/null
@@ -1,22 +0,0 @@
-Ingenic JZ47xx PWM Controller
-=============================
-
-Required properties:
-- compatible: Should be "ingenic,jz4740-pwm"
-- #pwm-cells: Should be 3. See pwm.txt in this directory for a description
-  of the cells format.
-- clocks : phandle to the external clock.
-- clock-names : Should be "ext".
-
-
-Example:
-
-       pwm: pwm@10002000 {
-               compatible = "ingenic,jz4740-pwm";
-               reg = <0x10002000 0x1000>;
-
-               #pwm-cells = <3>;
-
-               clocks = <&ext>;
-               clock-names = "ext";
-       };
index 991728c..c850153 100644 (file)
@@ -6,6 +6,8 @@ Required properties:
    - "mediatek,mt7622-pwm": found on mt7622 SoC.
    - "mediatek,mt7623-pwm": found on mt7623 SoC.
    - "mediatek,mt7628-pwm": found on mt7628 SoC.
+   - "mediatek,mt7629-pwm", "mediatek,mt7622-pwm": found on mt7629 SoC.
+   - "mediatek,mt8516-pwm": found on mt8516 SoC.
  - reg: physical base address and length of the controller's registers.
  - #pwm-cells: must be 2. See pwm.txt in this directory for a description of
    the cell format.
diff --git a/Documentation/devicetree/bindings/pwm/pwm-sprd.txt b/Documentation/devicetree/bindings/pwm/pwm-sprd.txt
new file mode 100644 (file)
index 0000000..16fa5a0
--- /dev/null
@@ -0,0 +1,40 @@
+Spreadtrum PWM controller
+
+Spreadtrum SoCs PWM controller provides 4 PWM channels.
+
+Required properties:
+- compatible : Should be "sprd,ums512-pwm".
+- reg: Physical base address and length of the controller's registers.
+- clocks: The phandle and specifier referencing the controller's clocks.
+- clock-names: Should contain following entries:
+  "pwmn": used to derive the functional clock for PWM channel n (n range: 0 ~ 3).
+  "enablen": for PWM channel n enable clock (n range: 0 ~ 3).
+- #pwm-cells: Should be 2. See pwm.txt in this directory for a description of
+  the cells format.
+
+Optional properties:
+- assigned-clocks: Reference to the PWM clock entries.
+- assigned-clock-parents: The phandle of the parent clock of PWM clock.
+
+Example:
+       pwms: pwm@32260000 {
+               compatible = "sprd,ums512-pwm";
+               reg = <0 0x32260000 0 0x10000>;
+               clock-names = "pwm0", "enable0",
+                       "pwm1", "enable1",
+                       "pwm2", "enable2",
+                       "pwm3", "enable3";
+               clocks = <&aon_clk CLK_PWM0>, <&aonapb_gate CLK_PWM0_EB>,
+                      <&aon_clk CLK_PWM1>, <&aonapb_gate CLK_PWM1_EB>,
+                      <&aon_clk CLK_PWM2>, <&aonapb_gate CLK_PWM2_EB>,
+                      <&aon_clk CLK_PWM3>, <&aonapb_gate CLK_PWM3_EB>;
+               assigned-clocks = <&aon_clk CLK_PWM0>,
+                       <&aon_clk CLK_PWM1>,
+                       <&aon_clk CLK_PWM2>,
+                       <&aon_clk CLK_PWM3>;
+               assigned-clock-parents = <&ext_26m>,
+                       <&ext_26m>,
+                       <&ext_26m>,
+                       <&ext_26m>;
+               #pwm-cells = <2>;
+       };
index 924622f..d7a57ec 100644 (file)
@@ -25,6 +25,7 @@ properties:
       - items:
           - const: allwinner,sun50i-a64-rtc
           - const: allwinner,sun8i-h3-rtc
+      - const: allwinner,sun50i-h6-rtc
 
   reg:
     maxItems: 1
@@ -96,6 +97,18 @@ allOf:
       properties:
         compatible:
           contains:
+            const: allwinner,sun50i-h6-rtc
+
+    then:
+      properties:
+        clock-output-names:
+          minItems: 3
+          maxItems: 3
+
+  - if:
+      properties:
+        compatible:
+          contains:
             const: allwinner,sun8i-r40-rtc
 
     then:
index 1994f60..7371f52 100644 (file)
@@ -1,7 +1,7 @@
 NXP PCF2123 SPI Real Time Clock
 
 Required properties:
-- compatible: should be: "nxp,rtc-pcf2123"
+- compatible: should be: "nxp,pcf2123"
                       or "microcrystal,rv2123"
 - reg: should be the SPI slave chipselect address
 
@@ -11,7 +11,7 @@ Optional properties:
 Example:
 
 pcf2123: rtc@3 {
-       compatible = "nxp,rtc-pcf2123"
+       compatible = "nxp,pcf2123"
        reg = <3>
        spi-cs-high;
 };
index 36984ac..6076fe7 100644 (file)
@@ -3,7 +3,9 @@
 Philips PCF8563/Epson RTC8564 Real Time Clock
 
 Required properties:
-- compatible: Should contain "nxp,pcf8563".
+- compatible: Should contain "nxp,pcf8563",
+       "epson,rtc8564" or
+       "microcrystal,rv8564"
 - reg: I2C address for chip.
 
 Optional property:
index eaee19b..66f0a31 100644 (file)
@@ -19,6 +19,7 @@ Required properties:
        "pericom,pt7c4338",
        "epson,rx8025",
        "isil,isl12057"
+       "epson,rx8130"
 - reg: I2C bus address of the device
 
 Optional properties:
diff --git a/Documentation/devicetree/bindings/rtc/rtc-fsl-ftm-alarm.txt b/Documentation/devicetree/bindings/rtc/rtc-fsl-ftm-alarm.txt
new file mode 100644 (file)
index 0000000..fffac74
--- /dev/null
@@ -0,0 +1,36 @@
+Freescale FlexTimer Module (FTM) Alarm
+
+Required properties:
+- compatible : Should be "fsl,<chip>-ftm-alarm", the
+              supported chips include
+              "fsl,ls1012a-ftm-alarm"
+              "fsl,ls1021a-ftm-alarm"
+              "fsl,ls1028a-ftm-alarm"
+              "fsl,ls1043a-ftm-alarm"
+              "fsl,ls1046a-ftm-alarm"
+              "fsl,ls1088a-ftm-alarm"
+              "fsl,ls208xa-ftm-alarm"
+              "fsl,lx2160a-ftm-alarm"
+- reg : Specifies base physical address and size of the register sets for the
+  FlexTimer Module.
+- interrupts : Should be the FlexTimer Module interrupt.
+- fsl,rcpm-wakeup property and rcpm node : Please refer
+       Documentation/devicetree/bindings/soc/fsl/rcpm.txt
+
+Optional properties:
+- big-endian: If the host controller is big-endian mode, specify this property.
+  The default endian mode is little-endian.
+
+Example:
+rcpm: rcpm@1e34040 {
+       compatible = "fsl,ls1088a-rcpm", "fsl,qoriq-rcpm-2.1+";
+       reg = <0x0 0x1e34040 0x0 0x18>;
+       #fsl,rcpm-wakeup-cells = <6>;
+};
+
+ftm_alarm0: timer@2800000 {
+       compatible = "fsl,ls1088a-ftm-alarm";
+       reg = <0x0 0x2800000 0x0 0x10000>;
+       fsl,rcpm-wakeup = <&rcpm 0x0 0x0 0x0 0x0 0x4000 0x0>;
+       interrupts = <0 44 4>;
+};
diff --git a/Documentation/devicetree/bindings/rtc/rtc-meson-vrtc.txt b/Documentation/devicetree/bindings/rtc/rtc-meson-vrtc.txt
new file mode 100644 (file)
index 0000000..c014f54
--- /dev/null
@@ -0,0 +1,22 @@
+* Amlogic Virtual RTC (VRTC)
+
+This is a Linux interface to an RTC managed by firmware, hence it's
+virtual from a Linux perspective.  The interface is 1 register where
+an alarm time (in seconds) is to be written.
+
+Required properties:
+- compatible: should be "amlogic,meson-vrtc"
+- reg: physical address for the alarm register
+
+The alarm register is a simple scratch register shared between the
+application processors (AP) and the secure co-processor (SCP.)  When
+the AP suspends, the SCP will use the value of this register to
+program an always-on timer before going sleep. When the timer expires,
+the SCP will wake up and will then wake the AP.
+
+Example:
+
+       vrtc: rtc@0a8 {
+               compatible = "amlogic,meson-vrtc";
+               reg = <0x0 0x000a8 0x0 0x4>;
+       };
index 0c12ce9..18cb456 100644 (file)
@@ -52,8 +52,6 @@ properties:
       - nxp,pcf2127
       # Real-time clock
       - nxp,pcf2129
-      # Real-time clock/calendar
-      - nxp,pcf8563
       # Real-time Clock Module
       - pericom,pt7c4338
       # I2C bus SERIAL INTERFACE REAL-TIME CLOCK IC
diff --git a/Documentation/devicetree/bindings/soundwire/soundwire-controller.yaml b/Documentation/devicetree/bindings/soundwire/soundwire-controller.yaml
new file mode 100644 (file)
index 0000000..1b43993
--- /dev/null
@@ -0,0 +1,82 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/soundwire/soundwire-controller.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: SoundWire Controller Generic Binding
+
+maintainers:
+  - Srinivas Kandagatla <srinivas.kandagatla@linaro.org>
+  - Vinod Koul <vkoul@kernel.org>
+
+description: |
+  SoundWire busses can be described with a node for the SoundWire controller
+  device and a set of child nodes for each SoundWire slave on the bus.
+
+properties:
+  $nodename:
+    pattern: "^soundwire(@.*)?$"
+
+  "#address-cells":
+    const: 2
+
+  "#size-cells":
+    const: 0
+
+patternProperties:
+  "^.*@[0-9a-f],[0-9a-f]$":
+    type: object
+
+    properties:
+      compatible:
+        pattern: "^sdw[0-9a-f]{1}[0-9a-f]{4}[0-9a-f]{4}[0-9a-f]{2}$"
+        description: Is the textual representation of SoundWire Enumeration
+          address. compatible string should contain SoundWire Version ID,
+          Manufacturer ID, Part ID and Class ID in order and shall be in
+          lower-case hexadecimal with leading zeroes.
+          Valid sizes of these fields are
+          Version ID is 1 nibble, number '0x1' represents SoundWire 1.0
+          and '0x2' represents SoundWire 1.1 and so on.
+          MFD is 4 nibbles
+          PID is 4 nibbles
+          CID is 2 nibbles
+          More Information on detail of encoding of these fields can be
+          found in MIPI Alliance DisCo & SoundWire 1.0 Specifications.
+
+      reg:
+        maxItems: 1
+        description:
+          Link ID followed by Instance ID of SoundWire Device Address.
+
+    required:
+      - compatible
+      - reg
+
+required:
+  - "#address-cells"
+  - "#size-cells"
+
+examples:
+  - |
+    soundwire@c2d0000 {
+        #address-cells = <2>;
+        #size-cells = <0>;
+        reg = <0x0c2d0000 0x2000>;
+
+        speaker@0,1 {
+            compatible = "sdw10217201000";
+            reg = <0 1>;
+            powerdown-gpios = <&wcdpinctrl 2 0>;
+            #thermal-sensor-cells = <0>;
+        };
+
+        speaker@0,2 {
+            compatible = "sdw10217201000";
+            reg = <0 2>;
+            powerdown-gpios = <&wcdpinctrl 2 0>;
+            #thermal-sensor-cells = <0>;
+        };
+    };
+
+...
index 04cbb90..28f2cba 100644 (file)
@@ -23,6 +23,7 @@ Required properties:
 Optional property:
 - little-endian : If present, the TMU registers are little endian. If absent,
        the default is big endian.
+- clocks : the clock for clocking the TMU silicon.
 
 Example:
 
diff --git a/Documentation/devicetree/bindings/timer/ingenic,tcu.txt b/Documentation/devicetree/bindings/timer/ingenic,tcu.txt
new file mode 100644 (file)
index 0000000..5a4b9dd
--- /dev/null
@@ -0,0 +1,137 @@
+Ingenic JZ47xx SoCs Timer/Counter Unit devicetree bindings
+==========================================================
+
+For a description of the TCU hardware and drivers, have a look at
+Documentation/mips/ingenic-tcu.txt.
+
+Required properties:
+
+- compatible: Must be one of:
+  * ingenic,jz4740-tcu
+  * ingenic,jz4725b-tcu
+  * ingenic,jz4770-tcu
+  followed by "simple-mfd".
+- reg: Should be the offset/length value corresponding to the TCU registers
+- clocks: List of phandle & clock specifiers for clocks external to the TCU.
+  The "pclk", "rtc" and "ext" clocks should be provided. The "tcu" clock
+  should be provided if the SoC has it.
+- clock-names: List of name strings for the external clocks.
+- #clock-cells: Should be <1>;
+  Clock consumers specify this argument to identify a clock. The valid values
+  may be found in <dt-bindings/clock/ingenic,tcu.h>.
+- interrupt-controller : Identifies the node as an interrupt controller
+- #interrupt-cells : Specifies the number of cells needed to encode an
+  interrupt source. The value should be 1.
+- interrupts : Specifies the interrupt the controller is connected to.
+
+Optional properties:
+
+- ingenic,pwm-channels-mask: Bitmask of TCU channels reserved for PWM use.
+  Default value is 0xfc.
+
+
+Children nodes
+==========================================================
+
+
+PWM node:
+---------
+
+Required properties:
+
+- compatible: Must be one of:
+  * ingenic,jz4740-pwm
+  * ingenic,jz4725b-pwm
+- #pwm-cells: Should be 3. See ../pwm/pwm.txt for a description of the cell
+  format.
+- clocks: List of phandle & clock specifiers for the TCU clocks.
+- clock-names: List of name strings for the TCU clocks.
+
+
+Watchdog node:
+--------------
+
+Required properties:
+
+- compatible: Must be "ingenic,jz4740-watchdog"
+- clocks: phandle to the WDT clock
+- clock-names: should be "wdt"
+
+
+OS Timer node:
+---------
+
+Required properties:
+
+- compatible: Must be one of:
+  * ingenic,jz4725b-ost
+  * ingenic,jz4770-ost
+- clocks: phandle to the OST clock
+- clock-names: should be "ost"
+- interrupts : Specifies the interrupt the OST is connected to.
+
+
+Example
+==========================================================
+
+#include <dt-bindings/clock/jz4770-cgu.h>
+#include <dt-bindings/clock/ingenic,tcu.h>
+
+/ {
+       tcu: timer@10002000 {
+               compatible = "ingenic,jz4770-tcu", "simple-mfd";
+               reg = <0x10002000 0x1000>;
+               #address-cells = <1>;
+               #size-cells = <1>;
+               ranges = <0x0 0x10002000 0x1000>;
+
+               #clock-cells = <1>;
+
+               clocks = <&cgu JZ4770_CLK_RTC
+                         &cgu JZ4770_CLK_EXT
+                         &cgu JZ4770_CLK_PCLK>;
+               clock-names = "rtc", "ext", "pclk";
+
+               interrupt-controller;
+               #interrupt-cells = <1>;
+
+               interrupt-parent = <&intc>;
+               interrupts = <27 26 25>;
+
+               watchdog: watchdog@0 {
+                       compatible = "ingenic,jz4740-watchdog";
+                       reg = <0x0 0xc>;
+
+                       clocks = <&tcu TCU_CLK_WDT>;
+                       clock-names = "wdt";
+               };
+
+               pwm: pwm@40 {
+                       compatible = "ingenic,jz4740-pwm";
+                       reg = <0x40 0x80>;
+
+                       #pwm-cells = <3>;
+
+                       clocks = <&tcu TCU_CLK_TIMER0
+                                 &tcu TCU_CLK_TIMER1
+                                 &tcu TCU_CLK_TIMER2
+                                 &tcu TCU_CLK_TIMER3
+                                 &tcu TCU_CLK_TIMER4
+                                 &tcu TCU_CLK_TIMER5
+                                 &tcu TCU_CLK_TIMER6
+                                 &tcu TCU_CLK_TIMER7>;
+                       clock-names = "timer0", "timer1", "timer2", "timer3",
+                                     "timer4", "timer5", "timer6", "timer7";
+               };
+
+               ost: timer@e0 {
+                       compatible = "ingenic,jz4770-ost";
+                       reg = <0xe0 0x20>;
+
+                       clocks = <&tcu TCU_CLK_OST>;
+                       clock-names = "ost";
+
+                       interrupts = <15>;
+               };
+       };
+};
index a747204..d78ef63 100644 (file)
@@ -54,6 +54,8 @@ Optional properties:
                          PHY reset from the UFS controller.
 - resets            : reset node register
 - reset-names       : describe reset node register, the "rst" corresponds to reset the whole UFS IP.
+- reset-gpios       : A phandle and gpio specifier denoting the GPIO connected
+                     to the RESET pin of the UFS memory device.
 
 Note: If above properties are not defined it can be assumed that the supply
 regulators or clocks are always on.
diff --git a/Documentation/devicetree/bindings/watchdog/allwinner,sun4i-a10-wdt.yaml b/Documentation/devicetree/bindings/watchdog/allwinner,sun4i-a10-wdt.yaml
new file mode 100644 (file)
index 0000000..3a54f58
--- /dev/null
@@ -0,0 +1,58 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/watchdog/allwinner,sun4i-a10-wdt.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Allwinner A10 Watchdog Device Tree Bindings
+
+allOf:
+  - $ref: "watchdog.yaml#"
+
+maintainers:
+  - Chen-Yu Tsai <wens@csie.org>
+  - Maxime Ripard <maxime.ripard@bootlin.com>
+
+properties:
+  compatible:
+    oneOf:
+      - const: allwinner,sun4i-a10-wdt
+      - const: allwinner,sun6i-a31-wdt
+      - items:
+          - const: allwinner,sun50i-a64-wdt
+          - const: allwinner,sun6i-a31-wdt
+      - items:
+          - const: allwinner,sun50i-h6-wdt
+          - const: allwinner,sun6i-a31-wdt
+      - items:
+          - const: allwinner,suniv-f1c100s-wdt
+          - const: allwinner,sun4i-a10-wdt
+
+  reg:
+    maxItems: 1
+
+  clocks:
+    maxItems: 1
+
+  interrupts:
+    maxItems: 1
+
+required:
+  - compatible
+  - reg
+  - clocks
+  - interrupts
+
+unevaluatedProperties: false
+
+examples:
+  - |
+    wdt: watchdog@1c20c90 {
+        compatible = "allwinner,sun4i-a10-wdt";
+        reg = <0x01c20c90 0x10>;
+        interrupts = <24>;
+        clocks = <&osc24M>;
+        timeout-sec = <10>;
+    };
+
+...
index c5077a1..d78d4a8 100644 (file)
@@ -4,6 +4,7 @@ Required properties:
  - compatible: must be one of:
        - "aspeed,ast2400-wdt"
        - "aspeed,ast2500-wdt"
+       - "aspeed,ast2600-wdt"
 
  - reg: physical base address of the controller and length of memory mapped
    region
diff --git a/Documentation/devicetree/bindings/watchdog/fsl-imx7ulp-wdt.txt b/Documentation/devicetree/bindings/watchdog/fsl-imx7ulp-wdt.txt
new file mode 100644 (file)
index 0000000..f902508
--- /dev/null
@@ -0,0 +1,22 @@
+* Freescale i.MX7ULP Watchdog Timer (WDT) Controller
+
+Required properties:
+- compatible : Should be "fsl,imx7ulp-wdt"
+- reg : Should contain WDT registers location and length
+- interrupts : Should contain WDT interrupt
+- clocks: Should contain a phandle pointing to the gated peripheral clock.
+
+Optional properties:
+- timeout-sec : Contains the watchdog timeout in seconds
+
+Examples:
+
+wdog1: watchdog@403d0000 {
+       compatible = "fsl,imx7ulp-wdt";
+       reg = <0x403d0000 0x10000>;
+       interrupts = <GIC_SPI 55 IRQ_TYPE_LEVEL_HIGH>;
+       clocks = <&pcc2 IMX7ULP_CLK_WDG1>;
+       assigned-clocks = <&pcc2 IMX7ULP_CLK_WDG1>;
+       assigned-clocks-parents = <&scg1 IMX7ULP_CLK_FIRC_BUS_CLK>;
+       timeout-sec = <40>;
+};
diff --git a/Documentation/devicetree/bindings/watchdog/ingenic,jz4740-wdt.txt b/Documentation/devicetree/bindings/watchdog/ingenic,jz4740-wdt.txt
deleted file mode 100644 (file)
index ce1cb72..0000000
+++ /dev/null
@@ -1,17 +0,0 @@
-Ingenic Watchdog Timer (WDT) Controller for JZ4740 & JZ4780
-
-Required properties:
-compatible: "ingenic,jz4740-watchdog" or "ingenic,jz4780-watchdog"
-reg: Register address and length for watchdog registers
-clocks: phandle to the RTC clock
-clock-names: should be "rtc"
-
-Example:
-
-watchdog: jz4740-watchdog@10002000 {
-       compatible = "ingenic,jz4740-watchdog";
-       reg = <0x10002000 0x10>;
-
-       clocks = <&cgu JZ4740_CLK_RTC>;
-       clock-names = "rtc";
-};
diff --git a/Documentation/devicetree/bindings/watchdog/sunxi-wdt.txt b/Documentation/devicetree/bindings/watchdog/sunxi-wdt.txt
deleted file mode 100644 (file)
index e65198d..0000000
+++ /dev/null
@@ -1,22 +0,0 @@
-Allwinner SoCs Watchdog timer
-
-Required properties:
-
-- compatible : should be one of
-       "allwinner,sun4i-a10-wdt"
-       "allwinner,sun6i-a31-wdt"
-       "allwinner,sun50i-a64-wdt","allwinner,sun6i-a31-wdt"
-       "allwinner,sun50i-h6-wdt","allwinner,sun6i-a31-wdt"
-       "allwinner,suniv-f1c100s-wdt", "allwinner,sun4i-a10-wdt"
-- reg : Specifies base physical address and size of the registers.
-
-Optional properties:
-- timeout-sec : Contains the watchdog timeout in seconds
-
-Example:
-
-wdt: watchdog@1c20c90 {
-       compatible = "allwinner,sun4i-a10-wdt";
-       reg = <0x01c20c90 0x10>;
-       timeout-sec = <10>;
-};
diff --git a/Documentation/devicetree/bindings/watchdog/watchdog.yaml b/Documentation/devicetree/bindings/watchdog/watchdog.yaml
new file mode 100644 (file)
index 0000000..187bf6c
--- /dev/null
@@ -0,0 +1,26 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/watchdog/watchdog.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Watchdog Generic Bindings
+
+maintainers:
+  - Guenter Roeck <linux@roeck-us.net>
+  - Wim Van Sebroeck <wim@linux-watchdog.org>
+
+description: |
+  This document describes generic bindings which can be used to
+  describe watchdog devices in a device tree.
+
+properties:
+  $nodename:
+    pattern: "^watchdog(@.*|-[0-9a-f])?$"
+
+  timeout-sec:
+    $ref: /schemas/types.yaml#/definitions/uint32
+    description:
+      Contains the watchdog timeout in seconds.
+
+...
index d2c6a5c..b19b6a0 100644 (file)
@@ -158,6 +158,20 @@ Mount Options
         copies.  Currently, it's only used in copy_file_range, which will revert
         to the default VFS implementation if this option is used.
 
+  recover_session=<no|clean>
+       Set auto reconnect mode in the case where the client is blacklisted. The
+       available modes are "no" and "clean". The default is "no".
+
+       * no: never attempt to reconnect when client detects that it has been
+       blacklisted. Operations will generally fail after being blacklisted.
+
+       * clean: client reconnects to the ceph cluster automatically when it
+       detects that it has been blacklisted. During reconnect, client drops
+       dirty data/metadata, invalidates page caches and writable file handles.
+       After reconnect, file locks become stale because the MDS loses track
+       of them. If an inode contains any stale file locks, read/write on the
+       inode is not allowed until applications release all stale file locks.
+
 More Information
 ================
 
index c6d8855..72075aa 100644 (file)
@@ -9,14 +9,26 @@ ext4 code is not prepared to handle the case where the block size
 exceeds the page size. However, for a filesystem of mostly huge files,
 it is desirable to be able to allocate disk blocks in units of multiple
 blocks to reduce both fragmentation and metadata overhead. The
-`bigalloc <Bigalloc>`__ feature provides exactly this ability. The
-administrator can set a block cluster size at mkfs time (which is stored
-in the s\_log\_cluster\_size field in the superblock); from then on, the
-block bitmaps track clusters, not individual blocks. This means that
-block groups can be several gigabytes in size (instead of just 128MiB);
-however, the minimum allocation unit becomes a cluster, not a block,
-even for directories. TaoBao had a patchset to extend the “use units of
-clusters instead of blocks” to the extent tree, though it is not clear
-where those patches went-- they eventually morphed into “extent tree v2”
-but that code has not landed as of May 2015.
+bigalloc feature provides exactly this ability.
+
+The bigalloc feature (EXT4_FEATURE_RO_COMPAT_BIGALLOC) changes ext4 to
+use clustered allocation, so that each bit in the ext4 block allocation
+bitmap addresses a power of two number of blocks. For example, if the
+file system is mainly going to be storing large files in the 4-32
+megabyte range, it might make sense to set a cluster size of 1 megabyte.
+This means that each bit in the block allocation bitmap now addresses
+256 4k blocks. This shrinks the total size of the block allocation
+bitmaps for a 2T file system from 64 megabytes to 256 kilobytes. It also
+means that a block group addresses 32 gigabytes instead of 128 megabytes,
+also shrinking the amount of file system overhead for metadata.
+
+The administrator can set a block cluster size at mkfs time (which is
+stored in the s\_log\_cluster\_size field in the superblock); from then
+on, the block bitmaps track clusters, not individual blocks. This means
+that block groups can be several gigabytes in size (instead of just
+128MiB); however, the minimum allocation unit becomes a cluster, not a
+block, even for directories. TaoBao had a patchset to extend the “use
+units of clusters instead of blocks” to the extent tree, though it is
+not clear where those patches went-- they eventually morphed into
+“extent tree v2” but that code has not landed as of May 2015.
 
index baf888e..3da1566 100644 (file)
@@ -71,11 +71,11 @@ if the flex\_bg size is 4, then group 0 will contain (in order) the
 superblock, group descriptors, data block bitmaps for groups 0-3, inode
 bitmaps for groups 0-3, inode tables for groups 0-3, and the remaining
 space in group 0 is for file data. The effect of this is to group the
-block metadata close together for faster loading, and to enable large
-files to be continuous on disk. Backup copies of the superblock and
-group descriptors are always at the beginning of block groups, even if
-flex\_bg is enabled. The number of block groups that make up a flex\_bg
-is given by 2 ^ ``sb.s_log_groups_per_flex``.
+block group metadata close together for faster loading, and to enable
+large files to be continuous on disk. Backup copies of the superblock
+and group descriptors are always at the beginning of block groups, even
+if flex\_bg is enabled. The number of block groups that make up a
+flex\_bg is given by 2 ^ ``sb.s_log_groups_per_flex``.
 
 Meta Block Groups
 -----------------
index 73d4dc0..bd722ec 100644 (file)
@@ -10,7 +10,9 @@ block groups. Block size is specified at mkfs time and typically is
 4KiB. You may experience mounting problems if block size is greater than
 page size (i.e. 64KiB blocks on a i386 which only has 4KiB memory
 pages). By default a filesystem can contain 2^32 blocks; if the '64bit'
-feature is enabled, then a filesystem can have 2^64 blocks.
+feature is enabled, then a filesystem can have 2^64 blocks. The location
+of structures is stored in terms of the block number the structure lives
+in and not the absolute offset on disk.
 
 For 32-bit filesystems, limits are as follows:
 
index 614034e..073940c 100644 (file)
@@ -59,7 +59,7 @@ is at most 263 bytes long, though on disk you'll need to reference
      - File name.
 
 Since file names cannot be longer than 255 bytes, the new directory
-entry format shortens the rec\_len field and uses the space for a file
+entry format shortens the name\_len field and uses the space for a file
 type flag, probably to avoid having to load every inode during directory
 tree traversal. This format is ``ext4_dir_entry_2``, which is at most
 263 bytes long, though on disk you'll need to reference
index 0f783ed..7ba6114 100644 (file)
@@ -99,9 +99,12 @@ The block group descriptor is laid out in ``struct ext4_group_desc``.
    * - 0x1E
      - \_\_le16
      - bg\_checksum
-     - Group descriptor checksum; crc16(sb\_uuid+group+desc) if the
-       RO\_COMPAT\_GDT\_CSUM feature is set, or crc32c(sb\_uuid+group\_desc) &
-       0xFFFF if the RO\_COMPAT\_METADATA\_CSUM feature is set.
+     - Group descriptor checksum; crc16(sb\_uuid+group\_num+bg\_desc) if the
+       RO\_COMPAT\_GDT\_CSUM feature is set, or
+       crc32c(sb\_uuid+group\_num+bg\_desc) & 0xFFFF if the
+       RO\_COMPAT\_METADATA\_CSUM feature is set.  The bg\_checksum
+       field in bg\_desc is skipped when calculating crc16 checksum,
+       and set to zero if crc32c checksum is used.
    * -
      -
      -
index e851e6c..a65baff 100644 (file)
@@ -472,8 +472,8 @@ inode, which allows struct ext4\_inode to grow for a new kernel without
 having to upgrade all of the on-disk inodes. Access to fields beyond
 EXT2\_GOOD\_OLD\_INODE\_SIZE should be verified to be within
 ``i_extra_isize``. By default, ext4 inode records are 256 bytes, and (as
-of October 2013) the inode structure is 156 bytes
-(``i_extra_isize = 28``). The extra space between the end of the inode
+of August 2019) the inode structure is 160 bytes
+(``i_extra_isize = 32``). The extra space between the end of the inode
 structure and the end of the inode record can be used to store extended
 attributes. Each inode record can be as large as the filesystem block
 size, though this is not terribly efficient.
index 6eae920..93e55d7 100644 (file)
@@ -58,7 +58,7 @@ The ext4 superblock is laid out as follows in
    * - 0x1C
      - \_\_le32
      - s\_log\_cluster\_size
-     - Cluster size is (2 ^ s\_log\_cluster\_size) blocks if bigalloc is
+     - Cluster size is 2 ^ (10 + s\_log\_cluster\_size) blocks if bigalloc is
        enabled. Otherwise s\_log\_cluster\_size must equal s\_log\_block\_size.
    * - 0x20
      - \_\_le32
@@ -447,7 +447,7 @@ The ext4 superblock is laid out as follows in
      - Upper 8 bits of the s_wtime field.
    * - 0x275
      - \_\_u8
-     - s\_wtime_hi
+     - s\_mtime_hi
      - Upper 8 bits of the s_mtime field.
    * - 0x276
      - \_\_u8
@@ -466,12 +466,20 @@ The ext4 superblock is laid out as follows in
      - s\_last_error_time_hi
      - Upper 8 bits of the s_last_error_time_hi field.
    * - 0x27A
-     - \_\_u8[2]
-     - s\_pad
+     - \_\_u8
+     - s\_pad[2]
      - Zero padding.
    * - 0x27C
+     - \_\_le16
+     - s\_encoding
+     - Filename charset encoding.
+   * - 0x27E
+     - \_\_le16
+     - s\_encoding_flags
+     - Filename charset encoding flags.
+   * - 0x280
      - \_\_le32
-     - s\_reserved[96]
+     - s\_reserved[95]
      - Padding to the end of the block.
    * - 0x3FC
      - \_\_le32
@@ -617,7 +625,7 @@ following:
    * - 0x80
      - Enable a filesystem size of 2^64 blocks (INCOMPAT\_64BIT).
    * - 0x100
-     - Multiple mount protection. Not implemented (INCOMPAT\_MMP).
+     - Multiple mount protection (INCOMPAT\_MMP).
    * - 0x200
      - Flexible block groups. See the earlier discussion of this feature
        (INCOMPAT\_FLEX\_BG).
index 496fa28..7e19913 100644 (file)
@@ -157,6 +157,11 @@ noinline_data          Disable the inline data feature, inline data feature is
                        enabled by default.
 data_flush             Enable data flushing before checkpoint in order to
                        persist data of regular and symlink.
+reserve_root=%d        Support configuring reserved space which is used for
+                       allocation from a privileged user with specified uid or
+                       gid, unit: 4KB, the default limit is 0.2% of user blocks.
+resuid=%d              The user ID which may use the reserved blocks.
+resgid=%d              The group ID which may use the reserved blocks.
 fault_injection=%d     Enable fault injection in all supported types with
                        specified injection rate.
 fault_type=%d          Support configuring fault injection type, should be
@@ -413,6 +418,9 @@ Files in /sys/fs/f2fs/<devname>
                               that would be unusable if checkpoint=disable were
                               to be set.
 
+encoding                     This shows the encoding used for casefolding.
+                              If casefolding is not enabled, returns (none)
+
 ================================================================================
 USAGE
 ================================================================================
index fd2bcf9..2c3a9f7 100644 (file)
@@ -37,3 +37,13 @@ filesystem implementations.
    journalling
    fscrypt
    fsverity
+
+Filesystems
+===========
+
+Documentation for filesystem implementations.
+
+.. toctree::
+   :maxdepth: 2
+
+   virtiofs
diff --git a/Documentation/filesystems/virtiofs.rst b/Documentation/filesystems/virtiofs.rst
new file mode 100644 (file)
index 0000000..4f338e3
--- /dev/null
@@ -0,0 +1,60 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+===================================================
+virtiofs: virtio-fs host<->guest shared file system
+===================================================
+
+- Copyright (C) 2019 Red Hat, Inc.
+
+Introduction
+============
+The virtiofs file system for Linux implements a driver for the paravirtualized
+VIRTIO "virtio-fs" device for guest<->host file system sharing.  It allows a
+guest to mount a directory that has been exported on the host.
+
+Guests often require access to files residing on the host or remote systems.
+Use cases include making files available to new guests during installation,
+booting from a root file system located on the host, persistent storage for
+stateless or ephemeral guests, and sharing a directory between guests.
+
+Although it is possible to use existing network file systems for some of these
+tasks, they require configuration steps that are hard to automate and they
+expose the storage network to the guest.  The virtio-fs device was designed to
+solve these problems by providing file system access without networking.
+
+Furthermore the virtio-fs device takes advantage of the co-location of the
+guest and host to increase performance and provide semantics that are not
+possible with network file systems.
+
+Usage
+=====
+Mount file system with tag ``myfs`` on ``/mnt``:
+
+.. code-block:: sh
+
+  guest# mount -t virtiofs myfs /mnt
+
+Please see https://virtio-fs.gitlab.io/ for details on how to configure QEMU
+and the virtiofsd daemon.
+
+Internals
+=========
+Since the virtio-fs device uses the FUSE protocol for file system requests, the
+virtiofs file system for Linux is integrated closely with the FUSE file system
+client.  The guest acts as the FUSE client while the host acts as the FUSE
+server.  The /dev/fuse interface between the kernel and userspace is replaced
+with the virtio-fs device interface.
+
+FUSE requests are placed into a virtqueue and processed by the host.  The
+response portion of the buffer is filled in by the host and the guest handles
+the request completion.
+
+Mapping /dev/fuse to virtqueues requires solving differences in semantics
+between /dev/fuse and virtqueues.  Each time the /dev/fuse device is read, the
+FUSE client may choose which request to transfer, making it possible to
+prioritize certain requests over others.  Virtqueues have queue semantics and
+it is not possible to change the order of requests that have been enqueued.
+This is especially important if the virtqueue becomes full since it is then
+impossible to add high priority requests.  In order to address this difference,
+the virtio-fs device uses a "hiprio" virtqueue specifically for requests that
+have priority over normal requests.
index b5fd87e..b843e31 100644 (file)
@@ -144,16 +144,15 @@ implementation.
 .. toctree::
    :maxdepth: 2
 
-   sh/index
    arm/index
    arm64/index
    ia64/index
    m68k/index
-   powerpc/index
    mips/index
    nios2/nios2
    openrisc/index
    parisc/index
+   powerpc/index
    riscv/index
    s390/index
    sh/index
index f34669b..8f76a8a 100644 (file)
@@ -29,10 +29,10 @@ Sleeping and interrupt context
   The corresponding functions exported to upper level protocol
   consumers:
 
-    - ib_create_ah
-    - ib_modify_ah
-    - ib_query_ah
-    - ib_destroy_ah
+    - rdma_create_ah
+    - rdma_modify_ah
+    - rdma_query_ah
+    - rdma_destroy_ah
     - ib_post_send
     - ib_post_recv
     - ib_req_notify_cq
index 24e7634..d2ae799 100644 (file)
@@ -470,9 +470,12 @@ build.
 
        The syntax of the Module.symvers file is::
 
-               <CRC>       <Symbol>           <module>
+       <CRC>       <Symbol>          <Namespace>  <Module>                         <Export Type>
 
-               0x2d036834  scsi_remove_host   drivers/scsi/scsi_mod
+       0xe1cc2a05  usb_stor_suspend  USB_STORAGE  drivers/usb/storage/usb-storage  EXPORT_SYMBOL_GPL
+
+       The fields are separated by tabs and values may be empty (e.g.
+       if no namespace is defined for an exported symbol).
 
        For a kernel build without CONFIG_MODVERSIONS enabled, the CRC
        would read 0x00000000.
diff --git a/Documentation/kbuild/namespaces.rst b/Documentation/kbuild/namespaces.rst
new file mode 100644 (file)
index 0000000..982ed7b
--- /dev/null
@@ -0,0 +1,154 @@
+=================
+Symbol Namespaces
+=================
+
+The following document describes how to use Symbol Namespaces to structure the
+export surface of in-kernel symbols exported through the family of
+EXPORT_SYMBOL() macros.
+
+.. Table of Contents
+
+       === 1 Introduction
+       === 2 How to define Symbol Namespaces
+          --- 2.1 Using the EXPORT_SYMBOL macros
+          --- 2.2 Using the DEFAULT_SYMBOL_NAMESPACE define
+       === 3 How to use Symbols exported in Namespaces
+       === 4 Loading Modules that use namespaced Symbols
+       === 5 Automatically creating MODULE_IMPORT_NS statements
+
+1. Introduction
+===============
+
+Symbol Namespaces have been introduced as a means to structure the export
+surface of the in-kernel API. It allows subsystem maintainers to partition
+their exported symbols into separate namespaces. That is useful for
+documentation purposes (think of the SUBSYSTEM_DEBUG namespace) as well as for
+limiting the availability of a set of symbols for use in other parts of the
+kernel. As of today, modules that make use of symbols exported into namespaces,
+are required to import the namespace. Otherwise the kernel will, depending on
+its configuration, reject loading the module or warn about a missing import.
+
+2. How to define Symbol Namespaces
+==================================
+
+Symbols can be exported into namespace using different methods. All of them are
+changing the way EXPORT_SYMBOL and friends are instrumented to create ksymtab
+entries.
+
+2.1 Using the EXPORT_SYMBOL macros
+==================================
+
+In addition to the macros EXPORT_SYMBOL() and EXPORT_SYMBOL_GPL(), that allow
+exporting of kernel symbols to the kernel symbol table, variants of these are
+available to export symbols into a certain namespace: EXPORT_SYMBOL_NS() and
+EXPORT_SYMBOL_NS_GPL(). They take one additional argument: the namespace.
+Please note that due to macro expansion that argument needs to be a
+preprocessor symbol. E.g. to export the symbol `usb_stor_suspend` into the
+namespace `USB_STORAGE`, use::
+
+       EXPORT_SYMBOL_NS(usb_stor_suspend, USB_STORAGE);
+
+The corresponding ksymtab entry struct `kernel_symbol` will have the member
+`namespace` set accordingly. A symbol that is exported without a namespace will
+refer to `NULL`. There is no default namespace if none is defined. `modpost`
+and kernel/module.c make use the namespace at build time or module load time,
+respectively.
+
+2.2 Using the DEFAULT_SYMBOL_NAMESPACE define
+=============================================
+
+Defining namespaces for all symbols of a subsystem can be very verbose and may
+become hard to maintain. Therefore a default define (DEFAULT_SYMBOL_NAMESPACE)
+is been provided, that, if set, will become the default for all EXPORT_SYMBOL()
+and EXPORT_SYMBOL_GPL() macro expansions that do not specify a namespace.
+
+There are multiple ways of specifying this define and it depends on the
+subsystem and the maintainer's preference, which one to use. The first option
+is to define the default namespace in the `Makefile` of the subsystem. E.g. to
+export all symbols defined in usb-common into the namespace USB_COMMON, add a
+line like this to drivers/usb/common/Makefile::
+
+       ccflags-y += -DDEFAULT_SYMBOL_NAMESPACE=USB_COMMON
+
+That will affect all EXPORT_SYMBOL() and EXPORT_SYMBOL_GPL() statements. A
+symbol exported with EXPORT_SYMBOL_NS() while this definition is present, will
+still be exported into the namespace that is passed as the namespace argument
+as this argument has preference over a default symbol namespace.
+
+A second option to define the default namespace is directly in the compilation
+unit as preprocessor statement. The above example would then read::
+
+       #undef  DEFAULT_SYMBOL_NAMESPACE
+       #define DEFAULT_SYMBOL_NAMESPACE USB_COMMON
+
+within the corresponding compilation unit before any EXPORT_SYMBOL macro is
+used.
+
+3. How to use Symbols exported in Namespaces
+============================================
+
+In order to use symbols that are exported into namespaces, kernel modules need
+to explicitly import these namespaces. Otherwise the kernel might reject to
+load the module. The module code is required to use the macro MODULE_IMPORT_NS
+for the namespaces it uses symbols from. E.g. a module using the
+usb_stor_suspend symbol from above, needs to import the namespace USB_STORAGE
+using a statement like::
+
+       MODULE_IMPORT_NS(USB_STORAGE);
+
+This will create a `modinfo` tag in the module for each imported namespace.
+This has the side effect, that the imported namespaces of a module can be
+inspected with modinfo::
+
+       $ modinfo drivers/usb/storage/ums-karma.ko
+       [...]
+       import_ns:      USB_STORAGE
+       [...]
+
+
+It is advisable to add the MODULE_IMPORT_NS() statement close to other module
+metadata definitions like MODULE_AUTHOR() or MODULE_LICENSE(). Refer to section
+5. for a way to create missing import statements automatically.
+
+4. Loading Modules that use namespaced Symbols
+==============================================
+
+At module loading time (e.g. `insmod`), the kernel will check each symbol
+referenced from the module for its availability and whether the namespace it
+might be exported to has been imported by the module. The default behaviour of
+the kernel is to reject loading modules that don't specify sufficient imports.
+An error will be logged and loading will be failed with EINVAL. In order to
+allow loading of modules that don't satisfy this precondition, a configuration
+option is available: Setting MODULE_ALLOW_MISSING_NAMESPACE_IMPORTS=y will
+enable loading regardless, but will emit a warning.
+
+5. Automatically creating MODULE_IMPORT_NS statements
+=====================================================
+
+Missing namespaces imports can easily be detected at build time. In fact,
+modpost will emit a warning if a module uses a symbol from a namespace
+without importing it.
+MODULE_IMPORT_NS() statements will usually be added at a definite location
+(along with other module meta data). To make the life of module authors (and
+subsystem maintainers) easier, a script and make target is available to fixup
+missing imports. Fixing missing imports can be done with::
+
+       $ make nsdeps
+
+A typical scenario for module authors would be::
+
+       - write code that depends on a symbol from a not imported namespace
+       - `make`
+       - notice the warning of modpost telling about a missing import
+       - run `make nsdeps` to add the import to the correct code location
+
+For subsystem maintainers introducing a namespace, the steps are very similar.
+Again, `make nsdeps` will eventually add the missing namespace imports for
+in-tree modules::
+
+       - move or add symbols to a namespace (e.g. with EXPORT_SYMBOL_NS())
+       - `make` (preferably with an allmodconfig to cover all in-kernel
+         modules)
+       - notice the warning of modpost telling about a missing import
+       - run `make nsdeps` to add the import to the correct code location
+
index 5891a70..a3ddb21 100644 (file)
@@ -594,6 +594,24 @@ internal implementation issue, and not really an interface. Some
 maintainers and developers may however require EXPORT_SYMBOL_GPL()
 when adding any new APIs or functionality.
 
+:c:func:`EXPORT_SYMBOL_NS()`
+----------------------------
+
+Defined in ``include/linux/export.h``
+
+This is the variant of `EXPORT_SYMBOL()` that allows specifying a symbol
+namespace. Symbol Namespaces are documented in
+``Documentation/kbuild/namespaces.rst``.
+
+:c:func:`EXPORT_SYMBOL_NS_GPL()`
+--------------------------------
+
+Defined in ``include/linux/export.h``
+
+This is the variant of `EXPORT_SYMBOL_GPL()` that allows specifying a symbol
+namespace. Symbol Namespaces are documented in
+``Documentation/kbuild/namespaces.rst``.
+
 Routines and Conventions
 ========================
 
index fd9023c..a93c2f6 100644 (file)
@@ -1,11 +1,14 @@
 .. SPDX-License-Identifier: GPL-2.0
 
-=================
-MIPS architecture
-=================
+===========================
+MIPS-specific Documentation
+===========================
 
 .. toctree::
    :maxdepth: 2
+   :numbered:
+
+   ingenic-tcu
 
    au1xxx_ide
 
diff --git a/Documentation/mips/ingenic-tcu.rst b/Documentation/mips/ingenic-tcu.rst
new file mode 100644 (file)
index 0000000..c4ef4c4
--- /dev/null
@@ -0,0 +1,71 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+===============================================
+Ingenic JZ47xx SoCs Timer/Counter Unit hardware
+===============================================
+
+The Timer/Counter Unit (TCU) in Ingenic JZ47xx SoCs is a multi-function
+hardware block. It features up to to eight channels, that can be used as
+counters, timers, or PWM.
+
+- JZ4725B, JZ4750, JZ4755 only have six TCU channels. The other SoCs all
+  have eight channels.
+
+- JZ4725B introduced a separate channel, called Operating System Timer
+  (OST). It is a 32-bit programmable timer. On JZ4760B and above, it is
+  64-bit.
+
+- Each one of the TCU channels has its own clock, which can be reparented to three
+  different clocks (pclk, ext, rtc), gated, and reclocked, through their TCSR register.
+
+    - The watchdog and OST hardware blocks also feature a TCSR register with the same
+      format in their register space.
+    - The TCU registers used to gate/ungate can also gate/ungate the watchdog and
+      OST clocks.
+
+- Each TCU channel works in one of two modes:
+
+    - mode TCU1: channels cannot work in sleep mode, but are easier to
+      operate.
+    - mode TCU2: channels can work in sleep mode, but the operation is a bit
+      more complicated than with TCU1 channels.
+
+- The mode of each TCU channel depends on the SoC used:
+
+    - On the oldest SoCs (up to JZ4740), all of the eight channels operate in
+      TCU1 mode.
+    - On JZ4725B, channel 5 operates as TCU2, the others operate as TCU1.
+    - On newest SoCs (JZ4750 and above), channels 1-2 operate as TCU2, the
+      others operate as TCU1.
+
+- Each channel can generate an interrupt. Some channels share an interrupt
+  line, some don't, and this changes between SoC versions:
+
+    - on older SoCs (JZ4740 and below), channel 0 and channel 1 have their
+      own interrupt line; channels 2-7 share the last interrupt line.
+    - On JZ4725B, channel 0 has its own interrupt; channels 1-5 share one
+      interrupt line; the OST uses the last interrupt line.
+    - on newer SoCs (JZ4750 and above), channel 5 has its own interrupt;
+      channels 0-4 and (if eight channels) 6-7 all share one interrupt line;
+      the OST uses the last interrupt line.
+
+Implementation
+==============
+
+The functionalities of the TCU hardware are spread across multiple drivers:
+
+===========  =====
+clocks       drivers/clk/ingenic/tcu.c
+interrupts   drivers/irqchip/irq-ingenic-tcu.c
+timers       drivers/clocksource/ingenic-timer.c
+OST          drivers/clocksource/ingenic-ost.c
+PWM          drivers/pwm/pwm-jz4740.c
+watchdog     drivers/watchdog/jz4740_wdt.c
+===========  =====
+
+Because various functionalities of the TCU that belong to different drivers
+and frameworks can be controlled from the same registers, all of these
+drivers access their registers through the same regmap.
+
+For more information regarding the devicetree bindings of the TCU drivers,
+have a look at Documentation/devicetree/bindings/mfd/ingenic,tcu.txt.
index 3d1cca2..c5a8432 100644 (file)
@@ -68,8 +68,10 @@ descriptors by adding their identifier to the format string
  - 'd-ng': the digest of the event, calculated with an arbitrary hash
    algorithm (field format: [<hash algo>:]digest, where the digest
    prefix is shown only if the hash algorithm is not SHA1 or MD5);
+ - 'd-modsig': the digest of the event without the appended modsig;
  - 'n-ng': the name of the event, without size limitations;
  - 'sig': the file signature;
+ - 'modsig' the appended file signature;
  - 'buf': the buffer data that was used to generate the hash without size limitations;
 
 
@@ -79,6 +81,7 @@ Below, there is the list of defined template descriptors:
  - "ima-ng" (default): its format is ``d-ng|n-ng``;
  - "ima-sig": its format is ``d-ng|n-ng|sig``;
  - "ima-buf": its format is ``d-ng|n-ng|buf``;
+ - "ima-modsig": its format is ``d-ng|n-ng|sig|d-modsig|modsig``;
 
 
 Use
index 136f1ee..4833904 100644 (file)
@@ -5309,3 +5309,16 @@ Architectures: x86
 This capability indicates that KVM supports paravirtualized Hyper-V IPI send
 hypercalls:
 HvCallSendSyntheticClusterIpi, HvCallSendSyntheticClusterIpiEx.
+8.21 KVM_CAP_HYPERV_DIRECT_TLBFLUSH
+
+Architecture: x86
+
+This capability indicates that KVM running on top of Hyper-V hypervisor
+enables Direct TLB flush for its guests meaning that TLB flush
+hypercalls are handled by Level 0 hypervisor (Hyper-V) bypassing KVM.
+Due to the different ABI for hypercall parameters between Hyper-V and
+KVM, enabling this capability effectively disables all hypercall
+handling by KVM (as some KVM hypercall may be mistakenly treated as TLB
+flush hypercalls by Hyper-V) so userspace should disable KVM identification
+in CPUID and only exposes Hyper-V identification. In this case, guest
+thinks it's running on Hyper-V and only use Hyper-V hypercalls.
index 710ce1c..0a5960b 100644 (file)
@@ -192,15 +192,14 @@ read only, or fully unmap, etc.). The device must complete the update before
 the driver callback returns.
 
 When the device driver wants to populate a range of virtual addresses, it can
-use either::
+use::
 
-  long hmm_range_snapshot(struct hmm_range *range);
-  long hmm_range_fault(struct hmm_range *range, bool block);
+  long hmm_range_fault(struct hmm_range *range, unsigned int flags);
 
-The first one (hmm_range_snapshot()) will only fetch present CPU page table
+With the HMM_RANGE_SNAPSHOT flag, it will only fetch present CPU page table
 entries and will not trigger a page fault on missing or non-present entries.
-The second one does trigger a page fault on missing or read-only entries if
-write access is requested (see below). Page faults use the generic mm page
+Without that flag, it does trigger a page fault on missing or read-only entries
+if write access is requested (see below). Page faults use the generic mm page
 fault code path just like a CPU page fault.
 
 Both functions copy CPU page table entries into their pfns array argument. Each
@@ -223,24 +222,24 @@ The usage pattern is::
       range.flags = ...;
       range.values = ...;
       range.pfn_shift = ...;
-      hmm_range_register(&range);
+      hmm_range_register(&range, mirror);
 
       /*
        * Just wait for range to be valid, safe to ignore return value as we
-       * will use the return value of hmm_range_snapshot() below under the
+       * will use the return value of hmm_range_fault() below under the
        * mmap_sem to ascertain the validity of the range.
        */
       hmm_range_wait_until_valid(&range, TIMEOUT_IN_MSEC);
 
  again:
       down_read(&mm->mmap_sem);
-      ret = hmm_range_snapshot(&range);
+      ret = hmm_range_fault(&range, HMM_RANGE_SNAPSHOT);
       if (ret) {
           up_read(&mm->mmap_sem);
           if (ret == -EBUSY) {
             /*
              * No need to check hmm_range_wait_until_valid() return value
-             * on retry we will get proper error with hmm_range_snapshot()
+             * on retry we will get proper error with hmm_range_fault()
              */
             hmm_range_wait_until_valid(&range, TIMEOUT_IN_MSEC);
             goto again;
@@ -340,58 +339,8 @@ Migration to and from device memory
 ===================================
 
 Because the CPU cannot access device memory, migration must use the device DMA
-engine to perform copy from and to device memory. For this we need a new
-migration helper::
-
- int migrate_vma(const struct migrate_vma_ops *ops,
-                 struct vm_area_struct *vma,
-                 unsigned long mentries,
-                 unsigned long start,
-                 unsigned long end,
-                 unsigned long *src,
-                 unsigned long *dst,
-                 void *private);
-
-Unlike other migration functions it works on a range of virtual address, there
-are two reasons for that. First, device DMA copy has a high setup overhead cost
-and thus batching multiple pages is needed as otherwise the migration overhead
-makes the whole exercise pointless. The second reason is because the
-migration might be for a range of addresses the device is actively accessing.
-
-The migrate_vma_ops struct defines two callbacks. First one (alloc_and_copy())
-controls destination memory allocation and copy operation. Second one is there
-to allow the device driver to perform cleanup operations after migration::
-
- struct migrate_vma_ops {
-     void (*alloc_and_copy)(struct vm_area_struct *vma,
-                            const unsigned long *src,
-                            unsigned long *dst,
-                            unsigned long start,
-                            unsigned long end,
-                            void *private);
-     void (*finalize_and_map)(struct vm_area_struct *vma,
-                              const unsigned long *src,
-                              const unsigned long *dst,
-                              unsigned long start,
-                              unsigned long end,
-                              void *private);
- };
-
-It is important to stress that these migration helpers allow for holes in the
-virtual address range. Some pages in the range might not be migrated for all
-the usual reasons (page is pinned, page is locked, ...). This helper does not
-fail but just skips over those pages.
-
-The alloc_and_copy() might decide to not migrate all pages in the
-range (for reasons under the callback control). For those, the callback just
-has to leave the corresponding dst entry empty.
-
-Finally, the migration of the struct page might fail (for file backed page) for
-various reasons (failure to freeze reference, or update page cache, ...). If
-that happens, then the finalize_and_map() can catch any pages that were not
-migrated. Note those pages were still copied to a new page and thus we wasted
-bandwidth but this is considered as a rare event and a price that we are
-willing to pay to keep all the code simpler.
+engine to perform copy from and to device memory. For this we need to use
+migrate_vma_setup(), migrate_vma_pages(), and migrate_vma_finalize() helpers.
 
 
 Memory cgroup (memcg) and rss accounting
index 889b00b..ff51f4a 100644 (file)
@@ -54,9 +54,9 @@ Hugetlb-specific helpers:
 Support of split page table lock by an architecture
 ===================================================
 
-There's no need in special enabling of PTE split page table lock:
-everything required is done by pgtable_page_ctor() and pgtable_page_dtor(),
-which must be called on PTE table allocation / freeing.
+There's no need in special enabling of PTE split page table lock: everything
+required is done by pgtable_pte_page_ctor() and pgtable_pte_page_dtor(), which
+must be called on PTE table allocation / freeing.
 
 Make sure the architecture doesn't use slab allocator for page table
 allocation: slab uses page->slab_cache for its pages.
@@ -74,7 +74,7 @@ paths: i.e X86_PAE preallocate few PMDs on pgd_alloc().
 
 With everything in place you can set CONFIG_ARCH_ENABLE_SPLIT_PMD_PTLOCK.
 
-NOTE: pgtable_page_ctor() and pgtable_pmd_page_ctor() can fail -- it must
+NOTE: pgtable_pte_page_ctor() and pgtable_pmd_page_ctor() can fail -- it must
 be handled properly.
 
 page->ptl
@@ -94,7 +94,7 @@ trick:
    split lock with enabled DEBUG_SPINLOCK or DEBUG_LOCK_ALLOC, but costs
    one more cache line for indirect access;
 
-The spinlock_t allocated in pgtable_page_ctor() for PTE table and in
+The spinlock_t allocated in pgtable_pte_page_ctor() for PTE table and in
 pgtable_pmd_page_ctor() for PMD table.
 
 Please, never access page->ptl directly -- use appropriate helper.
index a3985cc..223c993 100644 (file)
@@ -301,15 +301,6 @@ ixp4xx_wdt:
 
 -------------------------------------------------
 
-ks8695_wdt:
-    wdt_time:
-       Watchdog time in seconds. (default=5)
-    nowayout:
-       Watchdog cannot be stopped once started
-       (default=kernel config parameter)
-
--------------------------------------------------
-
 machzwd:
     nowayout:
        Watchdog cannot be stopped once started
@@ -375,16 +366,6 @@ nic7018_wdt:
 
 -------------------------------------------------
 
-nuc900_wdt:
-    heartbeat:
-       Watchdog heartbeats in seconds.
-       (default = 15)
-    nowayout:
-       Watchdog cannot be stopped once started
-       (default=kernel config parameter)
-
--------------------------------------------------
-
 omap_wdt:
     timer_margin:
        initial watchdog timeout (in seconds)
index fed7f97..857611c 100644 (file)
@@ -728,7 +728,7 @@ ALTERA SYSTEM MANAGER DRIVER
 M:     Thor Thayer <thor.thayer@linux.intel.com>
 S:     Maintained
 F:     drivers/mfd/altera-sysmgr.c
-F:     include/linux/mfd/altera-sysgmr.h
+F:     include/linux/mfd/altera-sysmgr.h
 
 ALTERA SYSTEM RESOURCE DRIVER FOR ARRIA10 DEVKIT
 M:     Thor Thayer <thor.thayer@linux.intel.com>
@@ -1466,6 +1466,7 @@ F:        arch/arm64/boot/dts/amlogic/
 F:     drivers/pinctrl/meson/
 F:     drivers/mmc/host/meson*
 F:     drivers/soc/amlogic/
+F:     drivers/rtc/rtc-meson*
 N:     meson
 
 ARM/Amlogic Meson SoC Sound Drivers
@@ -2920,6 +2921,8 @@ F:        drivers/video/backlight/
 F:     include/linux/backlight.h
 F:     include/linux/pwm_backlight.h
 F:     Documentation/devicetree/bindings/leds/backlight
+F:     Documentation/ABI/stable/sysfs-class-backlight
+F:     Documentation/ABI/testing/sysfs-class-backlight
 
 BATMAN ADVANCED
 M:     Marek Lindner <mareklindner@neomailbox.ch>
@@ -4337,6 +4340,12 @@ S:       Maintained
 F:     Documentation/filesystems/cramfs.txt
 F:     fs/cramfs/
 
+CREATIVE SB0540
+M:     Bastien Nocera <hadess@hadess.net>
+L:     linux-input@vger.kernel.org
+S:     Maintained
+F:     drivers/hid/hid-creative-sb0540.c
+
 CRYPTO API
 M:     Herbert Xu <herbert@gondor.apana.org.au>
 M:     "David S. Miller" <davem@davemloft.net>
@@ -5802,6 +5811,12 @@ L:       linux-edac@vger.kernel.org
 S:     Maintained
 F:     drivers/edac/amd64_edac*
 
+EDAC-ARMADA
+M:     Jan Luebbe <jlu@pengutronix.de>
+L:     linux-edac@vger.kernel.org
+S:     Maintained
+F:     drivers/edac/armada_xp_*
+
 EDAC-AST2500
 M:     Stefan Schaeckeler <sschaeck@cisco.com>
 S:     Supported
@@ -7850,6 +7865,12 @@ S:       Maintained
 F:     drivers/mfd/lpc_ich.c
 F:     drivers/gpio/gpio-ich.c
 
+ICY I2C DRIVER
+M:     Max Staudt <max@enpas.org>
+L:     linux-i2c@vger.kernel.org
+S:     Maintained
+F:     drivers/i2c/busses/i2c-icy.c
+
 IDE SUBSYSTEM
 M:     "David S. Miller" <davem@davemloft.net>
 L:     linux-ide@vger.kernel.org
@@ -9035,10 +9056,11 @@ S:      Supported
 F:     Documentation/security/keys/trusted-encrypted.rst
 F:     include/keys/trusted-type.h
 F:     security/keys/trusted.c
-F:     security/keys/trusted.h
+F:     include/keys/trusted.h
 
 KEYS/KEYRINGS:
 M:     David Howells <dhowells@redhat.com>
+M:     Jarkko Sakkinen <jarkko.sakkinen@linux.intel.com>
 L:     keyrings@vger.kernel.org
 S:     Maintained
 F:     Documentation/security/keys/core.rst
@@ -11504,7 +11526,6 @@ NOKIA N900 POWER SUPPLY DRIVERS
 R:     Pali Rohár <pali.rohar@gmail.com>
 F:     include/linux/power/bq2415x_charger.h
 F:     include/linux/power/bq27xxx_battery.h
-F:     include/linux/power/isp1704_charger.h
 F:     drivers/power/supply/bq2415x_charger.c
 F:     drivers/power/supply/bq27xxx_battery.c
 F:     drivers/power/supply/bq27xxx_battery_i2c.c
@@ -11517,6 +11538,11 @@ S:     Maintained
 T:     git git://git.kernel.org/pub/scm/linux/kernel/git/wtarreau/nolibc.git
 F:     tools/include/nolibc/
 
+NSDEPS
+M:     Matthias Maennich <maennich@google.com>
+S:     Maintained
+F:     scripts/nsdeps
+
 NTB AMD DRIVER
 M:     Shyam Sundar S K <Shyam-sundar.S-k@amd.com>
 L:     linux-ntb@googlegroups.com
@@ -11867,6 +11893,7 @@ S:      Maintained
 F:     arch/arm/mach-omap2/
 F:     arch/arm/plat-omap/
 F:     arch/arm/configs/omap2plus_defconfig
+F:     drivers/bus/ti-sysc.c
 F:     drivers/i2c/busses/i2c-omap.c
 F:     drivers/irqchip/irq-omap-intc.c
 F:     drivers/mfd/*omap*.c
@@ -11887,6 +11914,7 @@ F:      drivers/regulator/tps65910-regulator.c
 F:     drivers/regulator/twl-regulator.c
 F:     drivers/regulator/twl6030-regulator.c
 F:     include/linux/platform_data/i2c-omap.h
+F:     include/linux/platform_data/ti-sysc.h
 
 ONION OMEGA2+ BOARD
 M:     Harvey Hunt <harveyhuntnexus@gmail.com>
@@ -12561,16 +12589,18 @@ F:    arch/x86/kernel/early-quirks.c
 
 PCI NATIVE HOST BRIDGE AND ENDPOINT DRIVERS
 M:     Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
+R:     Andrew Murray <andrew.murray@arm.com>
 L:     linux-pci@vger.kernel.org
 Q:     http://patchwork.ozlabs.org/project/linux-pci/list/
 T:     git git://git.kernel.org/pub/scm/linux/kernel/git/lpieralisi/pci.git/
 S:     Supported
 F:     drivers/pci/controller/
 
-PCIE DRIVER FOR ANNAPURNA LABS
+PCIE DRIVER FOR AMAZON ANNAPURNA LABS
 M:     Jonathan Chocron <jonnyc@amazon.com>
 L:     linux-pci@vger.kernel.org
 S:     Maintained
+F:     Documentation/devicetree/bindings/pci/pcie-al.txt
 F:     drivers/pci/controller/dwc/pcie-al.c
 
 PCIE DRIVER FOR AMLOGIC MESON
@@ -13216,9 +13246,11 @@ F:     drivers/media/rc/pwm-ir-tx.c
 
 PWM SUBSYSTEM
 M:     Thierry Reding <thierry.reding@gmail.com>
+R:     Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
 L:     linux-pwm@vger.kernel.org
 S:     Maintained
 T:     git git://git.kernel.org/pub/scm/linux/kernel/git/thierry.reding/linux-pwm.git
+Q:     https://patchwork.ozlabs.org/project/linux-pwm/list/
 F:     Documentation/driver-api/pwm.rst
 F:     Documentation/devicetree/bindings/pwm/
 F:     include/linux/pwm.h
@@ -13227,6 +13259,7 @@ F:      drivers/video/backlight/pwm_bl.c
 F:     include/linux/pwm_backlight.h
 F:     drivers/gpio/gpio-mvebu.c
 F:     Documentation/devicetree/bindings/gpio/gpio-mvebu.txt
+K:     pwm_(config|apply_state|ops)
 
 PXA GPIO DRIVER
 M:     Robert Jarzmik <robert.jarzmik@free.fr>
@@ -13735,7 +13768,7 @@ REMOTE PROCESSOR (REMOTEPROC) SUBSYSTEM
 M:     Ohad Ben-Cohen <ohad@wizery.com>
 M:     Bjorn Andersson <bjorn.andersson@linaro.org>
 L:     linux-remoteproc@vger.kernel.org
-T:     git git://git.kernel.org/pub/scm/linux/kernel/git/ohad/remoteproc.git
+T:     git git://git.kernel.org/pub/scm/linux/kernel/git/andersson/remoteproc.git rproc-next
 S:     Maintained
 F:     Documentation/devicetree/bindings/remoteproc/
 F:     Documentation/ABI/testing/sysfs-class-remoteproc
@@ -13748,7 +13781,7 @@ REMOTE PROCESSOR MESSAGING (RPMSG) SUBSYSTEM
 M:     Ohad Ben-Cohen <ohad@wizery.com>
 M:     Bjorn Andersson <bjorn.andersson@linaro.org>
 L:     linux-remoteproc@vger.kernel.org
-T:     git git://git.kernel.org/pub/scm/linux/kernel/git/ohad/rpmsg.git
+T:     git git://git.kernel.org/pub/scm/linux/kernel/git/andersson/remoteproc.git rpmsg-next
 S:     Maintained
 F:     drivers/rpmsg/
 F:     Documentation/rpmsg.txt
@@ -13768,7 +13801,7 @@ F:      drivers/clk/renesas/
 RENESAS EMEV2 I2C DRIVER
 M:     Wolfram Sang <wsa+renesas@sang-engineering.com>
 S:     Supported
-F:     Documentation/devicetree/bindings/i2c/i2c-emev2.txt
+F:     Documentation/devicetree/bindings/i2c/renesas,iic-emev2.txt
 F:     drivers/i2c/busses/i2c-emev2.c
 
 RENESAS ETHERNET DRIVERS
@@ -13790,15 +13823,15 @@ F:    drivers/iio/adc/rcar-gyroadc.c
 RENESAS R-CAR I2C DRIVERS
 M:     Wolfram Sang <wsa+renesas@sang-engineering.com>
 S:     Supported
-F:     Documentation/devicetree/bindings/i2c/i2c-rcar.txt
-F:     Documentation/devicetree/bindings/i2c/i2c-sh_mobile.txt
+F:     Documentation/devicetree/bindings/i2c/renesas,i2c.txt
+F:     Documentation/devicetree/bindings/i2c/renesas,iic.txt
 F:     drivers/i2c/busses/i2c-rcar.c
 F:     drivers/i2c/busses/i2c-sh_mobile.c
 
 RENESAS RIIC DRIVER
 M:     Chris Brandt <chris.brandt@renesas.com>
 S:     Supported
-F:     Documentation/devicetree/bindings/i2c/i2c-riic.txt
+F:     Documentation/devicetree/bindings/i2c/renesas,riic.txt
 F:     drivers/i2c/busses/i2c-riic.c
 
 RENESAS USB PHY DRIVER
@@ -16042,6 +16075,7 @@ THERMAL
 M:     Zhang Rui <rui.zhang@intel.com>
 M:     Eduardo Valentin <edubezval@gmail.com>
 R:     Daniel Lezcano <daniel.lezcano@linaro.org>
+R:     Amit Kucheria <amit.kucheria@verdurent.com>
 L:     linux-pm@vger.kernel.org
 T:     git git://git.kernel.org/pub/scm/linux/kernel/git/rzhang/linux.git
 T:     git git://git.kernel.org/pub/scm/linux/kernel/git/evalenti/linux-soc-thermal.git
@@ -17246,6 +17280,18 @@ S:     Supported
 F:     drivers/s390/virtio/
 F:     arch/s390/include/uapi/asm/virtio-ccw.h
 
+VIRTIO FILE SYSTEM
+M:     Vivek Goyal <vgoyal@redhat.com>
+M:     Stefan Hajnoczi <stefanha@redhat.com>
+M:     Miklos Szeredi <miklos@szeredi.hu>
+L:     virtualization@lists.linux-foundation.org
+L:     linux-fsdevel@vger.kernel.org
+W:     https://virtio-fs.gitlab.io/
+S:     Supported
+F:     fs/fuse/virtio_fs.c
+F:     include/uapi/linux/virtio_fs.h
+F:     Documentation/filesystems/virtiofs.rst
+
 VIRTIO GPU DRIVER
 M:     David Airlie <airlied@linux.ie>
 M:     Gerd Hoffmann <kraxel@redhat.com>
index 656a8c9..d456746 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -1498,6 +1498,9 @@ help:
        @echo  '  headerdep       - Detect inclusion cycles in headers'
        @echo  '  coccicheck      - Check with Coccinelle'
        @echo  ''
+       @echo  'Tools:'
+       @echo  '  nsdeps          - Generate missing symbol namespace dependencies'
+       @echo  ''
        @echo  'Kernel selftest:'
        @echo  '  kselftest       - Build and run kernel selftest (run as root)'
        @echo  '                    Build, install, and boot kernel before'
@@ -1679,7 +1682,7 @@ clean: $(clean-dirs)
                -o -name '*.ko.*' \
                -o -name '*.dtb' -o -name '*.dtb.S' -o -name '*.dt.yaml' \
                -o -name '*.dwo' -o -name '*.lst' \
-               -o -name '*.su' -o -name '*.mod' \
+               -o -name '*.su' -o -name '*.mod' -o -name '*.ns_deps' \
                -o -name '.*.d' -o -name '.*.tmp' -o -name '*.mod.c' \
                -o -name '*.lex.c' -o -name '*.tab.[ch]' \
                -o -name '*.asn1.[ch]' \
@@ -1697,6 +1700,15 @@ quiet_cmd_tags = GEN     $@
 tags TAGS cscope gtags: FORCE
        $(call cmd,tags)
 
+# Script to generate missing namespace dependencies
+# ---------------------------------------------------------------------------
+
+PHONY += nsdeps
+
+nsdeps: modules
+       $(Q)$(MAKE) -f $(srctree)/scripts/Makefile.modpost nsdeps
+       $(Q)$(CONFIG_SHELL) $(srctree)/scripts/$@
+
 # Scripts to check various things for consistency
 # ---------------------------------------------------------------------------
 
index 0fcf8ec..5f8a5d8 100644 (file)
@@ -706,6 +706,17 @@ config HAVE_ARCH_COMPAT_MMAP_BASES
          and vice-versa 32-bit applications to call 64-bit mmap().
          Required for applications doing different bitness syscalls.
 
+# This allows to use a set of generic functions to determine mmap base
+# address by giving priority to top-down scheme only if the process
+# is not in legacy mode (compat task, unlimited stack size or
+# sysctl_legacy_va_layout).
+# Architecture that selects this option can provide its own version of:
+# - STACK_RND_MASK
+config ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT
+       bool
+       depends on MMU
+       select ARCH_HAS_ELF_RANDOMIZE
+
 config HAVE_COPY_THREAD_TLS
        bool
        help
index 71ded3b..eb91f1e 100644 (file)
@@ -53,6 +53,4 @@ pmd_free(struct mm_struct *mm, pmd_t *pmd)
        free_page((unsigned long)pmd);
 }
 
-#define check_pgt_cache()      do { } while (0)
-
 #endif /* _ALPHA_PGALLOC_H */
index 89c2032..065b57f 100644 (file)
@@ -359,11 +359,6 @@ extern void paging_init(void);
 
 #include <asm-generic/pgtable.h>
 
-/*
- * No page table caches to initialise
- */
-#define pgtable_cache_init()   do { } while (0)
-
 /* We have our own get_unmapped_area to cope with ADDR_LIMIT_32BIT.  */
 #define HAVE_ARCH_UNMAPPED_AREA
 
index ac23379..a18ec7f 100644 (file)
@@ -68,6 +68,9 @@
 #define MADV_WIPEONFORK 18             /* Zero memory on fork, child only */
 #define MADV_KEEPONFORK 19             /* Undo MADV_WIPEONFORK */
 
+#define MADV_COLD      20              /* deactivate these pages */
+#define MADV_PAGEOUT   21              /* reclaim these pages */
+
 /* compatibility flags */
 #define MAP_FILE       0
 
index 9bdb8ed..b747f2e 100644 (file)
@@ -108,7 +108,7 @@ pte_alloc_one(struct mm_struct *mm)
                return 0;
        memzero((void *)pte_pg, PTRS_PER_PTE * sizeof(pte_t));
        page = virt_to_page(pte_pg);
-       if (!pgtable_page_ctor(page)) {
+       if (!pgtable_pte_page_ctor(page)) {
                __free_page(page);
                return 0;
        }
@@ -123,13 +123,12 @@ static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
 
 static inline void pte_free(struct mm_struct *mm, pgtable_t ptep)
 {
-       pgtable_page_dtor(virt_to_page(ptep));
+       pgtable_pte_page_dtor(virt_to_page(ptep));
        free_pages((unsigned long)ptep, __get_order_pte());
 }
 
 #define __pte_free_tlb(tlb, pte, addr)  pte_free((tlb)->mm, pte)
 
-#define check_pgt_cache()   do { } while (0)
 #define pmd_pgtable(pmd)       ((pgtable_t) pmd_page_vaddr(pmd))
 
 #endif /* _ASM_ARC_PGALLOC_H */
index 1d87c18..7addd03 100644 (file)
@@ -395,11 +395,6 @@ void update_mmu_cache(struct vm_area_struct *vma, unsigned long address,
 /* to cope with aliasing VIPT cache */
 #define HAVE_ARCH_UNMAPPED_AREA
 
-/*
- * No page table caches to initialise
- */
-#define pgtable_cache_init()   do { } while (0)
-
 #endif /* __ASSEMBLY__ */
 
 #endif
index aa1d3b2..8a50efb 100644 (file)
@@ -34,6 +34,7 @@ config ARM
        select ARCH_SUPPORTS_ATOMIC_RMW
        select ARCH_USE_BUILTIN_BSWAP
        select ARCH_USE_CMPXCHG_LOCKREF
+       select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT if MMU
        select ARCH_WANT_IPC_PARSE_VERSION
        select BINFMT_FLAT_ARGVP_ENVP_ON_STACK
        select BUILDTIME_EXTABLE_SORT if MMU
@@ -82,7 +83,7 @@ config ARM
        select HAVE_FAST_GUP if ARM_LPAE
        select HAVE_FTRACE_MCOUNT_RECORD if !XIP_KERNEL
        select HAVE_FUNCTION_GRAPH_TRACER if !THUMB2_KERNEL && !CC_IS_CLANG
-       select HAVE_FUNCTION_TRACER if !XIP_KERNEL
+       select HAVE_FUNCTION_TRACER if !XIP_KERNEL && (CC_IS_GCC || CLANG_VERSION >= 100000)
        select HAVE_GCC_PLUGINS
        select HAVE_HW_BREAKPOINT if PERF_EVENTS && (CPU_V6 || CPU_V6K || CPU_V7)
        select HAVE_IDE if PCI || ISA || PCMCIA
@@ -1476,8 +1477,9 @@ config ARM_PATCH_IDIV
          code to do integer division.
 
 config AEABI
-       bool "Use the ARM EABI to compile the kernel" if !CPU_V7 && !CPU_V7M && !CPU_V6 && !CPU_V6K
-       default CPU_V7 || CPU_V7M || CPU_V6 || CPU_V6K
+       bool "Use the ARM EABI to compile the kernel" if !CPU_V7 && \
+               !CPU_V7M && !CPU_V6 && !CPU_V6K && !CC_IS_CLANG
+       default CPU_V7 || CPU_V7M || CPU_V6 || CPU_V6K || CC_IS_CLANG
        help
          This option allows for the kernel to be compiled using the latest
          ARM ABI (aka EABI).  This is only useful if you are using a user
index fe7e9b5..8bcbd0c 100644 (file)
@@ -56,7 +56,7 @@ choice
 
 config UNWINDER_FRAME_POINTER
        bool "Frame pointer unwinder"
-       depends on !THUMB2_KERNEL && !CC_IS_CLANG
+       depends on !THUMB2_KERNEL
        select ARCH_WANT_FRAME_POINTERS
        select FRAME_POINTER
        help
index be2fc3e..db857d0 100644 (file)
@@ -36,7 +36,10 @@ KBUILD_CFLAGS        += $(call cc-option,-mno-unaligned-access)
 endif
 
 ifeq ($(CONFIG_FRAME_POINTER),y)
-KBUILD_CFLAGS  +=-fno-omit-frame-pointer -mapcs -mno-sched-prolog
+KBUILD_CFLAGS  +=-fno-omit-frame-pointer
+ifeq ($(CONFIG_CC_IS_GCC),y)
+KBUILD_CFLAGS += -mapcs -mno-sched-prolog
+endif
 endif
 
 ifeq ($(CONFIG_CPU_BIG_ENDIAN),y)
@@ -112,6 +115,10 @@ ifeq ($(CONFIG_ARM_UNWIND),y)
 CFLAGS_ABI     +=-funwind-tables
 endif
 
+ifeq ($(CONFIG_CC_IS_CLANG),y)
+CFLAGS_ABI     += -meabi gnu
+endif
+
 # Accept old syntax despite ".syntax unified"
 AFLAGS_NOWARN  :=$(call as-option,-Wa$(comma)-mno-warn-deprecated,-Wa$(comma)-W)
 
@@ -266,14 +273,9 @@ endif
 
 export TEXT_OFFSET GZFLAGS MMUEXT
 
-# Do we have FASTFPE?
-FASTFPE                :=arch/arm/fastfpe
-ifeq ($(FASTFPE),$(wildcard $(FASTFPE)))
-FASTFPE_OBJ    :=$(FASTFPE)/
-endif
-
 core-$(CONFIG_FPE_NWFPE)       += arch/arm/nwfpe/
-core-$(CONFIG_FPE_FASTFPE)     += $(FASTFPE_OBJ)
+# Put arch/arm/fastfpe/ to use this.
+core-$(CONFIG_FPE_FASTFPE)     += $(patsubst $(srctree)/%,%,$(wildcard $(srctree)/arch/arm/fastfpe/))
 core-$(CONFIG_VFP)             += arch/arm/vfp/
 core-$(CONFIG_XEN)             += arch/arm/xen/
 core-$(CONFIG_KVM_ARM_HOST)    += arch/arm/kvm/
@@ -286,6 +288,10 @@ core-y                             += arch/arm/net/
 core-y                         += arch/arm/crypto/
 core-y                         += $(machdirs) $(platdirs)
 
+# For cleaning
+core-                          += $(patsubst %,arch/arm/mach-%/, $(machine-))
+core-                          += $(patsubst %,arch/arm/plat-%/, $(plat-))
+
 drivers-$(CONFIG_OPROFILE)      += arch/arm/oprofile/
 
 libs-y                         := arch/arm/lib/ $(libs-y)
index e59d146..93dffed 100644 (file)
  AR_CLASS(     .arm    )
 start:
                .type   start,#function
+               /*
+                * These 7 nops along with the 1 nop immediately below for
+                * !THUMB2 form 8 nops that make the compressed kernel bootable
+                * on legacy ARM systems that were assuming the kernel in a.out
+                * binary format. The boot loaders on these systems would
+                * jump 32 bytes into the image to skip the a.out header.
+                * with these 8 nops filling exactly 32 bytes, things still
+                * work as expected on these legacy systems. Thumb2 mode keeps
+                * 7 of the nops as it turns out that some boot loaders
+                * were patching the initial instructions of the kernel, i.e
+                * had started to exploit this "patch area".
+                */
                .rept   7
                __nop
                .endr
 #ifndef CONFIG_THUMB2_KERNEL
-               mov     r0, r0
+               __nop
 #else
  AR_CLASS(     sub     pc, pc, #3      )       @ A/R: switch to Thumb2 mode
   M_CLASS(     nop.w                   )       @ M: already in Thumb2 mode
index a24a6a1..b21b3a6 100644 (file)
@@ -336,7 +336,8 @@ dtb-$(CONFIG_MACH_MESON8) += \
 dtb-$(CONFIG_ARCH_MMP) += \
        pxa168-aspenite.dtb \
        pxa910-dkb.dtb \
-       mmp2-brownstone.dtb
+       mmp2-brownstone.dtb \
+       mmp2-olpc-xo-1-75.dtb
 dtb-$(CONFIG_ARCH_MPS2) += \
        mps2-an385.dtb \
        mps2-an399.dtb
@@ -1278,6 +1279,7 @@ dtb-$(CONFIG_ARCH_MILBEAUT) += milbeaut-m10v-evb.dtb
 dtb-$(CONFIG_ARCH_ZX) += zx296702-ad1.dtb
 dtb-$(CONFIG_ARCH_ASPEED) += \
        aspeed-ast2500-evb.dtb \
+       aspeed-ast2600-evb.dtb \
        aspeed-bmc-arm-centriq2400-rep.dtb \
        aspeed-bmc-arm-stardragon4800-rep2.dtb \
        aspeed-bmc-facebook-cmm.dtb \
index 46849d6..9915c89 100644 (file)
 
                target-module@100000 {                  /* 0x4a100000, ap 3 08.0 */
                        compatible = "ti,sysc-omap4-simple", "ti,sysc";
-                       ti,hwmods = "cpgmac0";
                        reg = <0x101200 0x4>,
                              <0x101208 0x4>,
                              <0x101204 0x4>;
 
                                davinci_mdio: mdio@1000 {
                                        compatible = "ti,cpsw-mdio","ti,davinci_mdio";
+                                       clocks = <&cpsw_125mhz_clkctrl AM3_CPSW_125MHZ_CPGMAC0_CLKCTRL 0>;
+                                       clock-names = "fck";
                                        #address-cells = <1>;
                                        #size-cells = <0>;
-                                       ti,hwmods = "davinci_mdio";
                                        bus_freq = <1000000>;
                                        reg = <0x1000 0x100>;
                                        status = "disabled";
index 23ea381..bf30020 100644 (file)
                        interrupts = <24>;
                        clocks = <&hecc_ck>;
                };
+
+               /*
+                * On am3517 the OCP registers do not seem to be accessible
+                * similar to the omap34xx. Maybe SGX is permanently set to
+                * "OCP bypass mode", or maybe there is OCP_SYSCONFIG that is
+                * write-only at 0x50000e10. We detect SGX based on the SGX
+                * revision register instead of the unreadable OCP revision
+                * register.
+                */
+               sgx_module: target-module@50000000 {
+                       compatible = "ti,sysc-omap2", "ti,sysc";
+                       reg = <0x50000014 0x4>;
+                       reg-names = "rev";
+                       clocks = <&sgx_fck>, <&sgx_ick>;
+                       clock-names = "fck", "ick";
+                       #address-cells = <1>;
+                       #size-cells = <1>;
+                       ranges = <0 0x50000000 0x4000>;
+
+                       /*
+                        * Closed source PowerVR driver, no child device
+                        * binding or driver in mainline
+                        */
+               };
        };
 };
 
index 04bee4f..59770dd 100644 (file)
 
                target-module@100000 {                  /* 0x4a100000, ap 3 04.0 */
                        compatible = "ti,sysc-omap4-simple", "ti,sysc";
-                       ti,hwmods = "cpgmac0";
                        reg = <0x101200 0x4>,
                              <0x101208 0x4>,
                              <0x101204 0x4>;
                                davinci_mdio: mdio@1000 {
                                        compatible = "ti,am4372-mdio","ti,cpsw-mdio","ti,davinci_mdio";
                                        reg = <0x1000 0x100>;
+                                       clocks = <&cpsw_125mhz_clkctrl AM4_CPSW_125MHZ_CPGMAC0_CLKCTRL 0>;
+                                       clock-names = "fck";
                                        #address-cells = <1>;
                                        #size-cells = <0>;
-                                       clocks = <&cpsw_125mhz_gclk>;
-                                       clock-names = "fck";
-                                       ti,hwmods = "davinci_mdio";
                                        bus_freq = <1000000>;
                                        status = "disabled";
                                };
diff --git a/arch/arm/boot/dts/aspeed-ast2600-evb.dts b/arch/arm/boot/dts/aspeed-ast2600-evb.dts
new file mode 100644 (file)
index 0000000..9870553
--- /dev/null
@@ -0,0 +1,80 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+// Copyright 2019 IBM Corp.
+
+/dts-v1/;
+
+#include "aspeed-g6.dtsi"
+
+/ {
+       model = "AST2600 EVB";
+       compatible = "aspeed,ast2600";
+
+       aliases {
+               serial4 = &uart5;
+       };
+
+       chosen {
+               bootargs = "console=ttyS4,115200n8";
+       };
+
+       memory@80000000 {
+               device_type = "memory";
+               reg = <0x80000000 0x80000000>;
+       };
+};
+
+&mdio1 {
+       status = "okay";
+
+       ethphy1: ethernet-phy@0 {
+               compatible = "ethernet-phy-ieee802.3-c22";
+               reg = <0>;
+       };
+};
+
+&mdio2 {
+       status = "okay";
+
+       ethphy2: ethernet-phy@0 {
+               compatible = "ethernet-phy-ieee802.3-c22";
+               reg = <0>;
+       };
+};
+
+&mdio3 {
+       status = "okay";
+
+       ethphy3: ethernet-phy@0 {
+               compatible = "ethernet-phy-ieee802.3-c22";
+               reg = <0>;
+       };
+};
+
+&mac1 {
+       status = "okay";
+
+       phy-mode = "rgmii";
+       phy-handle = <&ethphy1>;
+};
+
+&mac2 {
+       status = "okay";
+
+       phy-mode = "rgmii";
+       phy-handle = <&ethphy2>;
+};
+
+&mac3 {
+       status = "okay";
+
+       phy-mode = "rgmii";
+       phy-handle = <&ethphy3>;
+};
+
+&emmc {
+       status = "okay";
+};
+
+&rtc {
+       status = "okay";
+};
diff --git a/arch/arm/boot/dts/aspeed-g6-pinctrl.dtsi b/arch/arm/boot/dts/aspeed-g6-pinctrl.dtsi
new file mode 100644 (file)
index 0000000..5b8bf58
--- /dev/null
@@ -0,0 +1,1154 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+// Copyright 2019 IBM Corp.
+
+&pinctrl {
+       pinctrl_adc0_default: adc0_default {
+               function = "ADC0";
+               groups = "ADC0";
+       };
+
+       pinctrl_adc1_default: adc1_default {
+               function = "ADC1";
+               groups = "ADC1";
+       };
+
+       pinctrl_adc10_default: adc10_default {
+               function = "ADC10";
+               groups = "ADC10";
+       };
+
+       pinctrl_adc11_default: adc11_default {
+               function = "ADC11";
+               groups = "ADC11";
+       };
+
+       pinctrl_adc12_default: adc12_default {
+               function = "ADC12";
+               groups = "ADC12";
+       };
+
+       pinctrl_adc13_default: adc13_default {
+               function = "ADC13";
+               groups = "ADC13";
+       };
+
+       pinctrl_adc14_default: adc14_default {
+               function = "ADC14";
+               groups = "ADC14";
+       };
+
+       pinctrl_adc15_default: adc15_default {
+               function = "ADC15";
+               groups = "ADC15";
+       };
+
+       pinctrl_adc2_default: adc2_default {
+               function = "ADC2";
+               groups = "ADC2";
+       };
+
+       pinctrl_adc3_default: adc3_default {
+               function = "ADC3";
+               groups = "ADC3";
+       };
+
+       pinctrl_adc4_default: adc4_default {
+               function = "ADC4";
+               groups = "ADC4";
+       };
+
+       pinctrl_adc5_default: adc5_default {
+               function = "ADC5";
+               groups = "ADC5";
+       };
+
+       pinctrl_adc6_default: adc6_default {
+               function = "ADC6";
+               groups = "ADC6";
+       };
+
+       pinctrl_adc7_default: adc7_default {
+               function = "ADC7";
+               groups = "ADC7";
+       };
+
+       pinctrl_adc8_default: adc8_default {
+               function = "ADC8";
+               groups = "ADC8";
+       };
+
+       pinctrl_adc9_default: adc9_default {
+               function = "ADC9";
+               groups = "ADC9";
+       };
+
+       pinctrl_bmcint_default: bmcint_default {
+               function = "BMCINT";
+               groups = "BMCINT";
+       };
+
+       pinctrl_espi_default: espi_default {
+               function = "ESPI";
+               groups = "ESPI";
+       };
+
+       pinctrl_espialt_default: espialt_default {
+               function = "ESPIALT";
+               groups = "ESPIALT";
+       };
+
+       pinctrl_fsi1_default: fsi1_default {
+               function = "FSI1";
+               groups = "FSI1";
+       };
+
+       pinctrl_fsi2_default: fsi2_default {
+               function = "FSI2";
+               groups = "FSI2";
+       };
+
+       pinctrl_fwspiabr_default: fwspiabr_default {
+               function = "FWSPIABR";
+               groups = "FWSPIABR";
+       };
+
+       pinctrl_fwspid_default: fwspid_default {
+               function = "FWSPID";
+               groups = "FWSPID";
+       };
+
+       pinctrl_fwqspid_default: fwqspid_default {
+               function = "FWQSPID";
+               groups = "FWQSPID";
+       };
+
+       pinctrl_fwspiwp_default: fwspiwp_default {
+               function = "FWSPIWP";
+               groups = "FWSPIWP";
+       };
+
+       pinctrl_gpit0_default: gpit0_default {
+               function = "GPIT0";
+               groups = "GPIT0";
+       };
+
+       pinctrl_gpit1_default: gpit1_default {
+               function = "GPIT1";
+               groups = "GPIT1";
+       };
+
+       pinctrl_gpit2_default: gpit2_default {
+               function = "GPIT2";
+               groups = "GPIT2";
+       };
+
+       pinctrl_gpit3_default: gpit3_default {
+               function = "GPIT3";
+               groups = "GPIT3";
+       };
+
+       pinctrl_gpit4_default: gpit4_default {
+               function = "GPIT4";
+               groups = "GPIT4";
+       };
+
+       pinctrl_gpit5_default: gpit5_default {
+               function = "GPIT5";
+               groups = "GPIT5";
+       };
+
+       pinctrl_gpit6_default: gpit6_default {
+               function = "GPIT6";
+               groups = "GPIT6";
+       };
+
+       pinctrl_gpit7_default: gpit7_default {
+               function = "GPIT7";
+               groups = "GPIT7";
+       };
+
+       pinctrl_gpiu0_default: gpiu0_default {
+               function = "GPIU0";
+               groups = "GPIU0";
+       };
+
+       pinctrl_gpiu1_default: gpiu1_default {
+               function = "GPIU1";
+               groups = "GPIU1";
+       };
+
+       pinctrl_gpiu2_default: gpiu2_default {
+               function = "GPIU2";
+               groups = "GPIU2";
+       };
+
+       pinctrl_gpiu3_default: gpiu3_default {
+               function = "GPIU3";
+               groups = "GPIU3";
+       };
+
+       pinctrl_gpiu4_default: gpiu4_default {
+               function = "GPIU4";
+               groups = "GPIU4";
+       };
+
+       pinctrl_gpiu5_default: gpiu5_default {
+               function = "GPIU5";
+               groups = "GPIU5";
+       };
+
+       pinctrl_gpiu6_default: gpiu6_default {
+               function = "GPIU6";
+               groups = "GPIU6";
+       };
+
+       pinctrl_gpiu7_default: gpiu7_default {
+               function = "GPIU7";
+               groups = "GPIU7";
+       };
+
+       pinctrl_hvi3c3_default: hvi3c3_default {
+               function = "HVI3C3";
+               groups = "HVI3C3";
+       };
+
+       pinctrl_hvi3c4_default: hvi3c4_default {
+               function = "HVI3C4";
+               groups = "HVI3C4";
+       };
+
+       pinctrl_i2c1_default: i2c1_default {
+               function = "I2C1";
+               groups = "I2C1";
+       };
+
+       pinctrl_i2c10_default: i2c10_default {
+               function = "I2C10";
+               groups = "I2C10";
+       };
+
+       pinctrl_i2c11_default: i2c11_default {
+               function = "I2C11";
+               groups = "I2C11";
+       };
+
+       pinctrl_i2c12_default: i2c12_default {
+               function = "I2C12";
+               groups = "I2C12";
+       };
+
+       pinctrl_i2c13_default: i2c13_default {
+               function = "I2C13";
+               groups = "I2C13";
+       };
+
+       pinctrl_i2c14_default: i2c14_default {
+               function = "I2C14";
+               groups = "I2C14";
+       };
+
+       pinctrl_i2c15_default: i2c15_default {
+               function = "I2C15";
+               groups = "I2C15";
+       };
+
+       pinctrl_i2c16_default: i2c16_default {
+               function = "I2C16";
+               groups = "I2C16";
+       };
+
+       pinctrl_i2c2_default: i2c2_default {
+               function = "I2C2";
+               groups = "I2C2";
+       };
+
+       pinctrl_i2c3_default: i2c3_default {
+               function = "I2C3";
+               groups = "I2C3";
+       };
+
+       pinctrl_i2c4_default: i2c4_default {
+               function = "I2C4";
+               groups = "I2C4";
+       };
+
+       pinctrl_i2c5_default: i2c5_default {
+               function = "I2C5";
+               groups = "I2C5";
+       };
+
+       pinctrl_i2c6_default: i2c6_default {
+               function = "I2C6";
+               groups = "I2C6";
+       };
+
+       pinctrl_i2c7_default: i2c7_default {
+               function = "I2C7";
+               groups = "I2C7";
+       };
+
+       pinctrl_i2c8_default: i2c8_default {
+               function = "I2C8";
+               groups = "I2C8";
+       };
+
+       pinctrl_i2c9_default: i2c9_default {
+               function = "I2C9";
+               groups = "I2C9";
+       };
+
+       pinctrl_i3c3_default: i3c3_default {
+               function = "I3C3";
+               groups = "I3C3";
+       };
+
+       pinctrl_i3c4_default: i3c4_default {
+               function = "I3C4";
+               groups = "I3C4";
+       };
+
+       pinctrl_i3c5_default: i3c5_default {
+               function = "I3C5";
+               groups = "I3C5";
+       };
+
+       pinctrl_i3c6_default: i3c6_default {
+               function = "I3C6";
+               groups = "I3C6";
+       };
+
+       pinctrl_jtagm_default: jtagm_default {
+               function = "JTAGM";
+               groups = "JTAGM";
+       };
+
+       pinctrl_lhpd_default: lhpd_default {
+               function = "LHPD";
+               groups = "LHPD";
+       };
+
+       pinctrl_lhsirq_default: lhsirq_default {
+               function = "LHSIRQ";
+               groups = "LHSIRQ";
+       };
+
+       pinctrl_lpc_default: lpc_default {
+               function = "LPC";
+               groups = "LPC";
+       };
+
+       pinctrl_lpchc_default: lpchc_default {
+               function = "LPCHC";
+               groups = "LPCHC";
+       };
+
+       pinctrl_lpcpd_default: lpcpd_default {
+               function = "LPCPD";
+               groups = "LPCPD";
+       };
+
+       pinctrl_lpcpme_default: lpcpme_default {
+               function = "LPCPME";
+               groups = "LPCPME";
+       };
+
+       pinctrl_lpcsmi_default: lpcsmi_default {
+               function = "LPCSMI";
+               groups = "LPCSMI";
+       };
+
+       pinctrl_lsirq_default: lsirq_default {
+               function = "LSIRQ";
+               groups = "LSIRQ";
+       };
+
+       pinctrl_maclink1_default: maclink1_default {
+               function = "MACLINK1";
+               groups = "MACLINK1";
+       };
+
+       pinctrl_maclink2_default: maclink2_default {
+               function = "MACLINK2";
+               groups = "MACLINK2";
+       };
+
+       pinctrl_maclink3_default: maclink3_default {
+               function = "MACLINK3";
+               groups = "MACLINK3";
+       };
+
+       pinctrl_maclink4_default: maclink4_default {
+               function = "MACLINK4";
+               groups = "MACLINK4";
+       };
+
+       pinctrl_mdio1_default: mdio1_default {
+               function = "MDIO1";
+               groups = "MDIO1";
+       };
+
+       pinctrl_mdio2_default: mdio2_default {
+               function = "MDIO2";
+               groups = "MDIO2";
+       };
+
+       pinctrl_mdio3_default: mdio3_default {
+               function = "MDIO3";
+               groups = "MDIO3";
+       };
+
+       pinctrl_mdio4_default: mdio4_default {
+               function = "MDIO4";
+               groups = "MDIO4";
+       };
+
+       pinctrl_ncts1_default: ncts1_default {
+               function = "NCTS1";
+               groups = "NCTS1";
+       };
+
+       pinctrl_ncts2_default: ncts2_default {
+               function = "NCTS2";
+               groups = "NCTS2";
+       };
+
+       pinctrl_ncts3_default: ncts3_default {
+               function = "NCTS3";
+               groups = "NCTS3";
+       };
+
+       pinctrl_ncts4_default: ncts4_default {
+               function = "NCTS4";
+               groups = "NCTS4";
+       };
+
+       pinctrl_ndcd1_default: ndcd1_default {
+               function = "NDCD1";
+               groups = "NDCD1";
+       };
+
+       pinctrl_ndcd2_default: ndcd2_default {
+               function = "NDCD2";
+               groups = "NDCD2";
+       };
+
+       pinctrl_ndcd3_default: ndcd3_default {
+               function = "NDCD3";
+               groups = "NDCD3";
+       };
+
+       pinctrl_ndcd4_default: ndcd4_default {
+               function = "NDCD4";
+               groups = "NDCD4";
+       };
+
+       pinctrl_ndsr1_default: ndsr1_default {
+               function = "NDSR1";
+               groups = "NDSR1";
+       };
+
+       pinctrl_ndsr2_default: ndsr2_default {
+               function = "NDSR2";
+               groups = "NDSR2";
+       };
+
+       pinctrl_ndsr3_default: ndsr3_default {
+               function = "NDSR3";
+               groups = "NDSR3";
+       };
+
+       pinctrl_ndsr4_default: ndsr4_default {
+               function = "NDSR4";
+               groups = "NDSR4";
+       };
+
+       pinctrl_ndtr1_default: ndtr1_default {
+               function = "NDTR1";
+               groups = "NDTR1";
+       };
+
+       pinctrl_ndtr2_default: ndtr2_default {
+               function = "NDTR2";
+               groups = "NDTR2";
+       };
+
+       pinctrl_ndtr3_default: ndtr3_default {
+               function = "NDTR3";
+               groups = "NDTR3";
+       };
+
+       pinctrl_ndtr4_default: ndtr4_default {
+               function = "NDTR4";
+               groups = "NDTR4";
+       };
+
+       pinctrl_nri1_default: nri1_default {
+               function = "NRI1";
+               groups = "NRI1";
+       };
+
+       pinctrl_nri2_default: nri2_default {
+               function = "NRI2";
+               groups = "NRI2";
+       };
+
+       pinctrl_nri3_default: nri3_default {
+               function = "NRI3";
+               groups = "NRI3";
+       };
+
+       pinctrl_nri4_default: nri4_default {
+               function = "NRI4";
+               groups = "NRI4";
+       };
+
+       pinctrl_nrts1_default: nrts1_default {
+               function = "NRTS1";
+               groups = "NRTS1";
+       };
+
+       pinctrl_nrts2_default: nrts2_default {
+               function = "NRTS2";
+               groups = "NRTS2";
+       };
+
+       pinctrl_nrts3_default: nrts3_default {
+               function = "NRTS3";
+               groups = "NRTS3";
+       };
+
+       pinctrl_nrts4_default: nrts4_default {
+               function = "NRTS4";
+               groups = "NRTS4";
+       };
+
+       pinctrl_oscclk_default: oscclk_default {
+               function = "OSCCLK";
+               groups = "OSCCLK";
+       };
+
+       pinctrl_pewake_default: pewake_default {
+               function = "PEWAKE";
+               groups = "PEWAKE";
+       };
+
+       pinctrl_pwm0_default: pwm0_default {
+               function = "PWM0";
+               groups = "PWM0";
+       };
+
+       pinctrl_pwm1_default: pwm1_default {
+               function = "PWM1";
+               groups = "PWM1";
+       };
+
+       pinctrl_pwm10g0_default: pwm10g0_default {
+               function = "PWM10";
+               groups = "PWM10G0";
+       };
+
+       pinctrl_pwm10g1_default: pwm10g1_default {
+               function = "PWM10";
+               groups = "PWM10G1";
+       };
+
+       pinctrl_pwm11g0_default: pwm11g0_default {
+               function = "PWM11";
+               groups = "PWM11G0";
+       };
+
+       pinctrl_pwm11g1_default: pwm11g1_default {
+               function = "PWM11";
+               groups = "PWM11G1";
+       };
+
+       pinctrl_pwm12g0_default: pwm12g0_default {
+               function = "PWM12";
+               groups = "PWM12G0";
+       };
+
+       pinctrl_pwm12g1_default: pwm12g1_default {
+               function = "PWM12";
+               groups = "PWM12G1";
+       };
+
+       pinctrl_pwm13g0_default: pwm13g0_default {
+               function = "PWM13";
+               groups = "PWM13G0";
+       };
+
+       pinctrl_pwm13g1_default: pwm13g1_default {
+               function = "PWM13";
+               groups = "PWM13G1";
+       };
+
+       pinctrl_pwm14g0_default: pwm14g0_default {
+               function = "PWM14";
+               groups = "PWM14G0";
+       };
+
+       pinctrl_pwm14g1_default: pwm14g1_default {
+               function = "PWM14";
+               groups = "PWM14G1";
+       };
+
+       pinctrl_pwm15g0_default: pwm15g0_default {
+               function = "PWM15";
+               groups = "PWM15G0";
+       };
+
+       pinctrl_pwm15g1_default: pwm15g1_default {
+               function = "PWM15";
+               groups = "PWM15G1";
+       };
+
+       pinctrl_pwm2_default: pwm2_default {
+               function = "PWM2";
+               groups = "PWM2";
+       };
+
+       pinctrl_pwm3_default: pwm3_default {
+               function = "PWM3";
+               groups = "PWM3";
+       };
+
+       pinctrl_pwm4_default: pwm4_default {
+               function = "PWM4";
+               groups = "PWM4";
+       };
+
+       pinctrl_pwm5_default: pwm5_default {
+               function = "PWM5";
+               groups = "PWM5";
+       };
+
+       pinctrl_pwm6_default: pwm6_default {
+               function = "PWM6";
+               groups = "PWM6";
+       };
+
+       pinctrl_pwm7_default: pwm7_default {
+               function = "PWM7";
+               groups = "PWM7";
+       };
+
+       pinctrl_pwm8g0_default: pwm8g0_default {
+               function = "PWM8";
+               groups = "PWM8G0";
+       };
+
+       pinctrl_pwm8g1_default: pwm8g1_default {
+               function = "PWM8";
+               groups = "PWM8G1";
+       };
+
+       pinctrl_pwm9g0_default: pwm9g0_default {
+               function = "PWM9";
+               groups = "PWM9G0";
+       };
+
+       pinctrl_pwm9g1_default: pwm9g1_default {
+               function = "PWM9";
+               groups = "PWM9G1";
+       };
+
+       pinctrl_qspi1_default: qspi1_default {
+               function = "QSPI1";
+               groups = "QSPI1";
+       };
+
+       pinctrl_qspi2_default: qspi2_default {
+               function = "QSPI2";
+               groups = "QSPI2";
+       };
+
+       pinctrl_rgmii1_default: rgmii1_default {
+               function = "RGMII1";
+               groups = "RGMII1";
+       };
+
+       pinctrl_rgmii2_default: rgmii2_default {
+               function = "RGMII2";
+               groups = "RGMII2";
+       };
+
+       pinctrl_rgmii3_default: rgmii3_default {
+               function = "RGMII3";
+               groups = "RGMII3";
+       };
+
+       pinctrl_rgmii4_default: rgmii4_default {
+               function = "RGMII4";
+               groups = "RGMII4";
+       };
+
+       pinctrl_rmii1_default: rmii1_default {
+               function = "RMII1";
+               groups = "RMII1";
+       };
+
+       pinctrl_rmii2_default: rmii2_default {
+               function = "RMII2";
+               groups = "RMII2";
+       };
+
+       pinctrl_rmii3_default: rmii3_default {
+               function = "RMII3";
+               groups = "RMII3";
+       };
+
+       pinctrl_rmii4_default: rmii4_default {
+               function = "RMII4";
+               groups = "RMII4";
+       };
+
+       pinctrl_rxd1_default: rxd1_default {
+               function = "RXD1";
+               groups = "RXD1";
+       };
+
+       pinctrl_rxd2_default: rxd2_default {
+               function = "RXD2";
+               groups = "RXD2";
+       };
+
+       pinctrl_rxd3_default: rxd3_default {
+               function = "RXD3";
+               groups = "RXD3";
+       };
+
+       pinctrl_rxd4_default: rxd4_default {
+               function = "RXD4";
+               groups = "RXD4";
+       };
+
+       pinctrl_salt1_default: salt1_default {
+               function = "SALT1";
+               groups = "SALT1";
+       };
+
+       pinctrl_salt10g0_default: salt10g0_default {
+               function = "SALT10";
+               groups = "SALT10G0";
+       };
+
+       pinctrl_salt10g1_default: salt10g1_default {
+               function = "SALT10";
+               groups = "SALT10G1";
+       };
+
+       pinctrl_salt11g0_default: salt11g0_default {
+               function = "SALT11";
+               groups = "SALT11G0";
+       };
+
+       pinctrl_salt11g1_default: salt11g1_default {
+               function = "SALT11";
+               groups = "SALT11G1";
+       };
+
+       pinctrl_salt12g0_default: salt12g0_default {
+               function = "SALT12";
+               groups = "SALT12G0";
+       };
+
+       pinctrl_salt12g1_default: salt12g1_default {
+               function = "SALT12";
+               groups = "SALT12G1";
+       };
+
+       pinctrl_salt13g0_default: salt13g0_default {
+               function = "SALT13";
+               groups = "SALT13G0";
+       };
+
+       pinctrl_salt13g1_default: salt13g1_default {
+               function = "SALT13";
+               groups = "SALT13G1";
+       };
+
+       pinctrl_salt14g0_default: salt14g0_default {
+               function = "SALT14";
+               groups = "SALT14G0";
+       };
+
+       pinctrl_salt14g1_default: salt14g1_default {
+               function = "SALT14";
+               groups = "SALT14G1";
+       };
+
+       pinctrl_salt15g0_default: salt15g0_default {
+               function = "SALT15";
+               groups = "SALT15G0";
+       };
+
+       pinctrl_salt15g1_default: salt15g1_default {
+               function = "SALT15";
+               groups = "SALT15G1";
+       };
+
+       pinctrl_salt16g0_default: salt16g0_default {
+               function = "SALT16";
+               groups = "SALT16G0";
+       };
+
+       pinctrl_salt16g1_default: salt16g1_default {
+               function = "SALT16";
+               groups = "SALT16G1";
+       };
+
+       pinctrl_salt2_default: salt2_default {
+               function = "SALT2";
+               groups = "SALT2";
+       };
+
+       pinctrl_salt3_default: salt3_default {
+               function = "SALT3";
+               groups = "SALT3";
+       };
+
+       pinctrl_salt4_default: salt4_default {
+               function = "SALT4";
+               groups = "SALT4";
+       };
+
+       pinctrl_salt5_default: salt5_default {
+               function = "SALT5";
+               groups = "SALT5";
+       };
+
+       pinctrl_salt6_default: salt6_default {
+               function = "SALT6";
+               groups = "SALT6";
+       };
+
+       pinctrl_salt7_default: salt7_default {
+               function = "SALT7";
+               groups = "SALT7";
+       };
+
+       pinctrl_salt8_default: salt8_default {
+               function = "SALT8";
+               groups = "SALT8";
+       };
+
+       pinctrl_salt9g0_default: salt9g0_default {
+               function = "SALT9";
+               groups = "SALT9G0";
+       };
+
+       pinctrl_salt9g1_default: salt9g1_default {
+               function = "SALT9";
+               groups = "SALT9G1";
+       };
+
+       pinctrl_sd1_default: sd1_default {
+               function = "SD1";
+               groups = "SD1";
+       };
+
+       pinctrl_sd2_default: sd2_default {
+               function = "SD2";
+               groups = "SD2";
+       };
+
+       pinctrl_sd3_default: sd3_default {
+               function = "SD3";
+               groups = "SD3";
+       };
+
+       pinctrl_emmc_default: emmc_default {
+               function = "SD3";
+               groups = "EMMC";
+       };
+
+       pinctrl_sgpm1_default: sgpm1_default {
+               function = "SGPM1";
+               groups = "SGPM1";
+       };
+
+       pinctrl_sgps1_default: sgps1_default {
+               function = "SGPS1";
+               groups = "SGPS1";
+       };
+
+       pinctrl_sioonctrl_default: sioonctrl_default {
+               function = "SIOONCTRL";
+               groups = "SIOONCTRL";
+       };
+
+       pinctrl_siopbi_default: siopbi_default {
+               function = "SIOPBI";
+               groups = "SIOPBI";
+       };
+
+       pinctrl_siopbo_default: siopbo_default {
+               function = "SIOPBO";
+               groups = "SIOPBO";
+       };
+
+       pinctrl_siopwreq_default: siopwreq_default {
+               function = "SIOPWREQ";
+               groups = "SIOPWREQ";
+       };
+
+       pinctrl_siopwrgd_default: siopwrgd_default {
+               function = "SIOPWRGD";
+               groups = "SIOPWRGD";
+       };
+
+       pinctrl_sios3_default: sios3_default {
+               function = "SIOS3";
+               groups = "SIOS3";
+       };
+
+       pinctrl_sios5_default: sios5_default {
+               function = "SIOS5";
+               groups = "SIOS5";
+       };
+
+       pinctrl_siosci_default: siosci_default {
+               function = "SIOSCI";
+               groups = "SIOSCI";
+       };
+
+       pinctrl_spi1_default: spi1_default {
+               function = "SPI1";
+               groups = "SPI1";
+       };
+
+       pinctrl_spi1abr_default: spi1abr_default {
+               function = "SPI1ABR";
+               groups = "SPI1ABR";
+       };
+
+       pinctrl_spi1cs1_default: spi1cs1_default {
+               function = "SPI1CS1";
+               groups = "SPI1CS1";
+       };
+
+       pinctrl_spi1wp_default: spi1wp_default {
+               function = "SPI1WP";
+               groups = "SPI1WP";
+       };
+
+       pinctrl_spi2_default: spi2_default {
+               function = "SPI2";
+               groups = "SPI2";
+       };
+
+       pinctrl_spi2cs1_default: spi2cs1_default {
+               function = "SPI2CS1";
+               groups = "SPI2CS1";
+       };
+
+       pinctrl_spi2cs2_default: spi2cs2_default {
+               function = "SPI2CS2";
+               groups = "SPI2CS2";
+       };
+
+       pinctrl_tach0_default: tach0_default {
+               function = "TACH0";
+               groups = "TACH0";
+       };
+
+       pinctrl_tach1_default: tach1_default {
+               function = "TACH1";
+               groups = "TACH1";
+       };
+
+       pinctrl_tach10_default: tach10_default {
+               function = "TACH10";
+               groups = "TACH10";
+       };
+
+       pinctrl_tach11_default: tach11_default {
+               function = "TACH11";
+               groups = "TACH11";
+       };
+
+       pinctrl_tach12_default: tach12_default {
+               function = "TACH12";
+               groups = "TACH12";
+       };
+
+       pinctrl_tach13_default: tach13_default {
+               function = "TACH13";
+               groups = "TACH13";
+       };
+
+       pinctrl_tach14_default: tach14_default {
+               function = "TACH14";
+               groups = "TACH14";
+       };
+
+       pinctrl_tach15_default: tach15_default {
+               function = "TACH15";
+               groups = "TACH15";
+       };
+
+       pinctrl_tach2_default: tach2_default {
+               function = "TACH2";
+               groups = "TACH2";
+       };
+
+       pinctrl_tach3_default: tach3_default {
+               function = "TACH3";
+               groups = "TACH3";
+       };
+
+       pinctrl_tach4_default: tach4_default {
+               function = "TACH4";
+               groups = "TACH4";
+       };
+
+       pinctrl_tach5_default: tach5_default {
+               function = "TACH5";
+               groups = "TACH5";
+       };
+
+       pinctrl_tach6_default: tach6_default {
+               function = "TACH6";
+               groups = "TACH6";
+       };
+
+       pinctrl_tach7_default: tach7_default {
+               function = "TACH7";
+               groups = "TACH7";
+       };
+
+       pinctrl_tach8_default: tach8_default {
+               function = "TACH8";
+               groups = "TACH8";
+       };
+
+       pinctrl_tach9_default: tach9_default {
+               function = "TACH9";
+               groups = "TACH9";
+       };
+
+       pinctrl_thru0_default: thru0_default {
+               function = "THRU0";
+               groups = "THRU0";
+       };
+
+       pinctrl_thru1_default: thru1_default {
+               function = "THRU1";
+               groups = "THRU1";
+       };
+
+       pinctrl_thru2_default: thru2_default {
+               function = "THRU2";
+               groups = "THRU2";
+       };
+
+       pinctrl_thru3_default: thru3_default {
+               function = "THRU3";
+               groups = "THRU3";
+       };
+
+       pinctrl_txd1_default: txd1_default {
+               function = "TXD1";
+               groups = "TXD1";
+       };
+
+       pinctrl_txd2_default: txd2_default {
+               function = "TXD2";
+               groups = "TXD2";
+       };
+
+       pinctrl_txd3_default: txd3_default {
+               function = "TXD3";
+               groups = "TXD3";
+       };
+
+       pinctrl_txd4_default: txd4_default {
+               function = "TXD4";
+               groups = "TXD4";
+       };
+
+       pinctrl_uart10_default: uart10_default {
+               function = "UART10";
+               groups = "UART10";
+       };
+
+       pinctrl_uart11_default: uart11_default {
+               function = "UART11";
+               groups = "UART11";
+       };
+
+       pinctrl_uart12g0_default: uart12g0_default {
+               function = "UART12";
+               groups = "UART12G0";
+       };
+
+       pinctrl_uart12g1_default: uart12g1_default {
+               function = "UART12";
+               groups = "UART12G1";
+       };
+
+       pinctrl_uart13g0_default: uart13g0_default {
+               function = "UART13";
+               groups = "UART13G0";
+       };
+
+       pinctrl_uart13g1_default: uart13g1_default {
+               function = "UART13";
+               groups = "UART13G1";
+       };
+
+       pinctrl_uart6_default: uart6_default {
+               function = "UART6";
+               groups = "UART6";
+       };
+
+       pinctrl_uart7_default: uart7_default {
+               function = "UART7";
+               groups = "UART7";
+       };
+
+       pinctrl_uart8_default: uart8_default {
+               function = "UART8";
+               groups = "UART8";
+       };
+
+       pinctrl_uart9_default: uart9_default {
+               function = "UART9";
+               groups = "UART9";
+       };
+
+       pinctrl_vb_default: vb_default {
+               function = "VB";
+               groups = "VB";
+       };
+
+       pinctrl_vgahs_default: vgahs_default {
+               function = "VGAHS";
+               groups = "VGAHS";
+       };
+
+       pinctrl_vgavs_default: vgavs_default {
+               function = "VGAVS";
+               groups = "VGAVS";
+       };
+
+       pinctrl_wdtrst1_default: wdtrst1_default {
+               function = "WDTRST1";
+               groups = "WDTRST1";
+       };
+
+       pinctrl_wdtrst2_default: wdtrst2_default {
+               function = "WDTRST2";
+               groups = "WDTRST2";
+       };
+
+       pinctrl_wdtrst3_default: wdtrst3_default {
+               function = "WDTRST3";
+               groups = "WDTRST3";
+       };
+
+       pinctrl_wdtrst4_default: wdtrst4_default {
+               function = "WDTRST4";
+               groups = "WDTRST4";
+       };
+};
diff --git a/arch/arm/boot/dts/aspeed-g6.dtsi b/arch/arm/boot/dts/aspeed-g6.dtsi
new file mode 100644 (file)
index 0000000..3a1422f
--- /dev/null
@@ -0,0 +1,261 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+// Copyright 2019 IBM Corp.
+
+#include <dt-bindings/interrupt-controller/arm-gic.h>
+#include <dt-bindings/clock/ast2600-clock.h>
+
+/ {
+       model = "Aspeed BMC";
+       compatible = "aspeed,ast2600";
+       #address-cells = <1>;
+       #size-cells = <1>;
+       interrupt-parent = <&gic>;
+
+       aliases {
+               serial4 = &uart5;
+       };
+
+
+       cpus {
+               #address-cells = <1>;
+               #size-cells = <0>;
+               enable-method = "aspeed,ast2600-smp";
+
+               cpu@f00 {
+                       compatible = "arm,cortex-a7";
+                       device_type = "cpu";
+                       reg = <0xf00>;
+               };
+
+               cpu@f01 {
+                       compatible = "arm,cortex-a7";
+                       device_type = "cpu";
+                       reg = <0xf01>;
+               };
+       };
+
+       timer {
+               compatible = "arm,armv7-timer";
+               interrupt-parent = <&gic>;
+               interrupts = <GIC_PPI 13 (GIC_CPU_MASK_SIMPLE(2) | IRQ_TYPE_LEVEL_LOW)>,
+                            <GIC_PPI 14 (GIC_CPU_MASK_SIMPLE(2) | IRQ_TYPE_LEVEL_LOW)>,
+                            <GIC_PPI 11 (GIC_CPU_MASK_SIMPLE(2) | IRQ_TYPE_LEVEL_LOW)>,
+                            <GIC_PPI 10 (GIC_CPU_MASK_SIMPLE(2) | IRQ_TYPE_LEVEL_LOW)>;
+               clocks = <&syscon ASPEED_CLK_HPLL>;
+               arm,cpu-registers-not-fw-configured;
+       };
+
+       ahb {
+               compatible = "simple-bus";
+               #address-cells = <1>;
+               #size-cells = <1>;
+               device_type = "soc";
+               ranges;
+
+               gic: interrupt-controller@40461000 {
+                       compatible = "arm,cortex-a7-gic";
+                       interrupts = <GIC_PPI 9 (GIC_CPU_MASK_SIMPLE(2) | IRQ_TYPE_LEVEL_HIGH)>;
+                       #interrupt-cells = <3>;
+                       interrupt-controller;
+                       interrupt-parent = <&gic>;
+                       reg = <0x40461000 0x1000>,
+                           <0x40462000 0x1000>,
+                           <0x40464000 0x2000>,
+                           <0x40466000 0x2000>;
+                       };
+
+               mdio0: mdio@1e650000 {
+                       compatible = "aspeed,ast2600-mdio";
+                       reg = <0x1e650000 0x8>;
+                       #address-cells = <1>;
+                       #size-cells = <0>;
+                       status = "disabled";
+               };
+
+               mdio1: mdio@1e650008 {
+                       compatible = "aspeed,ast2600-mdio";
+                       reg = <0x1e650008 0x8>;
+                       #address-cells = <1>;
+                       #size-cells = <0>;
+                       status = "disabled";
+               };
+
+               mdio2: mdio@1e650010 {
+                       compatible = "aspeed,ast2600-mdio";
+                       reg = <0x1e650010 0x8>;
+                       #address-cells = <1>;
+                       #size-cells = <0>;
+                       status = "disabled";
+               };
+
+               mdio3: mdio@1e650018 {
+                       compatible = "aspeed,ast2600-mdio";
+                       reg = <0x1e650018 0x8>;
+                       #address-cells = <1>;
+                       #size-cells = <0>;
+                       status = "disabled";
+               };
+
+               mac0: ftgmac@1e660000 {
+                       compatible = "aspeed,ast2600-mac", "faraday,ftgmac100";
+                       reg = <0x1e660000 0x180>;
+                       #address-cells = <1>;
+                       #size-cells = <0>;
+                       interrupts = <GIC_SPI 2 IRQ_TYPE_LEVEL_HIGH>;
+                       clocks = <&syscon ASPEED_CLK_GATE_MAC1CLK>;
+                       status = "disabled";
+               };
+
+               mac1: ftgmac@1e680000 {
+                       compatible = "aspeed,ast2600-mac", "faraday,ftgmac100";
+                       reg = <0x1e680000 0x180>;
+                       #address-cells = <1>;
+                       #size-cells = <0>;
+                       interrupts = <GIC_SPI 3 IRQ_TYPE_LEVEL_HIGH>;
+                       clocks = <&syscon ASPEED_CLK_GATE_MAC2CLK>;
+                       status = "disabled";
+               };
+
+               mac2: ftgmac@1e670000 {
+                       compatible = "aspeed,ast2600-mac", "faraday,ftgmac100";
+                       reg = <0x1e670000 0x180>;
+                       #address-cells = <1>;
+                       #size-cells = <0>;
+                       interrupts = <GIC_SPI 32 IRQ_TYPE_LEVEL_HIGH>;
+                       clocks = <&syscon ASPEED_CLK_GATE_MAC3CLK>;
+                       status = "disabled";
+               };
+
+               mac3: ftgmac@1e690000 {
+                       compatible = "aspeed,ast2600-mac", "faraday,ftgmac100";
+                       reg = <0x1e690000 0x180>;
+                       #address-cells = <1>;
+                       #size-cells = <0>;
+                       interrupts = <GIC_SPI 33 IRQ_TYPE_LEVEL_HIGH>;
+                       clocks = <&syscon ASPEED_CLK_GATE_MAC4CLK>;
+                       status = "disabled";
+               };
+
+               apb {
+                       compatible = "simple-bus";
+                       #address-cells = <1>;
+                       #size-cells = <1>;
+                       ranges;
+
+                       syscon: syscon@1e6e2000 {
+                               compatible = "aspeed,ast2600-scu", "syscon", "simple-mfd";
+                               reg = <0x1e6e2000 0x1000>;
+                               ranges = <0 0x1e6e2000 0x1000>;
+                               #address-cells = <1>;
+                               #size-cells = <1>;
+                               #clock-cells = <1>;
+                               #reset-cells = <1>;
+
+                               pinctrl: pinctrl {
+                                       compatible = "aspeed,ast2600-pinctrl";
+                               };
+
+                               smp-memram@180 {
+                                       compatible = "aspeed,ast2600-smpmem";
+                                       reg = <0x180 0x40>;
+                               };
+                       };
+
+                       rng: hwrng@1e6e2524 {
+                               compatible = "timeriomem_rng";
+                               reg = <0x1e6e2524 0x4>;
+                               period = <1>;
+                               quality = <100>;
+                       };
+
+                       rtc: rtc@1e781000 {
+                               compatible = "aspeed,ast2600-rtc";
+                               reg = <0x1e781000 0x18>;
+                               interrupts = <GIC_SPI 13 IRQ_TYPE_LEVEL_HIGH>;
+                               status = "disabled";
+                       };
+
+                       uart5: serial@1e784000 {
+                               compatible = "ns16550a";
+                               reg = <0x1e784000 0x1000>;
+                               reg-shift = <2>;
+                               interrupts = <GIC_SPI 8 IRQ_TYPE_LEVEL_HIGH>;
+                               clocks = <&syscon ASPEED_CLK_GATE_UART5CLK>;
+                               no-loopback-test;
+                       };
+
+                       wdt1: watchdog@1e785000 {
+                               compatible = "aspeed,ast2600-wdt";
+                               reg = <0x1e785000 0x40>;
+                       };
+
+                       wdt2: watchdog@1e785040 {
+                               compatible = "aspeed,ast2600-wdt";
+                               reg = <0x1e785040 0x40>;
+                               status = "disabled";
+                       };
+
+                       wdt3: watchdog@1e785080 {
+                               compatible = "aspeed,ast2600-wdt";
+                               reg = <0x1e785080 0x40>;
+                               status = "disabled";
+                       };
+
+                       wdt4: watchdog@1e7850C0 {
+                               compatible = "aspeed,ast2600-wdt";
+                               reg = <0x1e7850C0 0x40>;
+                               status = "disabled";
+                       };
+
+                       sdc: sdc@1e740000 {
+                               compatible = "aspeed,ast2600-sd-controller";
+                               reg = <0x1e740000 0x100>;
+                               #address-cells = <1>;
+                               #size-cells = <1>;
+                               ranges = <0 0x1e740000 0x10000>;
+                               clocks = <&syscon ASPEED_CLK_GATE_SDCLK>;
+                               status = "disabled";
+
+                               sdhci0: sdhci@1e740100 {
+                                       compatible = "aspeed,ast2600-sdhci", "sdhci";
+                                       reg = <0x100 0x100>;
+                                       interrupts = <GIC_SPI 43 IRQ_TYPE_LEVEL_HIGH>;
+                                       sdhci,auto-cmd12;
+                                       clocks = <&syscon ASPEED_CLK_SDIO>;
+                                       status = "disabled";
+                               };
+
+                               sdhci1: sdhci@1e740200 {
+                                       compatible = "aspeed,ast2600-sdhci", "sdhci";
+                                       reg = <0x200 0x100>;
+                                       interrupts = <GIC_SPI 43 IRQ_TYPE_LEVEL_HIGH>;
+                                       sdhci,auto-cmd12;
+                                       clocks = <&syscon ASPEED_CLK_SDIO>;
+                                       status = "disabled";
+                               };
+                       };
+
+                       emmc: sdc@1e750000 {
+                               compatible = "aspeed,ast2600-sd-controller";
+                               reg = <0x1e750000 0x100>;
+                               #address-cells = <1>;
+                               #size-cells = <1>;
+                               ranges = <0 0x1e750000 0x10000>;
+                               clocks = <&syscon ASPEED_CLK_GATE_EMMCCLK>;
+                               status = "disabled";
+
+                               sdhci@1e750100 {
+                                       compatible = "aspeed,ast2600-sdhci";
+                                       reg = <0x100 0x100>;
+                                       sdhci,auto-cmd12;
+                                       interrupts = <GIC_SPI 15 IRQ_TYPE_LEVEL_HIGH>;
+                                       clocks = <&syscon ASPEED_CLK_EMMC>;
+                                       pinctrl-names = "default";
+                                       pinctrl-0 = <&pinctrl_emmc_default>;
+                               };
+                       };
+               };
+       };
+};
+
+#include "aspeed-g6-pinctrl.dtsi"
index 21e5914..ea0e7c1 100644 (file)
 
                target-module@20000 {                   /* 0x48020000, ap 3 04.0 */
                        compatible = "ti,sysc-omap2", "ti,sysc";
-                       ti,hwmods = "uart3";
                        reg = <0x20050 0x4>,
                              <0x20054 0x4>,
                              <0x20058 0x4>;
 
                gpio7_target: target-module@51000 {             /* 0x48051000, ap 45 2e.0 */
                        compatible = "ti,sysc-omap2", "ti,sysc";
-                       ti,hwmods = "gpio7";
                        reg = <0x51000 0x4>,
                              <0x51010 0x4>,
                              <0x51114 0x4>;
 
                target-module@53000 {                   /* 0x48053000, ap 35 36.0 */
                        compatible = "ti,sysc-omap2", "ti,sysc";
-                       ti,hwmods = "gpio8";
                        reg = <0x53000 0x4>,
                              <0x53010 0x4>,
                              <0x53114 0x4>;
 
                target-module@55000 {                   /* 0x48055000, ap 13 0e.0 */
                        compatible = "ti,sysc-omap2", "ti,sysc";
-                       ti,hwmods = "gpio2";
                        reg = <0x55000 0x4>,
                              <0x55010 0x4>,
                              <0x55114 0x4>;
 
                target-module@57000 {                   /* 0x48057000, ap 15 06.0 */
                        compatible = "ti,sysc-omap2", "ti,sysc";
-                       ti,hwmods = "gpio3";
                        reg = <0x57000 0x4>,
                              <0x57010 0x4>,
                              <0x57114 0x4>;
 
                target-module@59000 {                   /* 0x48059000, ap 17 16.0 */
                        compatible = "ti,sysc-omap2", "ti,sysc";
-                       ti,hwmods = "gpio4";
                        reg = <0x59000 0x4>,
                              <0x59010 0x4>,
                              <0x59114 0x4>;
 
                target-module@5b000 {                   /* 0x4805b000, ap 19 1e.0 */
                        compatible = "ti,sysc-omap2", "ti,sysc";
-                       ti,hwmods = "gpio5";
                        reg = <0x5b000 0x4>,
                              <0x5b010 0x4>,
                              <0x5b114 0x4>;
 
                target-module@5d000 {                   /* 0x4805d000, ap 21 26.0 */
                        compatible = "ti,sysc-omap2", "ti,sysc";
-                       ti,hwmods = "gpio6";
                        reg = <0x5d000 0x4>,
                              <0x5d010 0x4>,
                              <0x5d114 0x4>;
 
                target-module@60000 {                   /* 0x48060000, ap 23 32.0 */
                        compatible = "ti,sysc-omap2", "ti,sysc";
-                       ti,hwmods = "i2c3";
                        reg = <0x60000 0x8>,
                              <0x60010 0x8>,
                              <0x60090 0x8>;
 
                target-module@66000 {                   /* 0x48066000, ap 63 14.0 */
                        compatible = "ti,sysc-omap2", "ti,sysc";
-                       ti,hwmods = "uart5";
                        reg = <0x66050 0x4>,
                              <0x66054 0x4>,
                              <0x66058 0x4>;
 
                target-module@68000 {                   /* 0x48068000, ap 53 1c.0 */
                        compatible = "ti,sysc-omap2", "ti,sysc";
-                       ti,hwmods = "uart6";
                        reg = <0x68050 0x4>,
                              <0x68054 0x4>,
                              <0x68058 0x4>;
 
                target-module@6a000 {                   /* 0x4806a000, ap 24 24.0 */
                        compatible = "ti,sysc-omap2", "ti,sysc";
-                       ti,hwmods = "uart1";
                        reg = <0x6a050 0x4>,
                              <0x6a054 0x4>,
                              <0x6a058 0x4>;
 
                target-module@6c000 {                   /* 0x4806c000, ap 26 2c.0 */
                        compatible = "ti,sysc-omap2", "ti,sysc";
-                       ti,hwmods = "uart2";
                        reg = <0x6c050 0x4>,
                              <0x6c054 0x4>,
                              <0x6c058 0x4>;
 
                target-module@6e000 {                   /* 0x4806e000, ap 28 0c.1 */
                        compatible = "ti,sysc-omap2", "ti,sysc";
-                       ti,hwmods = "uart4";
                        reg = <0x6e050 0x4>,
                              <0x6e054 0x4>,
                              <0x6e058 0x4>;
 
                target-module@70000 {                   /* 0x48070000, ap 30 22.0 */
                        compatible = "ti,sysc-omap2", "ti,sysc";
-                       ti,hwmods = "i2c1";
                        reg = <0x70000 0x8>,
                              <0x70010 0x8>,
                              <0x70090 0x8>;
 
                target-module@72000 {                   /* 0x48072000, ap 32 2a.0 */
                        compatible = "ti,sysc-omap2", "ti,sysc";
-                       ti,hwmods = "i2c2";
                        reg = <0x72000 0x8>,
                              <0x72010 0x8>,
                              <0x72090 0x8>;
 
                target-module@7a000 {                   /* 0x4807a000, ap 81 3a.0 */
                        compatible = "ti,sysc-omap2", "ti,sysc";
-                       ti,hwmods = "i2c4";
                        reg = <0x7a000 0x8>,
                              <0x7a010 0x8>,
                              <0x7a090 0x8>;
 
                target-module@7c000 {                   /* 0x4807c000, ap 83 4a.0 */
                        compatible = "ti,sysc-omap2", "ti,sysc";
-                       ti,hwmods = "i2c5";
                        reg = <0x7c000 0x8>,
                              <0x7c010 0x8>,
                              <0x7c090 0x8>;
 
                target-module@98000 {                   /* 0x48098000, ap 47 08.0 */
                        compatible = "ti,sysc-omap4", "ti,sysc";
-                       ti,hwmods = "mcspi1";
                        reg = <0x98000 0x4>,
                              <0x98010 0x4>;
                        reg-names = "rev", "sysc";
 
                target-module@9a000 {                   /* 0x4809a000, ap 49 10.0 */
                        compatible = "ti,sysc-omap4", "ti,sysc";
-                       ti,hwmods = "mcspi2";
                        reg = <0x9a000 0x4>,
                              <0x9a010 0x4>;
                        reg-names = "rev", "sysc";
 
                target-module@9c000 {                   /* 0x4809c000, ap 51 38.0 */
                        compatible = "ti,sysc-omap4", "ti,sysc";
-                       ti,hwmods = "mmc1";
                        reg = <0x9c000 0x4>,
                              <0x9c010 0x4>;
                        reg-names = "rev", "sysc";
 
                target-module@ad000 {                   /* 0x480ad000, ap 61 20.0 */
                        compatible = "ti,sysc-omap4", "ti,sysc";
-                       ti,hwmods = "mmc3";
                        reg = <0xad000 0x4>,
                              <0xad010 0x4>;
                        reg-names = "rev", "sysc";
 
                target-module@b4000 {                   /* 0x480b4000, ap 65 40.0 */
                        compatible = "ti,sysc-omap4", "ti,sysc";
-                       ti,hwmods = "mmc2";
                        reg = <0xb4000 0x4>,
                              <0xb4010 0x4>;
                        reg-names = "rev", "sysc";
 
                target-module@b8000 {                   /* 0x480b8000, ap 67 48.0 */
                        compatible = "ti,sysc-omap4", "ti,sysc";
-                       ti,hwmods = "mcspi3";
                        reg = <0xb8000 0x4>,
                              <0xb8010 0x4>;
                        reg-names = "rev", "sysc";
 
                target-module@ba000 {                   /* 0x480ba000, ap 69 18.0 */
                        compatible = "ti,sysc-omap4", "ti,sysc";
-                       ti,hwmods = "mcspi4";
                        reg = <0xba000 0x4>,
                              <0xba010 0x4>;
                        reg-names = "rev", "sysc";
 
                target-module@d1000 {                   /* 0x480d1000, ap 71 28.0 */
                        compatible = "ti,sysc-omap4", "ti,sysc";
-                       ti,hwmods = "mmc4";
                        reg = <0xd1000 0x4>,
                              <0xd1010 0x4>;
                        reg-names = "rev", "sysc";
 
                target-module@20000 {                   /* 0x48420000, ap 47 02.0 */
                        compatible = "ti,sysc-omap2", "ti,sysc";
-                       ti,hwmods = "uart7";
                        reg = <0x20050 0x4>,
                              <0x20054 0x4>,
                              <0x20058 0x4>;
 
                target-module@22000 {                   /* 0x48422000, ap 49 0a.0 */
                        compatible = "ti,sysc-omap2", "ti,sysc";
-                       ti,hwmods = "uart8";
                        reg = <0x22050 0x4>,
                              <0x22054 0x4>,
                              <0x22058 0x4>;
 
                target-module@24000 {                   /* 0x48424000, ap 51 12.0 */
                        compatible = "ti,sysc-omap2", "ti,sysc";
-                       ti,hwmods = "uart9";
                        reg = <0x24050 0x4>,
                              <0x24054 0x4>,
                              <0x24058 0x4>;
 
                target-module@60000 {                   /* 0x48460000, ap 9 0e.0 */
                        compatible = "ti,sysc-dra7-mcasp", "ti,sysc";
-                       ti,hwmods = "mcasp1";
                        reg = <0x60000 0x4>,
                              <0x60004 0x4>;
                        reg-names = "rev", "sysc";
 
                target-module@64000 {                   /* 0x48464000, ap 11 1e.0 */
                        compatible = "ti,sysc-dra7-mcasp", "ti,sysc";
-                       ti,hwmods = "mcasp2";
                        reg = <0x64000 0x4>,
                              <0x64004 0x4>;
                        reg-names = "rev", "sysc";
 
                target-module@68000 {                   /* 0x48468000, ap 13 26.0 */
                        compatible = "ti,sysc-dra7-mcasp", "ti,sysc";
-                       ti,hwmods = "mcasp3";
                        reg = <0x68000 0x4>,
                              <0x68004 0x4>;
                        reg-names = "rev", "sysc";
 
                target-module@6c000 {                   /* 0x4846c000, ap 15 2e.0 */
                        compatible = "ti,sysc-dra7-mcasp", "ti,sysc";
-                       ti,hwmods = "mcasp4";
                        reg = <0x6c000 0x4>,
                              <0x6c004 0x4>;
                        reg-names = "rev", "sysc";
 
                target-module@70000 {                   /* 0x48470000, ap 19 36.0 */
                        compatible = "ti,sysc-dra7-mcasp", "ti,sysc";
-                       ti,hwmods = "mcasp5";
                        reg = <0x70000 0x4>,
                              <0x70004 0x4>;
                        reg-names = "rev", "sysc";
 
                target-module@74000 {                   /* 0x48474000, ap 35 14.0 */
                        compatible = "ti,sysc-dra7-mcasp", "ti,sysc";
-                       ti,hwmods = "mcasp6";
                        reg = <0x74000 0x4>,
                              <0x74004 0x4>;
                        reg-names = "rev", "sysc";
 
                target-module@78000 {                   /* 0x48478000, ap 39 0c.0 */
                        compatible = "ti,sysc-dra7-mcasp", "ti,sysc";
-                       ti,hwmods = "mcasp7";
                        reg = <0x78000 0x4>,
                              <0x78004 0x4>;
                        reg-names = "rev", "sysc";
 
                target-module@7c000 {                   /* 0x4847c000, ap 43 04.0 */
                        compatible = "ti,sysc-dra7-mcasp", "ti,sysc";
-                       ti,hwmods = "mcasp8";
                        reg = <0x7c000 0x4>,
                              <0x7c004 0x4>;
                        reg-names = "rev", "sysc";
 
                target-module@84000 {                   /* 0x48484000, ap 3 10.0 */
                        compatible = "ti,sysc-omap4-simple", "ti,sysc";
-                       ti,hwmods = "gmac";
                        reg = <0x85200 0x4>,
                              <0x85208 0x4>,
                              <0x85204 0x4>;
 
                                davinci_mdio: mdio@1000 {
                                        compatible = "ti,cpsw-mdio","ti,davinci_mdio";
+                                       clocks = <&gmac_clkctrl DRA7_GMAC_GMAC_CLKCTRL 0>;
+                                       clock-names = "fck";
                                        #address-cells = <1>;
                                        #size-cells = <0>;
-                                       ti,hwmods = "davinci_mdio";
                                        bus_freq = <1000000>;
                                        reg = <0x1000 0x100>;
                                };
 
                target-module@0 {                       /* 0x4ae10000, ap 5 20.0 */
                        compatible = "ti,sysc-omap2", "ti,sysc";
-                       ti,hwmods = "gpio1";
                        reg = <0x0 0x4>,
                              <0x10 0x4>,
                              <0x114 0x4>;
 
                target-module@b000 {                    /* 0x4ae2b000, ap 28 02.0 */
                        compatible = "ti,sysc-omap2", "ti,sysc";
-                       ti,hwmods = "uart10";
                        reg = <0xb050 0x4>,
                              <0xb054 0x4>,
                              <0xb058 0x4>;
index bfaa2de..e2030ba 100644 (file)
@@ -72,7 +72,6 @@
                        reg = <0>;
                        /* 50 ns min period = 20 MHz */
                        spi-max-frequency = <20000000>;
-                       spi-cpol; /* Clock active low */
                        vcc-supply = <&vdisp>;
                        iovcc-supply = <&vdisp>;
                        vci-supply = <&vdisp>;
index 464df42..2f6977a 100644 (file)
                        #address-cells = <3>;
                        #size-cells = <2>;
                        device_type = "pci";
-                       num-lanes = <4>;
                        num-viewport = <6>;
                        bus-range = <0x0 0xff>;
                        ranges = <0x81000000 0x0 0x00000000 0x40 0x00010000 0x0 0x00010000   /* downstream I/O */
                        #address-cells = <3>;
                        #size-cells = <2>;
                        device_type = "pci";
-                       num-lanes = <4>;
                        num-viewport = <6>;
                        bus-range = <0x0 0xff>;
                        ranges = <0x81000000 0x0 0x00000000 0x48 0x00010000 0x0 0x00010000   /* downstream I/O */
diff --git a/arch/arm/boot/dts/mmp2-olpc-xo-1-75.dts b/arch/arm/boot/dts/mmp2-olpc-xo-1-75.dts
new file mode 100644 (file)
index 0000000..6cfa0d4
--- /dev/null
@@ -0,0 +1,244 @@
+// SPDX-License-Identifier: GPL-2.0-or-later OR MIT
+/*
+ * OLPC XO 1.75 Laptop.
+ *
+ * Copyright (C) 2018,2019 Lubomir Rintel <lkundrak@v3.sk>
+ */
+
+/dts-v1/;
+#include "mmp2.dtsi"
+#include <dt-bindings/gpio/gpio.h>
+#include <dt-bindings/input/linux-event-codes.h>
+#include <dt-bindings/interrupt-controller/irq.h>
+
+/ {
+       model = "OLPC XO-1.75";
+       compatible = "olpc,xo-1.75", "mrvl,mmp2";
+
+       chosen {
+               #address-cells = <1>;
+               #size-cells = <1>;
+               ranges;
+
+               framebuffer@1fc00000 {
+                       compatible = "simple-framebuffer";
+                       reg = <0x1fc00000 (1200 * 900 * 2)>;
+                       width = <1200>;
+                       height = <900>;
+                       stride = <(1200 * 2)>;
+                       format = "r5g6b5";
+                       clocks = <&soc_clocks MMP2_CLK_DISP0_LCDC>,
+                                <&soc_clocks MMP2_CLK_DISP0>;
+               };
+       };
+
+       memory {
+               linux,usable-memory = <0x0 0x1f800000>;
+               available = <0xcf000 0x1ef31000 0x1000 0xbf000>;
+               reg = <0x0 0x20000000>;
+               device_type = "memory";
+       };
+
+       gpio-keys {
+               compatible = "gpio-keys";
+
+               lid {
+                       label = "Lid";
+                       gpios = <&gpio 129 GPIO_ACTIVE_LOW>;
+                       linux,input-type = <EV_SW>;
+                       linux,code = <SW_LID>;
+                       wakeup-source;
+               };
+
+               tablet_mode {
+                       label = "E-Book Mode";
+                       gpios = <&gpio 128 GPIO_ACTIVE_LOW>;
+                       linux,input-type = <EV_SW>;
+                       linux,code = <SW_TABLET_MODE>;
+                       wakeup-source;
+               };
+
+               microphone_insert {
+                       label = "Microphone Plug";
+                       gpios = <&gpio 96 GPIO_ACTIVE_HIGH>;
+                       linux,input-type = <EV_SW>;
+                       linux,code = <SW_MICROPHONE_INSERT>;
+                       debounce-interval = <100>;
+                       wakeup-source;
+               };
+
+               headphone_insert {
+                       label = "Headphone Plug";
+                       gpios = <&gpio 97 GPIO_ACTIVE_HIGH>;
+                       linux,input-type = <EV_SW>;
+                       linux,code = <SW_HEADPHONE_INSERT>;
+                       debounce-interval = <100>;
+                       wakeup-source;
+               };
+       };
+
+       camera_i2c {
+               compatible = "i2c-gpio";
+               gpios = <&gpio 109 (GPIO_ACTIVE_HIGH | GPIO_OPEN_DRAIN)>,
+                       <&gpio 108 (GPIO_ACTIVE_HIGH | GPIO_OPEN_DRAIN)>;
+               #address-cells = <1>;
+               #size-cells = <0>;
+               i2c-gpio,timeout-ms = <1000>;
+               status = "okay";
+
+               camera@21 {
+                       compatible = "ovti,ov7670";
+                       reg = <0x21>;
+                       reset-gpios = <&gpio 102 GPIO_ACTIVE_LOW>;
+                       powerdown-gpios = <&gpio 150 GPIO_ACTIVE_LOW>;
+                       clocks = <&camera0>;
+                       clock-names = "xclk";
+
+                       port {
+                               ov7670_0: endpoint {
+                                       hsync-active = <1>;
+                                       vsync-active = <1>;
+                                       remote-endpoint = <&camera0_0>;
+                               };
+                       };
+               };
+       };
+
+       battery {
+               compatible = "olpc,xo1.5-battery", "olpc,xo1-battery";
+       };
+
+       wlan_reg: fixedregulator0 {
+               compatible = "regulator-fixed";
+               regulator-name = "wlan";
+               regulator-min-microvolt = <3300000>;
+               regulator-max-microvolt = <3300000>;
+               gpio = <&gpio 34 GPIO_ACTIVE_HIGH>;
+               enable-active-high;
+       };
+
+       wlan_pwrseq: pwrseq0 {
+               compatible = "mmc-pwrseq-sd8787";
+               powerdown-gpios = <&gpio 57 GPIO_ACTIVE_HIGH>;
+               reset-gpios = <&gpio 58 GPIO_ACTIVE_HIGH>;
+       };
+
+       soc {
+               axi@d4200000 {
+                       ap-sp@d4290000 {
+                               #address-cells = <1>;
+                               #size-cells = <0>;
+                               compatible = "olpc,ap-sp";
+                               interrupts = <40>;
+                               reg = <0xd4290000 0x1000>;
+                               data-gpios = <&gpio 72 GPIO_ACTIVE_HIGH>;
+                               clk-gpios = <&gpio 71 GPIO_ACTIVE_HIGH>;
+                               status = "okay";
+                       };
+               };
+       };
+};
+
+&uart3 {
+       status = "okay";
+};
+
+&uart4 {
+       status = "okay";
+};
+
+&rtc {
+       status = "okay";
+};
+
+&usb_phy0 {
+       status = "okay";
+};
+
+&usb_otg0 {
+       status = "okay";
+};
+
+&mmc1 {
+       clock-frequency = <50000000>;
+       no-1-8-v;
+       mrvl,clk-delay-cycles = <31>;
+       broken-cd;
+       status = "okay";
+};
+
+&mmc2 {
+       clock-frequency = <50000000>;
+       no-1-8-v;
+       bus-width = <4>;
+       non-removable;
+       broken-cd;
+       wakeup-source;
+       keep-power-in-suspend;
+       mmc-pwrseq = <&wlan_pwrseq>;
+       vmmc-supply = <&wlan_reg>;
+       status = "okay";
+};
+
+&mmc3 {
+       clock-frequency = <50000000>;
+       no-1-8-v;
+       bus-width = <8>;
+       non-removable;
+       broken-cd;
+       mrvl,clk-delay-cycles = <31>;
+       status = "okay";
+};
+
+&twsi1 {
+       status = "okay";
+
+       audio-codec@1a {
+               compatible = "realtek,alc5631";
+               reg = <0x1a>;
+               status = "okay";
+       };
+};
+
+&twsi2 {
+       status = "okay";
+
+       rtc@68 {
+               compatible = "dallas,ds1338";
+               reg = <0x68>;
+               status = "okay";
+       };
+};
+
+&twsi6 {
+       status = "okay";
+
+       accelerometer@1d {
+               compatible = "st,lis331dlh", "st,lis3lv02d";
+               reg = <0x1d>;
+               status = "okay";
+       };
+};
+
+&ssp3 {
+       #address-cells = <0>;
+       spi-slave;
+       status = "okay";
+       ready-gpio = <&gpio 125 GPIO_ACTIVE_HIGH>;
+
+       slave {
+               compatible = "olpc,xo1.75-ec";
+               spi-cpha;
+               cmd-gpio = <&gpio 155 GPIO_ACTIVE_HIGH>;
+       };
+};
+
+&camera0 {
+       status = "okay";
+
+       port {
+               camera0_0: endpoint {
+                       remote-endpoint = <&ov7670_0>;
+               };
+       };
+};
index b6f4074..6a2f072 100644 (file)
                                mrvl,intc-nr-irqs = <2>;
                        };
 
-                       usb_otg_phy0: usb-otg-phy@d4207000 {
+                       usb_phy0: usb-phy@d4207000 {
                                compatible = "marvell,mmp2-usb-phy";
                                reg = <0xd4207000 0x40>;
                                #phy-cells = <0>;
                                interrupts = <44>;
                                clocks = <&soc_clocks MMP2_CLK_USB>;
                                clock-names = "USBCLK";
-                               phys = <&usb_otg_phy0>;
+                               phys = <&usb_phy0>;
                                phy-names = "usb";
                                status = "disabled";
                        };
                                interrupts = <54>;
                                status = "disabled";
                        };
+
+                       camera0: camera@d420a000 {
+                               compatible = "marvell,mmp2-ccic";
+                               reg = <0xd420a000 0x800>;
+                               interrupts = <42>;
+                               clocks = <&soc_clocks MMP2_CLK_CCIC0>;
+                               clock-names = "axi";
+                               #clock-cells = <0>;
+                               clock-output-names = "mclk";
+                               status = "disabled";
+                       };
+
+                       camera1: camera@d420a800 {
+                               compatible = "marvell,mmp2-ccic";
+                               reg = <0xd420a800 0x800>;
+                               interrupts = <30>;
+                               clocks = <&soc_clocks MMP2_CLK_CCIC1>;
+                               clock-names = "axi";
+                               #clock-cells = <0>;
+                               clock-output-names = "mclk";
+                               status = "disabled";
+                       };
                };
 
                apb@d4000000 {  /* APB */
                                interrupts = <27>;
                                clocks = <&soc_clocks MMP2_CLK_UART0>;
                                resets = <&soc_clocks MMP2_CLK_UART0>;
+                               reg-shift = <2>;
                                status = "disabled";
                        };
 
                                interrupts = <28>;
                                clocks = <&soc_clocks MMP2_CLK_UART1>;
                                resets = <&soc_clocks MMP2_CLK_UART1>;
+                               reg-shift = <2>;
                                status = "disabled";
                        };
 
                                interrupts = <24>;
                                clocks = <&soc_clocks MMP2_CLK_UART2>;
                                resets = <&soc_clocks MMP2_CLK_UART2>;
+                               reg-shift = <2>;
                                status = "disabled";
                        };
 
                                interrupts = <46>;
                                clocks = <&soc_clocks MMP2_CLK_UART3>;
                                resets = <&soc_clocks MMP2_CLK_UART3>;
+                               reg-shift = <2>;
                                status = "disabled";
                        };
 
                                status = "disabled";
                        };
 
-                       ssp1: ssp@d4035000 {
+                       ssp1: spi@d4035000 {
                                compatible = "marvell,mmp2-ssp";
                                reg = <0xd4035000 0x1000>;
                                clocks = <&soc_clocks MMP2_CLK_SSP0>;
                                interrupts = <0>;
+                               #address-cells = <1>;
+                               #size-cells = <0>;
                                status = "disabled";
                        };
 
-                       ssp2: ssp@d4036000 {
+                       ssp2: spi@d4036000 {
                                compatible = "marvell,mmp2-ssp";
                                reg = <0xd4036000 0x1000>;
                                clocks = <&soc_clocks MMP2_CLK_SSP1>;
                                interrupts = <1>;
+                               #address-cells = <1>;
+                               #size-cells = <0>;
                                status = "disabled";
                        };
 
-                       ssp3: ssp@d4037000 {
+                       ssp3: spi@d4037000 {
                                compatible = "marvell,mmp2-ssp";
                                reg = <0xd4037000 0x1000>;
                                clocks = <&soc_clocks MMP2_CLK_SSP2>;
                                interrupts = <20>;
+                               #address-cells = <1>;
+                               #size-cells = <0>;
                                status = "disabled";
                        };
 
-                       ssp4: ssp@d4039000 {
+                       ssp4: spi@d4039000 {
                                compatible = "marvell,mmp2-ssp";
                                reg = <0xd4039000 0x1000>;
                                clocks = <&soc_clocks MMP2_CLK_SSP3>;
                                interrupts = <21>;
+                               #address-cells = <1>;
+                               #size-cells = <0>;
                                status = "disabled";
                        };
                };
 
-               soc_clocks: clocks{
+               soc_clocks: clocks {
                        compatible = "marvell,mmp2-clock";
                        reg = <0xd4050000 0x1000>,
                              <0xd4282800 0x400>,
index f572a47..7b09cbe 100644 (file)
                                interrupts = <18>;
                        };
                };
+
+               /*
+                * On omap34xx the OCP registers do not seem to be accessible
+                * at all unlike on 36xx. Maybe SGX is permanently set to
+                * "OCP bypass mode", or maybe there is OCP_SYSCONFIG that is
+                * write-only at 0x50000e10. We detect SGX based on the SGX
+                * revision register instead of the unreadable OCP revision
+                * register. Also note that on early 34xx es1 revision there
+                * are also different clocks, but we do not have any dts users
+                * for it.
+                */
+               sgx_module: target-module@50000000 {
+                       compatible = "ti,sysc-omap2", "ti,sysc";
+                       reg = <0x50000014 0x4>;
+                       reg-names = "rev";
+                       clocks = <&sgx_fck>, <&sgx_ick>;
+                       clock-names = "fck", "ick";
+                       #address-cells = <1>;
+                       #size-cells = <1>;
+                       ranges = <0 0x50000000 0x4000>;
+
+                       /*
+                        * Closed source PowerVR driver, no child device
+                        * binding or driver in mainline
+                        */
+               };
        };
 
        thermal_zones: thermal-zones {
index 6fb23ad..1e552f0 100644 (file)
                                interrupts = <18>;
                        };
                };
+
+               /*
+                * Note that the sysconfig register layout is a subset of the
+                * "ti,sysc-omap4" type register with just sidle and midle bits
+                * available while omap34xx has "ti,sysc-omap2" type sysconfig.
+                */
+               sgx_module: target-module@50000000 {
+                       compatible = "ti,sysc-omap4", "ti,sysc";
+                       reg = <0x5000fe00 0x4>,
+                             <0x5000fe10 0x4>;
+                       reg-names = "rev", "sysc";
+                       ti,sysc-midle = <SYSC_IDLE_FORCE>,
+                                       <SYSC_IDLE_NO>,
+                                       <SYSC_IDLE_SMART>;
+                       ti,sysc-sidle = <SYSC_IDLE_FORCE>,
+                                       <SYSC_IDLE_NO>,
+                                       <SYSC_IDLE_SMART>;
+                       clocks = <&sgx_fck>, <&sgx_ick>;
+                       clock-names = "fck", "ick";
+                       #address-cells = <1>;
+                       #size-cells = <1>;
+                       ranges = <0 0x50000000 0x2000000>;
+
+                       /*
+                        * Closed source PowerVR driver, no child device
+                        * binding or driver in mainline
+                        */
+               };
        };
 
        thermal_zones: thermal-zones {
index 67072df..8e6662b 100644 (file)
 
                target-module@30000 {                   /* 0x40130000, ap 14 0e.0 */
                        compatible = "ti,sysc-omap2", "ti,sysc";
-                       ti,hwmods = "wd_timer3";
                        reg = <0x30000 0x4>,
                              <0x30010 0x4>,
                              <0x30014 0x4>;
index bea05dc..d60d5e0 100644 (file)
                        };
                };
 
+               /* d2d mdm */
                target-module@36000 {                   /* 0x4a0b6000, ap 69 60.0 */
-                       compatible = "ti,sysc";
-                       status = "disabled";
+                       compatible = "ti,sysc-omap2", "ti,sysc";
+                       reg = <0x36000 0x4>,
+                             <0x36010 0x4>,
+                             <0x36014 0x4>;
+                       reg-names = "rev", "sysc", "syss";
+                       ti,sysc-mask = <(SYSC_OMAP2_SOFTRESET | SYSC_OMAP2_AUTOIDLE)>;
+                       ti,sysc-sidle = <SYSC_IDLE_FORCE>,
+                                       <SYSC_IDLE_NO>,
+                                       <SYSC_IDLE_SMART>,
+                                       <SYSC_IDLE_SMART_WKUP>;
+                       ti,syss-mask = <1>;
+                       /* Domains (V, P, C): core, core_pwrdm, d2d_clkdm */
+                       clocks = <&d2d_clkctrl OMAP4_C2C_CLKCTRL 0>;
+                       clock-names = "fck";
                        #address-cells = <1>;
                        #size-cells = <1>;
                        ranges = <0x0 0x36000 0x1000>;
                };
 
+               /* d2d mpu */
                target-module@4d000 {                   /* 0x4a0cd000, ap 78 58.0 */
-                       compatible = "ti,sysc";
-                       status = "disabled";
+                       compatible = "ti,sysc-omap2", "ti,sysc";
+                       reg = <0x4d000 0x4>,
+                             <0x4d010 0x4>,
+                             <0x4d014 0x4>;
+                       reg-names = "rev", "sysc", "syss";
+                       ti,sysc-mask = <(SYSC_OMAP2_SOFTRESET | SYSC_OMAP2_AUTOIDLE)>;
+                       ti,sysc-sidle = <SYSC_IDLE_FORCE>,
+                                       <SYSC_IDLE_NO>,
+                                       <SYSC_IDLE_SMART>,
+                                       <SYSC_IDLE_SMART_WKUP>;
+                       ti,syss-mask = <1>;
+                       /* Domains (V, P, C): core, core_pwrdm, d2d_clkdm */
+                       clocks = <&d2d_clkctrl OMAP4_C2C_CLKCTRL 0>;
+                       clock-names = "fck";
                        #address-cells = <1>;
                        #size-cells = <1>;
                        ranges = <0x0 0x4d000 0x1000>;
 
                target-module@4000 {                    /* 0x4a314000, ap 7 18.0 */
                        compatible = "ti,sysc-omap2", "ti,sysc";
-                       ti,hwmods = "wd_timer2";
                        reg = <0x4000 0x4>,
                              <0x4010 0x4>,
                              <0x4014 0x4>;
 
                target-module@60000 {                   /* 0x48060000, ap 25 1e.0 */
                        compatible = "ti,sysc-omap2", "ti,sysc";
-                       ti,hwmods = "i2c3";
                        reg = <0x60000 0x8>,
                              <0x60010 0x8>,
                              <0x60090 0x8>;
 
                target-module@70000 {                   /* 0x48070000, ap 32 28.0 */
                        compatible = "ti,sysc-omap2", "ti,sysc";
-                       ti,hwmods = "i2c1";
                        reg = <0x70000 0x8>,
                              <0x70010 0x8>,
                              <0x70090 0x8>;
 
                target-module@72000 {                   /* 0x48072000, ap 34 30.0 */
                        compatible = "ti,sysc-omap2", "ti,sysc";
-                       ti,hwmods = "i2c2";
                        reg = <0x72000 0x8>,
                              <0x72010 0x8>,
                              <0x72090 0x8>;
 
                target-module@150000 {                  /* 0x48350000, ap 77 4c.0 */
                        compatible = "ti,sysc-omap2", "ti,sysc";
-                       ti,hwmods = "i2c4";
                        reg = <0x150000 0x8>,
                              <0x150010 0x8>,
                              <0x150090 0x8>;
index c43e52f..7cc95bc 100644 (file)
 
                target-module@56000000 {
                        compatible = "ti,sysc-omap4", "ti,sysc";
-                       ti,hwmods = "gpu";
                        reg = <0x5601fc00 0x4>,
                              <0x5601fc10 0x4>;
                        reg-names = "rev", "sysc";
index edfd26c..1fb7937 100644 (file)
                        ports-implemented = <0x1>;
                };
 
+               target-module@56000000 {
+                       compatible = "ti,sysc-omap4", "ti,sysc";
+                       reg = <0x5600fe00 0x4>,
+                             <0x5600fe10 0x4>;
+                       reg-names = "rev", "sysc";
+                       ti,sysc-midle = <SYSC_IDLE_FORCE>,
+                                       <SYSC_IDLE_NO>,
+                                       <SYSC_IDLE_SMART>;
+                       ti,sysc-sidle = <SYSC_IDLE_FORCE>,
+                                       <SYSC_IDLE_NO>,
+                                       <SYSC_IDLE_SMART>;
+                       clocks = <&gpu_clkctrl OMAP5_GPU_CLKCTRL 0>;
+                       clock-names = "fck";
+                       #address-cells = <1>;
+                       #size-cells = <1>;
+                       ranges = <0 0x56000000 0x2000000>;
+
+                       /*
+                        * Closed source PowerVR driver, no child device
+                        * binding or driver in mainline
+                        */
+               };
+
                dss: dss@58000000 {
                        compatible = "ti,omap5-dss";
                        reg = <0x58000000 0x80>;
index 33e8dd9..fac2e57 100644 (file)
                };
        };
 
+       gpu_cm: clock-controller@1500 {
+               compatible = "ti,omap4-cm";
+               reg = <0x1500 0x100>;
+               #address-cells = <1>;
+               #size-cells = <1>;
+               ranges = <0 0x1500 0x100>;
+
+               gpu_clkctrl: clk@20 {
+                       compatible = "ti,clkctrl";
+                       reg = <0x20 0x4>;
+                       #clock-cells = <2>;
+               };
+       };
+
        l3init_cm: l3init_cm@1600 {
                compatible = "ti,omap4-cm";
                reg = <0x1600 0x100>;
similarity index 50%
rename from arch/arm/mm/cache-aurora-l2.h
rename to arch/arm/include/asm/hardware/cache-aurora-l2.h
index c861247..39769ff 100644 (file)
@@ -31,6 +31,9 @@
 #define AURORA_ACR_REPLACEMENT_TYPE_SEMIPLRU \
        (3 << AURORA_ACR_REPLACEMENT_OFFSET)
 
+#define AURORA_ACR_PARITY_EN   (1 << 21)
+#define AURORA_ACR_ECC_EN      (1 << 20)
+
 #define AURORA_ACR_FORCE_WRITE_POLICY_OFFSET   0
 #define AURORA_ACR_FORCE_WRITE_POLICY_MASK     \
        (0x3 << AURORA_ACR_FORCE_WRITE_POLICY_OFFSET)
 #define AURORA_ACR_FORCE_WRITE_THRO_POLICY     \
        (2 << AURORA_ACR_FORCE_WRITE_POLICY_OFFSET)
 
-#define MAX_RANGE_SIZE         1024
+#define AURORA_ERR_CNT_REG          0x600
+#define AURORA_ERR_ATTR_CAP_REG     0x608
+#define AURORA_ERR_ADDR_CAP_REG     0x60c
+#define AURORA_ERR_WAY_CAP_REG      0x610
+#define AURORA_ERR_INJECT_CTL_REG   0x614
+#define AURORA_ERR_INJECT_MASK_REG  0x618
+
+#define AURORA_ERR_CNT_CLR_OFFSET         31
+#define AURORA_ERR_CNT_CLR                \
+       (0x1 << AURORA_ERR_CNT_CLR_OFFSET)
+#define AURORA_ERR_CNT_UE_OFFSET          16
+#define AURORA_ERR_CNT_UE_MASK             \
+       (0x7fff << AURORA_ERR_CNT_UE_OFFSET)
+#define AURORA_ERR_CNT_CE_OFFSET           0
+#define AURORA_ERR_CNT_CE_MASK             \
+       (0xffff << AURORA_ERR_CNT_CE_OFFSET)
+
+#define AURORA_ERR_ATTR_SRC_OFF           16
+#define AURORA_ERR_ATTR_SRC_MSK            \
+       (0x7 << AURORA_ERR_ATTR_SRC_OFF)
+#define AURORA_ERR_ATTR_TXN_OFF           12
+#define AURORA_ERR_ATTR_TXN_MSK            \
+       (0xf << AURORA_ERR_ATTR_TXN_OFF)
+#define AURORA_ERR_ATTR_ERR_OFF            8
+#define AURORA_ERR_ATTR_ERR_MSK            \
+       (0x3 << AURORA_ERR_ATTR_ERR_OFF)
+#define AURORA_ERR_ATTR_CAP_VALID_OFF      0
+#define AURORA_ERR_ATTR_CAP_VALID          \
+       (0x1 << AURORA_ERR_ATTR_CAP_VALID_OFF)
+
+#define AURORA_ERR_ADDR_CAP_ADDR_MASK 0xffffffe0
+
+#define AURORA_ERR_WAY_IDX_OFF             8
+#define AURORA_ERR_WAY_IDX_MSK             \
+       (0xfff << AURORA_ERR_WAY_IDX_OFF)
+#define AURORA_ERR_WAY_CAP_WAY_OFFSET      1
+#define AURORA_ERR_WAY_CAP_WAY_MASK        \
+       (0xf << AURORA_ERR_WAY_CAP_WAY_OFFSET)
+
+#define AURORA_ERR_INJECT_CTL_ADDR_MASK 0xfffffff0
+#define AURORA_ERR_ATTR_TXN_OFF   12
+#define AURORA_ERR_INJECT_CTL_EN_MASK          0x3
+#define AURORA_ERR_INJECT_CTL_EN_PARITY        0x2
+#define AURORA_ERR_INJECT_CTL_EN_ECC           0x1
+
+#define AURORA_MAX_RANGE_SIZE  1024
 
 #define AURORA_WAY_SIZE_SHIFT  2
 
index a2a68b7..069da39 100644 (file)
@@ -15,8 +15,6 @@
 #include <asm/cacheflush.h>
 #include <asm/tlbflush.h>
 
-#define check_pgt_cache()              do { } while (0)
-
 #ifdef CONFIG_MMU
 
 #define _PAGE_USER_TABLE       (PMD_TYPE_TABLE | PMD_BIT4 | PMD_DOMAIN(DOMAIN_USER))
index d0de24f..010fa1a 100644 (file)
@@ -71,11 +71,6 @@ typedef pte_t *pte_addr_t;
 extern unsigned int kobjsize(const void *objp);
 
 /*
- * No page table caches to initialise.
- */
-#define pgtable_cache_init()   do { } while (0)
-
-/*
  * All 32bit addresses are effectively valid for vmalloc...
  * Sort of meaningless for non-VM targets.
  */
index f2e990d..3ae120c 100644 (file)
@@ -368,8 +368,6 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
 #define HAVE_ARCH_UNMAPPED_AREA
 #define HAVE_ARCH_UNMAPPED_AREA_TOPDOWN
 
-#define pgtable_cache_init() do { } while (0)
-
 #endif /* !__ASSEMBLY__ */
 
 #endif /* CONFIG_MMU */
index 20c2f42..614bf82 100644 (file)
@@ -140,8 +140,6 @@ static inline void prefetchw(const void *ptr)
 #endif
 #endif
 
-#define HAVE_ARCH_PICK_MMAP_LAYOUT
-
 #endif
 
 #endif /* __ASM_ARM_PROCESSOR_H */
index b75ea15..669474a 100644 (file)
@@ -44,7 +44,7 @@ static inline void __tlb_remove_table(void *_table)
 static inline void
 __pte_free_tlb(struct mmu_gather *tlb, pgtable_t pte, unsigned long addr)
 {
-       pgtable_page_dtor(pte);
+       pgtable_pte_page_dtor(pte);
 
 #ifndef CONFIG_ARM_LPAE
        /*
index a4fb0f8..2924d79 100644 (file)
@@ -697,9 +697,9 @@ static struct attribute_group armv7_pmuv2_events_attr_group = {
 /*
  * Event filters for PMUv2
  */
-#define        ARMV7_EXCLUDE_PL1       (1 << 31)
-#define        ARMV7_EXCLUDE_USER      (1 << 30)
-#define        ARMV7_INCLUDE_HYP       (1 << 27)
+#define        ARMV7_EXCLUDE_PL1       BIT(31)
+#define        ARMV7_EXCLUDE_USER      BIT(30)
+#define        ARMV7_INCLUDE_HYP       BIT(27)
 
 /*
  * Secure debug enable reg
index f934a67..9485acc 100644 (file)
@@ -319,11 +319,6 @@ unsigned long get_wchan(struct task_struct *p)
        return 0;
 }
 
-unsigned long arch_randomize_brk(struct mm_struct *mm)
-{
-       return randomize_page(mm->brk, 0x02000000);
-}
-
 #ifdef CONFIG_MMU
 #ifdef CONFIG_KUSER_HELPERS
 /*
index 8872acf..9bf16c9 100644 (file)
@@ -194,7 +194,6 @@ static int __init vdso_init(void)
        }
 
        text_pages = (vdso_end - vdso_start) >> PAGE_SHIFT;
-       pr_debug("vdso: %i text pages at base %p\n", text_pages, vdso_start);
 
        /* Allocate the VDSO text pagelist */
        vdso_text_pagelist = kcalloc(text_pages, sizeof(struct page *),
index b25c545..6d2ba45 100644 (file)
@@ -5,7 +5,7 @@
 # Copyright (C) 1995-2000 Russell King
 #
 
-lib-y          := backtrace.o changebit.o csumipv6.o csumpartial.o   \
+lib-y          := changebit.o csumipv6.o csumpartial.o               \
                   csumpartialcopy.o csumpartialcopyuser.o clearbit.o \
                   delay.o delay-loop.o findbit.o memchr.o memcpy.o   \
                   memmove.o memset.o setbit.o                        \
@@ -19,6 +19,12 @@ lib-y                := backtrace.o changebit.o csumipv6.o csumpartial.o   \
 mmu-y          := clear_user.o copy_page.o getuser.o putuser.o       \
                   copy_from_user.o copy_to_user.o
 
+ifdef CONFIG_CC_IS_CLANG
+  lib-y        += backtrace-clang.o
+else
+  lib-y        += backtrace.o
+endif
+
 # using lib_ here won't override already available weak symbols
 obj-$(CONFIG_UACCESS_WITH_MEMCPY) += uaccess_with_memcpy.o
 
diff --git a/arch/arm/lib/backtrace-clang.S b/arch/arm/lib/backtrace-clang.S
new file mode 100644 (file)
index 0000000..2ff3751
--- /dev/null
@@ -0,0 +1,217 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ *  linux/arch/arm/lib/backtrace-clang.S
+ *
+ *  Copyright (C) 2019 Nathan Huckleberry
+ *
+ */
+#include <linux/kern_levels.h>
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+               .text
+
+/* fp is 0 or stack frame */
+
+#define frame  r4
+#define sv_fp  r5
+#define sv_pc  r6
+#define mask   r7
+#define sv_lr  r8
+
+ENTRY(c_backtrace)
+
+#if !defined(CONFIG_FRAME_POINTER) || !defined(CONFIG_PRINTK)
+               ret     lr
+ENDPROC(c_backtrace)
+#else
+
+
+/*
+ * Clang does not store pc or sp in function prologues so we don't know exactly
+ * where the function starts.
+ *
+ * We can treat the current frame's lr as the saved pc and the preceding
+ * frame's lr as the current frame's lr, but we can't trace the most recent
+ * call.  Inserting a false stack frame allows us to reference the function
+ * called last in the stacktrace.
+ *
+ * If the call instruction was a bl we can look at the callers branch
+ * instruction to calculate the saved pc.  We can recover the pc in most cases,
+ * but in cases such as calling function pointers we cannot. In this case,
+ * default to using the lr. This will be some address in the function, but will
+ * not be the function start.
+ *
+ * Unfortunately due to the stack frame layout we can't dump r0 - r3, but these
+ * are less frequently saved.
+ *
+ * Stack frame layout:
+ *             <larger addresses>
+ *             saved lr
+ *     frame=> saved fp
+ *             optionally saved caller registers (r4 - r10)
+ *             optionally saved arguments (r0 - r3)
+ *             <top of stack frame>
+ *             <smaller addresses>
+ *
+ * Functions start with the following code sequence:
+ * corrected pc =>  stmfd sp!, {..., fp, lr}
+ *             add fp, sp, #x
+ *             stmfd sp!, {r0 - r3} (optional)
+ *
+ *
+ *
+ *
+ *
+ *
+ * The diagram below shows an example stack setup for dump_stack.
+ *
+ * The frame for c_backtrace has pointers to the code of dump_stack. This is
+ * why the frame of c_backtrace is used to for the pc calculation of
+ * dump_stack. This is why we must move back a frame to print dump_stack.
+ *
+ * The stored locals for dump_stack are in dump_stack's frame. This means that
+ * to fully print dump_stack's frame we need both the frame for dump_stack (for
+ * locals) and the frame that was called by dump_stack (for pc).
+ *
+ * To print locals we must know where the function start is. If we read the
+ * function prologue opcodes we can determine which variables are stored in the
+ * stack frame.
+ *
+ * To find the function start of dump_stack we can look at the stored LR of
+ * show_stack. It points at the instruction directly after the bl dump_stack.
+ * We can then read the offset from the bl opcode to determine where the branch
+ * takes us.  The address calculated must be the start of dump_stack.
+ *
+ * c_backtrace frame           dump_stack:
+ * {[LR]    }  ============|   ...
+ * {[FP]    }  =======|    |   bl c_backtrace
+ *                    |    |=> ...
+ * {[R4-R10]}         |
+ * {[R0-R3] }         |        show_stack:
+ * dump_stack frame   |        ...
+ * {[LR]    } =============|   bl dump_stack
+ * {[FP]    } <=======|    |=> ...
+ * {[R4-R10]}
+ * {[R0-R3] }
+ */
+
+               stmfd   sp!, {r4 - r9, fp, lr}  @ Save an extra register
+                                               @ to ensure 8 byte alignment
+               movs    frame, r0               @ if frame pointer is zero
+               beq     no_frame                @ we have no stack frames
+               tst     r1, #0x10               @ 26 or 32-bit mode?
+               moveq   mask, #0xfc000003
+               movne   mask, #0                @ mask for 32-bit
+
+/*
+ * Switches the current frame to be the frame for dump_stack.
+ */
+               add     frame, sp, #24          @ switch to false frame
+for_each_frame:        tst     frame, mask             @ Check for address exceptions
+               bne     no_frame
+
+/*
+ * sv_fp is the stack frame with the locals for the current considered
+ * function.
+ *
+ * sv_pc is the saved lr frame the frame above. This is a pointer to a code
+ * address within the current considered function, but it is not the function
+ * start. This value gets updated to be the function start later if it is
+ * possible.
+ */
+1001:          ldr     sv_pc, [frame, #4]      @ get saved 'pc'
+1002:          ldr     sv_fp, [frame, #0]      @ get saved fp
+
+               teq     sv_fp, mask             @ make sure next frame exists
+               beq     no_frame
+
+/*
+ * sv_lr is the lr from the function that called the current function. This is
+ * a pointer to a code address in the current function's caller.  sv_lr-4 is
+ * the instruction used to call the current function.
+ *
+ * This sv_lr can be used to calculate the function start if the function was
+ * called using a bl instruction. If the function start can be recovered sv_pc
+ * is overwritten with the function start.
+ *
+ * If the current function was called using a function pointer we cannot
+ * recover the function start and instead continue with sv_pc as an arbitrary
+ * value within the current function. If this is the case we cannot print
+ * registers for the current function, but the stacktrace is still printed
+ * properly.
+ */
+1003:          ldr     sv_lr, [sv_fp, #4]      @ get saved lr from next frame
+
+               ldr     r0, [sv_lr, #-4]        @ get call instruction
+               ldr     r3, .Lopcode+4
+               and     r2, r3, r0              @ is this a bl call
+               teq     r2, r3
+               bne     finished_setup          @ give up if it's not
+               and     r0, #0xffffff           @ get call offset 24-bit int
+               lsl     r0, r0, #8              @ sign extend offset
+               asr     r0, r0, #8
+               ldr     sv_pc, [sv_fp, #4]      @ get lr address
+               add     sv_pc, sv_pc, #-4       @ get call instruction address
+               add     sv_pc, sv_pc, #8        @ take care of prefetch
+               add     sv_pc, sv_pc, r0, lsl #2@ find function start
+
+finished_setup:
+
+               bic     sv_pc, sv_pc, mask      @ mask PC/LR for the mode
+
+/*
+ * Print the function (sv_pc) and where it was called from (sv_lr).
+ */
+1004:          mov     r0, sv_pc
+
+               mov     r1, sv_lr
+               mov     r2, frame
+               bic     r1, r1, mask            @ mask PC/LR for the mode
+               bl      dump_backtrace_entry
+
+/*
+ * Test if the function start is a stmfd instruction to determine which
+ * registers were stored in the function prologue.
+ *
+ * If we could not recover the sv_pc because we were called through a function
+ * pointer the comparison will fail and no registers will print. Unwinding will
+ * continue as if there had been no registers stored in this frame.
+ */
+1005:          ldr     r1, [sv_pc, #0]         @ if stmfd sp!, {..., fp, lr}
+               ldr     r3, .Lopcode            @ instruction exists,
+               teq     r3, r1, lsr #11
+               ldr     r0, [frame]             @ locals are stored in
+                                               @ the preceding frame
+               subeq   r0, r0, #4
+               bleq    dump_backtrace_stm      @ dump saved registers
+
+/*
+ * If we are out of frames or if the next frame is invalid.
+ */
+               teq     sv_fp, #0               @ zero saved fp means
+               beq     no_frame                @ no further frames
+
+               cmp     sv_fp, frame            @ next frame must be
+               mov     frame, sv_fp            @ above the current frame
+               bhi     for_each_frame
+
+1006:          adr     r0, .Lbad
+               mov     r1, frame
+               bl      printk
+no_frame:      ldmfd   sp!, {r4 - r9, fp, pc}
+ENDPROC(c_backtrace)
+               .pushsection __ex_table,"a"
+               .align  3
+               .long   1001b, 1006b
+               .long   1002b, 1006b
+               .long   1003b, 1006b
+               .long   1004b, 1006b
+               .long   1005b, 1006b
+               .popsection
+
+.Lbad:         .asciz  "Backtrace aborted due to bad frame pointer <%p>\n"
+               .align
+.Lopcode:      .word   0xe92d4800 >> 11        @ stmfd sp!, {... fp, lr}
+               .word   0x0b000000              @ bl if these bits are set
+
+#endif
index f837866..9dab1f5 100644 (file)
@@ -19,6 +19,7 @@ menuconfig ARCH_EXYNOS
        select EXYNOS_SROM
        select EXYNOS_PM_DOMAINS if PM_GENERIC_DOMAINS
        select GPIOLIB
+       select HAVE_ARM_ARCH_TIMER if ARCH_EXYNOS5 && VIRTUALIZATION
        select HAVE_ARM_SCU if SMP
        select HAVE_S3C2410_I2C if I2C
        select HAVE_S3C2410_WATCHDOG if WATCHDOG
index aaa6092..3de3d7a 100644 (file)
@@ -30,7 +30,6 @@ extern struct omap_hwmod_ocp_if am33xx_l3_main__gfx;
 extern struct omap_hwmod_ocp_if am33xx_l4_wkup__rtc;
 extern struct omap_hwmod_ocp_if am33xx_l4_per__dcan0;
 extern struct omap_hwmod_ocp_if am33xx_l4_per__dcan1;
-extern struct omap_hwmod_ocp_if am33xx_cpgmac0__mdio;
 extern struct omap_hwmod_ocp_if am33xx_l4_ls__elm;
 extern struct omap_hwmod_ocp_if am33xx_l4_ls__epwmss0;
 extern struct omap_hwmod_ocp_if am33xx_l4_ls__epwmss1;
@@ -72,8 +71,6 @@ extern struct omap_hwmod am33xx_rng_hwmod;
 extern struct omap_hwmod am33xx_ocmcram_hwmod;
 extern struct omap_hwmod am33xx_smartreflex0_hwmod;
 extern struct omap_hwmod am33xx_smartreflex1_hwmod;
-extern struct omap_hwmod am33xx_cpgmac0_hwmod;
-extern struct omap_hwmod am33xx_mdio_hwmod;
 extern struct omap_hwmod am33xx_dcan0_hwmod;
 extern struct omap_hwmod am33xx_dcan1_hwmod;
 extern struct omap_hwmod am33xx_elm_hwmod;
index 47a0e30..63698ff 100644 (file)
@@ -122,12 +122,6 @@ struct omap_hwmod_ocp_if am33xx_l4_per__dcan1 = {
        .user           = OCP_USER_MPU | OCP_USER_SDMA,
 };
 
-struct omap_hwmod_ocp_if am33xx_cpgmac0__mdio = {
-       .master         = &am33xx_cpgmac0_hwmod,
-       .slave          = &am33xx_mdio_hwmod,
-       .user           = OCP_USER_MPU,
-};
-
 struct omap_hwmod_ocp_if am33xx_l4_ls__elm = {
        .master         = &am33xx_l4_ls_hwmod,
        .slave          = &am33xx_elm_hwmod,
index adb6271..dd939e1 100644 (file)
@@ -350,54 +350,6 @@ struct omap_hwmod_class am33xx_control_hwmod_class = {
 };
 
 /*
- * 'cpgmac' class
- * cpsw/cpgmac sub system
- */
-static struct omap_hwmod_class_sysconfig am33xx_cpgmac_sysc = {
-       .rev_offs       = 0x0,
-       .sysc_offs      = 0x8,
-       .syss_offs      = 0x4,
-       .sysc_flags     = (SYSC_HAS_SIDLEMODE | SYSC_HAS_MIDLEMODE |
-                          SYSS_HAS_RESET_STATUS),
-       .idlemodes      = (SIDLE_FORCE | SIDLE_NO | MSTANDBY_FORCE |
-                          MSTANDBY_NO),
-       .sysc_fields    = &omap_hwmod_sysc_type3,
-};
-
-static struct omap_hwmod_class am33xx_cpgmac0_hwmod_class = {
-       .name           = "cpgmac0",
-       .sysc           = &am33xx_cpgmac_sysc,
-};
-
-struct omap_hwmod am33xx_cpgmac0_hwmod = {
-       .name           = "cpgmac0",
-       .class          = &am33xx_cpgmac0_hwmod_class,
-       .clkdm_name     = "cpsw_125mhz_clkdm",
-       .flags          = (HWMOD_SWSUP_SIDLE | HWMOD_SWSUP_MSTANDBY),
-       .main_clk       = "cpsw_125mhz_gclk",
-       .mpu_rt_idx     = 1,
-       .prcm           = {
-               .omap4  = {
-                       .modulemode     = MODULEMODE_SWCTRL,
-               },
-       },
-};
-
-/*
- * mdio class
- */
-static struct omap_hwmod_class am33xx_mdio_hwmod_class = {
-       .name           = "davinci_mdio",
-};
-
-struct omap_hwmod am33xx_mdio_hwmod = {
-       .name           = "davinci_mdio",
-       .class          = &am33xx_mdio_hwmod_class,
-       .clkdm_name     = "cpsw_125mhz_clkdm",
-       .main_clk       = "cpsw_125mhz_gclk",
-};
-
-/*
  * dcan class
  */
 static struct omap_hwmod_class am33xx_dcan_hwmod_class = {
@@ -1072,7 +1024,6 @@ static void omap_hwmod_am33xx_clkctrl(void)
        CLKCTRL(am33xx_tptc1_hwmod, AM33XX_CM_PER_TPTC1_CLKCTRL_OFFSET);
        CLKCTRL(am33xx_tptc2_hwmod, AM33XX_CM_PER_TPTC2_CLKCTRL_OFFSET);
        CLKCTRL(am33xx_gfx_hwmod, AM33XX_CM_GFX_GFX_CLKCTRL_OFFSET);
-       CLKCTRL(am33xx_cpgmac0_hwmod, AM33XX_CM_PER_CPGMAC0_CLKCTRL_OFFSET);
        CLKCTRL(am33xx_pruss_hwmod, AM33XX_CM_PER_PRUSS_CLKCTRL_OFFSET);
        CLKCTRL(am33xx_mpu_hwmod , AM33XX_CM_MPU_MPU_CLKCTRL_OFFSET);
        CLKCTRL(am33xx_l3_instr_hwmod , AM33XX_CM_PER_L3_INSTR_CLKCTRL_OFFSET);
@@ -1134,7 +1085,6 @@ static void omap_hwmod_am43xx_clkctrl(void)
        CLKCTRL(am33xx_tptc1_hwmod, AM43XX_CM_PER_TPTC1_CLKCTRL_OFFSET);
        CLKCTRL(am33xx_tptc2_hwmod, AM43XX_CM_PER_TPTC2_CLKCTRL_OFFSET);
        CLKCTRL(am33xx_gfx_hwmod, AM43XX_CM_GFX_GFX_CLKCTRL_OFFSET);
-       CLKCTRL(am33xx_cpgmac0_hwmod, AM43XX_CM_PER_CPGMAC0_CLKCTRL_OFFSET);
        CLKCTRL(am33xx_pruss_hwmod, AM43XX_CM_PER_PRUSS_CLKCTRL_OFFSET);
        CLKCTRL(am33xx_mpu_hwmod , AM43XX_CM_MPU_MPU_CLKCTRL_OFFSET);
        CLKCTRL(am33xx_l3_instr_hwmod , AM43XX_CM_PER_L3_INSTR_CLKCTRL_OFFSET);
index c965af2..2bcb634 100644 (file)
@@ -372,13 +372,6 @@ static struct omap_hwmod_ocp_if am33xx_l4_wkup__adc_tsc = {
        .user           = OCP_USER_MPU,
 };
 
-static struct omap_hwmod_ocp_if am33xx_l4_hs__cpgmac0 = {
-       .master         = &am33xx_l4_hs_hwmod,
-       .slave          = &am33xx_cpgmac0_hwmod,
-       .clk            = "cpsw_125mhz_gclk",
-       .user           = OCP_USER_MPU,
-};
-
 static struct omap_hwmod_ocp_if am33xx_l3_main__lcdc = {
        .master         = &am33xx_l3_main_hwmod,
        .slave          = &am33xx_lcdc_hwmod,
@@ -462,8 +455,6 @@ static struct omap_hwmod_ocp_if *am33xx_hwmod_ocp_ifs[] __initdata = {
        &am33xx_l3_main__tptc2,
        &am33xx_l3_main__ocmc,
        &am33xx_l3_s__usbss,
-       &am33xx_l4_hs__cpgmac0,
-       &am33xx_cpgmac0__mdio,
        &am33xx_l3_main__sha0,
        &am33xx_l3_main__aes0,
        &am33xx_l4_per__rng,
index 69571ab..5c3db6b 100644 (file)
@@ -597,13 +597,6 @@ static struct omap_hwmod_ocp_if am43xx_l4_wkup__adc_tsc = {
        .user           = OCP_USER_MPU,
 };
 
-static struct omap_hwmod_ocp_if am43xx_l4_hs__cpgmac0 = {
-       .master         = &am43xx_l4_hs_hwmod,
-       .slave          = &am33xx_cpgmac0_hwmod,
-       .clk            = "cpsw_125mhz_gclk",
-       .user           = OCP_USER_MPU,
-};
-
 static struct omap_hwmod_ocp_if am43xx_l4_wkup__timer1 = {
        .master         = &am33xx_l4_wkup_hwmod,
        .slave          = &am33xx_timer1_hwmod,
@@ -859,8 +852,6 @@ static struct omap_hwmod_ocp_if *am43xx_hwmod_ocp_ifs[] __initdata = {
        &am33xx_l3_main__tptc1,
        &am33xx_l3_main__tptc2,
        &am33xx_l3_main__ocmc,
-       &am43xx_l4_hs__cpgmac0,
-       &am33xx_cpgmac0__mdio,
        &am33xx_l3_main__sha0,
        &am33xx_l3_main__aes0,
        &am43xx_l3_main__des,
index a6f2a10..28ea296 100644 (file)
@@ -28,7 +28,6 @@
 #include "cm2_44xx.h"
 #include "prm44xx.h"
 #include "prm-regbits-44xx.h"
-#include "wd_timer.h"
 
 /* Base offset for all OMAP4 interrupts external to MPUSS */
 #define OMAP44XX_IRQ_GIC_START 32
@@ -276,29 +275,6 @@ static struct omap_hwmod omap44xx_aess_hwmod = {
 };
 
 /*
- * 'c2c' class
- * chip 2 chip interface used to plug the ape soc (omap) with an external modem
- * soc
- */
-
-static struct omap_hwmod_class omap44xx_c2c_hwmod_class = {
-       .name   = "c2c",
-};
-
-/* c2c */
-static struct omap_hwmod omap44xx_c2c_hwmod = {
-       .name           = "c2c",
-       .class          = &omap44xx_c2c_hwmod_class,
-       .clkdm_name     = "d2d_clkdm",
-       .prcm = {
-               .omap4 = {
-                       .clkctrl_offs = OMAP4_CM_D2D_SAD2D_CLKCTRL_OFFSET,
-                       .context_offs = OMAP4_RM_D2D_SAD2D_CONTEXT_OFFSET,
-               },
-       },
-};
-
-/*
  * 'counter' class
  * 32-bit ordinary counter, clocked by the falling edge of the 32 khz clock
  */
@@ -1086,41 +1062,6 @@ static struct omap_hwmod omap44xx_gpmc_hwmod = {
 };
 
 /*
- * 'gpu' class
- * 2d/3d graphics accelerator
- */
-
-static struct omap_hwmod_class_sysconfig omap44xx_gpu_sysc = {
-       .rev_offs       = 0x1fc00,
-       .sysc_offs      = 0x1fc10,
-       .sysc_flags     = (SYSC_HAS_MIDLEMODE | SYSC_HAS_SIDLEMODE),
-       .idlemodes      = (SIDLE_FORCE | SIDLE_NO | SIDLE_SMART |
-                          SIDLE_SMART_WKUP | MSTANDBY_FORCE | MSTANDBY_NO |
-                          MSTANDBY_SMART | MSTANDBY_SMART_WKUP),
-       .sysc_fields    = &omap_hwmod_sysc_type2,
-};
-
-static struct omap_hwmod_class omap44xx_gpu_hwmod_class = {
-       .name   = "gpu",
-       .sysc   = &omap44xx_gpu_sysc,
-};
-
-/* gpu */
-static struct omap_hwmod omap44xx_gpu_hwmod = {
-       .name           = "gpu",
-       .class          = &omap44xx_gpu_hwmod_class,
-       .clkdm_name     = "l3_gfx_clkdm",
-       .main_clk       = "sgx_clk_mux",
-       .prcm = {
-               .omap4 = {
-                       .clkctrl_offs = OMAP4_CM_GFX_GFX_CLKCTRL_OFFSET,
-                       .context_offs = OMAP4_RM_GFX_GFX_CONTEXT_OFFSET,
-                       .modulemode   = MODULEMODE_SWCTRL,
-               },
-       },
-};
-
-/*
  * 'hdq1w' class
  * hdq / 1-wire serial interface controller
  */
@@ -2434,61 +2375,6 @@ static struct omap_hwmod omap44xx_usb_tll_hs_hwmod = {
 };
 
 /*
- * 'wd_timer' class
- * 32-bit watchdog upward counter that generates a pulse on the reset pin on
- * overflow condition
- */
-
-static struct omap_hwmod_class_sysconfig omap44xx_wd_timer_sysc = {
-       .rev_offs       = 0x0000,
-       .sysc_offs      = 0x0010,
-       .syss_offs      = 0x0014,
-       .sysc_flags     = (SYSC_HAS_EMUFREE | SYSC_HAS_SIDLEMODE |
-                          SYSC_HAS_SOFTRESET | SYSS_HAS_RESET_STATUS),
-       .idlemodes      = (SIDLE_FORCE | SIDLE_NO | SIDLE_SMART |
-                          SIDLE_SMART_WKUP),
-       .sysc_fields    = &omap_hwmod_sysc_type1,
-};
-
-static struct omap_hwmod_class omap44xx_wd_timer_hwmod_class = {
-       .name           = "wd_timer",
-       .sysc           = &omap44xx_wd_timer_sysc,
-       .pre_shutdown   = &omap2_wd_timer_disable,
-       .reset          = &omap2_wd_timer_reset,
-};
-
-/* wd_timer2 */
-static struct omap_hwmod omap44xx_wd_timer2_hwmod = {
-       .name           = "wd_timer2",
-       .class          = &omap44xx_wd_timer_hwmod_class,
-       .clkdm_name     = "l4_wkup_clkdm",
-       .main_clk       = "sys_32k_ck",
-       .prcm = {
-               .omap4 = {
-                       .clkctrl_offs = OMAP4_CM_WKUP_WDT2_CLKCTRL_OFFSET,
-                       .context_offs = OMAP4_RM_WKUP_WDT2_CONTEXT_OFFSET,
-                       .modulemode   = MODULEMODE_SWCTRL,
-               },
-       },
-};
-
-/* wd_timer3 */
-static struct omap_hwmod omap44xx_wd_timer3_hwmod = {
-       .name           = "wd_timer3",
-       .class          = &omap44xx_wd_timer_hwmod_class,
-       .clkdm_name     = "abe_clkdm",
-       .main_clk       = "sys_32k_ck",
-       .prcm = {
-               .omap4 = {
-                       .clkctrl_offs = OMAP4_CM1_ABE_WDT3_CLKCTRL_OFFSET,
-                       .context_offs = OMAP4_RM_ABE_WDT3_CONTEXT_OFFSET,
-                       .modulemode   = MODULEMODE_SWCTRL,
-               },
-       },
-};
-
-
-/*
  * interfaces
  */
 
@@ -2596,14 +2482,6 @@ static struct omap_hwmod_ocp_if omap44xx_fdif__l3_main_2 = {
        .user           = OCP_USER_MPU | OCP_USER_SDMA,
 };
 
-/* gpu -> l3_main_2 */
-static struct omap_hwmod_ocp_if omap44xx_gpu__l3_main_2 = {
-       .master         = &omap44xx_gpu_hwmod,
-       .slave          = &omap44xx_l3_main_2_hwmod,
-       .clk            = "l3_div_ck",
-       .user           = OCP_USER_MPU | OCP_USER_SDMA,
-};
-
 /* hsi -> l3_main_2 */
 static struct omap_hwmod_ocp_if omap44xx_hsi__l3_main_2 = {
        .master         = &omap44xx_hsi_hwmod,
@@ -2788,14 +2666,6 @@ static struct omap_hwmod_ocp_if __maybe_unused omap44xx_l4_abe__aess_dma = {
        .user           = OCP_USER_SDMA,
 };
 
-/* l3_main_2 -> c2c */
-static struct omap_hwmod_ocp_if omap44xx_l3_main_2__c2c = {
-       .master         = &omap44xx_l3_main_2_hwmod,
-       .slave          = &omap44xx_c2c_hwmod,
-       .clk            = "l3_div_ck",
-       .user           = OCP_USER_MPU | OCP_USER_SDMA,
-};
-
 /* l4_wkup -> counter_32k */
 static struct omap_hwmod_ocp_if omap44xx_l4_wkup__counter_32k = {
        .master         = &omap44xx_l4_wkup_hwmod,
@@ -3028,14 +2898,6 @@ static struct omap_hwmod_ocp_if omap44xx_l3_main_2__gpmc = {
        .user           = OCP_USER_MPU | OCP_USER_SDMA,
 };
 
-/* l3_main_2 -> gpu */
-static struct omap_hwmod_ocp_if omap44xx_l3_main_2__gpu = {
-       .master         = &omap44xx_l3_main_2_hwmod,
-       .slave          = &omap44xx_gpu_hwmod,
-       .clk            = "l3_div_ck",
-       .user           = OCP_USER_MPU | OCP_USER_SDMA,
-};
-
 /* l4_per -> hdq1w */
 static struct omap_hwmod_ocp_if omap44xx_l4_per__hdq1w = {
        .master         = &omap44xx_l4_per_hwmod,
@@ -3396,30 +3258,6 @@ static struct omap_hwmod_ocp_if omap44xx_l4_cfg__usb_tll_hs = {
        .user           = OCP_USER_MPU | OCP_USER_SDMA,
 };
 
-/* l4_wkup -> wd_timer2 */
-static struct omap_hwmod_ocp_if omap44xx_l4_wkup__wd_timer2 = {
-       .master         = &omap44xx_l4_wkup_hwmod,
-       .slave          = &omap44xx_wd_timer2_hwmod,
-       .clk            = "l4_wkup_clk_mux_ck",
-       .user           = OCP_USER_MPU | OCP_USER_SDMA,
-};
-
-/* l4_abe -> wd_timer3 */
-static struct omap_hwmod_ocp_if omap44xx_l4_abe__wd_timer3 = {
-       .master         = &omap44xx_l4_abe_hwmod,
-       .slave          = &omap44xx_wd_timer3_hwmod,
-       .clk            = "ocp_abe_iclk",
-       .user           = OCP_USER_MPU,
-};
-
-/* l4_abe -> wd_timer3 (dma) */
-static struct omap_hwmod_ocp_if omap44xx_l4_abe__wd_timer3_dma = {
-       .master         = &omap44xx_l4_abe_hwmod,
-       .slave          = &omap44xx_wd_timer3_hwmod,
-       .clk            = "ocp_abe_iclk",
-       .user           = OCP_USER_SDMA,
-};
-
 /* mpu -> emif1 */
 static struct omap_hwmod_ocp_if omap44xx_mpu__emif1 = {
        .master         = &omap44xx_mpu_hwmod,
@@ -3450,7 +3288,6 @@ static struct omap_hwmod_ocp_if *omap44xx_hwmod_ocp_ifs[] __initdata = {
        &omap44xx_debugss__l3_main_2,
        &omap44xx_dma_system__l3_main_2,
        &omap44xx_fdif__l3_main_2,
-       &omap44xx_gpu__l3_main_2,
        &omap44xx_hsi__l3_main_2,
        &omap44xx_ipu__l3_main_2,
        &omap44xx_iss__l3_main_2,
@@ -3474,7 +3311,6 @@ static struct omap_hwmod_ocp_if *omap44xx_hwmod_ocp_ifs[] __initdata = {
        &omap44xx_l4_cfg__ocp_wp_noc,
        &omap44xx_l4_abe__aess,
        &omap44xx_l4_abe__aess_dma,
-       &omap44xx_l3_main_2__c2c,
        &omap44xx_l4_wkup__counter_32k,
        &omap44xx_l4_cfg__ctrl_module_core,
        &omap44xx_l4_cfg__ctrl_module_pad_core,
@@ -3503,7 +3339,6 @@ static struct omap_hwmod_ocp_if *omap44xx_hwmod_ocp_ifs[] __initdata = {
        &omap44xx_l4_per__elm,
        &omap44xx_l4_cfg__fdif,
        &omap44xx_l3_main_2__gpmc,
-       &omap44xx_l3_main_2__gpu,
        &omap44xx_l4_per__hdq1w,
        &omap44xx_l4_cfg__hsi,
        &omap44xx_l3_main_2__ipu,
@@ -3551,9 +3386,6 @@ static struct omap_hwmod_ocp_if *omap44xx_hwmod_ocp_ifs[] __initdata = {
        &omap44xx_l4_cfg__usb_host_hs,
        &omap44xx_l4_cfg__usb_otg_hs,
        &omap44xx_l4_cfg__usb_tll_hs,
-       &omap44xx_l4_wkup__wd_timer2,
-       &omap44xx_l4_abe__wd_timer3,
-       &omap44xx_l4_abe__wd_timer3_dma,
        &omap44xx_mpu__emif1,
        &omap44xx_mpu__emif2,
        &omap44xx_l3_main_2__aes1,
index 1ec21e9..e5bd549 100644 (file)
@@ -285,56 +285,6 @@ static struct omap_hwmod dra7xx_ctrl_module_wkup_hwmod = {
 };
 
 /*
- * 'gmac' class
- * cpsw/gmac sub system
- */
-static struct omap_hwmod_class_sysconfig dra7xx_gmac_sysc = {
-       .rev_offs       = 0x0,
-       .sysc_offs      = 0x8,
-       .syss_offs      = 0x4,
-       .sysc_flags     = (SYSC_HAS_SIDLEMODE | SYSC_HAS_MIDLEMODE |
-                          SYSS_HAS_RESET_STATUS),
-       .idlemodes      = (SIDLE_FORCE | SIDLE_NO | MSTANDBY_FORCE |
-                          MSTANDBY_NO),
-       .sysc_fields    = &omap_hwmod_sysc_type3,
-};
-
-static struct omap_hwmod_class dra7xx_gmac_hwmod_class = {
-       .name           = "gmac",
-       .sysc           = &dra7xx_gmac_sysc,
-};
-
-static struct omap_hwmod dra7xx_gmac_hwmod = {
-       .name           = "gmac",
-       .class          = &dra7xx_gmac_hwmod_class,
-       .clkdm_name     = "gmac_clkdm",
-       .flags          = (HWMOD_SWSUP_SIDLE | HWMOD_SWSUP_MSTANDBY),
-       .main_clk       = "dpll_gmac_ck",
-       .mpu_rt_idx     = 1,
-       .prcm           = {
-               .omap4  = {
-                       .clkctrl_offs   = DRA7XX_CM_GMAC_GMAC_CLKCTRL_OFFSET,
-                       .context_offs   = DRA7XX_RM_GMAC_GMAC_CONTEXT_OFFSET,
-                       .modulemode     = MODULEMODE_SWCTRL,
-               },
-       },
-};
-
-/*
- * 'mdio' class
- */
-static struct omap_hwmod_class dra7xx_mdio_hwmod_class = {
-       .name           = "davinci_mdio",
-};
-
-static struct omap_hwmod dra7xx_mdio_hwmod = {
-       .name           = "davinci_mdio",
-       .class          = &dra7xx_mdio_hwmod_class,
-       .clkdm_name     = "gmac_clkdm",
-       .main_clk       = "dpll_gmac_ck",
-};
-
-/*
  * 'dcan' class
  *
  */
@@ -1047,281 +997,6 @@ static struct omap_hwmod dra7xx_mailbox13_hwmod = {
 };
 
 /*
- * 'mcspi' class
- *
- */
-
-static struct omap_hwmod_class_sysconfig dra7xx_mcspi_sysc = {
-       .rev_offs       = 0x0000,
-       .sysc_offs      = 0x0010,
-       .sysc_flags     = (SYSC_HAS_EMUFREE | SYSC_HAS_RESET_STATUS |
-                          SYSC_HAS_SIDLEMODE | SYSC_HAS_SOFTRESET),
-       .idlemodes      = (SIDLE_FORCE | SIDLE_NO | SIDLE_SMART |
-                          SIDLE_SMART_WKUP),
-       .sysc_fields    = &omap_hwmod_sysc_type2,
-};
-
-static struct omap_hwmod_class dra7xx_mcspi_hwmod_class = {
-       .name   = "mcspi",
-       .sysc   = &dra7xx_mcspi_sysc,
-};
-
-/* mcspi1 */
-static struct omap_hwmod dra7xx_mcspi1_hwmod = {
-       .name           = "mcspi1",
-       .class          = &dra7xx_mcspi_hwmod_class,
-       .clkdm_name     = "l4per_clkdm",
-       .main_clk       = "func_48m_fclk",
-       .prcm = {
-               .omap4 = {
-                       .clkctrl_offs = DRA7XX_CM_L4PER_MCSPI1_CLKCTRL_OFFSET,
-                       .context_offs = DRA7XX_RM_L4PER_MCSPI1_CONTEXT_OFFSET,
-                       .modulemode   = MODULEMODE_SWCTRL,
-               },
-       },
-};
-
-/* mcspi2 */
-static struct omap_hwmod dra7xx_mcspi2_hwmod = {
-       .name           = "mcspi2",
-       .class          = &dra7xx_mcspi_hwmod_class,
-       .clkdm_name     = "l4per_clkdm",
-       .main_clk       = "func_48m_fclk",
-       .prcm = {
-               .omap4 = {
-                       .clkctrl_offs = DRA7XX_CM_L4PER_MCSPI2_CLKCTRL_OFFSET,
-                       .context_offs = DRA7XX_RM_L4PER_MCSPI2_CONTEXT_OFFSET,
-                       .modulemode   = MODULEMODE_SWCTRL,
-               },
-       },
-};
-
-/* mcspi3 */
-static struct omap_hwmod dra7xx_mcspi3_hwmod = {
-       .name           = "mcspi3",
-       .class          = &dra7xx_mcspi_hwmod_class,
-       .clkdm_name     = "l4per_clkdm",
-       .main_clk       = "func_48m_fclk",
-       .prcm = {
-               .omap4 = {
-                       .clkctrl_offs = DRA7XX_CM_L4PER_MCSPI3_CLKCTRL_OFFSET,
-                       .context_offs = DRA7XX_RM_L4PER_MCSPI3_CONTEXT_OFFSET,
-                       .modulemode   = MODULEMODE_SWCTRL,
-               },
-       },
-};
-
-/* mcspi4 */
-static struct omap_hwmod dra7xx_mcspi4_hwmod = {
-       .name           = "mcspi4",
-       .class          = &dra7xx_mcspi_hwmod_class,
-       .clkdm_name     = "l4per_clkdm",
-       .main_clk       = "func_48m_fclk",
-       .prcm = {
-               .omap4 = {
-                       .clkctrl_offs = DRA7XX_CM_L4PER_MCSPI4_CLKCTRL_OFFSET,
-                       .context_offs = DRA7XX_RM_L4PER_MCSPI4_CONTEXT_OFFSET,
-                       .modulemode   = MODULEMODE_SWCTRL,
-               },
-       },
-};
-
-/*
- * 'mcasp' class
- *
- */
-static struct omap_hwmod_class_sysconfig dra7xx_mcasp_sysc = {
-       .rev_offs       = 0,
-       .sysc_offs      = 0x0004,
-       .sysc_flags     = SYSC_HAS_SIDLEMODE,
-       .idlemodes      = (SIDLE_FORCE | SIDLE_NO | SIDLE_SMART),
-       .sysc_fields    = &omap_hwmod_sysc_type3,
-};
-
-static struct omap_hwmod_class dra7xx_mcasp_hwmod_class = {
-       .name   = "mcasp",
-       .sysc   = &dra7xx_mcasp_sysc,
-};
-
-/* mcasp1 */
-static struct omap_hwmod_opt_clk mcasp1_opt_clks[] = {
-       { .role = "ahclkx", .clk = "mcasp1_ahclkx_mux" },
-       { .role = "ahclkr", .clk = "mcasp1_ahclkr_mux" },
-};
-
-static struct omap_hwmod dra7xx_mcasp1_hwmod = {
-       .name           = "mcasp1",
-       .class          = &dra7xx_mcasp_hwmod_class,
-       .clkdm_name     = "ipu_clkdm",
-       .main_clk       = "mcasp1_aux_gfclk_mux",
-       .flags          = HWMOD_OPT_CLKS_NEEDED,
-       .prcm = {
-               .omap4 = {
-                       .clkctrl_offs = DRA7XX_CM_IPU_MCASP1_CLKCTRL_OFFSET,
-                       .context_offs = DRA7XX_RM_IPU_MCASP1_CONTEXT_OFFSET,
-                       .modulemode   = MODULEMODE_SWCTRL,
-               },
-       },
-       .opt_clks       = mcasp1_opt_clks,
-       .opt_clks_cnt   = ARRAY_SIZE(mcasp1_opt_clks),
-};
-
-/* mcasp2 */
-static struct omap_hwmod_opt_clk mcasp2_opt_clks[] = {
-       { .role = "ahclkx", .clk = "mcasp2_ahclkx_mux" },
-       { .role = "ahclkr", .clk = "mcasp2_ahclkr_mux" },
-};
-
-static struct omap_hwmod dra7xx_mcasp2_hwmod = {
-       .name           = "mcasp2",
-       .class          = &dra7xx_mcasp_hwmod_class,
-       .clkdm_name     = "l4per2_clkdm",
-       .main_clk       = "mcasp2_aux_gfclk_mux",
-       .flags          = HWMOD_OPT_CLKS_NEEDED,
-       .prcm = {
-               .omap4 = {
-                       .clkctrl_offs = DRA7XX_CM_L4PER2_MCASP2_CLKCTRL_OFFSET,
-                       .context_offs = DRA7XX_RM_L4PER2_MCASP2_CONTEXT_OFFSET,
-                       .modulemode   = MODULEMODE_SWCTRL,
-               },
-       },
-       .opt_clks       = mcasp2_opt_clks,
-       .opt_clks_cnt   = ARRAY_SIZE(mcasp2_opt_clks),
-};
-
-/* mcasp3 */
-static struct omap_hwmod_opt_clk mcasp3_opt_clks[] = {
-       { .role = "ahclkx", .clk = "mcasp3_ahclkx_mux" },
-};
-
-static struct omap_hwmod dra7xx_mcasp3_hwmod = {
-       .name           = "mcasp3",
-       .class          = &dra7xx_mcasp_hwmod_class,
-       .clkdm_name     = "l4per2_clkdm",
-       .main_clk       = "mcasp3_aux_gfclk_mux",
-       .flags          = HWMOD_OPT_CLKS_NEEDED,
-       .prcm = {
-               .omap4 = {
-                       .clkctrl_offs = DRA7XX_CM_L4PER2_MCASP3_CLKCTRL_OFFSET,
-                       .context_offs = DRA7XX_RM_L4PER2_MCASP3_CONTEXT_OFFSET,
-                       .modulemode   = MODULEMODE_SWCTRL,
-               },
-       },
-       .opt_clks       = mcasp3_opt_clks,
-       .opt_clks_cnt   = ARRAY_SIZE(mcasp3_opt_clks),
-};
-
-/* mcasp4 */
-static struct omap_hwmod_opt_clk mcasp4_opt_clks[] = {
-       { .role = "ahclkx", .clk = "mcasp4_ahclkx_mux" },
-};
-
-static struct omap_hwmod dra7xx_mcasp4_hwmod = {
-       .name           = "mcasp4",
-       .class          = &dra7xx_mcasp_hwmod_class,
-       .clkdm_name     = "l4per2_clkdm",
-       .main_clk       = "mcasp4_aux_gfclk_mux",
-       .flags          = HWMOD_OPT_CLKS_NEEDED,
-       .prcm = {
-               .omap4 = {
-                       .clkctrl_offs = DRA7XX_CM_L4PER2_MCASP4_CLKCTRL_OFFSET,
-                       .context_offs = DRA7XX_RM_L4PER2_MCASP4_CONTEXT_OFFSET,
-                       .modulemode   = MODULEMODE_SWCTRL,
-               },
-       },
-       .opt_clks       = mcasp4_opt_clks,
-       .opt_clks_cnt   = ARRAY_SIZE(mcasp4_opt_clks),
-};
-
-/* mcasp5 */
-static struct omap_hwmod_opt_clk mcasp5_opt_clks[] = {
-       { .role = "ahclkx", .clk = "mcasp5_ahclkx_mux" },
-};
-
-static struct omap_hwmod dra7xx_mcasp5_hwmod = {
-       .name           = "mcasp5",
-       .class          = &dra7xx_mcasp_hwmod_class,
-       .clkdm_name     = "l4per2_clkdm",
-       .main_clk       = "mcasp5_aux_gfclk_mux",
-       .flags          = HWMOD_OPT_CLKS_NEEDED,
-       .prcm = {
-               .omap4 = {
-                       .clkctrl_offs = DRA7XX_CM_L4PER2_MCASP5_CLKCTRL_OFFSET,
-                       .context_offs = DRA7XX_RM_L4PER2_MCASP5_CONTEXT_OFFSET,
-                       .modulemode   = MODULEMODE_SWCTRL,
-               },
-       },
-       .opt_clks       = mcasp5_opt_clks,
-       .opt_clks_cnt   = ARRAY_SIZE(mcasp5_opt_clks),
-};
-
-/* mcasp6 */
-static struct omap_hwmod_opt_clk mcasp6_opt_clks[] = {
-       { .role = "ahclkx", .clk = "mcasp6_ahclkx_mux" },
-};
-
-static struct omap_hwmod dra7xx_mcasp6_hwmod = {
-       .name           = "mcasp6",
-       .class          = &dra7xx_mcasp_hwmod_class,
-       .clkdm_name     = "l4per2_clkdm",
-       .main_clk       = "mcasp6_aux_gfclk_mux",
-       .flags          = HWMOD_OPT_CLKS_NEEDED,
-       .prcm = {
-               .omap4 = {
-                       .clkctrl_offs = DRA7XX_CM_L4PER2_MCASP6_CLKCTRL_OFFSET,
-                       .context_offs = DRA7XX_RM_L4PER2_MCASP6_CONTEXT_OFFSET,
-                       .modulemode   = MODULEMODE_SWCTRL,
-               },
-       },
-       .opt_clks       = mcasp6_opt_clks,
-       .opt_clks_cnt   = ARRAY_SIZE(mcasp6_opt_clks),
-};
-
-/* mcasp7 */
-static struct omap_hwmod_opt_clk mcasp7_opt_clks[] = {
-       { .role = "ahclkx", .clk = "mcasp7_ahclkx_mux" },
-};
-
-static struct omap_hwmod dra7xx_mcasp7_hwmod = {
-       .name           = "mcasp7",
-       .class          = &dra7xx_mcasp_hwmod_class,
-       .clkdm_name     = "l4per2_clkdm",
-       .main_clk       = "mcasp7_aux_gfclk_mux",
-       .flags          = HWMOD_OPT_CLKS_NEEDED,
-       .prcm = {
-               .omap4 = {
-                       .clkctrl_offs = DRA7XX_CM_L4PER2_MCASP7_CLKCTRL_OFFSET,
-                       .context_offs = DRA7XX_RM_L4PER2_MCASP7_CONTEXT_OFFSET,
-                       .modulemode   = MODULEMODE_SWCTRL,
-               },
-       },
-       .opt_clks       = mcasp7_opt_clks,
-       .opt_clks_cnt   = ARRAY_SIZE(mcasp7_opt_clks),
-};
-
-/* mcasp8 */
-static struct omap_hwmod_opt_clk mcasp8_opt_clks[] = {
-       { .role = "ahclkx", .clk = "mcasp8_ahclkx_mux" },
-};
-
-static struct omap_hwmod dra7xx_mcasp8_hwmod = {
-       .name           = "mcasp8",
-       .class          = &dra7xx_mcasp_hwmod_class,
-       .clkdm_name     = "l4per2_clkdm",
-       .main_clk       = "mcasp8_aux_gfclk_mux",
-       .flags          = HWMOD_OPT_CLKS_NEEDED,
-       .prcm = {
-               .omap4 = {
-                       .clkctrl_offs = DRA7XX_CM_L4PER2_MCASP8_CLKCTRL_OFFSET,
-                       .context_offs = DRA7XX_RM_L4PER2_MCASP8_CONTEXT_OFFSET,
-                       .modulemode   = MODULEMODE_SWCTRL,
-               },
-       },
-       .opt_clks       = mcasp8_opt_clks,
-       .opt_clks_cnt   = ARRAY_SIZE(mcasp8_opt_clks),
-};
-
-/*
  * 'mpu' class
  *
  */
@@ -2303,19 +1978,6 @@ static struct omap_hwmod_ocp_if dra7xx_l4_wkup__ctrl_module_wkup = {
        .user           = OCP_USER_MPU | OCP_USER_SDMA,
 };
 
-static struct omap_hwmod_ocp_if dra7xx_l4_per2__cpgmac0 = {
-       .master         = &dra7xx_l4_per2_hwmod,
-       .slave          = &dra7xx_gmac_hwmod,
-       .clk            = "dpll_gmac_ck",
-       .user           = OCP_USER_MPU,
-};
-
-static struct omap_hwmod_ocp_if dra7xx_gmac__mdio = {
-       .master         = &dra7xx_gmac_hwmod,
-       .slave          = &dra7xx_mdio_hwmod,
-       .user           = OCP_USER_MPU,
-};
-
 /* l4_wkup -> dcan1 */
 static struct omap_hwmod_ocp_if dra7xx_l4_wkup__dcan1 = {
        .master         = &dra7xx_l4_wkup_hwmod,
@@ -2412,94 +2074,6 @@ static struct omap_hwmod_ocp_if dra7xx_l3_main_1__sha0 = {
        .user           = OCP_USER_MPU | OCP_USER_SDMA,
 };
 
-/* l4_per2 -> mcasp1 */
-static struct omap_hwmod_ocp_if dra7xx_l4_per2__mcasp1 = {
-       .master         = &dra7xx_l4_per2_hwmod,
-       .slave          = &dra7xx_mcasp1_hwmod,
-       .clk            = "l4_root_clk_div",
-       .user           = OCP_USER_MPU | OCP_USER_SDMA,
-};
-
-/* l3_main_1 -> mcasp1 */
-static struct omap_hwmod_ocp_if dra7xx_l3_main_1__mcasp1 = {
-       .master         = &dra7xx_l3_main_1_hwmod,
-       .slave          = &dra7xx_mcasp1_hwmod,
-       .clk            = "l3_iclk_div",
-       .user           = OCP_USER_MPU | OCP_USER_SDMA,
-};
-
-/* l4_per2 -> mcasp2 */
-static struct omap_hwmod_ocp_if dra7xx_l4_per2__mcasp2 = {
-       .master         = &dra7xx_l4_per2_hwmod,
-       .slave          = &dra7xx_mcasp2_hwmod,
-       .clk            = "l4_root_clk_div",
-       .user           = OCP_USER_MPU | OCP_USER_SDMA,
-};
-
-/* l3_main_1 -> mcasp2 */
-static struct omap_hwmod_ocp_if dra7xx_l3_main_1__mcasp2 = {
-       .master         = &dra7xx_l3_main_1_hwmod,
-       .slave          = &dra7xx_mcasp2_hwmod,
-       .clk            = "l3_iclk_div",
-       .user           = OCP_USER_MPU | OCP_USER_SDMA,
-};
-
-/* l4_per2 -> mcasp3 */
-static struct omap_hwmod_ocp_if dra7xx_l4_per2__mcasp3 = {
-       .master         = &dra7xx_l4_per2_hwmod,
-       .slave          = &dra7xx_mcasp3_hwmod,
-       .clk            = "l4_root_clk_div",
-       .user           = OCP_USER_MPU | OCP_USER_SDMA,
-};
-
-/* l3_main_1 -> mcasp3 */
-static struct omap_hwmod_ocp_if dra7xx_l3_main_1__mcasp3 = {
-       .master         = &dra7xx_l3_main_1_hwmod,
-       .slave          = &dra7xx_mcasp3_hwmod,
-       .clk            = "l3_iclk_div",
-       .user           = OCP_USER_MPU | OCP_USER_SDMA,
-};
-
-/* l4_per2 -> mcasp4 */
-static struct omap_hwmod_ocp_if dra7xx_l4_per2__mcasp4 = {
-       .master         = &dra7xx_l4_per2_hwmod,
-       .slave          = &dra7xx_mcasp4_hwmod,
-       .clk            = "l4_root_clk_div",
-       .user           = OCP_USER_MPU | OCP_USER_SDMA,
-};
-
-/* l4_per2 -> mcasp5 */
-static struct omap_hwmod_ocp_if dra7xx_l4_per2__mcasp5 = {
-       .master         = &dra7xx_l4_per2_hwmod,
-       .slave          = &dra7xx_mcasp5_hwmod,
-       .clk            = "l4_root_clk_div",
-       .user           = OCP_USER_MPU | OCP_USER_SDMA,
-};
-
-/* l4_per2 -> mcasp6 */
-static struct omap_hwmod_ocp_if dra7xx_l4_per2__mcasp6 = {
-       .master         = &dra7xx_l4_per2_hwmod,
-       .slave          = &dra7xx_mcasp6_hwmod,
-       .clk            = "l4_root_clk_div",
-       .user           = OCP_USER_MPU | OCP_USER_SDMA,
-};
-
-/* l4_per2 -> mcasp7 */
-static struct omap_hwmod_ocp_if dra7xx_l4_per2__mcasp7 = {
-       .master         = &dra7xx_l4_per2_hwmod,
-       .slave          = &dra7xx_mcasp7_hwmod,
-       .clk            = "l4_root_clk_div",
-       .user           = OCP_USER_MPU | OCP_USER_SDMA,
-};
-
-/* l4_per2 -> mcasp8 */
-static struct omap_hwmod_ocp_if dra7xx_l4_per2__mcasp8 = {
-       .master         = &dra7xx_l4_per2_hwmod,
-       .slave          = &dra7xx_mcasp8_hwmod,
-       .clk            = "l4_root_clk_div",
-       .user           = OCP_USER_MPU | OCP_USER_SDMA,
-};
-
 /* l4_per1 -> elm */
 static struct omap_hwmod_ocp_if dra7xx_l4_per1__elm = {
        .master         = &dra7xx_l4_per1_hwmod,
@@ -2628,38 +2202,6 @@ static struct omap_hwmod_ocp_if dra7xx_l4_per3__mailbox13 = {
        .user           = OCP_USER_MPU | OCP_USER_SDMA,
 };
 
-/* l4_per1 -> mcspi1 */
-static struct omap_hwmod_ocp_if dra7xx_l4_per1__mcspi1 = {
-       .master         = &dra7xx_l4_per1_hwmod,
-       .slave          = &dra7xx_mcspi1_hwmod,
-       .clk            = "l3_iclk_div",
-       .user           = OCP_USER_MPU | OCP_USER_SDMA,
-};
-
-/* l4_per1 -> mcspi2 */
-static struct omap_hwmod_ocp_if dra7xx_l4_per1__mcspi2 = {
-       .master         = &dra7xx_l4_per1_hwmod,
-       .slave          = &dra7xx_mcspi2_hwmod,
-       .clk            = "l3_iclk_div",
-       .user           = OCP_USER_MPU | OCP_USER_SDMA,
-};
-
-/* l4_per1 -> mcspi3 */
-static struct omap_hwmod_ocp_if dra7xx_l4_per1__mcspi3 = {
-       .master         = &dra7xx_l4_per1_hwmod,
-       .slave          = &dra7xx_mcspi3_hwmod,
-       .clk            = "l3_iclk_div",
-       .user           = OCP_USER_MPU | OCP_USER_SDMA,
-};
-
-/* l4_per1 -> mcspi4 */
-static struct omap_hwmod_ocp_if dra7xx_l4_per1__mcspi4 = {
-       .master         = &dra7xx_l4_per1_hwmod,
-       .slave          = &dra7xx_mcspi4_hwmod,
-       .clk            = "l3_iclk_div",
-       .user           = OCP_USER_MPU | OCP_USER_SDMA,
-};
-
 /* l4_cfg -> mpu */
 static struct omap_hwmod_ocp_if dra7xx_l4_cfg__mpu = {
        .master         = &dra7xx_l4_cfg_hwmod,
@@ -3021,19 +2563,6 @@ static struct omap_hwmod_ocp_if *dra7xx_hwmod_ocp_ifs[] __initdata = {
        &dra7xx_l4_wkup__ctrl_module_wkup,
        &dra7xx_l4_wkup__dcan1,
        &dra7xx_l4_per2__dcan2,
-       &dra7xx_l4_per2__cpgmac0,
-       &dra7xx_l4_per2__mcasp1,
-       &dra7xx_l3_main_1__mcasp1,
-       &dra7xx_l4_per2__mcasp2,
-       &dra7xx_l3_main_1__mcasp2,
-       &dra7xx_l4_per2__mcasp3,
-       &dra7xx_l3_main_1__mcasp3,
-       &dra7xx_l4_per2__mcasp4,
-       &dra7xx_l4_per2__mcasp5,
-       &dra7xx_l4_per2__mcasp6,
-       &dra7xx_l4_per2__mcasp7,
-       &dra7xx_l4_per2__mcasp8,
-       &dra7xx_gmac__mdio,
        &dra7xx_l4_cfg__dma_system,
        &dra7xx_l3_main_1__tpcc,
        &dra7xx_l3_main_1__tptc0,
@@ -3060,10 +2589,6 @@ static struct omap_hwmod_ocp_if *dra7xx_hwmod_ocp_ifs[] __initdata = {
        &dra7xx_l4_per3__mailbox11,
        &dra7xx_l4_per3__mailbox12,
        &dra7xx_l4_per3__mailbox13,
-       &dra7xx_l4_per1__mcspi1,
-       &dra7xx_l4_per1__mcspi2,
-       &dra7xx_l4_per1__mcspi3,
-       &dra7xx_l4_per1__mcspi4,
        &dra7xx_l4_cfg__mpu,
        &dra7xx_l4_cfg__ocp2scp1,
        &dra7xx_l4_cfg__ocp2scp3,
index 5b251c8..12c26eb 100644 (file)
@@ -18,8 +18,8 @@
 #include <asm/cp15.h>
 #include <asm/cputype.h>
 #include <asm/hardware/cache-l2x0.h>
+#include <asm/hardware/cache-aurora-l2.h>
 #include "cache-tauros3.h"
-#include "cache-aurora-l2.h"
 
 struct l2c_init_data {
        const char *type;
@@ -1352,8 +1352,8 @@ static unsigned long aurora_range_end(unsigned long start, unsigned long end)
         * since cache range operations stall the CPU pipeline
         * until completion.
         */
-       if (end > start + MAX_RANGE_SIZE)
-               end = start + MAX_RANGE_SIZE;
+       if (end > start + AURORA_MAX_RANGE_SIZE)
+               end = start + AURORA_MAX_RANGE_SIZE;
 
        /*
         * Cache range operations can't straddle a page boundary.
@@ -1493,6 +1493,18 @@ static void __init aurora_of_parse(const struct device_node *np,
                mask |= AURORA_ACR_FORCE_WRITE_POLICY_MASK;
        }
 
+       if (of_property_read_bool(np, "marvell,ecc-enable")) {
+               mask |= AURORA_ACR_ECC_EN;
+               val |= AURORA_ACR_ECC_EN;
+       }
+
+       if (of_property_read_bool(np, "arm,parity-enable")) {
+               mask |= AURORA_ACR_PARITY_EN;
+               val |= AURORA_ACR_PARITY_EN;
+       } else if (of_property_read_bool(np, "arm,parity-disable")) {
+               mask |= AURORA_ACR_PARITY_EN;
+       }
+
        *aux_val &= ~mask;
        *aux_val |= val;
        *aux_mask &= ~mask;
index 890eeaa..bd0f482 100644 (file)
@@ -191,7 +191,7 @@ static inline bool access_error(unsigned int fsr, struct vm_area_struct *vma)
 {
        unsigned int mask = VM_READ | VM_WRITE | VM_EXEC;
 
-       if (fsr & FSR_WRITE)
+       if ((fsr & FSR_WRITE) && !(fsr & FSR_CM))
                mask = VM_WRITE;
        if (fsr & FSR_LNX_PF)
                mask = VM_EXEC;
@@ -262,7 +262,7 @@ do_page_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
 
        if (user_mode(regs))
                flags |= FAULT_FLAG_USER;
-       if (fsr & FSR_WRITE)
+       if ((fsr & FSR_WRITE) && !(fsr & FSR_CM))
                flags |= FAULT_FLAG_WRITE;
 
        /*
index c063708..9ecc209 100644 (file)
@@ -6,6 +6,7 @@
  * Fault status register encodings.  We steal bit 31 for our own purposes.
  */
 #define FSR_LNX_PF             (1 << 31)
+#define FSR_CM                 (1 << 13)
 #define FSR_WRITE              (1 << 11)
 #define FSR_FS4                        (1 << 10)
 #define FSR_FS3_0              (15)
index 6ecbda8..6d89db7 100644 (file)
@@ -204,18 +204,17 @@ void __flush_dcache_page(struct address_space *mapping, struct page *page)
         * coherent with the kernels mapping.
         */
        if (!PageHighMem(page)) {
-               size_t page_size = PAGE_SIZE << compound_order(page);
-               __cpuc_flush_dcache_area(page_address(page), page_size);
+               __cpuc_flush_dcache_area(page_address(page), page_size(page));
        } else {
                unsigned long i;
                if (cache_is_vipt_nonaliasing()) {
-                       for (i = 0; i < (1 << compound_order(page)); i++) {
+                       for (i = 0; i < compound_nr(page); i++) {
                                void *addr = kmap_atomic(page + i);
                                __cpuc_flush_dcache_area(addr, PAGE_SIZE);
                                kunmap_atomic(addr);
                        }
                } else {
-                       for (i = 0; i < (1 << compound_order(page)); i++) {
+                       for (i = 0; i < compound_nr(page); i++) {
                                void *addr = kmap_high_get(page + i);
                                if (addr) {
                                        __cpuc_flush_dcache_area(addr, PAGE_SIZE);
index f866870..b8d912a 100644 (file)
        ((((addr)+SHMLBA-1)&~(SHMLBA-1)) +      \
         (((pgoff)<<PAGE_SHIFT) & (SHMLBA-1)))
 
-/* gap between mmap and stack */
-#define MIN_GAP (128*1024*1024UL)
-#define MAX_GAP ((TASK_SIZE)/6*5)
-
-static int mmap_is_legacy(struct rlimit *rlim_stack)
-{
-       if (current->personality & ADDR_COMPAT_LAYOUT)
-               return 1;
-
-       if (rlim_stack->rlim_cur == RLIM_INFINITY)
-               return 1;
-
-       return sysctl_legacy_va_layout;
-}
-
-static unsigned long mmap_base(unsigned long rnd, struct rlimit *rlim_stack)
-{
-       unsigned long gap = rlim_stack->rlim_cur;
-
-       if (gap < MIN_GAP)
-               gap = MIN_GAP;
-       else if (gap > MAX_GAP)
-               gap = MAX_GAP;
-
-       return PAGE_ALIGN(TASK_SIZE - gap - rnd);
-}
-
 /*
  * We need to ensure that shared mappings are correctly aligned to
  * avoid aliasing issues with VIPT caches.  We need to ensure that
@@ -171,31 +144,6 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
        return addr;
 }
 
-unsigned long arch_mmap_rnd(void)
-{
-       unsigned long rnd;
-
-       rnd = get_random_long() & ((1UL << mmap_rnd_bits) - 1);
-
-       return rnd << PAGE_SHIFT;
-}
-
-void arch_pick_mmap_layout(struct mm_struct *mm, struct rlimit *rlim_stack)
-{
-       unsigned long random_factor = 0UL;
-
-       if (current->flags & PF_RANDOMIZE)
-               random_factor = arch_mmap_rnd();
-
-       if (mmap_is_legacy(rlim_stack)) {
-               mm->mmap_base = TASK_UNMAPPED_BASE + random_factor;
-               mm->get_unmapped_area = arch_get_unmapped_area;
-       } else {
-               mm->mmap_base = mmap_base(random_factor, rlim_stack);
-               mm->get_unmapped_area = arch_get_unmapped_area_topdown;
-       }
-}
-
 /*
  * You really shouldn't be using read() or write() on /dev/mem.  This
  * might go away in the future.
index d9a0038..48c2888 100644 (file)
@@ -731,7 +731,7 @@ static void *__init late_alloc(unsigned long sz)
 {
        void *ptr = (void *)__get_free_pages(GFP_PGTABLE_KERNEL, get_order(sz));
 
-       if (!ptr || !pgtable_page_ctor(virt_to_page(ptr)))
+       if (!ptr || !pgtable_pte_page_ctor(virt_to_page(ptr)))
                BUG();
        return ptr;
 }
@@ -1177,10 +1177,29 @@ void __init adjust_lowmem_bounds(void)
         */
        vmalloc_limit = (u64)(uintptr_t)vmalloc_min - PAGE_OFFSET + PHYS_OFFSET;
 
+       /*
+        * The first usable region must be PMD aligned. Mark its start
+        * as MEMBLOCK_NOMAP if it isn't
+        */
+       for_each_memblock(memory, reg) {
+               if (!memblock_is_nomap(reg)) {
+                       if (!IS_ALIGNED(reg->base, PMD_SIZE)) {
+                               phys_addr_t len;
+
+                               len = round_up(reg->base, PMD_SIZE) - reg->base;
+                               memblock_mark_nomap(reg->base, len);
+                       }
+                       break;
+               }
+       }
+
        for_each_memblock(memory, reg) {
                phys_addr_t block_start = reg->base;
                phys_addr_t block_end = reg->base + reg->size;
 
+               if (memblock_is_nomap(reg))
+                       continue;
+
                if (reg->base < vmalloc_limit) {
                        if (block_end > lowmem_limit)
                                /*
index ce42cc6..71d85ff 100644 (file)
@@ -62,6 +62,7 @@ void samsung_wdt_reset(void)
 #ifdef CONFIG_OF
 static const struct of_device_id s3c2410_wdt_match[] = {
        { .compatible = "samsung,s3c2410-wdt" },
+       { .compatible = "samsung,s3c6410-wdt" },
        {},
 };
 
index 37c6109..41a9b42 100644 (file)
@@ -15,7 +15,6 @@ config ARM64
        select ARCH_HAS_DMA_COHERENT_TO_PFN
        select ARCH_HAS_DMA_PREP_COHERENT
        select ARCH_HAS_ACPI_TABLE_UPGRADE if ACPI
-       select ARCH_HAS_ELF_RANDOMIZE
        select ARCH_HAS_FAST_MULTIPLIER
        select ARCH_HAS_FORTIFY_SOURCE
        select ARCH_HAS_GCOV_PROFILE_ALL
@@ -71,6 +70,7 @@ config ARM64
        select ARCH_SUPPORTS_INT128 if GCC_VERSION >= 50000 || CC_IS_CLANG
        select ARCH_SUPPORTS_NUMA_BALANCING
        select ARCH_WANT_COMPAT_IPC_PARSE_VERSION if COMPAT
+       select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT
        select ARCH_WANT_FRAME_POINTERS
        select ARCH_WANT_HUGE_PMD_SHARE if ARM64_4K_PAGES || (ARM64_16K_PAGES && !ARM64_VA_BITS_36)
        select ARCH_HAS_UBSAN_SANITIZE_ALL
@@ -982,7 +982,7 @@ config KEXEC_FILE
          for kernel and initramfs as opposed to list of segments as
          accepted by previous system call.
 
-config KEXEC_VERIFY_SIG
+config KEXEC_SIG
        bool "Verify kernel signature during kexec_file_load() syscall"
        depends on KEXEC_FILE
        help
@@ -997,13 +997,13 @@ config KEXEC_VERIFY_SIG
 config KEXEC_IMAGE_VERIFY_SIG
        bool "Enable Image signature verification support"
        default y
-       depends on KEXEC_VERIFY_SIG
+       depends on KEXEC_SIG
        depends on EFI && SIGNED_PE_FILE_VERIFICATION
        help
          Enable Image signature verification support.
 
 comment "Support for PE file signature verification disabled"
-       depends on KEXEC_VERIFY_SIG
+       depends on KEXEC_SIG
        depends on !EFI || !SIGNED_PE_FILE_VERIFICATION
 
 config CRASH_DUMP
index 124a7e2..3379193 100644 (file)
                        #address-cells = <3>;
                        #size-cells = <2>;
                        device_type = "pci";
-                       num-lanes = <4>;
                        num-viewport = <2>;
                        bus-range = <0x0 0xff>;
                        ranges = <0x81000000 0x0 0x00000000 0x40 0x00010000 0x0 0x00010000   /* downstream I/O */
index 71d9ed9..c084c7a 100644 (file)
                        #size-cells = <2>;
                        device_type = "pci";
                        dma-coherent;
-                       num-lanes = <4>;
                        num-viewport = <6>;
                        bus-range = <0x0 0xff>;
                        ranges = <0x81000000 0x0 0x00000000 0x40 0x00010000 0x0 0x00010000   /* downstream I/O */
                        #size-cells = <2>;
                        device_type = "pci";
                        dma-coherent;
-                       num-lanes = <2>;
                        num-viewport = <6>;
                        bus-range = <0x0 0xff>;
                        ranges = <0x81000000 0x0 0x00000000 0x48 0x00010000 0x0 0x00010000   /* downstream I/O */
                        #size-cells = <2>;
                        device_type = "pci";
                        dma-coherent;
-                       num-lanes = <2>;
                        num-viewport = <6>;
                        bus-range = <0x0 0xff>;
                        ranges = <0x81000000 0x0 0x00000000 0x50 0x00010000 0x0 0x00010000   /* downstream I/O */
index b0ef08b..d4c1da3 100644 (file)
                        #size-cells = <2>;
                        device_type = "pci";
                        dma-coherent;
-                       num-lanes = <4>;
                        num-viewport = <8>;
                        bus-range = <0x0 0xff>;
                        ranges = <0x81000000 0x0 0x00000000 0x40 0x00010000 0x0 0x00010000   /* downstream I/O */
                        reg-names = "regs", "addr_space";
                        num-ib-windows = <6>;
                        num-ob-windows = <8>;
-                       num-lanes = <2>;
                        status = "disabled";
                };
 
                        #size-cells = <2>;
                        device_type = "pci";
                        dma-coherent;
-                       num-lanes = <2>;
                        num-viewport = <8>;
                        bus-range = <0x0 0xff>;
                        ranges = <0x81000000 0x0 0x00000000 0x48 0x00010000 0x0 0x00010000   /* downstream I/O */
                        reg-names = "regs", "addr_space";
                        num-ib-windows = <6>;
                        num-ob-windows = <8>;
-                       num-lanes = <2>;
                        status = "disabled";
                };
 
                        #size-cells = <2>;
                        device_type = "pci";
                        dma-coherent;
-                       num-lanes = <2>;
                        num-viewport = <8>;
                        bus-range = <0x0 0xff>;
                        ranges = <0x81000000 0x0 0x00000000 0x50 0x00010000 0x0 0x00010000   /* downstream I/O */
                        reg-names = "regs", "addr_space";
                        num-ib-windows = <6>;
                        num-ob-windows = <8>;
-                       num-lanes = <2>;
                        status = "disabled";
                };
 
index d1469b0..c676d07 100644 (file)
                        #size-cells = <2>;
                        device_type = "pci";
                        dma-coherent;
-                       num-lanes = <4>;
                        num-viewport = <256>;
                        bus-range = <0x0 0xff>;
                        ranges = <0x81000000 0x0 0x00000000 0x20 0x00010000 0x0 0x00010000   /* downstream I/O */
                        #size-cells = <2>;
                        device_type = "pci";
                        dma-coherent;
-                       num-lanes = <4>;
                        num-viewport = <6>;
                        bus-range = <0x0 0xff>;
                        ranges = <0x81000000 0x0 0x00000000 0x28 0x00010000 0x0 0x00010000   /* downstream I/O */
                        #size-cells = <2>;
                        device_type = "pci";
                        dma-coherent;
-                       num-lanes = <8>;
                        num-viewport = <6>;
                        bus-range = <0x0 0xff>;
                        ranges = <0x81000000 0x0 0x00000000 0x30 0x00010000 0x0 0x00010000   /* downstream I/O */
index 64101c9..7a0be8e 100644 (file)
                        #size-cells = <2>;
                        device_type = "pci";
                        dma-coherent;
-                       num-lanes = <4>;
                        num-viewport = <6>;
                        bus-range = <0x0 0xff>;
                        msi-parent = <&its>;
                        #size-cells = <2>;
                        device_type = "pci";
                        dma-coherent;
-                       num-lanes = <4>;
                        num-viewport = <6>;
                        bus-range = <0x0 0xff>;
                        msi-parent = <&its>;
                        #size-cells = <2>;
                        device_type = "pci";
                        dma-coherent;
-                       num-lanes = <8>;
                        num-viewport = <256>;
                        bus-range = <0x0 0xff>;
                        msi-parent = <&its>;
                        #size-cells = <2>;
                        device_type = "pci";
                        dma-coherent;
-                       num-lanes = <4>;
                        num-viewport = <6>;
                        bus-range = <0x0 0xff>;
                        msi-parent = <&its>;
index caed433..243338c 100644 (file)
@@ -2,6 +2,7 @@
 # Mvebu SoC Family
 dtb-$(CONFIG_ARCH_MVEBU) += armada-3720-db.dtb
 dtb-$(CONFIG_ARCH_MVEBU) += armada-3720-espressobin.dtb
+dtb-$(CONFIG_ARCH_MVEBU) += armada-3720-turris-mox.dtb
 dtb-$(CONFIG_ARCH_MVEBU) += armada-3720-uDPU.dtb
 dtb-$(CONFIG_ARCH_MVEBU) += armada-7040-db.dtb
 dtb-$(CONFIG_ARCH_MVEBU) += armada-8040-clearfog-gt-8k.dtb
diff --git a/arch/arm64/boot/dts/marvell/armada-3720-turris-mox.dts b/arch/arm64/boot/dts/marvell/armada-3720-turris-mox.dts
new file mode 100644 (file)
index 0000000..d105986
--- /dev/null
@@ -0,0 +1,840 @@
+// SPDX-License-Identifier: (GPL-2.0+ OR MIT)
+/*
+ * Device Tree file for CZ.NIC Turris Mox Board
+ * 2019 by Marek Behun <marek.behun@nic.cz>
+ */
+
+/dts-v1/;
+
+#include <dt-bindings/bus/moxtet.h>
+#include <dt-bindings/gpio/gpio.h>
+#include <dt-bindings/input/input.h>
+#include "armada-372x.dtsi"
+
+/ {
+       model = "CZ.NIC Turris Mox Board";
+       compatible = "cznic,turris-mox", "marvell,armada3720",
+                    "marvell,armada3710";
+
+       aliases {
+               spi0 = &spi0;
+               ethernet1 = &eth1;
+       };
+
+       chosen {
+               stdout-path = "serial0:115200n8";
+       };
+
+       memory@0 {
+               device_type = "memory";
+               reg = <0x00000000 0x00000000 0x00000000 0x20000000>;
+       };
+
+       leds {
+               compatible = "gpio-leds";
+               red {
+                       label = "mox:red:activity";
+                       gpios = <&gpiosb 21 GPIO_ACTIVE_LOW>;
+                       linux,default-trigger = "default-on";
+               };
+       };
+
+       gpio-keys {
+               compatible = "gpio-keys";
+
+               reset {
+                       label = "reset";
+                       linux,code = <KEY_RESTART>;
+                       gpios = <&gpiosb 20 GPIO_ACTIVE_LOW>;
+                       debounce-interval = <60>;
+               };
+       };
+
+       exp_usb3_vbus: usb3-vbus {
+               compatible = "regulator-fixed";
+               regulator-name = "usb3-vbus";
+               regulator-min-microvolt = <5000000>;
+               regulator-max-microvolt = <5000000>;
+               enable-active-high;
+               regulator-always-on;
+               gpio = <&gpiosb 0 GPIO_ACTIVE_HIGH>;
+       };
+
+       usb3_phy: usb3-phy {
+               compatible = "usb-nop-xceiv";
+               vcc-supply = <&exp_usb3_vbus>;
+       };
+
+       vsdc_reg: vsdc-reg {
+               compatible = "regulator-gpio";
+               regulator-name = "vsdc";
+               regulator-min-microvolt = <1800000>;
+               regulator-max-microvolt = <3300000>;
+               regulator-boot-on;
+
+               gpios = <&gpiosb 23 GPIO_ACTIVE_HIGH>;
+               gpios-states = <0>;
+               states = <1800000 0x1
+                         3300000 0x0>;
+               enable-active-high;
+       };
+
+       vsdio_reg: vsdio-reg {
+               compatible = "regulator-gpio";
+               regulator-name = "vsdio";
+               regulator-min-microvolt = <1800000>;
+               regulator-max-microvolt = <3300000>;
+               regulator-boot-on;
+
+               gpios = <&gpiosb 22 GPIO_ACTIVE_HIGH>;
+               gpios-states = <0>;
+               states = <1800000 0x1
+                         3300000 0x0>;
+               enable-active-high;
+       };
+
+       sdhci1_pwrseq: sdhci1-pwrseq {
+               compatible = "mmc-pwrseq-simple";
+               reset-gpios = <&gpionb 19 GPIO_ACTIVE_HIGH>;
+               status = "okay";
+       };
+
+       sfp: sfp {
+               compatible = "sff,sfp+";
+               i2c-bus = <&i2c0>;
+               los-gpio = <&moxtet_sfp 0 GPIO_ACTIVE_HIGH>;
+               tx-fault-gpio = <&moxtet_sfp 1 GPIO_ACTIVE_HIGH>;
+               mod-def0-gpio = <&moxtet_sfp 2 GPIO_ACTIVE_LOW>;
+               tx-disable-gpio = <&moxtet_sfp 4 GPIO_ACTIVE_HIGH>;
+               rate-select0-gpio = <&moxtet_sfp 5 GPIO_ACTIVE_HIGH>;
+
+               /* enabled by U-Boot if SFP module is present */
+               status = "disabled";
+       };
+};
+
+&i2c0 {
+       pinctrl-names = "default";
+       pinctrl-0 = <&i2c1_pins>;
+       clock-frequency = <100000>;
+       status = "okay";
+
+       rtc@6f {
+               compatible = "microchip,mcp7940x";
+               reg = <0x6f>;
+       };
+};
+
+&pcie_reset_pins {
+       function = "gpio";
+};
+
+&pcie0 {
+       pinctrl-names = "default";
+       pinctrl-0 = <&pcie_reset_pins &pcie_clkreq_pins>;
+       status = "okay";
+       max-link-speed = <2>;
+       reset-gpios = <&gpiosb 3 GPIO_ACTIVE_LOW>;
+       phys = <&comphy1 0>;
+
+       /* enabled by U-Boot if PCIe module is present */
+       status = "disabled";
+};
+
+&uart0 {
+       status = "okay";
+};
+
+&eth0 {
+       pinctrl-names = "default";
+       pinctrl-0 = <&rgmii_pins>;
+       phy-mode = "rgmii-id";
+       phy = <&phy1>;
+       status = "okay";
+};
+
+&eth1 {
+       phy-mode = "2500base-x";
+       managed = "in-band-status";
+       phys = <&comphy0 1>;
+};
+
+&sdhci0 {
+       wp-inverted;
+       bus-width = <4>;
+       cd-gpios = <&gpionb 10 GPIO_ACTIVE_HIGH>;
+       vqmmc-supply = <&vsdc_reg>;
+       marvell,pad-type = "sd";
+       status = "okay";
+};
+
+&sdhci1 {
+       pinctrl-names = "default";
+       pinctrl-0 = <&sdio_pins>;
+       non-removable;
+       bus-width = <4>;
+       marvell,pad-type = "sd";
+       vqmmc-supply = <&vsdio_reg>;
+       mmc-pwrseq = <&sdhci1_pwrseq>;
+       status = "okay";
+};
+
+&spi0 {
+       status = "okay";
+       pinctrl-names = "default";
+       pinctrl-0 = <&spi_quad_pins &spi_cs1_pins>;
+       assigned-clocks = <&nb_periph_clk 7>;
+       assigned-clock-parents = <&tbg 1>;
+       assigned-clock-rates = <20000000>;
+
+       spi-flash@0 {
+               #address-cells = <1>;
+               #size-cells = <1>;
+               compatible = "jedec,spi-nor";
+               reg = <0>;
+               spi-max-frequency = <20000000>;
+
+               partitions {
+                       compatible = "fixed-partitions";
+                       #address-cells = <1>;
+                       #size-cells = <1>;
+
+                       partition@0 {
+                               label = "secure-firmware";
+                               reg = <0x0 0x20000>;
+                       };
+
+                       partition@20000 {
+                               label = "u-boot";
+                               reg = <0x20000 0x160000>;
+                       };
+
+                       partition@180000 {
+                               label = "u-boot-env";
+                               reg = <0x180000 0x10000>;
+                       };
+
+                       partition@190000 {
+                               label = "Rescue system";
+                               reg = <0x190000 0x660000>;
+                       };
+
+                       partition@7f0000 {
+                               label = "dtb";
+                               reg = <0x7f0000 0x10000>;
+                       };
+               };
+       };
+
+       moxtet: moxtet@1 {
+               #address-cells = <1>;
+               #size-cells = <0>;
+               compatible = "cznic,moxtet";
+               reg = <1>;
+               reset-gpios = <&gpiosb 2 GPIO_ACTIVE_LOW>;
+               spi-max-frequency = <10000000>;
+               spi-cpol;
+               spi-cpha;
+               interrupt-controller;
+               #interrupt-cells = <1>;
+               interrupt-parent = <&gpiosb>;
+               interrupts = <5 IRQ_TYPE_EDGE_FALLING>;
+               status = "okay";
+
+               moxtet_sfp: gpio@0 {
+                       compatible = "cznic,moxtet-gpio";
+                       gpio-controller;
+                       #gpio-cells = <2>;
+                       reg = <0>;
+                       status = "disabled";
+               };
+       };
+};
+
+&usb2 {
+       status = "okay";
+};
+
+&usb3 {
+       status = "okay";
+       phys = <&comphy2 0>;
+       usb-phy = <&usb3_phy>;
+};
+
+&mdio {
+       pinctrl-names = "default";
+       pinctrl-0 = <&smi_pins>;
+       status = "okay";
+
+       phy1: ethernet-phy@1 {
+               reg = <1>;
+       };
+
+       /* switch nodes are enabled by U-Boot if modules are present */
+       switch0@10 {
+               compatible = "marvell,mv88e6190";
+               reg = <0x10 0>;
+               dsa,member = <0 0>;
+               interrupt-parent = <&moxtet>;
+               interrupts = <MOXTET_IRQ_PERIDOT(0)>;
+               status = "disabled";
+
+               mdio {
+                       #address-cells = <1>;
+                       #size-cells = <0>;
+
+                       switch0phy1: switch0phy1@1 {
+                               reg = <0x1>;
+                       };
+
+                       switch0phy2: switch0phy2@2 {
+                               reg = <0x2>;
+                       };
+
+                       switch0phy3: switch0phy3@3 {
+                               reg = <0x3>;
+                       };
+
+                       switch0phy4: switch0phy4@4 {
+                               reg = <0x4>;
+                       };
+
+                       switch0phy5: switch0phy5@5 {
+                               reg = <0x5>;
+                       };
+
+                       switch0phy6: switch0phy6@6 {
+                               reg = <0x6>;
+                       };
+
+                       switch0phy7: switch0phy7@7 {
+                               reg = <0x7>;
+                       };
+
+                       switch0phy8: switch0phy8@8 {
+                               reg = <0x8>;
+                       };
+               };
+
+               ports {
+                       #address-cells = <1>;
+                       #size-cells = <0>;
+
+                       port@1 {
+                               reg = <0x1>;
+                               label = "lan1";
+                               phy-handle = <&switch0phy1>;
+                       };
+
+                       port@2 {
+                               reg = <0x2>;
+                               label = "lan2";
+                               phy-handle = <&switch0phy2>;
+                       };
+
+                       port@3 {
+                               reg = <0x3>;
+                               label = "lan3";
+                               phy-handle = <&switch0phy3>;
+                       };
+
+                       port@4 {
+                               reg = <0x4>;
+                               label = "lan4";
+                               phy-handle = <&switch0phy4>;
+                       };
+
+                       port@5 {
+                               reg = <0x5>;
+                               label = "lan5";
+                               phy-handle = <&switch0phy5>;
+                       };
+
+                       port@6 {
+                               reg = <0x6>;
+                               label = "lan6";
+                               phy-handle = <&switch0phy6>;
+                       };
+
+                       port@7 {
+                               reg = <0x7>;
+                               label = "lan7";
+                               phy-handle = <&switch0phy7>;
+                       };
+
+                       port@8 {
+                               reg = <0x8>;
+                               label = "lan8";
+                               phy-handle = <&switch0phy8>;
+                       };
+
+                       port@9 {
+                               reg = <0x9>;
+                               label = "cpu";
+                               ethernet = <&eth1>;
+                               phy-mode = "2500base-x";
+                               managed = "in-band-status";
+                       };
+
+                       switch0port10: port@a {
+                               reg = <0xa>;
+                               label = "dsa";
+                               phy-mode = "2500base-x";
+                               managed = "in-band-status";
+                               link = <&switch1port9 &switch2port9>;
+                               status = "disabled";
+                       };
+
+                       port-sfp@a {
+                               reg = <0xa>;
+                               label = "sfp";
+                               sfp = <&sfp>;
+                               phy-mode = "sgmii";
+                               managed = "in-band-status";
+                               status = "disabled";
+                       };
+               };
+       };
+
+       switch0@2 {
+               compatible = "marvell,mv88e6085";
+               reg = <0x2 0>;
+               dsa,member = <0 0>;
+               interrupt-parent = <&moxtet>;
+               interrupts = <MOXTET_IRQ_TOPAZ>;
+               status = "disabled";
+
+               mdio {
+                       #address-cells = <1>;
+                       #size-cells = <0>;
+
+                       switch0phy1_topaz: switch0phy1@11 {
+                               reg = <0x11>;
+                       };
+
+                       switch0phy2_topaz: switch0phy2@12 {
+                               reg = <0x12>;
+                       };
+
+                       switch0phy3_topaz: switch0phy3@13 {
+                               reg = <0x13>;
+                       };
+
+                       switch0phy4_topaz: switch0phy4@14 {
+                               reg = <0x14>;
+                       };
+               };
+
+               ports {
+                       #address-cells = <1>;
+                       #size-cells = <0>;
+
+                       port@1 {
+                               reg = <0x1>;
+                               label = "lan1";
+                               phy-handle = <&switch0phy1_topaz>;
+                       };
+
+                       port@2 {
+                               reg = <0x2>;
+                               label = "lan2";
+                               phy-handle = <&switch0phy2_topaz>;
+                       };
+
+                       port@3 {
+                               reg = <0x3>;
+                               label = "lan3";
+                               phy-handle = <&switch0phy3_topaz>;
+                       };
+
+                       port@4 {
+                               reg = <0x4>;
+                               label = "lan4";
+                               phy-handle = <&switch0phy4_topaz>;
+                       };
+
+                       port@5 {
+                               reg = <0x5>;
+                               label = "cpu";
+                               phy-mode = "2500base-x";
+                               managed = "in-band-status";
+                               ethernet = <&eth1>;
+                       };
+               };
+       };
+
+       switch1@11 {
+               compatible = "marvell,mv88e6190";
+               reg = <0x11 0>;
+               dsa,member = <0 1>;
+               interrupt-parent = <&moxtet>;
+               interrupts = <MOXTET_IRQ_PERIDOT(1)>;
+               status = "disabled";
+
+               mdio {
+                       #address-cells = <1>;
+                       #size-cells = <0>;
+
+                       switch1phy1: switch1phy1@1 {
+                               reg = <0x1>;
+                       };
+
+                       switch1phy2: switch1phy2@2 {
+                               reg = <0x2>;
+                       };
+
+                       switch1phy3: switch1phy3@3 {
+                               reg = <0x3>;
+                       };
+
+                       switch1phy4: switch1phy4@4 {
+                               reg = <0x4>;
+                       };
+
+                       switch1phy5: switch1phy5@5 {
+                               reg = <0x5>;
+                       };
+
+                       switch1phy6: switch1phy6@6 {
+                               reg = <0x6>;
+                       };
+
+                       switch1phy7: switch1phy7@7 {
+                               reg = <0x7>;
+                       };
+
+                       switch1phy8: switch1phy8@8 {
+                               reg = <0x8>;
+                       };
+               };
+
+               ports {
+                       #address-cells = <1>;
+                       #size-cells = <0>;
+
+                       port@1 {
+                               reg = <0x1>;
+                               label = "lan9";
+                               phy-handle = <&switch1phy1>;
+                       };
+
+                       port@2 {
+                               reg = <0x2>;
+                               label = "lan10";
+                               phy-handle = <&switch1phy2>;
+                       };
+
+                       port@3 {
+                               reg = <0x3>;
+                               label = "lan11";
+                               phy-handle = <&switch1phy3>;
+                       };
+
+                       port@4 {
+                               reg = <0x4>;
+                               label = "lan12";
+                               phy-handle = <&switch1phy4>;
+                       };
+
+                       port@5 {
+                               reg = <0x5>;
+                               label = "lan13";
+                               phy-handle = <&switch1phy5>;
+                       };
+
+                       port@6 {
+                               reg = <0x6>;
+                               label = "lan14";
+                               phy-handle = <&switch1phy6>;
+                       };
+
+                       port@7 {
+                               reg = <0x7>;
+                               label = "lan15";
+                               phy-handle = <&switch1phy7>;
+                       };
+
+                       port@8 {
+                               reg = <0x8>;
+                               label = "lan16";
+                               phy-handle = <&switch1phy8>;
+                       };
+
+                       switch1port9: port@9 {
+                               reg = <0x9>;
+                               label = "dsa";
+                               phy-mode = "2500base-x";
+                               managed = "in-band-status";
+                               link = <&switch0port10>;
+                       };
+
+                       switch1port10: port@a {
+                               reg = <0xa>;
+                               label = "dsa";
+                               phy-mode = "2500base-x";
+                               managed = "in-band-status";
+                               link = <&switch2port9>;
+                               status = "disabled";
+                       };
+
+                       port-sfp@a {
+                               reg = <0xa>;
+                               label = "sfp";
+                               sfp = <&sfp>;
+                               phy-mode = "sgmii";
+                               managed = "in-band-status";
+                               status = "disabled";
+                       };
+               };
+       };
+
+       switch1@2 {
+               compatible = "marvell,mv88e6085";
+               reg = <0x2 0>;
+               dsa,member = <0 1>;
+               interrupt-parent = <&moxtet>;
+               interrupts = <MOXTET_IRQ_TOPAZ>;
+               status = "disabled";
+
+               mdio {
+                       #address-cells = <1>;
+                       #size-cells = <0>;
+
+                       switch1phy1_topaz: switch1phy1@11 {
+                               reg = <0x11>;
+                       };
+
+                       switch1phy2_topaz: switch1phy2@12 {
+                               reg = <0x12>;
+                       };
+
+                       switch1phy3_topaz: switch1phy3@13 {
+                               reg = <0x13>;
+                       };
+
+                       switch1phy4_topaz: switch1phy4@14 {
+                               reg = <0x14>;
+                       };
+               };
+
+               ports {
+                       #address-cells = <1>;
+                       #size-cells = <0>;
+
+                       port@1 {
+                               reg = <0x1>;
+                               label = "lan9";
+                               phy-handle = <&switch1phy1_topaz>;
+                       };
+
+                       port@2 {
+                               reg = <0x2>;
+                               label = "lan10";
+                               phy-handle = <&switch1phy2_topaz>;
+                       };
+
+                       port@3 {
+                               reg = <0x3>;
+                               label = "lan11";
+                               phy-handle = <&switch1phy3_topaz>;
+                       };
+
+                       port@4 {
+                               reg = <0x4>;
+                               label = "lan12";
+                               phy-handle = <&switch1phy4_topaz>;
+                       };
+
+                       port@5 {
+                               reg = <0x5>;
+                               label = "dsa";
+                               phy-mode = "2500base-x";
+                               managed = "in-band-status";
+                               link = <&switch0port10>;
+                       };
+               };
+       };
+
+       switch2@12 {
+               compatible = "marvell,mv88e6190";
+               reg = <0x12 0>;
+               dsa,member = <0 2>;
+               interrupt-parent = <&moxtet>;
+               interrupts = <MOXTET_IRQ_PERIDOT(2)>;
+               status = "disabled";
+
+               mdio {
+                       #address-cells = <1>;
+                       #size-cells = <0>;
+
+                       switch2phy1: switch2phy1@1 {
+                               reg = <0x1>;
+                       };
+
+                       switch2phy2: switch2phy2@2 {
+                               reg = <0x2>;
+                       };
+
+                       switch2phy3: switch2phy3@3 {
+                               reg = <0x3>;
+                       };
+
+                       switch2phy4: switch2phy4@4 {
+                               reg = <0x4>;
+                       };
+
+                       switch2phy5: switch2phy5@5 {
+                               reg = <0x5>;
+                       };
+
+                       switch2phy6: switch2phy6@6 {
+                               reg = <0x6>;
+                       };
+
+                       switch2phy7: switch2phy7@7 {
+                               reg = <0x7>;
+                       };
+
+                       switch2phy8: switch2phy8@8 {
+                               reg = <0x8>;
+                       };
+               };
+
+               ports {
+                       #address-cells = <1>;
+                       #size-cells = <0>;
+
+                       port@1 {
+                               reg = <0x1>;
+                               label = "lan17";
+                               phy-handle = <&switch2phy1>;
+                       };
+
+                       port@2 {
+                               reg = <0x2>;
+                               label = "lan18";
+                               phy-handle = <&switch2phy2>;
+                       };
+
+                       port@3 {
+                               reg = <0x3>;
+                               label = "lan19";
+                               phy-handle = <&switch2phy3>;
+                       };
+
+                       port@4 {
+                               reg = <0x4>;
+                               label = "lan20";
+                               phy-handle = <&switch2phy4>;
+                       };
+
+                       port@5 {
+                               reg = <0x5>;
+                               label = "lan21";
+                               phy-handle = <&switch2phy5>;
+                       };
+
+                       port@6 {
+                               reg = <0x6>;
+                               label = "lan22";
+                               phy-handle = <&switch2phy6>;
+                       };
+
+                       port@7 {
+                               reg = <0x7>;
+                               label = "lan23";
+                               phy-handle = <&switch2phy7>;
+                       };
+
+                       port@8 {
+                               reg = <0x8>;
+                               label = "lan24";
+                               phy-handle = <&switch2phy8>;
+                       };
+
+                       switch2port9: port@9 {
+                               reg = <0x9>;
+                               label = "dsa";
+                               phy-mode = "2500base-x";
+                               managed = "in-band-status";
+                               link = <&switch1port10 &switch0port10>;
+                       };
+
+                       port-sfp@a {
+                               reg = <0xa>;
+                               label = "sfp";
+                               sfp = <&sfp>;
+                               phy-mode = "sgmii";
+                               managed = "in-band-status";
+                               status = "disabled";
+                       };
+               };
+       };
+
+       switch2@2 {
+               compatible = "marvell,mv88e6085";
+               reg = <0x2 0>;
+               dsa,member = <0 2>;
+               interrupt-parent = <&moxtet>;
+               interrupts = <MOXTET_IRQ_TOPAZ>;
+               status = "disabled";
+
+               mdio {
+                       #address-cells = <1>;
+                       #size-cells = <0>;
+
+                       switch2phy1_topaz: switch2phy1@11 {
+                               reg = <0x11>;
+                       };
+
+                       switch2phy2_topaz: switch2phy2@12 {
+                               reg = <0x12>;
+                       };
+
+                       switch2phy3_topaz: switch2phy3@13 {
+                               reg = <0x13>;
+                       };
+
+                       switch2phy4_topaz: switch2phy4@14 {
+                               reg = <0x14>;
+                       };
+               };
+
+               ports {
+                       #address-cells = <1>;
+                       #size-cells = <0>;
+
+                       port@1 {
+                               reg = <0x1>;
+                               label = "lan17";
+                               phy-handle = <&switch2phy1_topaz>;
+                       };
+
+                       port@2 {
+                               reg = <0x2>;
+                               label = "lan18";
+                               phy-handle = <&switch2phy2_topaz>;
+                       };
+
+                       port@3 {
+                               reg = <0x3>;
+                               label = "lan19";
+                               phy-handle = <&switch2phy3_topaz>;
+                       };
+
+                       port@4 {
+                               reg = <0x4>;
+                               label = "lan20";
+                               phy-handle = <&switch2phy4_topaz>;
+                       };
+
+                       port@5 {
+                               reg = <0x5>;
+                               label = "dsa";
+                               phy-mode = "2500base-x";
+                               managed = "in-band-status";
+                               link = <&switch1port10 &switch0port10>;
+                       };
+               };
+       };
+};
index 7f69e3d..000c135 100644 (file)
                                        function = "spi";
                                };
 
+                               spi_cs1_pins: spi-cs1-pins {
+                                       groups = "spi_cs1";
+                                       function = "spi";
+                               };
+
                                i2c1_pins: i2c1-pins {
                                        groups = "i2c1";
                                        function = "i2c";
index 62e07e1..4c38426 100644 (file)
                        gpio = <&gpio TEGRA194_MAIN_GPIO(A, 3) GPIO_ACTIVE_HIGH>;
                        enable-active-high;
                };
+
+               vdd_3v3_pcie: regulator@2 {
+                       compatible = "regulator-fixed";
+                       reg = <2>;
+
+                       regulator-name = "PEX_3V3";
+                       regulator-min-microvolt = <3300000>;
+                       regulator-max-microvolt = <3300000>;
+                       gpio = <&gpio TEGRA194_MAIN_GPIO(Z, 2) GPIO_ACTIVE_HIGH>;
+                       regulator-boot-on;
+                       enable-active-high;
+               };
+
+               vdd_12v_pcie: regulator@3 {
+                       compatible = "regulator-fixed";
+                       reg = <3>;
+
+                       regulator-name = "VDD_12V";
+                       regulator-min-microvolt = <1200000>;
+                       regulator-max-microvolt = <1200000>;
+                       gpio = <&gpio TEGRA194_MAIN_GPIO(A, 1) GPIO_ACTIVE_LOW>;
+                       regulator-boot-on;
+                       enable-active-low;
+               };
        };
 };
index 23597d5..d47cd8c 100644 (file)
        };
 
        pcie@141a0000 {
-               status = "disabled";
+               status = "okay";
 
                vddio-pex-ctl-supply = <&vdd_1v8ao>;
+               vpcie3v3-supply = <&vdd_3v3_pcie>;
+               vpcie12v-supply = <&vdd_12v_pcie>;
 
                phys = <&p2u_nvhs_0>, <&p2u_nvhs_1>, <&p2u_nvhs_2>,
                       <&p2u_nvhs_3>, <&p2u_nvhs_4>, <&p2u_nvhs_5>,
index adebbbf..3c0cf54 100644 (file)
@@ -3,8 +3,9 @@
 #include <dt-bindings/gpio/tegra194-gpio.h>
 #include <dt-bindings/interrupt-controller/arm-gic.h>
 #include <dt-bindings/mailbox/tegra186-hsp.h>
-#include <dt-bindings/reset/tegra194-reset.h>
+#include <dt-bindings/pinctrl/pinctrl-tegra.h>
 #include <dt-bindings/power/tegra194-powergate.h>
+#include <dt-bindings/reset/tegra194-reset.h>
 #include <dt-bindings/thermal/tegra194-bpmp-thermal.h>
 
 / {
                        };
                };
 
+               pinmux: pinmux@2430000 {
+                       compatible = "nvidia,tegra194-pinmux";
+                       reg = <0x2430000 0x17000
+                              0xc300000 0x4000>;
+
+                       status = "okay";
+
+                       pex_rst_c5_out_state: pex_rst_c5_out {
+                               pex_rst {
+                                       nvidia,pins = "pex_l5_rst_n_pgg1";
+                                       nvidia,schmitt = <TEGRA_PIN_DISABLE>;
+                                       nvidia,lpdr = <TEGRA_PIN_ENABLE>;
+                                       nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                                       nvidia,io-high-voltage = <TEGRA_PIN_ENABLE>;
+                                       nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                                       nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               };
+                       };
+
+                       clkreq_c5_bi_dir_state: clkreq_c5_bi_dir {
+                               clkreq {
+                                       nvidia,pins = "pex_l5_clkreq_n_pgg0";
+                                       nvidia,schmitt = <TEGRA_PIN_DISABLE>;
+                                       nvidia,lpdr = <TEGRA_PIN_ENABLE>;
+                                       nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                                       nvidia,io-high-voltage = <TEGRA_PIN_ENABLE>;
+                                       nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                                       nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               };
+                       };
+               };
+
                uarta: serial@3100000 {
                        compatible = "nvidia,tegra194-uart", "nvidia,tegra20-uart";
                        reg = <0x03100000 0x40>;
                num-viewport = <8>;
                linux,pci-domain = <5>;
 
+               pinctrl-names = "default";
+               pinctrl-0 = <&pex_rst_c5_out_state>, <&clkreq_c5_bi_dir_state>;
+
                clocks = <&bpmp TEGRA194_CLK_PEX1_CORE_5>,
                        <&bpmp TEGRA194_CLK_PEX1_CORE_5M>;
                clock-names = "core", "core_m";
index 71bd717..f5a85ca 100644 (file)
 &ufs_mem_hc {
        status = "okay";
 
+       reset-gpios = <&tlmm 150 GPIO_ACTIVE_LOW>;
+
        vcc-supply = <&vreg_l20a_2p95>;
        vcc-max-microamp = <800000>;
 };
index 2e78638..c57548b 100644 (file)
 &ufs_mem_hc {
        status = "okay";
 
+       reset-gpios = <&tlmm 150 GPIO_ACTIVE_LOW>;
+
        vcc-supply = <&vreg_l20a_2p95>;
        vcc-max-microamp = <600000>;
 };
index ca70ff7..799c75f 100644 (file)
@@ -42,7 +42,7 @@
                 */
                interrupts = <GIC_PPI 9 IRQ_TYPE_LEVEL_HIGH>;
 
-               gic_its: gic-its@18200000 {
+               gic_its: gic-its@1820000 {
                        compatible = "arm,gic-v3-its";
                        reg = <0x00 0x01820000 0x00 0x10000>;
                        socionext,synquacer-pre-its = <0x1000000 0x400000>;
@@ -67,7 +67,7 @@
                reg = <0x0 0x900000 0x0 0x2000>;
                reg-names = "serdes";
                #phy-cells = <2>;
-               power-domains = <&k3_pds 153>;
+               power-domains = <&k3_pds 153 TI_SCI_PD_EXCLUSIVE>;
                clocks = <&k3_clks 153 4>, <&k3_clks 153 1>, <&serdes1 AM654_SERDES_LO_REFCLK>;
                clock-output-names = "serdes0_cmu_refclk", "serdes0_lo_refclk", "serdes0_ro_refclk";
                assigned-clocks = <&k3_clks 153 4>, <&serdes0 AM654_SERDES_CMU_REFCLK>;
@@ -82,7 +82,7 @@
                reg = <0x0 0x910000 0x0 0x2000>;
                reg-names = "serdes";
                #phy-cells = <2>;
-               power-domains = <&k3_pds 154>;
+               power-domains = <&k3_pds 154 TI_SCI_PD_EXCLUSIVE>;
                clocks = <&serdes0 AM654_SERDES_RO_REFCLK>, <&k3_clks 154 1>, <&k3_clks 154 5>;
                clock-output-names = "serdes1_cmu_refclk", "serdes1_lo_refclk", "serdes1_ro_refclk";
                assigned-clocks = <&k3_clks 154 5>, <&serdes1 AM654_SERDES_CMU_REFCLK>;
                interrupts = <GIC_SPI 192 IRQ_TYPE_LEVEL_HIGH>;
                clock-frequency = <48000000>;
                current-speed = <115200>;
-               power-domains = <&k3_pds 146>;
+               power-domains = <&k3_pds 146 TI_SCI_PD_EXCLUSIVE>;
        };
 
        main_uart1: serial@2810000 {
                reg-io-width = <4>;
                interrupts = <GIC_SPI 193 IRQ_TYPE_LEVEL_HIGH>;
                clock-frequency = <48000000>;
-               power-domains = <&k3_pds 147>;
+               power-domains = <&k3_pds 147 TI_SCI_PD_EXCLUSIVE>;
        };
 
        main_uart2: serial@2820000 {
                reg-io-width = <4>;
                interrupts = <GIC_SPI 194 IRQ_TYPE_LEVEL_HIGH>;
                clock-frequency = <48000000>;
-               power-domains = <&k3_pds 148>;
+               power-domains = <&k3_pds 148 TI_SCI_PD_EXCLUSIVE>;
        };
 
        main_pmx0: pinmux@11c000 {
                #size-cells = <0>;
                clock-names = "fck";
                clocks = <&k3_clks 110 1>;
-               power-domains = <&k3_pds 110>;
+               power-domains = <&k3_pds 110 TI_SCI_PD_EXCLUSIVE>;
        };
 
        main_i2c1: i2c@2010000 {
                #size-cells = <0>;
                clock-names = "fck";
                clocks = <&k3_clks 111 1>;
-               power-domains = <&k3_pds 111>;
+               power-domains = <&k3_pds 111 TI_SCI_PD_EXCLUSIVE>;
        };
 
        main_i2c2: i2c@2020000 {
                #size-cells = <0>;
                clock-names = "fck";
                clocks = <&k3_clks 112 1>;
-               power-domains = <&k3_pds 112>;
+               power-domains = <&k3_pds 112 TI_SCI_PD_EXCLUSIVE>;
        };
 
        main_i2c3: i2c@2030000 {
                #size-cells = <0>;
                clock-names = "fck";
                clocks = <&k3_clks 113 1>;
-               power-domains = <&k3_pds 113>;
+               power-domains = <&k3_pds 113 TI_SCI_PD_EXCLUSIVE>;
        };
 
        ecap0: pwm@3100000 {
                compatible = "ti,am654-ecap", "ti,am3352-ecap";
                #pwm-cells = <3>;
                reg = <0x0 0x03100000 0x0 0x60>;
-               power-domains = <&k3_pds 39>;
+               power-domains = <&k3_pds 39 TI_SCI_PD_EXCLUSIVE>;
                clocks = <&k3_clks 39 0>;
                clock-names = "fck";
        };
                reg = <0x0 0x2100000 0x0 0x400>;
                interrupts = <GIC_SPI 184 IRQ_TYPE_LEVEL_HIGH>;
                clocks = <&k3_clks 137 1>;
-               power-domains = <&k3_pds 137>;
+               power-domains = <&k3_pds 137 TI_SCI_PD_EXCLUSIVE>;
                #address-cells = <1>;
                #size-cells = <0>;
        };
                reg = <0x0 0x2110000 0x0 0x400>;
                interrupts = <GIC_SPI 185 IRQ_TYPE_LEVEL_HIGH>;
                clocks = <&k3_clks 138 1>;
-               power-domains = <&k3_pds 138>;
+               power-domains = <&k3_pds 138 TI_SCI_PD_EXCLUSIVE>;
                #address-cells = <1>;
                #size-cells = <0>;
                assigned-clocks = <&k3_clks 137 1>;
                reg = <0x0 0x2120000 0x0 0x400>;
                interrupts = <GIC_SPI 186 IRQ_TYPE_LEVEL_HIGH>;
                clocks = <&k3_clks 139 1>;
-               power-domains = <&k3_pds 139>;
+               power-domains = <&k3_pds 139 TI_SCI_PD_EXCLUSIVE>;
                #address-cells = <1>;
                #size-cells = <0>;
        };
                reg = <0x0 0x2130000 0x0 0x400>;
                interrupts = <GIC_SPI 187 IRQ_TYPE_LEVEL_HIGH>;
                clocks = <&k3_clks 140 1>;
-               power-domains = <&k3_pds 140>;
+               power-domains = <&k3_pds 140 TI_SCI_PD_EXCLUSIVE>;
                #address-cells = <1>;
                #size-cells = <0>;
        };
                reg = <0x0 0x2140000 0x0 0x400>;
                interrupts = <GIC_SPI 188 IRQ_TYPE_LEVEL_HIGH>;
                clocks = <&k3_clks 141 1>;
-               power-domains = <&k3_pds 141>;
+               power-domains = <&k3_pds 141 TI_SCI_PD_EXCLUSIVE>;
                #address-cells = <1>;
                #size-cells = <0>;
        };
        sdhci0: sdhci@4f80000 {
                compatible = "ti,am654-sdhci-5.1";
                reg = <0x0 0x4f80000 0x0 0x260>, <0x0 0x4f90000 0x0 0x134>;
-               power-domains = <&k3_pds 47>;
+               power-domains = <&k3_pds 47 TI_SCI_PD_EXCLUSIVE>;
                clocks = <&k3_clks 47 0>, <&k3_clks 47 1>;
                clock-names = "clk_ahb", "clk_xin";
                interrupts = <GIC_SPI 136 IRQ_TYPE_LEVEL_HIGH>;
                ranges = <0x0 0x0 0x4000000 0x20000>;
                interrupts = <GIC_SPI 97 IRQ_TYPE_LEVEL_HIGH>;
                dma-coherent;
-               power-domains = <&k3_pds 151>;
+               power-domains = <&k3_pds 151 TI_SCI_PD_EXCLUSIVE>;
                assigned-clocks = <&k3_clks 151 2>, <&k3_clks 151 7>;
                assigned-clock-parents = <&k3_clks 151 4>,      /* set REF_CLK to 20MHz i.e. PER0_PLL/48 */
                                         <&k3_clks 151 9>;      /* set PIPE3_TXB_CLK to CLK_12M_RC/256 (for HS only) */
                ranges = <0x0 0x0 0x4020000 0x20000>;
                interrupts = <GIC_SPI 117 IRQ_TYPE_LEVEL_HIGH>;
                dma-coherent;
-               power-domains = <&k3_pds 152>;
+               power-domains = <&k3_pds 152 TI_SCI_PD_EXCLUSIVE>;
                assigned-clocks = <&k3_clks 152 2>;
                assigned-clock-parents = <&k3_clks 152 4>;      /* set REF_CLK to 20MHz i.e. PER0_PLL/48 */
 
                        ti,sci-rm-range-vint = <0x0>;
                        ti,sci-rm-range-global-event = <0x1>;
                };
+
+               hwspinlock: spinlock@30e00000 {
+                       compatible = "ti,am654-hwspinlock";
+                       reg = <0x00 0x30e00000 0x00 0x1000>;
+                       #hwlock-cells = <1>;
+               };
        };
 
        main_gpio0:  main_gpio0@600000 {
                compatible = "ti,am654-pcie-rc";
                reg =  <0x0 0x5500000 0x0 0x1000>, <0x0 0x5501000 0x0 0x1000>, <0x0 0x10000000 0x0 0x2000>, <0x0 0x5506000 0x0 0x1000>;
                reg-names = "app", "dbics", "config", "atu";
-               power-domains = <&k3_pds 120>;
+               power-domains = <&k3_pds 120 TI_SCI_PD_EXCLUSIVE>;
                #address-cells = <3>;
                #size-cells = <2>;
                ranges = <0x81000000 0 0          0x0 0x10020000 0 0x00010000
                compatible = "ti,am654-pcie-ep";
                reg =  <0x0 0x5500000 0x0 0x1000>, <0x0 0x5501000 0x0 0x1000>, <0x0 0x10000000 0x0 0x8000000>, <0x0 0x5506000 0x0 0x1000>;
                reg-names = "app", "dbics", "addr_space", "atu";
-               power-domains = <&k3_pds 120>;
+               power-domains = <&k3_pds 120 TI_SCI_PD_EXCLUSIVE>;
                ti,syscon-pcie-mode = <&pcie0_mode>;
                num-ib-windows = <16>;
                num-ob-windows = <16>;
                compatible = "ti,am654-pcie-rc";
                reg =  <0x0 0x5600000 0x0 0x1000>, <0x0 0x5601000 0x0 0x1000>, <0x0 0x18000000 0x0 0x2000>, <0x0 0x5606000 0x0 0x1000>;
                reg-names = "app", "dbics", "config", "atu";
-               power-domains = <&k3_pds 121>;
+               power-domains = <&k3_pds 121 TI_SCI_PD_EXCLUSIVE>;
                #address-cells = <3>;
                #size-cells = <2>;
                ranges = <0x81000000 0 0          0x0   0x18020000 0 0x00010000
                compatible = "ti,am654-pcie-ep";
                reg =  <0x0 0x5600000 0x0 0x1000>, <0x0 0x5601000 0x0 0x1000>, <0x0 0x18000000 0x0 0x4000000>, <0x0 0x5606000 0x0 0x1000>;
                reg-names = "app", "dbics", "addr_space", "atu";
-               power-domains = <&k3_pds 121>;
+               power-domains = <&k3_pds 121 TI_SCI_PD_EXCLUSIVE>;
                ti,syscon-pcie-mode = <&pcie1_mode>;
                num-ib-windows = <16>;
                num-ob-windows = <16>;
index afc29ea..7bdf534 100644 (file)
@@ -14,7 +14,7 @@
                        interrupts = <GIC_SPI 565 IRQ_TYPE_LEVEL_HIGH>;
                        clock-frequency = <96000000>;
                        current-speed = <115200>;
-                       power-domains = <&k3_pds 149>;
+                       power-domains = <&k3_pds 149 TI_SCI_PD_EXCLUSIVE>;
        };
 
        mcu_ram: sram@41c00000 {
@@ -33,7 +33,7 @@
                #size-cells = <0>;
                clock-names = "fck";
                clocks = <&k3_clks 114 1>;
-               power-domains = <&k3_pds 114>;
+               power-domains = <&k3_pds 114 TI_SCI_PD_EXCLUSIVE>;
        };
 
        mcu_spi0: spi@40300000 {
@@ -41,7 +41,7 @@
                reg = <0x0 0x40300000 0x0 0x400>;
                interrupts = <GIC_SPI 560 IRQ_TYPE_LEVEL_HIGH>;
                clocks = <&k3_clks 142 1>;
-               power-domains = <&k3_pds 142>;
+               power-domains = <&k3_pds 142 TI_SCI_PD_EXCLUSIVE>;
                #address-cells = <1>;
                #size-cells = <0>;
        };
@@ -51,7 +51,7 @@
                reg = <0x0 0x40310000 0x0 0x400>;
                interrupts = <GIC_SPI 561 IRQ_TYPE_LEVEL_HIGH>;
                clocks = <&k3_clks 143 1>;
-               power-domains = <&k3_pds 143>;
+               power-domains = <&k3_pds 143 TI_SCI_PD_EXCLUSIVE>;
                #address-cells = <1>;
                #size-cells = <0>;
        };
@@ -61,7 +61,7 @@
                reg = <0x0 0x40320000 0x0 0x400>;
                interrupts = <GIC_SPI 562 IRQ_TYPE_LEVEL_HIGH>;
                clocks = <&k3_clks 144 1>;
-               power-domains = <&k3_pds 144>;
+               power-domains = <&k3_pds 144 TI_SCI_PD_EXCLUSIVE>;
                #address-cells = <1>;
                #size-cells = <0>;
        };
index 9cf2c08..f4227e2 100644 (file)
@@ -20,7 +20,7 @@
 
                k3_pds: power-controller {
                        compatible = "ti,sci-pm-domain";
-                       #power-domain-cells = <1>;
+                       #power-domain-cells = <2>;
                };
 
                k3_clks: clocks {
@@ -50,7 +50,7 @@
                interrupts = <GIC_SPI 697 IRQ_TYPE_LEVEL_HIGH>;
                clock-frequency = <48000000>;
                current-speed = <115200>;
-               power-domains = <&k3_pds 150>;
+               power-domains = <&k3_pds 150 TI_SCI_PD_EXCLUSIVE>;
        };
 
        wkup_i2c0: i2c@42120000 {
@@ -61,7 +61,7 @@
                #size-cells = <0>;
                clock-names = "fck";
                clocks = <&k3_clks 115 1>;
-               power-domains = <&k3_pds 115>;
+               power-domains = <&k3_pds 115 TI_SCI_PD_EXCLUSIVE>;
        };
 
        intr_wkup_gpio: interrupt-controller2 {
index 82edf10..6dfccd5 100644 (file)
@@ -9,6 +9,7 @@
 #include <dt-bindings/interrupt-controller/irq.h>
 #include <dt-bindings/interrupt-controller/arm-gic.h>
 #include <dt-bindings/pinctrl/k3.h>
+#include <dt-bindings/soc/ti,sci_pm_domain.h>
 
 / {
        model = "Texas Instruments K3 AM654 SoC";
index 52c245d..1102b84 100644 (file)
 &main_uart0 {
        pinctrl-names = "default";
        pinctrl-0 = <&main_uart0_pins_default>;
+       power-domains = <&k3_pds 146 TI_SCI_PD_SHARED>;
 };
 
 &wkup_i2c0 {
index c680123..d2894d5 100644 (file)
@@ -6,12 +6,49 @@
 /dts-v1/;
 
 #include "k3-j721e-som-p0.dtsi"
+#include <dt-bindings/gpio/gpio.h>
+#include <dt-bindings/input/input.h>
 
 / {
        chosen {
                stdout-path = "serial2:115200n8";
                bootargs = "console=ttyS2,115200n8 earlycon=ns16550a,mmio32,0x02800000";
        };
+
+       gpio_keys: gpio-keys {
+               compatible = "gpio-keys";
+               autorepeat;
+               pinctrl-names = "default";
+               pinctrl-0 = <&sw10_button_pins_default &sw11_button_pins_default>;
+
+               sw10: sw10 {
+                       label = "GPIO Key USER1";
+                       linux,code = <BTN_0>;
+                       gpios = <&main_gpio0 0 GPIO_ACTIVE_LOW>;
+               };
+
+               sw11: sw11 {
+                       label = "GPIO Key USER2";
+                       linux,code = <BTN_1>;
+                       gpios = <&wkup_gpio0 7 GPIO_ACTIVE_LOW>;
+               };
+       };
+};
+
+&main_pmx0 {
+       sw10_button_pins_default: sw10_button_pins_default {
+               pinctrl-single,pins = <
+                       J721E_IOPAD(0x0, PIN_INPUT, 7) /* (AC18) EXTINTn.GPIO0_0 */
+               >;
+       };
+};
+
+&wkup_pmx0 {
+       sw11_button_pins_default: sw11_button_pins_default {
+               pinctrl-single,pins = <
+                       J721E_WKUP_IOPAD(0xcc, PIN_INPUT, 7) /* (G28) WKUP_GPIO0_7 */
+               >;
+       };
 };
 
 &wkup_uart0 {
        status = "disabled";
 };
 
+&main_uart0 {
+       power-domains = <&k3_pds 146 TI_SCI_PD_SHARED>;
+};
+
 &main_uart3 {
        /* UART not brought out */
        status = "disabled";
        /* UART not brought out */
        status = "disabled";
 };
+
+&main_gpio2 {
+       status = "disabled";
+};
+
+&main_gpio3 {
+       status = "disabled";
+};
+
+&main_gpio4 {
+       status = "disabled";
+};
+
+&main_gpio5 {
+       status = "disabled";
+};
+
+&main_gpio6 {
+       status = "disabled";
+};
+
+&main_gpio7 {
+       status = "disabled";
+};
+
+&wkup_gpio1 {
+       status = "disabled";
+};
index a013081..698ef9a 100644 (file)
@@ -31,7 +31,7 @@
                /* vcpumntirq: virtual CPU interface maintenance interrupt */
                interrupts = <GIC_PPI 9 IRQ_TYPE_LEVEL_HIGH>;
 
-               gic_its: gic-its@18200000 {
+               gic_its: gic-its@1820000 {
                        compatible = "arm,gic-v3-its";
                        reg = <0x00 0x01820000 0x00 0x10000>;
                        socionext,synquacer-pre-its = <0x1000000 0x400000>;
                        ti,sci-rm-range-vint = <0xa>;
                        ti,sci-rm-range-global-event = <0xd>;
                };
+
+               hwspinlock: spinlock@30e00000 {
+                       compatible = "ti,am654-hwspinlock";
+                       reg = <0x00 0x30e00000 0x00 0x1000>;
+                       #hwlock-cells = <1>;
+               };
        };
 
        secure_proxy_main: mailbox@32c00000 {
                interrupts = <GIC_SPI 192 IRQ_TYPE_LEVEL_HIGH>;
                clock-frequency = <48000000>;
                current-speed = <115200>;
-               power-domains = <&k3_pds 146>;
+               power-domains = <&k3_pds 146 TI_SCI_PD_EXCLUSIVE>;
                clocks = <&k3_clks 146 0>;
                clock-names = "fclk";
        };
                interrupts = <GIC_SPI 193 IRQ_TYPE_LEVEL_HIGH>;
                clock-frequency = <48000000>;
                current-speed = <115200>;
-               power-domains = <&k3_pds 278>;
+               power-domains = <&k3_pds 278 TI_SCI_PD_EXCLUSIVE>;
                clocks = <&k3_clks 278 0>;
                clock-names = "fclk";
        };
                interrupts = <GIC_SPI 194 IRQ_TYPE_LEVEL_HIGH>;
                clock-frequency = <48000000>;
                current-speed = <115200>;
-               power-domains = <&k3_pds 279>;
+               power-domains = <&k3_pds 279 TI_SCI_PD_EXCLUSIVE>;
                clocks = <&k3_clks 279 0>;
                clock-names = "fclk";
        };
                interrupts = <GIC_SPI 195 IRQ_TYPE_LEVEL_HIGH>;
                clock-frequency = <48000000>;
                current-speed = <115200>;
-               power-domains = <&k3_pds 280>;
+               power-domains = <&k3_pds 280 TI_SCI_PD_EXCLUSIVE>;
                clocks = <&k3_clks 280 0>;
                clock-names = "fclk";
        };
                interrupts = <GIC_SPI 196 IRQ_TYPE_LEVEL_HIGH>;
                clock-frequency = <48000000>;
                current-speed = <115200>;
-               power-domains = <&k3_pds 281>;
+               power-domains = <&k3_pds 281 TI_SCI_PD_EXCLUSIVE>;
                clocks = <&k3_clks 281 0>;
                clock-names = "fclk";
        };
                interrupts = <GIC_SPI 197 IRQ_TYPE_LEVEL_HIGH>;
                clock-frequency = <48000000>;
                current-speed = <115200>;
-               power-domains = <&k3_pds 282>;
+               power-domains = <&k3_pds 282 TI_SCI_PD_EXCLUSIVE>;
                clocks = <&k3_clks 282 0>;
                clock-names = "fclk";
        };
                interrupts = <GIC_SPI 198 IRQ_TYPE_LEVEL_HIGH>;
                clock-frequency = <48000000>;
                current-speed = <115200>;
-               power-domains = <&k3_pds 283>;
+               power-domains = <&k3_pds 283 TI_SCI_PD_EXCLUSIVE>;
                clocks = <&k3_clks 283 0>;
                clock-names = "fclk";
        };
                interrupts = <GIC_SPI 199 IRQ_TYPE_LEVEL_HIGH>;
                clock-frequency = <48000000>;
                current-speed = <115200>;
-               power-domains = <&k3_pds 284>;
+               power-domains = <&k3_pds 284 TI_SCI_PD_EXCLUSIVE>;
                clocks = <&k3_clks 284 0>;
                clock-names = "fclk";
        };
                interrupts = <GIC_SPI 248 IRQ_TYPE_LEVEL_HIGH>;
                clock-frequency = <48000000>;
                current-speed = <115200>;
-               power-domains = <&k3_pds 285>;
+               power-domains = <&k3_pds 285 TI_SCI_PD_EXCLUSIVE>;
                clocks = <&k3_clks 285 0>;
                clock-names = "fclk";
        };
                interrupts = <GIC_SPI 249 IRQ_TYPE_LEVEL_HIGH>;
                clock-frequency = <48000000>;
                current-speed = <115200>;
-               power-domains = <&k3_pds 286>;
+               power-domains = <&k3_pds 286 TI_SCI_PD_EXCLUSIVE>;
                clocks = <&k3_clks 286 0>;
                clock-names = "fclk";
        };
+
+       main_gpio0: gpio@600000 {
+               compatible = "ti,j721e-gpio", "ti,keystone-gpio";
+               reg = <0x0 0x00600000 0x0 0x100>;
+               gpio-controller;
+               #gpio-cells = <2>;
+               interrupt-parent = <&main_gpio_intr>;
+               interrupts = <105 0>, <105 1>, <105 2>, <105 3>,
+                            <105 4>, <105 5>, <105 6>, <105 7>;
+               interrupt-controller;
+               #interrupt-cells = <2>;
+               ti,ngpio = <128>;
+               ti,davinci-gpio-unbanked = <0>;
+               power-domains = <&k3_pds 105 TI_SCI_PD_EXCLUSIVE>;
+               clocks = <&k3_clks 105 0>;
+               clock-names = "gpio";
+       };
+
+       main_gpio1: gpio@601000 {
+               compatible = "ti,j721e-gpio", "ti,keystone-gpio";
+               reg = <0x0 0x00601000 0x0 0x100>;
+               gpio-controller;
+               #gpio-cells = <2>;
+               interrupt-parent = <&main_gpio_intr>;
+               interrupts = <106 0>, <106 1>, <106 2>;
+               interrupt-controller;
+               #interrupt-cells = <2>;
+               ti,ngpio = <36>;
+               ti,davinci-gpio-unbanked = <0>;
+               power-domains = <&k3_pds 106 TI_SCI_PD_EXCLUSIVE>;
+               clocks = <&k3_clks 106 0>;
+               clock-names = "gpio";
+       };
+
+       main_gpio2: gpio@610000 {
+               compatible = "ti,j721e-gpio", "ti,keystone-gpio";
+               reg = <0x0 0x00610000 0x0 0x100>;
+               gpio-controller;
+               #gpio-cells = <2>;
+               interrupt-parent = <&main_gpio_intr>;
+               interrupts = <107 0>, <107 1>, <107 2>, <107 3>,
+                            <107 4>, <107 5>, <107 6>, <107 7>;
+               interrupt-controller;
+               #interrupt-cells = <2>;
+               ti,ngpio = <128>;
+               ti,davinci-gpio-unbanked = <0>;
+               power-domains = <&k3_pds 107 TI_SCI_PD_EXCLUSIVE>;
+               clocks = <&k3_clks 107 0>;
+               clock-names = "gpio";
+       };
+
+       main_gpio3: gpio@611000 {
+               compatible = "ti,j721e-gpio", "ti,keystone-gpio";
+               reg = <0x0 0x00611000 0x0 0x100>;
+               gpio-controller;
+               #gpio-cells = <2>;
+               interrupt-parent = <&main_gpio_intr>;
+               interrupts = <108 0>, <108 1>, <108 2>;
+               interrupt-controller;
+               #interrupt-cells = <2>;
+               ti,ngpio = <36>;
+               ti,davinci-gpio-unbanked = <0>;
+               power-domains = <&k3_pds 108 TI_SCI_PD_EXCLUSIVE>;
+               clocks = <&k3_clks 108 0>;
+               clock-names = "gpio";
+       };
+
+       main_gpio4: gpio@620000 {
+               compatible = "ti,j721e-gpio", "ti,keystone-gpio";
+               reg = <0x0 0x00620000 0x0 0x100>;
+               gpio-controller;
+               #gpio-cells = <2>;
+               interrupt-parent = <&main_gpio_intr>;
+               interrupts = <109 0>, <109 1>, <109 2>, <109 3>,
+                            <109 4>, <109 5>, <109 6>, <109 7>;
+               interrupt-controller;
+               #interrupt-cells = <2>;
+               ti,ngpio = <128>;
+               ti,davinci-gpio-unbanked = <0>;
+               power-domains = <&k3_pds 109 TI_SCI_PD_EXCLUSIVE>;
+               clocks = <&k3_clks 109 0>;
+               clock-names = "gpio";
+       };
+
+       main_gpio5: gpio@621000 {
+               compatible = "ti,j721e-gpio", "ti,keystone-gpio";
+               reg = <0x0 0x00621000 0x0 0x100>;
+               gpio-controller;
+               #gpio-cells = <2>;
+               interrupt-parent = <&main_gpio_intr>;
+               interrupts = <110 0>, <110 1>, <110 2>;
+               interrupt-controller;
+               #interrupt-cells = <2>;
+               ti,ngpio = <36>;
+               ti,davinci-gpio-unbanked = <0>;
+               power-domains = <&k3_pds 110 TI_SCI_PD_EXCLUSIVE>;
+               clocks = <&k3_clks 110 0>;
+               clock-names = "gpio";
+       };
+
+       main_gpio6: gpio@630000 {
+               compatible = "ti,j721e-gpio", "ti,keystone-gpio";
+               reg = <0x0 0x00630000 0x0 0x100>;
+               gpio-controller;
+               #gpio-cells = <2>;
+               interrupt-parent = <&main_gpio_intr>;
+               interrupts = <111 0>, <111 1>, <111 2>, <111 3>,
+                            <111 4>, <111 5>, <111 6>, <111 7>;
+               interrupt-controller;
+               #interrupt-cells = <2>;
+               ti,ngpio = <128>;
+               ti,davinci-gpio-unbanked = <0>;
+               power-domains = <&k3_pds 111 TI_SCI_PD_EXCLUSIVE>;
+               clocks = <&k3_clks 111 0>;
+               clock-names = "gpio";
+       };
+
+       main_gpio7: gpio@631000 {
+               compatible = "ti,j721e-gpio", "ti,keystone-gpio";
+               reg = <0x0 0x00631000 0x0 0x100>;
+               gpio-controller;
+               #gpio-cells = <2>;
+               interrupt-parent = <&main_gpio_intr>;
+               interrupts = <112 0>, <112 1>, <112 2>;
+               interrupt-controller;
+               #interrupt-cells = <2>;
+               ti,ngpio = <36>;
+               ti,davinci-gpio-unbanked = <0>;
+               power-domains = <&k3_pds 112 TI_SCI_PD_EXCLUSIVE>;
+               clocks = <&k3_clks 112 0>;
+               clock-names = "gpio";
+       };
 };
index 07b58ee..555dc7b 100644 (file)
@@ -20,7 +20,7 @@
 
                k3_pds: power-controller {
                        compatible = "ti,sci-pm-domain";
-                       #power-domain-cells = <1>;
+                       #power-domain-cells = <2>;
                };
 
                k3_clks: clocks {
@@ -59,7 +59,7 @@
                interrupts = <GIC_SPI 897 IRQ_TYPE_LEVEL_HIGH>;
                clock-frequency = <48000000>;
                current-speed = <115200>;
-               power-domains = <&k3_pds 287>;
+               power-domains = <&k3_pds 287 TI_SCI_PD_EXCLUSIVE>;
                clocks = <&k3_clks 287 0>;
                clock-names = "fclk";
        };
@@ -72,7 +72,7 @@
                interrupts = <GIC_SPI 846 IRQ_TYPE_LEVEL_HIGH>;
                clock-frequency = <96000000>;
                current-speed = <115200>;
-               power-domains = <&k3_pds 149>;
+               power-domains = <&k3_pds 149 TI_SCI_PD_EXCLUSIVE>;
                clocks = <&k3_clks 149 0>;
                clock-names = "fclk";
        };
                ti,sci-dst-id = <14>;
                ti,sci-rm-range-girq = <0x5>;
        };
+
+       wkup_gpio0: gpio@42110000 {
+               compatible = "ti,j721e-gpio", "ti,keystone-gpio";
+               reg = <0x0 0x42110000 0x0 0x100>;
+               gpio-controller;
+               #gpio-cells = <2>;
+               interrupt-parent = <&wkup_gpio_intr>;
+               interrupts = <113 0>, <113 1>, <113 2>,
+                            <113 3>, <113 4>, <113 5>;
+               interrupt-controller;
+               #interrupt-cells = <2>;
+               ti,ngpio = <84>;
+               ti,davinci-gpio-unbanked = <0>;
+               power-domains = <&k3_pds 113 TI_SCI_PD_EXCLUSIVE>;
+               clocks = <&k3_clks 113 0>;
+               clock-names = "gpio";
+       };
+
+       wkup_gpio1: gpio@42100000 {
+               compatible = "ti,j721e-gpio", "ti,keystone-gpio";
+               reg = <0x0 0x42100000 0x0 0x100>;
+               gpio-controller;
+               #gpio-cells = <2>;
+               interrupt-parent = <&wkup_gpio_intr>;
+               interrupts = <114 0>, <114 1>, <114 2>,
+                            <114 3>, <114 4>, <114 5>;
+               interrupt-controller;
+               #interrupt-cells = <2>;
+               ti,ngpio = <84>;
+               ti,davinci-gpio-unbanked = <0>;
+               power-domains = <&k3_pds 114 TI_SCI_PD_EXCLUSIVE>;
+               clocks = <&k3_clks 114 0>;
+               clock-names = "gpio";
+       };
 };
index f8dd74b..43ea1ba 100644 (file)
@@ -8,6 +8,7 @@
 #include <dt-bindings/interrupt-controller/irq.h>
 #include <dt-bindings/interrupt-controller/arm-gic.h>
 #include <dt-bindings/pinctrl/k3.h>
+#include <dt-bindings/soc/ti,sci_pm_domain.h>
 
 / {
        model = "Texas Instruments K3 J721E SoC";
index 14d0bc4..172d76f 100644 (file)
@@ -15,8 +15,6 @@
 
 #include <asm-generic/pgalloc.h>       /* for pte_{alloc,free}_one */
 
-#define check_pgt_cache()              do { } while (0)
-
 #define PGD_SIZE       (PTRS_PER_PGD * sizeof(pgd_t))
 
 #if CONFIG_PGTABLE_LEVELS > 2
index 57427d1..7576df0 100644 (file)
@@ -861,8 +861,6 @@ extern int kern_addr_valid(unsigned long addr);
 
 #include <asm-generic/pgtable.h>
 
-static inline void pgtable_cache_init(void) { }
-
 /*
  * On AArch64, the cache coherency is handled via the set_pte_at() function.
  */
index c67848c..5623685 100644 (file)
@@ -280,8 +280,6 @@ static inline void spin_lock_prefetch(const void *ptr)
                     "nop") : : "p" (ptr));
 }
 
-#define HAVE_ARCH_PICK_MMAP_LAYOUT
-
 extern unsigned long __ro_after_init signal_minsigstksz; /* sigframe size */
 extern void __init minsigstksz_setup(void);
 
index a95d1fc..b76df82 100644 (file)
@@ -44,7 +44,7 @@ static inline void tlb_flush(struct mmu_gather *tlb)
 static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t pte,
                                  unsigned long addr)
 {
-       pgtable_page_dtor(pte);
+       pgtable_pte_page_dtor(pte);
        tlb_remove_table(tlb, pte);
 }
 
index 03689c0..a47462d 100644 (file)
@@ -557,14 +557,6 @@ unsigned long arch_align_stack(unsigned long sp)
        return sp & ~0xf;
 }
 
-unsigned long arch_randomize_brk(struct mm_struct *mm)
-{
-       if (is_compat_task())
-               return randomize_page(mm->brk, SZ_32M);
-       else
-               return randomize_page(mm->brk, SZ_1G);
-}
-
 /*
  * Called from setup_new_exec() after (COMPAT_)SET_PERSONALITY.
  */
index dc19300..ac48516 100644 (file)
@@ -56,8 +56,7 @@ void __sync_icache_dcache(pte_t pte)
        struct page *page = pte_page(pte);
 
        if (!test_and_set_bit(PG_dcache_clean, &page->flags))
-               sync_icache_aliases(page_address(page),
-                                   PAGE_SIZE << compound_order(page));
+               sync_icache_aliases(page_address(page), page_size(page));
 }
 EXPORT_SYMBOL_GPL(__sync_icache_dcache);
 
index b050641..3028bac 100644 (file)
 #include <asm/cputype.h>
 
 /*
- * Leave enough space between the mmap area and the stack to honour ulimit in
- * the face of randomisation.
- */
-#define MIN_GAP (SZ_128M)
-#define MAX_GAP        (STACK_TOP/6*5)
-
-static int mmap_is_legacy(struct rlimit *rlim_stack)
-{
-       if (current->personality & ADDR_COMPAT_LAYOUT)
-               return 1;
-
-       if (rlim_stack->rlim_cur == RLIM_INFINITY)
-               return 1;
-
-       return sysctl_legacy_va_layout;
-}
-
-unsigned long arch_mmap_rnd(void)
-{
-       unsigned long rnd;
-
-#ifdef CONFIG_COMPAT
-       if (test_thread_flag(TIF_32BIT))
-               rnd = get_random_long() & ((1UL << mmap_rnd_compat_bits) - 1);
-       else
-#endif
-               rnd = get_random_long() & ((1UL << mmap_rnd_bits) - 1);
-       return rnd << PAGE_SHIFT;
-}
-
-static unsigned long mmap_base(unsigned long rnd, struct rlimit *rlim_stack)
-{
-       unsigned long gap = rlim_stack->rlim_cur;
-       unsigned long pad = (STACK_RND_MASK << PAGE_SHIFT) + stack_guard_gap;
-
-       /* Values close to RLIM_INFINITY can overflow. */
-       if (gap + pad > gap)
-               gap += pad;
-
-       if (gap < MIN_GAP)
-               gap = MIN_GAP;
-       else if (gap > MAX_GAP)
-               gap = MAX_GAP;
-
-       return PAGE_ALIGN(STACK_TOP - gap - rnd);
-}
-
-/*
- * This function, called very early during the creation of a new process VM
- * image, sets up which VM layout function to use:
- */
-void arch_pick_mmap_layout(struct mm_struct *mm, struct rlimit *rlim_stack)
-{
-       unsigned long random_factor = 0UL;
-
-       if (current->flags & PF_RANDOMIZE)
-               random_factor = arch_mmap_rnd();
-
-       /*
-        * Fall back to the standard layout if the personality bit is set, or
-        * if the expected stack growth is unlimited:
-        */
-       if (mmap_is_legacy(rlim_stack)) {
-               mm->mmap_base = TASK_UNMAPPED_BASE + random_factor;
-               mm->get_unmapped_area = arch_get_unmapped_area;
-       } else {
-               mm->mmap_base = mmap_base(random_factor, rlim_stack);
-               mm->get_unmapped_area = arch_get_unmapped_area_topdown;
-       }
-}
-
-/*
  * You really shouldn't be using read() or write() on /dev/mem.  This might go
  * away in the future.
  */
index 53dc6f2..60c929f 100644 (file)
@@ -384,7 +384,7 @@ static phys_addr_t pgd_pgtable_alloc(int shift)
         * folded, and if so pgtable_pmd_page_ctor() becomes nop.
         */
        if (shift == PAGE_SHIFT)
-               BUG_ON(!pgtable_page_ctor(phys_to_page(pa)));
+               BUG_ON(!pgtable_pte_page_ctor(phys_to_page(pa)));
        else if (shift == PMD_SHIFT)
                BUG_ON(!pgtable_pmd_page_ctor(phys_to_page(pa)));
 
index 7548f9c..4a64089 100644 (file)
@@ -35,7 +35,7 @@ void pgd_free(struct mm_struct *mm, pgd_t *pgd)
                kmem_cache_free(pgd_cache, pgd);
 }
 
-void __init pgd_cache_init(void)
+void __init pgtable_cache_init(void)
 {
        if (PGD_SIZE == PAGE_SIZE)
                return;
index 0bd8059..0b6919c 100644 (file)
@@ -60,11 +60,6 @@ extern unsigned long empty_zero_page;
 #define swapper_pg_dir ((pgd_t *) 0)
 
 /*
- * No page table caches to initialise
- */
-#define pgtable_cache_init()   do { } while (0)
-
-/*
  * c6x is !MMU, so define the simpliest implementation
  */
 #define pgprot_writecombine pgprot_noncached
index 98c5716..c7c1ed2 100644 (file)
@@ -71,12 +71,10 @@ static inline pgd_t *pgd_alloc(struct mm_struct *mm)
 
 #define __pte_free_tlb(tlb, pte, address)              \
 do {                                                   \
-       pgtable_page_dtor(pte);                         \
+       pgtable_pte_page_dtor(pte);                     \
        tlb_remove_page(tlb, pte);                      \
 } while (0)
 
-#define check_pgt_cache()      do {} while (0)
-
 extern void pagetable_init(void);
 extern void pre_mmu_init(void);
 extern void pre_trap_init(void);
index c429a6f..0040b3a 100644 (file)
@@ -296,11 +296,6 @@ void update_mmu_cache(struct vm_area_struct *vma, unsigned long address,
 /* Needs to be defined here and not in linux/mm.h, as it is arch dependent */
 #define kern_addr_valid(addr)  (1)
 
-/*
- * No page table caches to initialise
- */
-#define pgtable_cache_init()   do {} while (0)
-
 #define io_remap_pfn_range(vma, vaddr, pfn, size, prot) \
        remap_pfn_range(vma, vaddr, pfn, size, prot)
 
index 8c7cc09..fdff9b8 100644 (file)
@@ -8,14 +8,14 @@
 
 #define tlb_start_vma(tlb, vma) \
        do { \
-               if (!tlb->fullmm) \
-                       flush_cache_range(vma, vma->vm_start, vma->vm_end); \
+               if (!(tlb)->fullmm) \
+                       flush_cache_range(vma, (vma)->vm_start, (vma)->vm_end); \
        }  while (0)
 
 #define tlb_end_vma(tlb, vma) \
        do { \
-               if (!tlb->fullmm) \
-                       flush_tlb_range(vma, vma->vm_start, vma->vm_end); \
+               if (!(tlb)->fullmm) \
+                       flush_tlb_range(vma, (vma)->vm_start, (vma)->vm_end); \
        }  while (0)
 
 #define tlb_flush(tlb) flush_tlb_mm((tlb)->mm)
index a99caa4..4d00152 100644 (file)
@@ -4,7 +4,6 @@
 #define __ARCH_USE_5LEVEL_HACK
 #include <asm-generic/pgtable-nopud.h>
 #include <asm-generic/pgtable.h>
-#define pgtable_cache_init()   do { } while (0)
 extern void paging_init(void);
 #define PAGE_NONE              __pgprot(0)    /* these mean nothing to NO_MM */
 #define PAGE_SHARED            __pgprot(0)    /* these mean nothing to NO_MM */
@@ -35,11 +34,6 @@ extern unsigned int kobjsize(const void *objp);
 extern int is_in_rom(unsigned long);
 
 /*
- * No page table caches to initialise
- */
-#define pgtable_cache_init()   do { } while (0)
-
-/*
  * All 32bit addresses are effectively valid for vmalloc...
  * Sort of meaningless for non-VM targets.
  */
index d6544dc..cc9be51 100644 (file)
@@ -13,8 +13,6 @@
 
 #include <asm-generic/pgalloc.h>       /* for pte_{alloc,free}_one */
 
-#define check_pgt_cache() do {} while (0)
-
 extern unsigned long long kmap_generation;
 
 /*
@@ -96,7 +94,7 @@ static inline void pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd,
 
 #define __pte_free_tlb(tlb, pte, addr)         \
 do {                                           \
-       pgtable_page_dtor((pte));               \
+       pgtable_pte_page_dtor((pte));           \
        tlb_remove_page((tlb), (pte));          \
 } while (0)
 
index a3ff6d2..2fec20a 100644 (file)
@@ -431,9 +431,6 @@ static inline int pte_exec(pte_t pte)
 
 #define __pte_offset(address) (((address) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1))
 
-/*  I think this is in case we have page table caches; needed by init/main.c  */
-#define pgtable_cache_init()    do { } while (0)
-
 /*
  * Swap/file PTE definitions.  If _PAGE_PRESENT is zero, the rest of the PTE is
  * interpreted as swap information.  The remaining free bits are interpreted as
index 1894263..8938384 100644 (file)
@@ -3,5 +3,5 @@
 # Makefile for Hexagon memory management subsystem
 #
 
-obj-y := init.o pgalloc.o ioremap.o uaccess.o vm_fault.o cache.o
+obj-y := init.o ioremap.o uaccess.o vm_fault.o cache.o
 obj-y += copy_to_user.o copy_from_user.o strnlen_user.o vm_tlb.o
index f1f6ebd..c961773 100644 (file)
@@ -71,19 +71,6 @@ void __init mem_init(void)
        init_mm.context.ptbase = __pa(init_mm.pgd);
 }
 
-/*
- * free_initrd_mem - frees...  initrd memory.
- * @start - start of init memory
- * @end - end of init memory
- *
- * Apparently has to be passed the address of the initrd memory.
- *
- * Wrapped by #ifdef CONFIG_BLKDEV_INITRD
- */
-void free_initrd_mem(unsigned long start, unsigned long end)
-{
-}
-
 void sync_icache_dcache(pte_t pte)
 {
        unsigned long addr;
diff --git a/arch/hexagon/mm/pgalloc.c b/arch/hexagon/mm/pgalloc.c
deleted file mode 100644 (file)
index 4d43161..0000000
+++ /dev/null
@@ -1,10 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Copyright (c) 2010-2011, The Linux Foundation. All rights reserved.
- */
-
-#include <linux/init.h>
-
-void __init pgtable_cache_init(void)
-{
-}
index 685a3df..1671447 100644 (file)
@@ -72,10 +72,6 @@ config 64BIT
 config ZONE_DMA32
        def_bool y
 
-config QUICKLIST
-       bool
-       default y
-
 config MMU
        bool
        default y
index c9e4810..f4c4910 100644 (file)
 #include <linux/mm.h>
 #include <linux/page-flags.h>
 #include <linux/threads.h>
-#include <linux/quicklist.h>
+
+#include <asm-generic/pgalloc.h>
 
 #include <asm/mmu_context.h>
 
 static inline pgd_t *pgd_alloc(struct mm_struct *mm)
 {
-       return quicklist_alloc(0, GFP_KERNEL, NULL);
+       return (pgd_t *)__get_free_page(GFP_KERNEL | __GFP_ZERO);
 }
 
 static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
 {
-       quicklist_free(0, NULL, pgd);
+       free_page((unsigned long)pgd);
 }
 
 #if CONFIG_PGTABLE_LEVELS == 4
@@ -42,12 +43,12 @@ pgd_populate(struct mm_struct *mm, pgd_t * pgd_entry, pud_t * pud)
 
 static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr)
 {
-       return quicklist_alloc(0, GFP_KERNEL, NULL);
+       return (pud_t *)__get_free_page(GFP_KERNEL | __GFP_ZERO);
 }
 
 static inline void pud_free(struct mm_struct *mm, pud_t *pud)
 {
-       quicklist_free(0, NULL, pud);
+       free_page((unsigned long)pud);
 }
 #define __pud_free_tlb(tlb, pud, address)      pud_free((tlb)->mm, pud)
 #endif /* CONFIG_PGTABLE_LEVELS == 4 */
@@ -60,12 +61,12 @@ pud_populate(struct mm_struct *mm, pud_t * pud_entry, pmd_t * pmd)
 
 static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr)
 {
-       return quicklist_alloc(0, GFP_KERNEL, NULL);
+       return (pmd_t *)__get_free_page(GFP_KERNEL | __GFP_ZERO);
 }
 
 static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
 {
-       quicklist_free(0, NULL, pmd);
+       free_page((unsigned long)pmd);
 }
 
 #define __pmd_free_tlb(tlb, pmd, address)      pmd_free((tlb)->mm, pmd)
@@ -83,43 +84,6 @@ pmd_populate_kernel(struct mm_struct *mm, pmd_t * pmd_entry, pte_t * pte)
        pmd_val(*pmd_entry) = __pa(pte);
 }
 
-static inline pgtable_t pte_alloc_one(struct mm_struct *mm)
-{
-       struct page *page;
-       void *pg;
-
-       pg = quicklist_alloc(0, GFP_KERNEL, NULL);
-       if (!pg)
-               return NULL;
-       page = virt_to_page(pg);
-       if (!pgtable_page_ctor(page)) {
-               quicklist_free(0, NULL, pg);
-               return NULL;
-       }
-       return page;
-}
-
-static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
-{
-       return quicklist_alloc(0, GFP_KERNEL, NULL);
-}
-
-static inline void pte_free(struct mm_struct *mm, pgtable_t pte)
-{
-       pgtable_page_dtor(pte);
-       quicklist_free_page(0, NULL, pte);
-}
-
-static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
-{
-       quicklist_free(0, NULL, pte);
-}
-
-static inline void check_pgt_cache(void)
-{
-       quicklist_trim(0, NULL, 25, 16);
-}
-
 #define __pte_free_tlb(tlb, pte, address)      pte_free((tlb)->mm, pte)
 
 #endif                         /* _ASM_IA64_PGALLOC_H */
index b1e7468..d602e7c 100644 (file)
@@ -566,11 +566,6 @@ extern struct page *zero_page_memmap_ptr;
 #define KERNEL_TR_PAGE_SHIFT   _PAGE_SIZE_64M
 #define KERNEL_TR_PAGE_SIZE    (1 << KERNEL_TR_PAGE_SHIFT)
 
-/*
- * No page table caches to initialise
- */
-#define pgtable_cache_init()   do { } while (0)
-
 /* These tell get_user_pages() that the first gate page is accessible from user-level.  */
 #define FIXADDR_USER_START     GATE_ADDR
 #ifdef HAVE_BUGGY_SEGREL
index f102084..8e91c86 100644 (file)
@@ -583,6 +583,7 @@ void ia64_process_pending_intr(void)
 static irqreturn_t dummy_handler (int irq, void *dev_id)
 {
        BUG();
+       return IRQ_NONE;
 }
 
 static struct irqaction ipi_irqaction = {
index db09a69..5b00dc3 100644 (file)
@@ -108,7 +108,6 @@ setup_per_cpu_areas(void)
        struct pcpu_group_info *gi;
        unsigned int cpu;
        ssize_t static_size, reserved_size, dyn_size;
-       int rc;
 
        ai = pcpu_alloc_alloc_info(1, num_possible_cpus());
        if (!ai)
index 219fc64..4f33f6e 100644 (file)
@@ -186,7 +186,7 @@ void __init setup_per_cpu_areas(void)
        unsigned long base_offset;
        unsigned int cpu;
        ssize_t static_size, reserved_size, dyn_size;
-       int node, prev_node, unit, nr_units, rc;
+       int node, prev_node, unit, nr_units;
 
        ai = pcpu_alloc_alloc_info(MAX_NUMNODES, nr_cpu_ids);
        if (!ai)
index 678b98a..bf9df26 100644 (file)
@@ -64,7 +64,7 @@ __ia64_sync_icache_dcache (pte_t pte)
        if (test_bit(PG_arch_1, &page->flags))
                return;                         /* i-cache is already coherent with d-cache */
 
-       flush_icache_range(addr, addr + (PAGE_SIZE << compound_order(page)));
+       flush_icache_range(addr, addr + page_size(page));
        set_bit(PG_arch_1, &page->flags);       /* mark page as clean */
 }
 
index 0af20f4..b53008b 100644 (file)
@@ -1,3 +1,2 @@
-#define KSYM_ALIGN 2
 #define KCRC_ALIGN 2
 #include <asm-generic/export.h>
index 4399d71..b34d44d 100644 (file)
@@ -41,7 +41,7 @@ extern inline pmd_t *pmd_alloc_kernel(pgd_t *pgd, unsigned long address)
 static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t page,
                                  unsigned long address)
 {
-       pgtable_page_dtor(page);
+       pgtable_pte_page_dtor(page);
        __free_page(page);
 }
 
@@ -54,7 +54,7 @@ static inline struct page *pte_alloc_one(struct mm_struct *mm)
 
        if (!page)
                return NULL;
-       if (!pgtable_page_ctor(page)) {
+       if (!pgtable_pte_page_ctor(page)) {
                __free_page(page);
                return NULL;
        }
@@ -73,7 +73,7 @@ static inline struct page *pte_alloc_one(struct mm_struct *mm)
 
 static inline void pte_free(struct mm_struct *mm, struct page *page)
 {
-       pgtable_page_dtor(page);
+       pgtable_pte_page_dtor(page);
        __free_page(page);
 }
 
index d04d9ba..acab315 100644 (file)
@@ -36,7 +36,7 @@ static inline pgtable_t pte_alloc_one(struct mm_struct *mm)
        page = alloc_pages(GFP_KERNEL|__GFP_ZERO, 0);
        if(!page)
                return NULL;
-       if (!pgtable_page_ctor(page)) {
+       if (!pgtable_pte_page_ctor(page)) {
                __free_page(page);
                return NULL;
        }
@@ -51,7 +51,7 @@ static inline pgtable_t pte_alloc_one(struct mm_struct *mm)
 
 static inline void pte_free(struct mm_struct *mm, pgtable_t page)
 {
-       pgtable_page_dtor(page);
+       pgtable_pte_page_dtor(page);
        cache_page(kmap(page));
        kunmap(page);
        __free_page(page);
@@ -60,7 +60,7 @@ static inline void pte_free(struct mm_struct *mm, pgtable_t page)
 static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t page,
                                  unsigned long address)
 {
-       pgtable_page_dtor(page);
+       pgtable_pte_page_dtor(page);
        cache_page(kmap(page));
        kunmap(page);
        __free_page(page);
index fde4534..646c174 100644 (file)
@@ -176,11 +176,4 @@ pgprot_t pgprot_dmacoherent(pgprot_t prot);
 #include <asm-generic/pgtable.h>
 #endif /* !__ASSEMBLY__ */
 
-/*
- * No page table caches to initialise
- */
-#define pgtable_cache_init()   do { } while (0)
-
-#define check_pgt_cache()      do { } while (0)
-
 #endif /* _M68K_PGTABLE_H */
index fc3a96c..c18165b 100644 (file)
@@ -45,11 +45,6 @@ extern void paging_init(void);
 #define ZERO_PAGE(vaddr)       (virt_to_page(0))
 
 /*
- * No page table caches to initialise.
- */
-#define pgtable_cache_init()   do { } while (0)
-
-/*
  * All 32bit addresses are effectively valid for vmalloc...
  * Sort of meaningless for non-VM targets.
  */
@@ -60,6 +55,4 @@ extern void paging_init(void);
 
 #include <asm-generic/pgtable.h>
 
-#define check_pgt_cache()      do { } while (0)
-
 #endif /* _M68KNOMMU_PGTABLE_H */
index 1a8ddbd..8561211 100644 (file)
@@ -21,7 +21,7 @@ extern const char bad_pmd_string[];
 
 #define __pte_free_tlb(tlb,pte,addr)                   \
 do {                                                   \
-       pgtable_page_dtor(pte);                         \
+       pgtable_pte_page_dtor(pte);                     \
        tlb_remove_page((tlb), pte);                    \
 } while (0)
 
index 632c947..c9c4be8 100644 (file)
@@ -5,15 +5,18 @@ config MICROBLAZE
        select ARCH_NO_SWAP
        select ARCH_HAS_BINFMT_FLAT if !MMU
        select ARCH_HAS_DMA_COHERENT_TO_PFN if MMU
+       select ARCH_HAS_DMA_PREP_COHERENT
        select ARCH_HAS_GCOV_PROFILE_ALL
        select ARCH_HAS_SYNC_DMA_FOR_CPU
        select ARCH_HAS_SYNC_DMA_FOR_DEVICE
+       select ARCH_HAS_UNCACHED_SEGMENT if !MMU
        select ARCH_MIGHT_HAVE_PC_PARPORT
        select ARCH_WANT_IPC_PARSE_VERSION
        select BUILDTIME_EXTABLE_SORT
        select TIMER_OF
        select CLONE_BACKWARDS3
        select COMMON_CLK
+       select DMA_DIRECT_REMAP if MMU
        select GENERIC_ATOMIC64
        select GENERIC_CLOCKEVENTS
        select GENERIC_CPU_DEVICES
index 5a8a9d0..5b23652 100644 (file)
@@ -18,7 +18,6 @@
        #address-cells = <1>;
        #size-cells = <1>;
        compatible = "xlnx,microblaze";
-       hard-reset-gpios = <&LEDs_8Bit 2 1>;
        model = "testing";
        DDR2_SDRAM: memory@90000000 {
                device_type = "memory";
                                gpios = <&LEDs_8Bit 7 1>;
                        };
                } ;
+
+               gpio-restart {
+                       compatible = "gpio-restart";
+                       /*
+                        * FIXME: is this active low or active high?
+                        * the current flag (1) indicates active low.
+                        * delay measures are templates, should be adjusted
+                        * to datasheet or trial-and-error with real hardware.
+                        */
+                       gpios = <&LEDs_8Bit 2 1>;
+                       active-delay = <100>;
+                       inactive-delay = <10>;
+                       wait-delay = <100>;
+               };
+
                RS232_Uart_1: serial@84000000 {
                        clock-frequency = <125000000>;
                        compatible = "xlnx,xps-uartlite-1.00.a";
index 92fd4e9..654edfd 100644 (file)
@@ -5,15 +5,10 @@ CONFIG_IKCONFIG=y
 CONFIG_IKCONFIG_PROC=y
 CONFIG_SYSFS_DEPRECATED=y
 CONFIG_SYSFS_DEPRECATED_V2=y
-CONFIG_KALLSYMS_ALL=y
 # CONFIG_BASE_FULL is not set
+CONFIG_KALLSYMS_ALL=y
 CONFIG_EMBEDDED=y
 CONFIG_SLAB=y
-CONFIG_MODULES=y
-CONFIG_MODULE_UNLOAD=y
-# CONFIG_BLK_DEV_BSG is not set
-CONFIG_PARTITION_ADVANCED=y
-# CONFIG_EFI_PARTITION is not set
 CONFIG_XILINX_MICROBLAZE0_USE_MSR_INSTR=1
 CONFIG_XILINX_MICROBLAZE0_USE_PCMP_INSTR=1
 CONFIG_XILINX_MICROBLAZE0_USE_BARREL=1
@@ -25,14 +20,19 @@ CONFIG_MMU=y
 CONFIG_CMDLINE_BOOL=y
 CONFIG_CMDLINE_FORCE=y
 CONFIG_HIGHMEM=y
-CONFIG_PCI=y
 CONFIG_PCI_XILINX=y
+CONFIG_MODULES=y
+CONFIG_MODULE_UNLOAD=y
+# CONFIG_BLK_DEV_BSG is not set
+CONFIG_PARTITION_ADVANCED=y
+# CONFIG_EFI_PARTITION is not set
 CONFIG_NET=y
 CONFIG_PACKET=y
 CONFIG_UNIX=y
 CONFIG_INET=y
 # CONFIG_IPV6 is not set
 CONFIG_BRIDGE=m
+CONFIG_PCI=y
 CONFIG_MTD=y
 CONFIG_MTD_CFI=y
 CONFIG_MTD_CFI_INTELEXT=y
@@ -41,6 +41,7 @@ CONFIG_BLK_DEV_RAM=y
 CONFIG_BLK_DEV_RAM_SIZE=8192
 CONFIG_NETDEVICES=y
 CONFIG_XILINX_EMACLITE=y
+CONFIG_XILINX_AXI_EMAC=y
 CONFIG_XILINX_LL_TEMAC=y
 # CONFIG_INPUT is not set
 # CONFIG_SERIO is not set
@@ -59,6 +60,8 @@ CONFIG_SPI_XILINX=y
 CONFIG_GPIOLIB=y
 CONFIG_GPIO_SYSFS=y
 CONFIG_GPIO_XILINX=y
+CONFIG_POWER_RESET=y
+CONFIG_POWER_RESET_GPIO_RESTART=y
 # CONFIG_HWMON is not set
 CONFIG_WATCHDOG=y
 CONFIG_XILINX_WATCHDOG=y
@@ -74,8 +77,8 @@ CONFIG_CRAMFS=y
 CONFIG_ROMFS_FS=y
 CONFIG_NFS_FS=y
 CONFIG_CIFS=y
-CONFIG_CIFS_STATS=y
 CONFIG_CIFS_STATS2=y
+CONFIG_ENCRYPTED_KEYS=y
 CONFIG_DEBUG_INFO=y
 CONFIG_DEBUG_SLAB=y
 CONFIG_DETECT_HUNG_TASK=y
@@ -83,6 +86,3 @@ CONFIG_DEBUG_SPINLOCK=y
 CONFIG_KGDB=y
 CONFIG_KGDB_TESTS=y
 CONFIG_KGDB_KDB=y
-CONFIG_EARLY_PRINTK=y
-CONFIG_KEYS=y
-CONFIG_ENCRYPTED_KEYS=y
index 06d69a6..377de39 100644 (file)
@@ -7,15 +7,10 @@ CONFIG_IKCONFIG=y
 CONFIG_IKCONFIG_PROC=y
 CONFIG_SYSFS_DEPRECATED=y
 CONFIG_SYSFS_DEPRECATED_V2=y
-CONFIG_KALLSYMS_ALL=y
 # CONFIG_BASE_FULL is not set
+CONFIG_KALLSYMS_ALL=y
 CONFIG_EMBEDDED=y
 CONFIG_SLAB=y
-CONFIG_MODULES=y
-CONFIG_MODULE_UNLOAD=y
-# CONFIG_BLK_DEV_BSG is not set
-CONFIG_PARTITION_ADVANCED=y
-# CONFIG_EFI_PARTITION is not set
 CONFIG_XILINX_MICROBLAZE0_USE_MSR_INSTR=1
 CONFIG_XILINX_MICROBLAZE0_USE_PCMP_INSTR=1
 CONFIG_XILINX_MICROBLAZE0_USE_BARREL=1
@@ -25,13 +20,18 @@ CONFIG_XILINX_MICROBLAZE0_USE_FPU=2
 CONFIG_HZ_100=y
 CONFIG_CMDLINE_BOOL=y
 CONFIG_CMDLINE_FORCE=y
-CONFIG_PCI=y
 CONFIG_PCI_XILINX=y
+CONFIG_MODULES=y
+CONFIG_MODULE_UNLOAD=y
+# CONFIG_BLK_DEV_BSG is not set
+CONFIG_PARTITION_ADVANCED=y
+# CONFIG_EFI_PARTITION is not set
 CONFIG_NET=y
 CONFIG_PACKET=y
 CONFIG_UNIX=y
 CONFIG_INET=y
 # CONFIG_IPV6 is not set
+CONFIG_PCI=y
 CONFIG_MTD=y
 CONFIG_MTD_CMDLINE_PARTS=y
 CONFIG_MTD_BLOCK=y
@@ -62,6 +62,8 @@ CONFIG_SPI_XILINX=y
 CONFIG_GPIOLIB=y
 CONFIG_GPIO_SYSFS=y
 CONFIG_GPIO_XILINX=y
+CONFIG_POWER_RESET=y
+CONFIG_POWER_RESET_GPIO_RESTART=y
 # CONFIG_HWMON is not set
 CONFIG_WATCHDOG=y
 CONFIG_XILINX_WATCHDOG=y
@@ -75,11 +77,6 @@ CONFIG_ROMFS_FS=y
 CONFIG_NFS_FS=y
 CONFIG_NFS_V3_ACL=y
 CONFIG_NLS=y
-CONFIG_DEBUG_INFO=y
-CONFIG_DEBUG_SLAB=y
-CONFIG_DETECT_HUNG_TASK=y
-CONFIG_DEBUG_SPINLOCK=y
-CONFIG_EARLY_PRINTK=y
 CONFIG_KEYS=y
 CONFIG_ENCRYPTED_KEYS=y
 CONFIG_CRYPTO_ECB=y
@@ -87,3 +84,7 @@ CONFIG_CRYPTO_MD4=y
 CONFIG_CRYPTO_MD5=y
 CONFIG_CRYPTO_ARC4=y
 CONFIG_CRYPTO_DES=y
+CONFIG_DEBUG_INFO=y
+CONFIG_DEBUG_SLAB=y
+CONFIG_DETECT_HUNG_TASK=y
+CONFIG_DEBUG_SPINLOCK=y
index c796813..86c95b2 100644 (file)
@@ -40,7 +40,6 @@ extern void iounmap(volatile void __iomem *addr);
 
 extern void __iomem *ioremap(phys_addr_t address, unsigned long size);
 #define ioremap_nocache(addr, size)            ioremap((addr), (size))
-#define ioremap_fullcache(addr, size)          ioremap((addr), (size))
 #define ioremap_wc(addr, size)                 ioremap((addr), (size))
 #define ioremap_wt(addr, size)                 ioremap((addr), (size))
 
index 21ddba9..7c4dc5d 100644 (file)
@@ -66,8 +66,6 @@ extern pgprot_t       pci_phys_mem_access_prot(struct file *file,
                                         unsigned long size,
                                         pgprot_t prot);
 
-#define HAVE_ARCH_PCI_RESOURCE_TO_USER
-
 /* This part of code was originally in xilinx-pci.h */
 #ifdef CONFIG_PCI_XILINX
 extern void __init xilinx_pci_init(void);
index f4cc9ff..7ecb05b 100644 (file)
 #include <asm/cache.h>
 #include <asm/pgtable.h>
 
-#define PGDIR_ORDER    0
-
-/*
- * This is handled very differently on MicroBlaze since out page tables
- * are all 0's and I want to be able to use these zero'd pages elsewhere
- * as well - it gives us quite a speedup.
- * -- Cort
- */
-extern struct pgtable_cache_struct {
-       unsigned long *pgd_cache;
-       unsigned long *pte_cache;
-       unsigned long pgtable_cache_sz;
-} quicklists;
-
-#define pgd_quicklist          (quicklists.pgd_cache)
-#define pmd_quicklist          ((unsigned long *)0)
-#define pte_quicklist          (quicklists.pte_cache)
-#define pgtable_cache_size     (quicklists.pgtable_cache_sz)
-
-extern unsigned long *zero_cache; /* head linked list of pre-zero'd pages */
-extern atomic_t zero_sz; /* # currently pre-zero'd pages */
-extern atomic_t zeropage_hits; /* # zero'd pages request that we've done */
-extern atomic_t zeropage_calls; /* # zero'd pages request that've been made */
-extern atomic_t zerototal; /* # pages zero'd over time */
-
-#define zero_quicklist         (zero_cache)
-#define zero_cache_sz          (zero_sz)
-#define zero_cache_calls       (zeropage_calls)
-#define zero_cache_hits                (zeropage_hits)
-#define zero_cache_total       (zerototal)
-
-/*
- * return a pre-zero'd page from the list,
- * return NULL if none available -- Cort
- */
-extern unsigned long get_zero_page_fast(void);
+#define __HAVE_ARCH_PTE_ALLOC_ONE_KERNEL
+#include <asm-generic/pgalloc.h>
 
 extern void __bad_pte(pmd_t *pmd);
 
-static inline pgd_t *get_pgd_slow(void)
+static inline pgd_t *get_pgd(void)
 {
-       pgd_t *ret;
-
-       ret = (pgd_t *)__get_free_pages(GFP_KERNEL, PGDIR_ORDER);
-       if (ret != NULL)
-               clear_page(ret);
-       return ret;
+       return (pgd_t *)__get_free_pages(GFP_KERNEL|__GFP_ZERO, 0);
 }
 
-static inline pgd_t *get_pgd_fast(void)
-{
-       unsigned long *ret;
-
-       ret = pgd_quicklist;
-       if (ret != NULL) {
-               pgd_quicklist = (unsigned long *)(*ret);
-               ret[0] = 0;
-               pgtable_cache_size--;
-       } else
-               ret = (unsigned long *)get_pgd_slow();
-       return (pgd_t *)ret;
-}
-
-static inline void free_pgd_fast(pgd_t *pgd)
-{
-       *(unsigned long **)pgd = pgd_quicklist;
-       pgd_quicklist = (unsigned long *) pgd;
-       pgtable_cache_size++;
-}
-
-static inline void free_pgd_slow(pgd_t *pgd)
+static inline void free_pgd(pgd_t *pgd)
 {
        free_page((unsigned long)pgd);
 }
 
-#define pgd_free(mm, pgd)        free_pgd_fast(pgd)
-#define pgd_alloc(mm)          get_pgd_fast()
+#define pgd_free(mm, pgd)      free_pgd(pgd)
+#define pgd_alloc(mm)          get_pgd()
 
 #define pmd_pgtable(pmd)       pmd_page(pmd)
 
@@ -110,50 +50,6 @@ static inline void free_pgd_slow(pgd_t *pgd)
 
 extern pte_t *pte_alloc_one_kernel(struct mm_struct *mm);
 
-static inline struct page *pte_alloc_one(struct mm_struct *mm)
-{
-       struct page *ptepage;
-
-#ifdef CONFIG_HIGHPTE
-       int flags = GFP_KERNEL | __GFP_HIGHMEM;
-#else
-       int flags = GFP_KERNEL;
-#endif
-
-       ptepage = alloc_pages(flags, 0);
-       if (!ptepage)
-               return NULL;
-       clear_highpage(ptepage);
-       if (!pgtable_page_ctor(ptepage)) {
-               __free_page(ptepage);
-               return NULL;
-       }
-       return ptepage;
-}
-
-static inline void pte_free_fast(pte_t *pte)
-{
-       *(unsigned long **)pte = pte_quicklist;
-       pte_quicklist = (unsigned long *) pte;
-       pgtable_cache_size++;
-}
-
-static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
-{
-       free_page((unsigned long)pte);
-}
-
-static inline void pte_free_slow(struct page *ptepage)
-{
-       __free_page(ptepage);
-}
-
-static inline void pte_free(struct mm_struct *mm, struct page *ptepage)
-{
-       pgtable_page_dtor(ptepage);
-       __free_page(ptepage);
-}
-
 #define __pte_free_tlb(tlb, pte, addr) pte_free((tlb)->mm, (pte))
 
 #define pmd_populate(mm, pmd, pte) \
@@ -171,10 +67,6 @@ static inline void pte_free(struct mm_struct *mm, struct page *ptepage)
 #define __pmd_free_tlb(tlb, x, addr)   pmd_free((tlb)->mm, x)
 #define pgd_populate(mm, pmd, pte)     BUG()
 
-extern int do_check_pgt_cache(int, int);
-
 #endif /* CONFIG_MMU */
 
-#define check_pgt_cache()              do { } while (0)
-
 #endif /* _ASM_MICROBLAZE_PGALLOC_H */
index 142d3f0..954b69a 100644 (file)
@@ -46,8 +46,6 @@ extern int mem_init_done;
 
 #define swapper_pg_dir ((pgd_t *) NULL)
 
-#define pgtable_cache_init()   do {} while (0)
-
 #define arch_enter_lazy_cpu_mode()     do {} while (0)
 
 #define pgprot_noncached_wc(prot)      prot
@@ -526,11 +524,6 @@ extern unsigned long iopa(unsigned long addr);
 /* Needs to be defined here and not in linux/mm.h, as it is arch dependent */
 #define kern_addr_valid(addr)  (1)
 
-/*
- * No page table caches to initialise
- */
-#define pgtable_cache_init()   do { } while (0)
-
 void do_page_fault(struct pt_regs *regs, unsigned long address,
                   unsigned long error_code);
 
index bff2a71..a1f206b 100644 (file)
@@ -163,44 +163,15 @@ extern long __user_bad(void);
  * Returns zero on success, or -EFAULT on error.
  * On error, the variable @x is set to zero.
  */
-#define get_user(x, ptr)                                               \
-       __get_user_check((x), (ptr), sizeof(*(ptr)))
-
-#define __get_user_check(x, ptr, size)                                 \
-({                                                                     \
-       unsigned long __gu_val = 0;                                     \
-       const typeof(*(ptr)) __user *__gu_addr = (ptr);                 \
-       int __gu_err = 0;                                               \
-                                                                       \
-       if (access_ok(__gu_addr, size)) {                       \
-               switch (size) {                                         \
-               case 1:                                                 \
-                       __get_user_asm("lbu", __gu_addr, __gu_val,      \
-                                      __gu_err);                       \
-                       break;                                          \
-               case 2:                                                 \
-                       __get_user_asm("lhu", __gu_addr, __gu_val,      \
-                                      __gu_err);                       \
-                       break;                                          \
-               case 4:                                                 \
-                       __get_user_asm("lw", __gu_addr, __gu_val,       \
-                                      __gu_err);                       \
-                       break;                                          \
-               default:                                                \
-                       __gu_err = __user_bad();                        \
-                       break;                                          \
-               }                                                       \
-       } else {                                                        \
-               __gu_err = -EFAULT;                                     \
-       }                                                               \
-       x = (__force typeof(*(ptr)))__gu_val;                           \
-       __gu_err;                                                       \
+#define get_user(x, ptr) ({                            \
+       const typeof(*(ptr)) __user *__gu_ptr = (ptr);  \
+       access_ok(__gu_ptr, sizeof(*__gu_ptr)) ?        \
+               __get_user(x, __gu_ptr) : -EFAULT;      \
 })
 
 #define __get_user(x, ptr)                                             \
 ({                                                                     \
        unsigned long __gu_val = 0;                                     \
-       /*unsigned long __gu_ptr = (unsigned long)(ptr);*/              \
        long __gu_err;                                                  \
        switch (sizeof(*(ptr))) {                                       \
        case 1:                                                         \
@@ -212,6 +183,11 @@ extern long __user_bad(void);
        case 4:                                                         \
                __get_user_asm("lw", (ptr), __gu_val, __gu_err);        \
                break;                                                  \
+       case 8:                                                         \
+               __gu_err = __copy_from_user(&__gu_val, ptr, 8);         \
+               if (__gu_err)                                           \
+                       __gu_err = -EFAULT;                             \
+               break;                                                  \
        default:                                                        \
                /* __gu_val = 0; __gu_err = -EINVAL;*/ __gu_err = __user_bad();\
        }                                                               \
index fcbe1da..5f47229 100644 (file)
@@ -8,83 +8,9 @@
  */
 
 #include <linux/init.h>
+#include <linux/delay.h>
 #include <linux/of_platform.h>
-
-/* Trigger specific functions */
-#ifdef CONFIG_GPIOLIB
-
-#include <linux/of_gpio.h>
-
-static int handle; /* reset pin handle */
-static unsigned int reset_val;
-
-static int of_platform_reset_gpio_probe(void)
-{
-       int ret;
-       handle = of_get_named_gpio(of_find_node_by_path("/"),
-                                  "hard-reset-gpios", 0);
-
-       if (!gpio_is_valid(handle)) {
-               pr_info("Skipping unavailable RESET gpio %d (%s)\n",
-                               handle, "reset");
-               return -ENODEV;
-       }
-
-       ret = gpio_request(handle, "reset");
-       if (ret < 0) {
-               pr_info("GPIO pin is already allocated\n");
-               return ret;
-       }
-
-       /* get current setup value */
-       reset_val = gpio_get_value(handle);
-       /* FIXME maybe worth to perform any action */
-       pr_debug("Reset: Gpio output state: 0x%x\n", reset_val);
-
-       /* Setup GPIO as output */
-       ret = gpio_direction_output(handle, 0);
-       if (ret < 0)
-               goto err;
-
-       /* Setup output direction */
-       gpio_set_value(handle, 0);
-
-       pr_info("RESET: Registered gpio device: %d, current val: %d\n",
-                                                       handle, reset_val);
-       return 0;
-err:
-       gpio_free(handle);
-       return ret;
-}
-device_initcall(of_platform_reset_gpio_probe);
-
-
-static void gpio_system_reset(void)
-{
-       if (gpio_is_valid(handle))
-               gpio_set_value(handle, 1 - reset_val);
-       else
-               pr_notice("Reset GPIO unavailable - halting!\n");
-}
-#else
-static void gpio_system_reset(void)
-{
-       pr_notice("No reset GPIO present - halting!\n");
-}
-
-void of_platform_reset_gpio_probe(void)
-{
-       return;
-}
-#endif
-
-void machine_restart(char *cmd)
-{
-       pr_notice("Machine restart...\n");
-       gpio_system_reset();
-       while (1)
-               ;
-}
+#include <linux/reboot.h>
 
 void machine_shutdown(void)
 {
@@ -106,3 +32,12 @@ void machine_power_off(void)
        while (1)
                ;
 }
+
+void machine_restart(char *cmd)
+{
+       do_kernel_restart(cmd);
+       /* Give the restart hook 1 s to take us down */
+       mdelay(1000);
+       pr_emerg("Reboot failed -- System halted\n");
+       while (1);
+}
index bc70422..8c5f0c3 100644 (file)
  * Copyright (C) 2010 Michal Simek <monstr@monstr.eu>
  * Copyright (C) 2010 PetaLogix
  * Copyright (C) 2005 John Williams <jwilliams@itee.uq.edu.au>
- *
- * Based on PowerPC version derived from arch/arm/mm/consistent.c
- * Copyright (C) 2001 Dan Malek (dmalek@jlc.net)
- * Copyright (C) 2000 Russell King
  */
 
-#include <linux/export.h>
-#include <linux/signal.h>
-#include <linux/sched.h>
 #include <linux/kernel.h>
-#include <linux/errno.h>
 #include <linux/string.h>
 #include <linux/types.h>
-#include <linux/ptrace.h>
-#include <linux/mman.h>
 #include <linux/mm.h>
-#include <linux/swap.h>
-#include <linux/stddef.h>
-#include <linux/vmalloc.h>
 #include <linux/init.h>
-#include <linux/delay.h>
-#include <linux/memblock.h>
-#include <linux/highmem.h>
-#include <linux/pci.h>
-#include <linux/interrupt.h>
-#include <linux/gfp.h>
 #include <linux/dma-noncoherent.h>
-
-#include <asm/pgalloc.h>
-#include <linux/io.h>
-#include <linux/hardirq.h>
-#include <linux/mmu_context.h>
-#include <asm/mmu.h>
-#include <linux/uaccess.h>
-#include <asm/pgtable.h>
 #include <asm/cpuinfo.h>
-#include <asm/tlbflush.h>
+#include <asm/cacheflush.h>
 
-#ifndef CONFIG_MMU
-/* I have to use dcache values because I can't relate on ram size */
-# define UNCACHED_SHADOW_MASK (cpuinfo.dcache_high - cpuinfo.dcache_base + 1)
-#endif
-
-/*
- * Consistent memory allocators. Used for DMA devices that want to
- * share uncached memory with the processor core.
- * My crufty no-MMU approach is simple. In the HW platform we can optionally
- * mirror the DDR up above the processor cacheable region.  So, memory accessed
- * in this mirror region will not be cached.  It's alloced from the same
- * pool as normal memory, but the handle we return is shifted up into the
- * uncached region.  This will no doubt cause big problems if memory allocated
- * here is not also freed properly. -- JW
- */
-void *arch_dma_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle,
-               gfp_t gfp, unsigned long attrs)
+void arch_dma_prep_coherent(struct page *page, size_t size)
 {
-       unsigned long order, vaddr;
-       void *ret;
-       unsigned int i, err = 0;
-       struct page *page, *end;
-
-#ifdef CONFIG_MMU
-       phys_addr_t pa;
-       struct vm_struct *area;
-       unsigned long va;
-#endif
-
-       if (in_interrupt())
-               BUG();
-
-       /* Only allocate page size areas. */
-       size = PAGE_ALIGN(size);
-       order = get_order(size);
-
-       vaddr = __get_free_pages(gfp | __GFP_ZERO, order);
-       if (!vaddr)
-               return NULL;
+       phys_addr_t paddr = page_to_phys(page);
 
-       /*
-        * we need to ensure that there are no cachelines in use,
-        * or worse dirty in this area.
-        */
-       flush_dcache_range(virt_to_phys((void *)vaddr),
-                                       virt_to_phys((void *)vaddr) + size);
+       flush_dcache_range(paddr, paddr + size);
+}
 
 #ifndef CONFIG_MMU
-       ret = (void *)vaddr;
-       /*
-        * Here's the magic!  Note if the uncached shadow is not implemented,
-        * it's up to the calling code to also test that condition and make
-        * other arranegments, such as manually flushing the cache and so on.
-        */
-# ifdef CONFIG_XILINX_UNCACHED_SHADOW
-       ret = (void *)((unsigned) ret | UNCACHED_SHADOW_MASK);
-# endif
-       if ((unsigned int)ret > cpuinfo.dcache_base &&
-                               (unsigned int)ret < cpuinfo.dcache_high)
-               pr_warn("ERROR: Your cache coherent area is CACHED!!!\n");
-
-       /* dma_handle is same as physical (shadowed) address */
-       *dma_handle = (dma_addr_t)ret;
+/*
+ * Consistent memory allocators. Used for DMA devices that want to share
+ * uncached memory with the processor core.  My crufty no-MMU approach is
+ * simple.  In the HW platform we can optionally mirror the DDR up above the
+ * processor cacheable region.  So, memory accessed in this mirror region will
+ * not be cached.  It's alloced from the same pool as normal memory, but the
+ * handle we return is shifted up into the uncached region.  This will no doubt
+ * cause big problems if memory allocated here is not also freed properly. -- JW
+ *
+ * I have to use dcache values because I can't relate on ram size:
+ */
+#ifdef CONFIG_XILINX_UNCACHED_SHADOW
+#define UNCACHED_SHADOW_MASK (cpuinfo.dcache_high - cpuinfo.dcache_base + 1)
 #else
-       /* Allocate some common virtual space to map the new pages. */
-       area = get_vm_area(size, VM_ALLOC);
-       if (!area) {
-               free_pages(vaddr, order);
-               return NULL;
-       }
-       va = (unsigned long) area->addr;
-       ret = (void *)va;
-
-       /* This gives us the real physical address of the first page. */
-       *dma_handle = pa = __virt_to_phys(vaddr);
-#endif
-
-       /*
-        * free wasted pages.  We skip the first page since we know
-        * that it will have count = 1 and won't require freeing.
-        * We also mark the pages in use as reserved so that
-        * remap_page_range works.
-        */
-       page = virt_to_page(vaddr);
-       end = page + (1 << order);
-
-       split_page(page, order);
-
-       for (i = 0; i < size && err == 0; i += PAGE_SIZE) {
-#ifdef CONFIG_MMU
-               /* MS: This is the whole magic - use cache inhibit pages */
-               err = map_page(va + i, pa + i, _PAGE_KERNEL | _PAGE_NO_CACHE);
-#endif
+#define UNCACHED_SHADOW_MASK 0
+#endif /* CONFIG_XILINX_UNCACHED_SHADOW */
 
-               SetPageReserved(page);
-               page++;
-       }
-
-       /* Free the otherwise unused pages. */
-       while (page < end) {
-               __free_page(page);
-               page++;
-       }
-
-       if (err) {
-               free_pages(vaddr, order);
-               return NULL;
-       }
-
-       return ret;
-}
-
-#ifdef CONFIG_MMU
-static pte_t *consistent_virt_to_pte(void *vaddr)
+void *uncached_kernel_address(void *ptr)
 {
-       unsigned long addr = (unsigned long)vaddr;
-
-       return pte_offset_kernel(pmd_offset(pgd_offset_k(addr), addr), addr);
-}
-
-long arch_dma_coherent_to_pfn(struct device *dev, void *vaddr,
-               dma_addr_t dma_addr)
-{
-       pte_t *ptep = consistent_virt_to_pte(vaddr);
-
-       if (pte_none(*ptep) || !pte_present(*ptep))
-               return 0;
+       unsigned long addr = (unsigned long)ptr;
 
-       return pte_pfn(*ptep);
+       addr |= UNCACHED_SHADOW_MASK;
+       if (addr > cpuinfo.dcache_base && addr < cpuinfo.dcache_high)
+               pr_warn("ERROR: Your cache coherent area is CACHED!!!\n");
+       return (void *)addr;
 }
-#endif
 
-/*
- * free page(s) as defined by the above mapping.
- */
-void arch_dma_free(struct device *dev, size_t size, void *vaddr,
-               dma_addr_t dma_addr, unsigned long attrs)
+void *cached_kernel_address(void *ptr)
 {
-       struct page *page;
-
-       if (in_interrupt())
-               BUG();
-
-       size = PAGE_ALIGN(size);
-
-#ifndef CONFIG_MMU
-       /* Clear SHADOW_MASK bit in address, and free as per usual */
-# ifdef CONFIG_XILINX_UNCACHED_SHADOW
-       vaddr = (void *)((unsigned)vaddr & ~UNCACHED_SHADOW_MASK);
-# endif
-       page = virt_to_page(vaddr);
-
-       do {
-               __free_reserved_page(page);
-               page++;
-       } while (size -= PAGE_SIZE);
-#else
-       do {
-               pte_t *ptep = consistent_virt_to_pte(vaddr);
-               unsigned long pfn;
-
-               if (!pte_none(*ptep) && pte_present(*ptep)) {
-                       pfn = pte_pfn(*ptep);
-                       pte_clear(&init_mm, (unsigned int)vaddr, ptep);
-                       if (pfn_valid(pfn)) {
-                               page = pfn_to_page(pfn);
-                               __free_reserved_page(page);
-                       }
-               }
-               vaddr += PAGE_SIZE;
-       } while (size -= PAGE_SIZE);
+       unsigned long addr = (unsigned long)ptr;
 
-       /* flush tlb */
-       flush_tlb_all();
-#endif
+       return (void *)(addr & ~UNCACHED_SHADOW_MASK);
 }
+#endif /* CONFIG_MMU */
index 8fe54fd..010bb9c 100644 (file)
@@ -44,10 +44,6 @@ unsigned long ioremap_base;
 unsigned long ioremap_bot;
 EXPORT_SYMBOL(ioremap_bot);
 
-#ifndef CONFIG_SMP
-struct pgtable_cache_struct quicklists;
-#endif
-
 static void __iomem *__ioremap(phys_addr_t addr, unsigned long size,
                unsigned long flags)
 {
index 904c096..a0bd9bd 100644 (file)
@@ -5,7 +5,6 @@ config MIPS
        select ARCH_32BIT_OFF_T if !64BIT
        select ARCH_BINFMT_ELF_STATE if MIPS_FP_SUPPORT
        select ARCH_CLOCKSOURCE_DATA
-       select ARCH_HAS_ELF_RANDOMIZE
        select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST
        select ARCH_HAS_UBSAN_SANITIZE_ALL
        select ARCH_SUPPORTS_UPROBES
@@ -13,6 +12,7 @@ config MIPS
        select ARCH_USE_CMPXCHG_LOCKREF if 64BIT
        select ARCH_USE_QUEUED_RWLOCKS
        select ARCH_USE_QUEUED_SPINLOCKS
+       select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT if MMU
        select ARCH_WANT_IPC_PARSE_VERSION
        select BUILDTIME_EXTABLE_SORT
        select CLONE_BACKWARDS
@@ -22,6 +22,7 @@ config MIPS
        select GENERIC_CLOCKEVENTS
        select GENERIC_CMOS_UPDATE
        select GENERIC_CPU_AUTOPROBE
+       select GENERIC_GETTIMEOFDAY
        select GENERIC_IOMAP
        select GENERIC_IRQ_PROBE
        select GENERIC_IRQ_SHOW
@@ -43,7 +44,7 @@ config MIPS
        select HAVE_ARCH_MMAP_RND_COMPAT_BITS if MMU && COMPAT
        select HAVE_ARCH_SECCOMP_FILTER
        select HAVE_ARCH_TRACEHOOK
-       select HAVE_ARCH_TRANSPARENT_HUGEPAGE if CPU_SUPPORTS_HUGEPAGES && 64BIT
+       select HAVE_ARCH_TRANSPARENT_HUGEPAGE if CPU_SUPPORTS_HUGEPAGES
        select HAVE_ASM_MODVERSIONS
        select HAVE_EBPF_JIT if (!CPU_MICROMIPS)
        select HAVE_CONTEXT_TRACKING
@@ -75,6 +76,7 @@ config MIPS
        select HAVE_STACKPROTECTOR
        select HAVE_SYSCALL_TRACEPOINTS
        select HAVE_VIRT_CPU_ACCOUNTING_GEN if 64BIT || !SMP
+       select HAVE_GENERIC_VDSO
        select IRQ_FORCED_THREADING
        select ISA if EISA
        select MODULES_USE_ELF_RELA if MODULES && 64BIT
@@ -83,6 +85,7 @@ config MIPS
        select RTC_LIB
        select SYSCTL_EXCEPTION_TRACE
        select VIRT_TO_BUS
+       select ARCH_HAS_PTE_SPECIAL if !(32BIT && CPU_HAS_RIXI)
 
 menu "Machine selection"
 
@@ -385,6 +388,7 @@ config MACH_INGENIC
        select SYS_SUPPORTS_32BIT_KERNEL
        select SYS_SUPPORTS_LITTLE_ENDIAN
        select SYS_SUPPORTS_ZBOOT_UART16550
+       select CPU_SUPPORTS_HUGEPAGES
        select DMA_NONCOHERENT
        select IRQ_MIPS_CPU
        select PINCTRL
@@ -1231,7 +1235,7 @@ config SYS_SUPPORTS_LITTLE_ENDIAN
 
 config SYS_SUPPORTS_HUGETLBFS
        bool
-       depends on CPU_SUPPORTS_HUGEPAGES && 64BIT
+       depends on CPU_SUPPORTS_HUGEPAGES
        default y
 
 config MIPS_HUGE_TLB_SUPPORT
@@ -1579,6 +1583,7 @@ config CPU_R3000
        depends on SYS_HAS_CPU_R3000
        select CPU_HAS_WB
        select CPU_HAS_LOAD_STORE_LR
+       select CPU_R3K_TLB
        select CPU_SUPPORTS_32BIT_KERNEL
        select CPU_SUPPORTS_HIGHMEM
        help
@@ -1594,6 +1599,7 @@ config CPU_TX39XX
        depends on SYS_HAS_CPU_TX39XX
        select CPU_SUPPORTS_32BIT_KERNEL
        select CPU_HAS_LOAD_STORE_LR
+       select CPU_R3K_TLB
 
 config CPU_VR41XX
        bool "R41xx"
@@ -1607,15 +1613,6 @@ config CPU_VR41XX
          kernel built with this option will not run on any other type of
          processor or vice versa.
 
-config CPU_R4300
-       bool "R4300"
-       depends on SYS_HAS_CPU_R4300
-       select CPU_SUPPORTS_32BIT_KERNEL
-       select CPU_SUPPORTS_64BIT_KERNEL
-       select CPU_HAS_LOAD_STORE_LR
-       help
-         MIPS Technologies R4300-series processors.
-
 config CPU_R4X00
        bool "R4x00"
        depends on SYS_HAS_CPU_R4X00
@@ -1646,14 +1643,6 @@ config CPU_R5000
        help
          MIPS Technologies R5000-series processors other than the Nevada.
 
-config CPU_R5432
-       bool "R5432"
-       depends on SYS_HAS_CPU_R5432
-       select CPU_SUPPORTS_32BIT_KERNEL
-       select CPU_SUPPORTS_64BIT_KERNEL
-       select CPU_SUPPORTS_HUGEPAGES
-       select CPU_HAS_LOAD_STORE_LR
-
 config CPU_R5500
        bool "R5500"
        depends on SYS_HAS_CPU_R5500
@@ -1675,16 +1664,6 @@ config CPU_NEVADA
        help
          QED / PMC-Sierra RM52xx-series ("Nevada") processors.
 
-config CPU_R8000
-       bool "R8000"
-       depends on SYS_HAS_CPU_R8000
-       select CPU_HAS_PREFETCH
-       select CPU_HAS_LOAD_STORE_LR
-       select CPU_SUPPORTS_64BIT_KERNEL
-       help
-         MIPS Technologies R8000 processors.  Note these processors are
-         uncommon and the support for them is incomplete.
-
 config CPU_R10000
        bool "R10000"
        depends on SYS_HAS_CPU_R10000
@@ -1977,9 +1956,6 @@ config SYS_HAS_CPU_TX39XX
 config SYS_HAS_CPU_VR41XX
        bool
 
-config SYS_HAS_CPU_R4300
-       bool
-
 config SYS_HAS_CPU_R4X00
        bool
 
@@ -1989,18 +1965,12 @@ config SYS_HAS_CPU_TX49XX
 config SYS_HAS_CPU_R5000
        bool
 
-config SYS_HAS_CPU_R5432
-       bool
-
 config SYS_HAS_CPU_R5500
        bool
 
 config SYS_HAS_CPU_NEVADA
        bool
 
-config SYS_HAS_CPU_R8000
-       bool
-
 config SYS_HAS_CPU_R10000
        bool
        select ARCH_HAS_SYNC_DMA_FOR_CPU if DMA_NONCOHERENT
@@ -2118,6 +2088,7 @@ config CPU_SUPPORTS_ADDRWINCFG
        bool
 config CPU_SUPPORTS_HUGEPAGES
        bool
+       depends on !(32BIT && (ARCH_PHYS_ADDR_T_64BIT || EVA))
 config CPU_SUPPORTS_UNCACHED_ACCELERATED
        bool
 config MIPS_PGD_C0_CONTEXT
@@ -2200,13 +2171,13 @@ config PAGE_SIZE_4KB
 
 config PAGE_SIZE_8KB
        bool "8kB"
-       depends on CPU_R8000 || CPU_CAVIUM_OCTEON
+       depends on CPU_CAVIUM_OCTEON
        depends on !MIPS_VA_BITS_48
        help
          Using 8kB page size will result in higher performance kernel at
          the price of higher memory consumption.  This option is available
-         only on R8000 and cnMIPS processors.  Note that you will need a
-         suitable Linux distribution to support this.
+         only on cnMIPS processors.  Note that you will need a suitable Linux
+         distribution to support this.
 
 config PAGE_SIZE_16KB
        bool "16kB"
@@ -2297,7 +2268,7 @@ config CPU_HAS_PREFETCH
 
 config CPU_GENERIC_DUMP_TLB
        bool
-       default y if !(CPU_R3000 || CPU_R8000 || CPU_TX39XX)
+       default y if !(CPU_R3000 || CPU_TX39XX)
 
 config MIPS_FP_SUPPORT
        bool "Floating Point support" if EXPERT
@@ -2319,6 +2290,9 @@ config CPU_R2300_FPU
        depends on MIPS_FP_SUPPORT
        default y if CPU_R3000 || CPU_TX39XX
 
+config CPU_R3K_TLB
+       bool
+
 config CPU_R4K_FPU
        bool
        depends on MIPS_FP_SUPPORT
@@ -2326,7 +2300,7 @@ config CPU_R4K_FPU
 
 config CPU_R4K_CACHE_TLB
        bool
-       default y if !(CPU_R3000 || CPU_R8000 || CPU_SB1 || CPU_TX39XX || CPU_CAVIUM_OCTEON)
+       default y if !(CPU_R3K_TLB || CPU_SB1 || CPU_CAVIUM_OCTEON)
 
 config MIPS_MT_SMP
        bool "MIPS MT SMP support (1 TC on each available VPE)"
@@ -2583,7 +2557,6 @@ config CPU_R4400_WORKAROUNDS
 config MIPS_ASID_SHIFT
        int
        default 6 if CPU_R3000 || CPU_TX39XX
-       default 4 if CPU_R8000
        default 0
 
 config MIPS_ASID_BITS
@@ -3077,10 +3050,6 @@ config STACKTRACE_SUPPORT
        bool
        default y
 
-config HAVE_LATENCYTOP_SUPPORT
-       bool
-       default y
-
 config PGTABLE_LEVELS
        int
        default 4 if PAGE_SIZE_4KB && MIPS_VA_BITS_48
index eceff9b..cdc09b7 100644 (file)
@@ -163,7 +163,6 @@ cflags-y += -fno-stack-check
 #
 cflags-$(CONFIG_CPU_R3000)     += -march=r3000
 cflags-$(CONFIG_CPU_TX39XX)    += -march=r3900
-cflags-$(CONFIG_CPU_R4300)     += -march=r4300 -Wa,--trap
 cflags-$(CONFIG_CPU_VR41XX)    += -march=r4100 -Wa,--trap
 cflags-$(CONFIG_CPU_R4X00)     += -march=r4600 -Wa,--trap
 cflags-$(CONFIG_CPU_TX49XX)    += -march=r4600 -Wa,--trap
@@ -174,8 +173,6 @@ cflags-$(CONFIG_CPU_MIPS64_R1)      += -march=mips64 -Wa,--trap
 cflags-$(CONFIG_CPU_MIPS64_R2) += -march=mips64r2 -Wa,--trap
 cflags-$(CONFIG_CPU_MIPS64_R6) += -march=mips64r6 -Wa,--trap
 cflags-$(CONFIG_CPU_R5000)     += -march=r5000 -Wa,--trap
-cflags-$(CONFIG_CPU_R5432)     += $(call cc-option,-march=r5400,-march=r5000) \
-                       -Wa,--trap
 cflags-$(CONFIG_CPU_R5500)     += $(call cc-option,-march=r5500,-march=r5000) \
                        -Wa,--trap
 cflags-$(CONFIG_CPU_NEVADA)    += $(call cc-option,-march=rm5200,-march=r5000) \
@@ -186,7 +183,6 @@ cflags-$(CONFIG_CPU_SB1)    += $(call cc-option,-march=sb1,-march=r5000) \
                        -Wa,--trap
 cflags-$(CONFIG_CPU_SB1)       += $(call cc-option,-mno-mdmx)
 cflags-$(CONFIG_CPU_SB1)       += $(call cc-option,-mno-mips3d)
-cflags-$(CONFIG_CPU_R8000)     += -march=r8000 -Wa,--trap
 cflags-$(CONFIG_CPU_R10000)    += $(call cc-option,-march=r10000,-march=r8000) \
                        -Wa,--trap
 cflags-$(CONFIG_CPU_CAVIUM_OCTEON) += $(call cc-option,-march=octeon) -Wa,--trap
index a80910d..35266a7 100644 (file)
@@ -160,6 +160,7 @@ struct bcm47xx_board_type_list1 bcm47xx_board_list_board_id[] __initconst = {
        {{BCM47XX_BOARD_LUXUL_XVW_P30_V1, "Luxul XVW-P30 V1"}, "luxul_xvwp30_v1"},
        {{BCM47XX_BOARD_LUXUL_XWR_600_V1, "Luxul XWR-600 V1"}, "luxul_xwr600_v1"},
        {{BCM47XX_BOARD_LUXUL_XWR_1750_V1, "Luxul XWR-1750 V1"}, "luxul_xwr1750_v1"},
+       {{BCM47XX_BOARD_NETGEAR_R6200_V1, "Netgear R6200 V1"}, "U12H192T00_NETGEAR"},
        {{BCM47XX_BOARD_NETGEAR_WGR614V8, "Netgear WGR614 V8"}, "U12H072T00_NETGEAR"},
        {{BCM47XX_BOARD_NETGEAR_WGR614V9, "Netgear WGR614 V9"}, "U12H094T00_NETGEAR"},
        {{BCM47XX_BOARD_NETGEAR_WGR614_V10, "Netgear WGR614 V10"}, "U12H139T01_NETGEAR"},
index 67b6a78..535d84a 100644 (file)
@@ -385,6 +385,13 @@ bcm47xx_buttons_motorola_wr850gv2v3[] __initconst = {
 /* Netgear */
 
 static const struct gpio_keys_button
+bcm47xx_buttons_netgear_r6200_v1[] __initconst = {
+       BCM47XX_GPIO_KEY(2, KEY_RFKILL),
+       BCM47XX_GPIO_KEY(3, KEY_RESTART),
+       BCM47XX_GPIO_KEY(4, KEY_WPS_BUTTON),
+};
+
+static const struct gpio_keys_button
 bcm47xx_buttons_netgear_wndr3400v1[] __initconst = {
        BCM47XX_GPIO_KEY(4, KEY_RESTART),
        BCM47XX_GPIO_KEY(6, KEY_WPS_BUTTON),
@@ -664,6 +671,9 @@ int __init bcm47xx_buttons_register(void)
                err = bcm47xx_copy_bdata(bcm47xx_buttons_motorola_wr850gv2v3);
                break;
 
+       case BCM47XX_BOARD_NETGEAR_R6200_V1:
+               err = bcm47xx_copy_bdata(bcm47xx_buttons_netgear_r6200_v1);
+               break;
        case BCM47XX_BOARD_NETGEAR_WNDR3400V1:
                err = bcm47xx_copy_bdata(bcm47xx_buttons_netgear_wndr3400v1);
                break;
index 7a3e5c8..69cbef4 100644 (file)
                compatible = "simple-bus";
                ranges;
 
-               periph_cntl: syscon@fff8c000 {
+               clkctl: clock-controller@fff8c004 {
+                       compatible = "brcm,bcm3368-clocks";
+                       reg = <0xfff8c004 0x4>;
+                       #clock-cells = <1>;
+               };
+
+               periph_cntl: syscon@fff8c008 {
                        compatible = "syscon";
-                       reg = <0xfff8c000 0xc>;
+                       reg = <0xfff8c000 0x4>;
                        native-endian;
                };
 
                reboot: syscon-reboot@fff8c008 {
                        compatible = "syscon-reboot";
                        regmap = <&periph_cntl>;
-                       offset = <0x8>;
+                       offset = <0x0>;
                        mask = <0x1>;
                };
 
index 58790b1..beec241 100644 (file)
                compatible = "simple-bus";
                ranges;
 
-               periph_cntl: syscon@10000000 {
+               clkctl: clock-controller@10000004 {
+                       compatible = "brcm,bcm63268-clocks";
+                       reg = <0x10000004 0x4>;
+                       #clock-cells = <1>;
+               };
+
+               periph_cntl: syscon@10000008 {
                        compatible = "syscon";
-                       reg = <0x10000000 0x14>;
+                       reg = <0x10000000 0xc>;
                        native-endian;
                };
 
                reboot: syscon-reboot@10000008 {
                        compatible = "syscon-reboot";
                        regmap = <&periph_cntl>;
-                       offset = <0x8>;
+                       offset = <0x0>;
                        mask = <0x1>;
                };
 
index bf6716a..af860d0 100644 (file)
                compatible = "simple-bus";
                ranges;
 
+               clkctl: clock-controller@10000004 {
+                       compatible = "brcm,bcm6328-clocks";
+                       reg = <0x10000004 0x4>;
+                       #clock-cells = <1>;
+               };
+
                periph_intc: interrupt-controller@10000020 {
                        compatible = "brcm,bcm6345-l1-intc";
                        reg = <0x10000020 0x10>,
index 26ddae5..f21176c 100644 (file)
                compatible = "simple-bus";
                ranges;
 
-               periph_cntl: syscon@fffe0000 {
+               clkctl: clock-controller@fffe0004 {
+                       compatible = "brcm,bcm6358-clocks";
+                       reg = <0xfffe0004 0x4>;
+                       #clock-cells = <1>;
+               };
+
+               periph_cntl: syscon@fffe0008 {
                        compatible = "syscon";
-                       reg = <0xfffe0000 0xc>;
+                       reg = <0xfffe0000 0x4>;
                        native-endian;
                };
 
                reboot: syscon-reboot@fffe0008 {
                        compatible = "syscon-reboot";
                        regmap = <&periph_cntl>;
-                       offset = <0x8>;
+                       offset = <0x0>;
                        mask = <0x1>;
                };
 
index c387793..8ae6981 100644 (file)
                compatible = "simple-bus";
                ranges;
 
-               periph_cntl: syscon@10000000 {
+               clkctl: clock-controller@10000004 {
+                       compatible = "brcm,bcm6362-clocks";
+                       reg = <0x10000004 0x4>;
+                       #clock-cells = <1>;
+               };
+
+               periph_cntl: syscon@10000008 {
                        compatible = "syscon";
-                       reg = <0x10000000 0x14>;
+                       reg = <0x10000000 0xc>;
                        native-endian;
                };
 
                reboot: syscon-reboot@10000008 {
                        compatible = "syscon-reboot";
                        regmap = <&periph_cntl>;
-                       offset = <0x8>;
+                       offset = <0x0>;
                        mask = <0x1>;
                };
 
index e116a38..449c167 100644 (file)
                compatible = "simple-bus";
                ranges;
 
-               periph_cntl: syscon@10000000 {
+               clkctl: clock-controller@10000004 {
+                       compatible = "brcm,bcm6368-clocks";
+                       reg = <0x10000004 0x4>;
+                       #clock-cells = <1>;
+               };
+
+               periph_cntl: syscon@100000008 {
                        compatible = "syscon";
-                       reg = <0x10000000 0x14>;
+                       reg = <0x10000000 0xc>;
                        native-endian;
                };
 
                reboot: syscon-reboot@10000008 {
                        compatible = "syscon-reboot";
                        regmap = <&periph_cntl>;
-                       offset = <0x8>;
+                       offset = <0x0>;
                        mask = <0x1>;
                };
 
index 4f7b1fa..2e99523 100644 (file)
@@ -2,6 +2,7 @@
 /dts-v1/;
 
 #include "jz4780.dtsi"
+#include <dt-bindings/clock/ingenic,tcu.h>
 #include <dt-bindings/gpio/gpio.h>
 
 / {
                bias-disable;
        };
 };
+
+&tcu {
+       /* 3 MHz for the system timer and clocksource */
+       assigned-clocks = <&tcu TCU_CLK_TIMER0>, <&tcu TCU_CLK_TIMER1>;
+       assigned-clock-rates = <3000000>, <3000000>;
+};
index 35f0291..f58d239 100644 (file)
@@ -2,6 +2,7 @@
 /dts-v1/;
 
 #include "jz4770.dtsi"
+#include <dt-bindings/clock/ingenic,tcu.h>
 
 / {
        compatible = "gcw,zero", "ingenic,jz4770";
        /* The WiFi module is connected to the UHC. */
        status = "okay";
 };
+
+&tcu {
+       /* 750 kHz for the system timer and clocksource */
+       assigned-clocks = <&tcu TCU_CLK_TIMER0>, <&tcu TCU_CLK_TIMER2>;
+       assigned-clock-rates = <750000>, <750000>;
+
+       /* PWM1 is in use, so reserve channel #2 for the clocksource */
+       ingenic,pwm-channels-mask = <0xfa>;
+};
index 2beb78a..5accda2 100644 (file)
                clock-names = "rtc";
        };
 
+       tcu: timer@10002000 {
+               compatible = "ingenic,jz4740-tcu", "simple-mfd";
+               reg = <0x10002000 0x1000>;
+               #address-cells = <1>;
+               #size-cells = <1>;
+               ranges = <0x0 0x10002000 0x1000>;
+
+               #clock-cells = <1>;
+
+               clocks = <&cgu JZ4740_CLK_RTC
+                         &cgu JZ4740_CLK_EXT
+                         &cgu JZ4740_CLK_PCLK
+                         &cgu JZ4740_CLK_TCU>;
+               clock-names = "rtc", "ext", "pclk", "tcu";
+
+               interrupt-controller;
+               #interrupt-cells = <1>;
+
+               interrupt-parent = <&intc>;
+               interrupts = <23 22 21>;
+       };
+
        rtc_dev: rtc@10003000 {
                compatible = "ingenic,jz4740-rtc";
                reg = <0x10003000 0x40>;
                };
        };
 
+       aic: audio-controller@10020000 {
+               compatible = "ingenic,jz4740-i2s";
+               reg = <0x10020000 0x38>;
+
+               #sound-dai-cells = <0>;
+
+               interrupt-parent = <&intc>;
+               interrupts = <18>;
+
+               clocks = <&cgu JZ4740_CLK_AIC>,
+                        <&cgu JZ4740_CLK_I2S>,
+                        <&cgu JZ4740_CLK_EXT>,
+                        <&cgu JZ4740_CLK_PLL_HALF>;
+               clock-names = "aic", "i2s", "ext", "pll half";
+
+               dmas = <&dmac 25 0xffffffff>, <&dmac 24 0xffffffff>;
+               dma-names = "rx", "tx";
+       };
+
+       codec: audio-codec@100200a4 {
+               compatible = "ingenic,jz4740-codec";
+               reg = <0x10020080 0x8>;
+
+               #sound-dai-cells = <0>;
+
+               clocks = <&cgu JZ4740_CLK_AIC>;
+               clock-names = "aic";
+       };
+
+       mmc: mmc@10021000 {
+               compatible = "ingenic,jz4740-mmc";
+               reg = <0x10021000 0x1000>;
+
+               clocks = <&cgu JZ4740_CLK_MMC>;
+               clock-names = "mmc";
+
+               interrupt-parent = <&intc>;
+               interrupts = <14>;
+
+               dmas = <&dmac 27 0xffffffff>, <&dmac 26 0xffffffff>;
+               dma-names = "rx", "tx";
+
+               cap-sd-highspeed;
+               cap-mmc-highspeed;
+               cap-sdio-irq;
+       };
+
        uart0: serial@10030000 {
                compatible = "ingenic,jz4740-uart";
                reg = <0x10030000 0x100>;
                clock-names = "baud", "module";
        };
 
+       adc: adc@10070000 {
+               compatible = "ingenic,jz4740-adc";
+               reg = <0x10070000 0x30>;
+               #io-channel-cells = <1>;
+
+               clocks = <&cgu JZ4740_CLK_ADC>;
+               clock-names = "adc";
+
+               interrupt-parent = <&intc>;
+               interrupts = <12>;
+       };
+
+       nemc: memory-controller@13010000 {
+               compatible = "ingenic,jz4740-nemc";
+               reg = <0x13010000 0x54>;
+               #address-cells = <2>;
+               #size-cells = <1>;
+               ranges = <1 0 0x18000000 0x4000000
+                         2 0 0x14000000 0x4000000
+                         3 0 0x0c000000 0x4000000
+                         4 0 0x08000000 0x4000000>;
+
+               clocks = <&cgu JZ4740_CLK_MCLK>;
+       };
+
+       ecc: ecc-controller@13010100 {
+               compatible = "ingenic,jz4740-ecc";
+               reg = <0x13010100 0x2C>;
+
+               clocks = <&cgu JZ4740_CLK_MCLK>;
+       };
+
        dmac: dma-controller@13020000 {
                compatible = "ingenic,jz4740-dma";
                reg = <0x13020000 0xbc
                interrupts = <20>;
 
                clocks = <&cgu JZ4740_CLK_DMA>;
-
-               /* Disable dmac until we have something that uses it */
-               status = "disabled";
        };
 
        uhc: uhc@13030000 {
 
                status = "disabled";
        };
+
+       udc: usb@13040000 {
+               compatible = "ingenic,jz4740-musb";
+               reg = <0x13040000 0x10000>;
+
+               interrupt-parent = <&intc>;
+               interrupts = <24>;
+               interrupt-names = "mc";
+
+               clocks = <&cgu JZ4740_CLK_UDC>;
+               clock-names = "udc";
+       };
+
+       lcd: lcd-controller@13050000 {
+               compatible = "ingenic,jz4740-lcd";
+               reg = <0x13050000 0x1000>;
+
+               interrupt-parent = <&intc>;
+               interrupts = <30>;
+
+               clocks = <&cgu JZ4740_CLK_LCD_PCLK>, <&cgu JZ4740_CLK_LCD>;
+               clock-names = "lcd_pclk", "lcd";
+       };
 };
index 49ede6c..0bfb9ed 100644 (file)
                #clock-cells = <1>;
        };
 
+       tcu: timer@10002000 {
+               compatible = "ingenic,jz4770-tcu", "simple-mfd";
+               reg = <0x10002000 0x1000>;
+               #address-cells = <1>;
+               #size-cells = <1>;
+               ranges = <0x0 0x10002000 0x1000>;
+
+               #clock-cells = <1>;
+
+               clocks = <&cgu JZ4770_CLK_RTC
+                         &cgu JZ4770_CLK_EXT
+                         &cgu JZ4770_CLK_PCLK>;
+               clock-names = "rtc", "ext", "pclk";
+
+               interrupt-controller;
+               #interrupt-cells = <1>;
+
+               interrupt-parent = <&intc>;
+               interrupts = <27 26 25>;
+       };
+
        pinctrl: pin-controller@10010000 {
                compatible = "ingenic,jz4770-pinctrl";
                reg = <0x10010000 0x600>;
index b03cdec..c54bd7c 100644 (file)
                #clock-cells = <1>;
        };
 
+       tcu: timer@10002000 {
+               compatible = "ingenic,jz4780-tcu",
+                            "ingenic,jz4770-tcu",
+                            "simple-mfd";
+               reg = <0x10002000 0x1000>;
+               #address-cells = <1>;
+               #size-cells = <1>;
+               ranges = <0x0 0x10002000 0x1000>;
+
+               #clock-cells = <1>;
+
+               clocks = <&cgu JZ4780_CLK_RTCLK
+                         &cgu JZ4780_CLK_EXCLK
+                         &cgu JZ4780_CLK_PCLK>;
+               clock-names = "rtc", "ext", "pclk";
+
+               interrupt-controller;
+               #interrupt-cells = <1>;
+
+               interrupt-parent = <&intc>;
+               interrupts = <27 26 25>;
+       };
+
        rtc_dev: rtc@10003000 {
                compatible = "ingenic,jz4780-rtc";
                reg = <0x10003000 0x4c>;
index 76aaf89..7a371d9 100644 (file)
 
 #include "jz4740.dtsi"
 
+#include <dt-bindings/gpio/gpio.h>
+#include <dt-bindings/iio/adc/ingenic,adc.h>
+#include <dt-bindings/clock/ingenic,tcu.h>
+#include <dt-bindings/input/input.h>
+
+#define KEY_QI_QI      KEY_F13
+#define KEY_QI_UPRED   KEY_RIGHTALT
+#define KEY_QI_VOLUP   KEY_VOLUMEUP
+#define KEY_QI_VOLDOWN KEY_VOLUMEDOWN
+#define KEY_QI_FN      KEY_LEFTCTRL
+
 / {
        compatible = "qi,lb60", "ingenic,jz4740";
 
        chosen {
                stdout-path = &uart0;
        };
+
+       vcc: regulator@0 {
+               compatible = "regulator-fixed";
+               regulator-name = "vcc";
+
+               regulator-min-microvolt = <3300000>;
+               regulator-max-microvolt = <3300000>;
+               regulator-always-on;
+       };
+
+       mmc_power: regulator@1 {
+               compatible = "regulator-fixed";
+               regulator-name = "mmc_vcc";
+               gpio = <&gpd 2 0>;
+
+               regulator-min-microvolt = <3300000>;
+               regulator-max-microvolt = <3300000>;
+       };
+
+       amp_supply: regulator@2 {
+               compatible = "regulator-fixed";
+               regulator-name = "amp_supply";
+               gpio = <&gpd 4 0>;
+               enable-active-high;
+
+               regulator-min-microvolt = <3300000>;
+               regulator-max-microvolt = <3300000>;
+       };
+
+       amp: analog-amplifier {
+               compatible = "simple-audio-amplifier";
+               enable-gpios = <&gpb 29 GPIO_ACTIVE_HIGH>;
+               VCC-supply = <&amp_supply>;
+       };
+
+       sound {
+               compatible = "simple-audio-card";
+
+               simple-audio-card,name = "QI LB60";
+               simple-audio-card,format = "i2s";
+
+               simple-audio-card,widgets =
+                       "Speaker", "Speaker",
+                       "Microphone", "Mic";
+               simple-audio-card,routing =
+                       "MIC", "Mic",
+                       "Speaker", "OUTL",
+                       "Speaker", "OUTR",
+                       "INL", "LOUT",
+                       "INL", "ROUT";
+
+               simple-audio-card,aux-devs = <&amp>;
+
+               simple-audio-card,bitclock-master = <&dai_codec>;
+               simple-audio-card,frame-master = <&dai_codec>;
+
+               dai_cpu: simple-audio-card,cpu {
+                       sound-dai = <&aic>;
+               };
+
+               dai_codec: simple-audio-card,codec {
+                       sound-dai = <&codec>;
+               };
+       };
+
+       keys {
+               compatible = "gpio-keys";
+
+               key {
+                       label = "Power";
+                       wakeup-source;
+                       linux,code = <KEY_POWER>;
+                       gpios = <&gpd 29 GPIO_ACTIVE_LOW>;
+               };
+       };
+
+       keyboard {
+               compatible = "gpio-matrix-keypad";
+
+               col-scan-delay-us = <10>;
+               debounce-delay-ms = <10>;
+               wakeup-source;
+
+               row-gpios = <&gpd 18 0 &gpd 19 0 &gpd 20 0 &gpd 21 0
+                            &gpd 22 0 &gpd 23 0 &gpd 24 0 &gpd 26 0>;
+               col-gpios = <&gpc 10 0 &gpc 11 0 &gpc 12 0 &gpc 13 0
+                            &gpc 14 0 &gpc 15 0 &gpc 16 0 &gpc 17 0>;
+               gpio-activelow;
+
+               linux,keymap = <
+                       MATRIX_KEY(0, 0, KEY_F1)        /* S2 */
+                       MATRIX_KEY(0, 1, KEY_F2)        /* S3 */
+                       MATRIX_KEY(0, 2, KEY_F3)        /* S4 */
+                       MATRIX_KEY(0, 3, KEY_F4)        /* S5 */
+                       MATRIX_KEY(0, 4, KEY_F5)        /* S6 */
+                       MATRIX_KEY(0, 5, KEY_F6)        /* S7 */
+                       MATRIX_KEY(0, 6, KEY_F7)        /* S8 */
+
+                       MATRIX_KEY(1, 0, KEY_Q) /* S10 */
+                       MATRIX_KEY(1, 1, KEY_W) /* S11 */
+                       MATRIX_KEY(1, 2, KEY_E) /* S12 */
+                       MATRIX_KEY(1, 3, KEY_R) /* S13 */
+                       MATRIX_KEY(1, 4, KEY_T) /* S14 */
+                       MATRIX_KEY(1, 5, KEY_Y) /* S15 */
+                       MATRIX_KEY(1, 6, KEY_U) /* S16 */
+                       MATRIX_KEY(1, 7, KEY_I) /* S17 */
+                       MATRIX_KEY(2, 0, KEY_A) /* S18 */
+                       MATRIX_KEY(2, 1, KEY_S) /* S19 */
+                       MATRIX_KEY(2, 2, KEY_D) /* S20 */
+                       MATRIX_KEY(2, 3, KEY_F) /* S21 */
+                       MATRIX_KEY(2, 4, KEY_G) /* S22 */
+                       MATRIX_KEY(2, 5, KEY_H) /* S23 */
+                       MATRIX_KEY(2, 6, KEY_J) /* S24 */
+                       MATRIX_KEY(2, 7, KEY_K) /* S25 */
+                       MATRIX_KEY(3, 0, KEY_ESC)       /* S26 */
+                       MATRIX_KEY(3, 1, KEY_Z) /* S27 */
+                       MATRIX_KEY(3, 2, KEY_X) /* S28 */
+                       MATRIX_KEY(3, 3, KEY_C) /* S29 */
+                       MATRIX_KEY(3, 4, KEY_V) /* S30 */
+                       MATRIX_KEY(3, 5, KEY_B) /* S31 */
+                       MATRIX_KEY(3, 6, KEY_N) /* S32 */
+                       MATRIX_KEY(3, 7, KEY_M) /* S33 */
+                       MATRIX_KEY(4, 0, KEY_TAB)       /* S34 */
+                       MATRIX_KEY(4, 1, KEY_CAPSLOCK)  /* S35 */
+                       MATRIX_KEY(4, 2, KEY_BACKSLASH) /* S36 */
+                       MATRIX_KEY(4, 3, KEY_APOSTROPHE)        /* S37 */
+                       MATRIX_KEY(4, 4, KEY_COMMA)     /* S38 */
+                       MATRIX_KEY(4, 5, KEY_DOT)       /* S39 */
+                       MATRIX_KEY(4, 6, KEY_SLASH)     /* S40 */
+                       MATRIX_KEY(4, 7, KEY_UP)        /* S41 */
+                       MATRIX_KEY(5, 0, KEY_O) /* S42 */
+                       MATRIX_KEY(5, 1, KEY_L) /* S43 */
+                       MATRIX_KEY(5, 2, KEY_EQUAL)     /* S44 */
+                       MATRIX_KEY(5, 3, KEY_QI_UPRED)  /* S45 */
+                       MATRIX_KEY(5, 4, KEY_SPACE)     /* S46 */
+                       MATRIX_KEY(5, 5, KEY_QI_QI)     /* S47 */
+                       MATRIX_KEY(5, 6, KEY_RIGHTCTRL) /* S48 */
+                       MATRIX_KEY(5, 7, KEY_LEFT)      /* S49 */
+                       MATRIX_KEY(6, 0, KEY_F8)        /* S50 */
+                       MATRIX_KEY(6, 1, KEY_P) /* S51 */
+                       MATRIX_KEY(6, 2, KEY_BACKSPACE)/* S52 */
+                       MATRIX_KEY(6, 3, KEY_ENTER)     /* S53 */
+                       MATRIX_KEY(6, 4, KEY_QI_VOLUP)  /* S54 */
+                       MATRIX_KEY(6, 5, KEY_QI_VOLDOWN)        /* S55 */
+                       MATRIX_KEY(6, 6, KEY_DOWN)      /* S56 */
+                       MATRIX_KEY(6, 7, KEY_RIGHT)     /* S57 */
+
+                       MATRIX_KEY(7, 0, KEY_LEFTSHIFT) /* S58 */
+                       MATRIX_KEY(7, 1, KEY_LEFTALT) /* S59 */
+                       MATRIX_KEY(7, 2, KEY_QI_FN)     /* S60 */
+                       >;
+       };
+
+       spi {
+               compatible = "spi-gpio";
+               #address-cells = <1>;
+               #size-cells = <0>;
+
+               sck-gpios = <&gpc 23 GPIO_ACTIVE_HIGH>;
+               mosi-gpios = <&gpc 22 GPIO_ACTIVE_HIGH>;
+               cs-gpios = <&gpc 21 GPIO_ACTIVE_LOW>;
+               num-chipselects = <1>;
+       };
+
+       usb_charger: charger {
+               compatible = "gpio-charger";
+               charger-type = "usb-sdp";
+               gpios = <&gpd 28 GPIO_ACTIVE_LOW>;
+               status-gpios = <&gpc 27 GPIO_ACTIVE_LOW>;
+       };
+
+       simple_battery: battery {
+               compatible = "simple-battery";
+               voltage-min-design-microvolt = <3600000>;
+               voltage-max-design-microvolt = <4200000>;
+       };
+
+       pmu {
+               compatible = "ingenic,jz4740-battery";
+               io-channels = <&adc INGENIC_ADC_BATTERY>;
+               io-channel-names = "battery";
+               power-supplies = <&usb_charger>;
+               monitored-battery = <&simple_battery>;
+       };
+
+       hwmon {
+               compatible = "iio-hwmon";
+               io-channels = <&adc INGENIC_ADC_AUX>;
+       };
+
+       panel: panel {
+               compatible = "giantplus,gpm940b0";
+
+               power-supply = <&vcc>;
+
+               port {
+                       panel_input: endpoint {
+                               remote-endpoint = <&panel_output>;
+                       };
+               };
+       };
+
+       usb_phy: usb-phy {
+               compatible = "usb-nop-xceiv";
+               #phy-cells = <0>;
+
+               vcc-supply = <&vcc>;
+       };
 };
 
 &ext {
        pinctrl-0 = <&pins_uart0>;
 };
 
+&uart1 {
+       status = "disabled";
+};
+
+&nemc {
+       nandc: nand-controller@1 {
+               compatible = "ingenic,jz4740-nand";
+               reg = <1 0 0x4000000>;
+
+               #address-cells = <1>;
+               #size-cells = <0>;
+
+               ingenic,bch-controller = <&ecc>;
+
+               pinctrl-names = "default";
+               pinctrl-0 = <&pins_nemc>;
+
+               rb-gpios = <&gpc 30 GPIO_ACTIVE_LOW>;
+
+               nand@1 {
+                       reg = <1>;
+
+                       nand-ecc-step-size = <512>;
+                       nand-ecc-strength = <4>;
+                       nand-ecc-mode = "hw";
+                       nand-is-boot-medium;
+                       nand-on-flash-bbt;
+
+                       partitions {
+                               compatible = "fixed-partitions";
+                               #address-cells = <1>;
+                               #size-cells = <1>;
+
+                               partition@0 {
+                                       label = "boot";
+                                       reg = <0x0 0x400000>;
+                               };
+
+                               partition@400000 {
+                                       label = "kernel";
+                                       reg = <0x400000 0x400000>;
+                               };
+
+                               partition@800000 {
+                                       label = "rootfs";
+                                       reg = <0x800000 0x0>;
+                               };
+                       };
+               };
+       };
+};
+
+&lcd {
+       pinctrl-names = "default";
+       pinctrl-0 = <&pins_lcd>;
+
+       port {
+               panel_output: endpoint {
+                       remote-endpoint = <&panel_input>;
+               };
+       };
+};
+
+&udc {
+       phys = <&usb_phy>;
+};
+
 &pinctrl {
+       pins_lcd: lcd {
+               function = "lcd";
+               groups = "lcd-8bit";
+       };
+
+       pins_nemc: nemc {
+               function = "nand";
+               groups = "nand-cs1";
+       };
+
        pins_uart0: uart0 {
                function = "uart0";
                groups = "uart0-data";
                bias-disable;
        };
+
+       pins_mmc: mmc {
+               mmc {
+                       function = "mmc";
+                       groups = "mmc-1bit", "mmc-4bit";
+                       bias-disable;
+               };
+
+               mmc-gpios {
+                       pins = "PD0", "PD2";
+                       bias-disable;
+               };
+       };
+};
+
+&mmc {
+       bus-width = <4>;
+       max-frequency = <24000000>;
+       cd-gpios = <&gpd 0 GPIO_ACTIVE_HIGH>;
+       vmmc-supply = <&mmc_power>;
+
+       pinctrl-names = "default";
+       pinctrl-0 = <&pins_mmc>;
+};
+
+&tcu {
+       /* 750 kHz for the system timer and clocksource */
+       assigned-clocks = <&tcu TCU_CLK_TIMER0>, <&tcu TCU_CLK_TIMER1>;
+       assigned-clock-rates = <750000>, <750000>;
 };
index 33ae74a..797d336 100644 (file)
                        reg = <0x1010000 0x10000>,
                              <0x1030000 0x10000>,
                              <0x1080000 0x100>,
+                             <0x10e0000 0x10000>,
                              <0x11e0000 0x100>,
                              <0x11f0000 0x100>,
                              <0x1200000 0x100>,
                              <0x1800000 0x80000>,
                              <0x1880000 0x10000>,
                              <0x1060000 0x10000>;
-                       reg-names = "sys", "rew", "qs", "port0", "port1",
+                       reg-names = "sys", "rew", "qs", "ptp", "port0", "port1",
                                    "port2", "port3", "port4", "port5", "port6",
                                    "port7", "port8", "port9", "port10", "qsys",
                                    "ana", "s2";
-                       interrupts = <21 22>;
-                       interrupt-names = "xtr", "inj";
+                       interrupts = <18 21 22>;
+                       interrupt-names = "ptp_rdy", "xtr", "inj";
 
                        ethernet-ports {
                                #address-cells = <1>;
index 11d5a4e..14ea680 100644 (file)
@@ -190,7 +190,7 @@ char *octeon_swiotlb;
 
 void __init plat_swiotlb_setup(void)
 {
-       int i;
+       struct memblock_region *mem;
        phys_addr_t max_addr;
        phys_addr_t addr_size;
        size_t swiotlbsize;
@@ -199,19 +199,15 @@ void __init plat_swiotlb_setup(void)
        max_addr = 0;
        addr_size = 0;
 
-       for (i = 0 ; i < boot_mem_map.nr_map; i++) {
-               struct boot_mem_map_entry *e = &boot_mem_map.map[i];
-               if (e->type != BOOT_MEM_RAM && e->type != BOOT_MEM_INIT_RAM)
-                       continue;
-
+       for_each_memblock(memory, mem) {
                /* These addresses map low for PCI. */
-               if (e->addr > 0x410000000ull && !OCTEON_IS_OCTEON2())
+               if (mem->base > 0x410000000ull && !OCTEON_IS_OCTEON2())
                        continue;
 
-               addr_size += e->size;
+               addr_size += mem->size;
 
-               if (max_addr < e->addr + e->size)
-                       max_addr = e->addr + e->size;
+               if (max_addr < mem->base + mem->size)
+                       max_addr = mem->base + mem->size;
 
        }
 
index 8bf43c5..95034bf 100644 (file)
@@ -1007,8 +1007,7 @@ void __init plat_mem_setup(void)
         * regions next to each other.
         */
        cvmx_bootmem_lock();
-       while ((boot_mem_map.nr_map < BOOT_MEM_MAP_MAX)
-               && (total < max_memory)) {
+       while (total < max_memory) {
                memory = cvmx_bootmem_phy_alloc(mem_alloc_size,
                                                __pa_symbol(&_end), -1,
                                                0x100000,
index 208da8a..d3f4d52 100644 (file)
@@ -1,7 +1,6 @@
 # CONFIG_LOCALVERSION_AUTO is not set
 CONFIG_SYSVIPC=y
 # CONFIG_CROSS_MEMORY_ATTACH is not set
-CONFIG_PREEMPT=y
 CONFIG_LOG_BUF_SHIFT=14
 CONFIG_SYSCTL_SYSCALL=y
 CONFIG_KALLSYMS_ALL=y
@@ -17,9 +16,8 @@ CONFIG_MODULE_UNLOAD=y
 # CONFIG_BLK_DEV_BSG is not set
 CONFIG_PARTITION_ADVANCED=y
 # CONFIG_EFI_PARTITION is not set
-# CONFIG_IOSCHED_CFQ is not set
 # CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set
-# CONFIG_COMPACTION is not set
+CONFIG_CMA=y
 CONFIG_NET=y
 CONFIG_PACKET=y
 CONFIG_UNIX=y
@@ -31,9 +29,6 @@ CONFIG_IP_ROUTE_MULTIPATH=y
 CONFIG_IP_ROUTE_VERBOSE=y
 CONFIG_IP_MROUTE=y
 CONFIG_IP_MROUTE_MULTIPLE_TABLES=y
-# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
-# CONFIG_INET_XFRM_MODE_TUNNEL is not set
-# CONFIG_INET_XFRM_MODE_BEET is not set
 # CONFIG_INET_DIAG is not set
 CONFIG_TCP_CONG_ADVANCED=y
 # CONFIG_TCP_CONG_BIC is not set
@@ -44,7 +39,8 @@ CONFIG_TCP_CONG_WESTWOOD=y
 CONFIG_MTD=y
 CONFIG_MTD_BLOCK=y
 CONFIG_MTD_RAW_NAND=y
-CONFIG_MTD_NAND_JZ4740=y
+CONFIG_MTD_NAND_JZ4780=y
+CONFIG_MTD_NAND_JZ4740_ECC=y
 CONFIG_MTD_UBI=y
 CONFIG_NETDEVICES=y
 # CONFIG_WLAN is not set
@@ -66,18 +62,20 @@ CONFIG_SERIAL_8250_INGENIC=y
 CONFIG_SPI=y
 CONFIG_SPI_GPIO=y
 CONFIG_POWER_SUPPLY=y
-CONFIG_BATTERY_JZ4740=y
+CONFIG_BATTERY_INGENIC=y
 CONFIG_CHARGER_GPIO=y
-# CONFIG_HWMON is not set
+CONFIG_SENSORS_IIO_HWMON=y
 CONFIG_WATCHDOG=y
 CONFIG_JZ4740_WDT=y
-CONFIG_MFD_JZ4740_ADC=y
 CONFIG_REGULATOR=y
 CONFIG_REGULATOR_FIXED_VOLTAGE=y
-CONFIG_FB=y
-CONFIG_FB_JZ4740=y
-CONFIG_LCD_CLASS_DEVICE=y
-# CONFIG_BACKLIGHT_CLASS_DEVICE is not set
+CONFIG_DRM=y
+CONFIG_DRM_FBDEV_OVERALLOC=200
+CONFIG_DRM_PANEL_SIMPLE=y
+CONFIG_DRM_INGENIC=y
+# CONFIG_LCD_CLASS_DEVICE is not set
+CONFIG_BACKLIGHT_CLASS_DEVICE=y
+# CONFIG_BACKLIGHT_GENERIC is not set
 # CONFIG_VGA_CONSOLE is not set
 CONFIG_FRAMEBUFFER_CONSOLE=y
 CONFIG_LOGO=y
@@ -92,13 +90,13 @@ CONFIG_SND=y
 # CONFIG_SND_SPI is not set
 # CONFIG_SND_MIPS is not set
 CONFIG_SND_SOC=y
-CONFIG_SND_JZ4740_SOC=y
-CONFIG_SND_JZ4740_SOC_QI_LB60=y
-CONFIG_USB=y
-CONFIG_USB_OTG_BLACKLIST_HUB=y
+CONFIG_SND_JZ4740_SOC_I2S=y
+CONFIG_SND_SOC_JZ4740_CODEC=y
+CONFIG_SND_SOC_SIMPLE_AMPLIFIER=y
+CONFIG_SND_SIMPLE_CARD=y
 CONFIG_USB_MUSB_HDRC=y
-CONFIG_USB_MUSB_GADGET=y
 CONFIG_USB_MUSB_JZ4740=y
+CONFIG_USB_INVENTRA_DMA=y
 CONFIG_NOP_USB_XCEIV=y
 CONFIG_USB_GADGET=y
 CONFIG_USB_GADGET_DEBUG=y
@@ -109,11 +107,13 @@ CONFIG_MMC_JZ4740=y
 CONFIG_RTC_CLASS=y
 CONFIG_RTC_DRV_JZ4740=y
 CONFIG_DMADEVICES=y
-CONFIG_DMA_JZ4740=y
+CONFIG_DMA_JZ4780=y
+CONFIG_MEMORY=y
+CONFIG_IIO=y
+CONFIG_INGENIC_ADC=y
 CONFIG_PWM=y
 CONFIG_PWM_JZ4740=y
-CONFIG_EXT2_FS=y
-CONFIG_EXT3_FS=y
+CONFIG_EXT4_FS=y
 # CONFIG_DNOTIFY is not set
 CONFIG_VFAT_FS=y
 CONFIG_PROC_KCORE=y
index 429b7f8..af44b35 100644 (file)
 
 #undef DEBUG
 
+#define MAX_PROM_MEM 5
+static phys_addr_t prom_mem_base[MAX_PROM_MEM] __initdata;
+static phys_addr_t prom_mem_size[MAX_PROM_MEM] __initdata;
+static unsigned int nr_prom_mem __initdata;
+
 /*
  * For ARC firmware memory functions the unit of meassuring memory is always
  * a 4k page of memory
@@ -129,6 +134,7 @@ void __init prom_meminit(void)
        }
 #endif
 
+       nr_prom_mem = 0;
        p = PROM_NULL_MDESC;
        while ((p = ArcGetMemoryDescriptor(p))) {
                unsigned long base, size;
@@ -139,6 +145,16 @@ void __init prom_meminit(void)
                type = prom_memtype_classify(p->type);
 
                add_memory_region(base, size, type);
+
+               if (type == BOOT_MEM_ROM_DATA) {
+                       if (nr_prom_mem >= 5) {
+                               pr_err("Too many ROM DATA regions");
+                               continue;
+                       }
+                       prom_mem_base[nr_prom_mem] = base;
+                       prom_mem_size[nr_prom_mem] = size;
+                       nr_prom_mem++;
+               }
        }
 }
 
@@ -150,12 +166,8 @@ void __init prom_free_prom_memory(void)
        if (prom_flags & PROM_FLAG_DONT_FREE_TEMP)
                return;
 
-       for (i = 0; i < boot_mem_map.nr_map; i++) {
-               if (boot_mem_map.map[i].type != BOOT_MEM_ROM_DATA)
-                       continue;
-
-               addr = boot_mem_map.map[i].addr;
+       for (i = 0; i < nr_prom_mem; i++) {
                free_init_pages("prom memory",
-                               addr, addr + boot_mem_map.map[i].size);
+                       prom_mem_base[i], prom_mem_base[i] + prom_mem_size[i]);
        }
 }
index 4856adc..59a48c6 100644 (file)
  */
 #define TO_PHYS_MASK   _CONST64_(0x07ffffffffffffff)   /* 2^^59 - 1 */
 
-#ifndef CONFIG_CPU_R8000
-
-/*
- * The R8000 doesn't have the 32-bit compat spaces so we don't define them
- * in order to catch bugs in the source code.
- */
-
 #define COMPAT_K1BASE32                _CONST64_(0xffffffffa0000000)
 #define PHYS_TO_COMPATK1(x)    ((x) | COMPAT_K1BASE32) /* 32-bit compat k1 */
 
-#endif
-
 #define KDM_TO_PHYS(x)         (_ACAST64_ (x) & TO_PHYS_MASK)
 #define PHYS_TO_K0(x)          (_ACAST64_ (x) | CAC_BASE)
 
index 9a82dd1..bb8658c 100644 (file)
@@ -68,7 +68,7 @@ static __inline__ void atomic_##op(int i, atomic_t * v)                             \
                "\t" __scbeqz " %0, 1b                                  \n"   \
                "       .set    pop                                     \n"   \
                : "=&r" (temp), "+" GCC_OFF_SMALL_ASM() (v->counter)          \
-               : "Ir" (i));                                                  \
+               : "Ir" (i) : __LLSC_CLOBBER);                                 \
        } else {                                                              \
                unsigned long flags;                                          \
                                                                              \
@@ -98,7 +98,7 @@ static __inline__ int atomic_##op##_return_relaxed(int i, atomic_t * v)             \
                "       .set    pop                                     \n"   \
                : "=&r" (result), "=&r" (temp),                               \
                  "+" GCC_OFF_SMALL_ASM() (v->counter)                        \
-               : "Ir" (i));                                                  \
+               : "Ir" (i) : __LLSC_CLOBBER);                                 \
        } else {                                                              \
                unsigned long flags;                                          \
                                                                              \
@@ -132,7 +132,7 @@ static __inline__ int atomic_fetch_##op##_relaxed(int i, atomic_t * v)            \
                "       move    %0, %1                                  \n"   \
                : "=&r" (result), "=&r" (temp),                               \
                  "+" GCC_OFF_SMALL_ASM() (v->counter)                        \
-               : "Ir" (i));                                                  \
+               : "Ir" (i) : __LLSC_CLOBBER);                                 \
        } else {                                                              \
                unsigned long flags;                                          \
                                                                              \
@@ -193,6 +193,7 @@ static __inline__ int atomic_sub_if_positive(int i, atomic_t * v)
        if (kernel_uses_llsc) {
                int temp;
 
+               loongson_llsc_mb();
                __asm__ __volatile__(
                "       .set    push                                    \n"
                "       .set    "MIPS_ISA_LEVEL"                        \n"
@@ -200,16 +201,16 @@ static __inline__ int atomic_sub_if_positive(int i, atomic_t * v)
                "       .set    pop                                     \n"
                "       subu    %0, %1, %3                              \n"
                "       move    %1, %0                                  \n"
-               "       bltz    %0, 1f                                  \n"
+               "       bltz    %0, 2f                                  \n"
                "       .set    push                                    \n"
                "       .set    "MIPS_ISA_LEVEL"                        \n"
                "       sc      %1, %2                                  \n"
                "\t" __scbeqz " %1, 1b                                  \n"
-               "1:                                                     \n"
+               "2:                                                     \n"
                "       .set    pop                                     \n"
                : "=&r" (result), "=&r" (temp),
                  "+" GCC_OFF_SMALL_ASM() (v->counter)
-               : "Ir" (i));
+               : "Ir" (i) : __LLSC_CLOBBER);
        } else {
                unsigned long flags;
 
@@ -269,7 +270,7 @@ static __inline__ void atomic64_##op(s64 i, atomic64_t * v)               \
                "\t" __scbeqz " %0, 1b                                  \n"   \
                "       .set    pop                                     \n"   \
                : "=&r" (temp), "+" GCC_OFF_SMALL_ASM() (v->counter)          \
-               : "Ir" (i));                                                  \
+               : "Ir" (i) : __LLSC_CLOBBER);                                 \
        } else {                                                              \
                unsigned long flags;                                          \
                                                                              \
@@ -299,7 +300,7 @@ static __inline__ s64 atomic64_##op##_return_relaxed(s64 i, atomic64_t * v)   \
                "       .set    pop                                     \n"   \
                : "=&r" (result), "=&r" (temp),                               \
                  "+" GCC_OFF_SMALL_ASM() (v->counter)                        \
-               : "Ir" (i));                                                  \
+               : "Ir" (i) : __LLSC_CLOBBER);                                 \
        } else {                                                              \
                unsigned long flags;                                          \
                                                                              \
@@ -333,7 +334,7 @@ static __inline__ s64 atomic64_fetch_##op##_relaxed(s64 i, atomic64_t * v)    \
                "       .set    pop                                     \n"   \
                : "=&r" (result), "=&r" (temp),                               \
                  "+" GCC_OFF_SMALL_ASM() (v->counter)                        \
-               : "Ir" (i));                                                  \
+               : "Ir" (i) : __LLSC_CLOBBER);                                 \
        } else {                                                              \
                unsigned long flags;                                          \
                                                                              \
index b865e31..9228f73 100644 (file)
 #define __smp_wmb()    barrier()
 #endif
 
+/*
+ * When LL/SC does imply order, it must also be a compiler barrier to avoid the
+ * compiler from reordering where the CPU will not. When it does not imply
+ * order, the compiler is also free to reorder across the LL/SC loop and
+ * ordering will be done by smp_llsc_mb() and friends.
+ */
 #if defined(CONFIG_WEAK_REORDERING_BEYOND_LLSC) && defined(CONFIG_SMP)
 #define __WEAK_LLSC_MB         "       sync    \n"
+#define smp_llsc_mb()          __asm__ __volatile__(__WEAK_LLSC_MB : : :"memory")
+#define __LLSC_CLOBBER
 #else
 #define __WEAK_LLSC_MB         "               \n"
+#define smp_llsc_mb()          do { } while (0)
+#define __LLSC_CLOBBER         "memory"
 #endif
 
-#define smp_llsc_mb()  __asm__ __volatile__(__WEAK_LLSC_MB : : :"memory")
-
 #ifdef CONFIG_CPU_CAVIUM_OCTEON
 #define smp_mb__before_llsc() smp_wmb()
 #define __smp_mb__before_llsc() __smp_wmb()
 
 /*
  * Some Loongson 3 CPUs have a bug wherein execution of a memory access (load,
- * store or pref) in between an ll & sc can cause the sc instruction to
+ * store or prefetch) in between an LL & SC can cause the SC instruction to
  * erroneously succeed, breaking atomicity. Whilst it's unusual to write code
  * containing such sequences, this bug bites harder than we might otherwise
  * expect due to reordering & speculation:
  *
- * 1) A memory access appearing prior to the ll in program order may actually
- *    be executed after the ll - this is the reordering case.
+ * 1) A memory access appearing prior to the LL in program order may actually
+ *    be executed after the LL - this is the reordering case.
  *
- *    In order to avoid this we need to place a memory barrier (ie. a sync
- *    instruction) prior to every ll instruction, in between it & any earlier
- *    memory access instructions. Many of these cases are already covered by
- *    smp_mb__before_llsc() but for the remaining cases, typically ones in
- *    which multiple CPUs may operate on a memory location but ordering is not
- *    usually guaranteed, we use loongson_llsc_mb() below.
+ *    In order to avoid this we need to place a memory barrier (ie. a SYNC
+ *    instruction) prior to every LL instruction, in between it and any earlier
+ *    memory access instructions.
  *
  *    This reordering case is fixed by 3A R2 CPUs, ie. 3A2000 models and later.
  *
- * 2) If a conditional branch exists between an ll & sc with a target outside
- *    of the ll-sc loop, for example an exit upon value mismatch in cmpxchg()
+ * 2) If a conditional branch exists between an LL & SC with a target outside
+ *    of the LL-SC loop, for example an exit upon value mismatch in cmpxchg()
  *    or similar, then misprediction of the branch may allow speculative
- *    execution of memory accesses from outside of the ll-sc loop.
+ *    execution of memory accesses from outside of the LL-SC loop.
  *
- *    In order to avoid this we need a memory barrier (ie. a sync instruction)
+ *    In order to avoid this we need a memory barrier (ie. a SYNC instruction)
  *    at each affected branch target, for which we also use loongson_llsc_mb()
  *    defined below.
  *
  *    This case affects all current Loongson 3 CPUs.
+ *
+ * The above described cases cause an error in the cache coherence protocol;
+ * such that the Invalidate of a competing LL-SC goes 'missing' and SC
+ * erroneously observes its core still has Exclusive state and lets the SC
+ * proceed.
+ *
+ * Therefore the error only occurs on SMP systems.
  */
 #ifdef CONFIG_CPU_LOONGSON3_WORKAROUNDS /* Loongson-3's LLSC workaround */
-#define loongson_llsc_mb()     __asm__ __volatile__(__WEAK_LLSC_MB : : :"memory")
+#define loongson_llsc_mb()     __asm__ __volatile__("sync" : : :"memory")
 #else
 #define loongson_llsc_mb()     do { } while (0)
 #endif
index 9a466dd..985d6a0 100644 (file)
@@ -66,7 +66,8 @@ static inline void set_bit(unsigned long nr, volatile unsigned long *addr)
                "       beqzl   %0, 1b                                  \n"
                "       .set    pop                                     \n"
                : "=&r" (temp), "=" GCC_OFF_SMALL_ASM() (*m)
-               : "ir" (1UL << bit), GCC_OFF_SMALL_ASM() (*m));
+               : "ir" (1UL << bit), GCC_OFF_SMALL_ASM() (*m)
+               : __LLSC_CLOBBER);
 #if defined(CONFIG_CPU_MIPSR2) || defined(CONFIG_CPU_MIPSR6)
        } else if (kernel_uses_llsc && __builtin_constant_p(bit)) {
                loongson_llsc_mb();
@@ -76,7 +77,8 @@ static inline void set_bit(unsigned long nr, volatile unsigned long *addr)
                        "       " __INS "%0, %3, %2, 1                  \n"
                        "       " __SC "%0, %1                          \n"
                        : "=&r" (temp), "+" GCC_OFF_SMALL_ASM() (*m)
-                       : "ir" (bit), "r" (~0));
+                       : "ir" (bit), "r" (~0)
+                       : __LLSC_CLOBBER);
                } while (unlikely(!temp));
 #endif /* CONFIG_CPU_MIPSR2 || CONFIG_CPU_MIPSR6 */
        } else if (kernel_uses_llsc) {
@@ -90,7 +92,8 @@ static inline void set_bit(unsigned long nr, volatile unsigned long *addr)
                        "       " __SC  "%0, %1                         \n"
                        "       .set    pop                             \n"
                        : "=&r" (temp), "+" GCC_OFF_SMALL_ASM() (*m)
-                       : "ir" (1UL << bit));
+                       : "ir" (1UL << bit)
+                       : __LLSC_CLOBBER);
                } while (unlikely(!temp));
        } else
                __mips_set_bit(nr, addr);
@@ -122,7 +125,8 @@ static inline void clear_bit(unsigned long nr, volatile unsigned long *addr)
                "       beqzl   %0, 1b                                  \n"
                "       .set    pop                                     \n"
                : "=&r" (temp), "+" GCC_OFF_SMALL_ASM() (*m)
-               : "ir" (~(1UL << bit)));
+               : "ir" (~(1UL << bit))
+               : __LLSC_CLOBBER);
 #if defined(CONFIG_CPU_MIPSR2) || defined(CONFIG_CPU_MIPSR6)
        } else if (kernel_uses_llsc && __builtin_constant_p(bit)) {
                loongson_llsc_mb();
@@ -132,7 +136,8 @@ static inline void clear_bit(unsigned long nr, volatile unsigned long *addr)
                        "       " __INS "%0, $0, %2, 1                  \n"
                        "       " __SC "%0, %1                          \n"
                        : "=&r" (temp), "+" GCC_OFF_SMALL_ASM() (*m)
-                       : "ir" (bit));
+                       : "ir" (bit)
+                       : __LLSC_CLOBBER);
                } while (unlikely(!temp));
 #endif /* CONFIG_CPU_MIPSR2 || CONFIG_CPU_MIPSR6 */
        } else if (kernel_uses_llsc) {
@@ -146,7 +151,8 @@ static inline void clear_bit(unsigned long nr, volatile unsigned long *addr)
                        "       " __SC "%0, %1                          \n"
                        "       .set    pop                             \n"
                        : "=&r" (temp), "+" GCC_OFF_SMALL_ASM() (*m)
-                       : "ir" (~(1UL << bit)));
+                       : "ir" (~(1UL << bit))
+                       : __LLSC_CLOBBER);
                } while (unlikely(!temp));
        } else
                __mips_clear_bit(nr, addr);
@@ -192,7 +198,8 @@ static inline void change_bit(unsigned long nr, volatile unsigned long *addr)
                "       beqzl   %0, 1b                          \n"
                "       .set    pop                             \n"
                : "=&r" (temp), "+" GCC_OFF_SMALL_ASM() (*m)
-               : "ir" (1UL << bit));
+               : "ir" (1UL << bit)
+               : __LLSC_CLOBBER);
        } else if (kernel_uses_llsc) {
                unsigned long *m = ((unsigned long *) addr) + (nr >> SZLONG_LOG);
                unsigned long temp;
@@ -207,7 +214,8 @@ static inline void change_bit(unsigned long nr, volatile unsigned long *addr)
                        "       " __SC  "%0, %1                         \n"
                        "       .set    pop                             \n"
                        : "=&r" (temp), "+" GCC_OFF_SMALL_ASM() (*m)
-                       : "ir" (1UL << bit));
+                       : "ir" (1UL << bit)
+                       : __LLSC_CLOBBER);
                } while (unlikely(!temp));
        } else
                __mips_change_bit(nr, addr);
@@ -244,11 +252,12 @@ static inline int test_and_set_bit(unsigned long nr,
                "       .set    pop                                     \n"
                : "=&r" (temp), "+" GCC_OFF_SMALL_ASM() (*m), "=&r" (res)
                : "r" (1UL << bit)
-               : "memory");
+               : __LLSC_CLOBBER);
        } else if (kernel_uses_llsc) {
                unsigned long *m = ((unsigned long *) addr) + (nr >> SZLONG_LOG);
                unsigned long temp;
 
+               loongson_llsc_mb();
                do {
                        __asm__ __volatile__(
                        "       .set    push                            \n"
@@ -259,7 +268,7 @@ static inline int test_and_set_bit(unsigned long nr,
                        "       .set    pop                             \n"
                        : "=&r" (temp), "+" GCC_OFF_SMALL_ASM() (*m), "=&r" (res)
                        : "r" (1UL << bit)
-                       : "memory");
+                       : __LLSC_CLOBBER);
                } while (unlikely(!res));
 
                res = temp & (1UL << bit);
@@ -300,11 +309,12 @@ static inline int test_and_set_bit_lock(unsigned long nr,
                "       .set    pop                                     \n"
                : "=&r" (temp), "+m" (*m), "=&r" (res)
                : "r" (1UL << bit)
-               : "memory");
+               : __LLSC_CLOBBER);
        } else if (kernel_uses_llsc) {
                unsigned long *m = ((unsigned long *) addr) + (nr >> SZLONG_LOG);
                unsigned long temp;
 
+               loongson_llsc_mb();
                do {
                        __asm__ __volatile__(
                        "       .set    push                            \n"
@@ -315,7 +325,7 @@ static inline int test_and_set_bit_lock(unsigned long nr,
                        "       .set    pop                             \n"
                        : "=&r" (temp), "+" GCC_OFF_SMALL_ASM() (*m), "=&r" (res)
                        : "r" (1UL << bit)
-                       : "memory");
+                       : __LLSC_CLOBBER);
                } while (unlikely(!res));
 
                res = temp & (1UL << bit);
@@ -358,12 +368,13 @@ static inline int test_and_clear_bit(unsigned long nr,
                "       .set    pop                                     \n"
                : "=&r" (temp), "+" GCC_OFF_SMALL_ASM() (*m), "=&r" (res)
                : "r" (1UL << bit)
-               : "memory");
+               : __LLSC_CLOBBER);
 #if defined(CONFIG_CPU_MIPSR2) || defined(CONFIG_CPU_MIPSR6)
        } else if (kernel_uses_llsc && __builtin_constant_p(nr)) {
                unsigned long *m = ((unsigned long *) addr) + (nr >> SZLONG_LOG);
                unsigned long temp;
 
+               loongson_llsc_mb();
                do {
                        __asm__ __volatile__(
                        "       " __LL  "%0, %1 # test_and_clear_bit    \n"
@@ -372,13 +383,14 @@ static inline int test_and_clear_bit(unsigned long nr,
                        "       " __SC  "%0, %1                         \n"
                        : "=&r" (temp), "+" GCC_OFF_SMALL_ASM() (*m), "=&r" (res)
                        : "ir" (bit)
-                       : "memory");
+                       : __LLSC_CLOBBER);
                } while (unlikely(!temp));
 #endif
        } else if (kernel_uses_llsc) {
                unsigned long *m = ((unsigned long *) addr) + (nr >> SZLONG_LOG);
                unsigned long temp;
 
+               loongson_llsc_mb();
                do {
                        __asm__ __volatile__(
                        "       .set    push                            \n"
@@ -390,7 +402,7 @@ static inline int test_and_clear_bit(unsigned long nr,
                        "       .set    pop                             \n"
                        : "=&r" (temp), "+" GCC_OFF_SMALL_ASM() (*m), "=&r" (res)
                        : "r" (1UL << bit)
-                       : "memory");
+                       : __LLSC_CLOBBER);
                } while (unlikely(!res));
 
                res = temp & (1UL << bit);
@@ -433,11 +445,12 @@ static inline int test_and_change_bit(unsigned long nr,
                "       .set    pop                                     \n"
                : "=&r" (temp), "+" GCC_OFF_SMALL_ASM() (*m), "=&r" (res)
                : "r" (1UL << bit)
-               : "memory");
+               : __LLSC_CLOBBER);
        } else if (kernel_uses_llsc) {
                unsigned long *m = ((unsigned long *) addr) + (nr >> SZLONG_LOG);
                unsigned long temp;
 
+               loongson_llsc_mb();
                do {
                        __asm__ __volatile__(
                        "       .set    push                            \n"
@@ -448,7 +461,7 @@ static inline int test_and_change_bit(unsigned long nr,
                        "       .set    pop                             \n"
                        : "=&r" (temp), "+" GCC_OFF_SMALL_ASM() (*m), "=&r" (res)
                        : "r" (1UL << bit)
-                       : "memory");
+                       : __LLSC_CLOBBER);
                } while (unlikely(!res));
 
                res = temp & (1UL << bit);
index 235bc2f..34d6222 100644 (file)
@@ -81,34 +81,19 @@ enum loongson_machine_type {
 #define  MACH_INGENIC_JZ4740   1       /* JZ4740 SOC           */
 #define  MACH_INGENIC_JZ4770   2       /* JZ4770 SOC           */
 #define  MACH_INGENIC_JZ4780   3       /* JZ4780 SOC           */
+#define  MACH_INGENIC_X1000            4       /* X1000 SOC            */
 
 extern char *system_type;
 const char *get_system_type(void);
 
 extern unsigned long mips_machtype;
 
-#define BOOT_MEM_MAP_MAX       32
 #define BOOT_MEM_RAM           1
 #define BOOT_MEM_ROM_DATA      2
 #define BOOT_MEM_RESERVED      3
 #define BOOT_MEM_INIT_RAM      4
 #define BOOT_MEM_NOMAP         5
 
-/*
- * A memory map that's built upon what was determined
- * or specified on the command line.
- */
-struct boot_mem_map {
-       int nr_map;
-       struct boot_mem_map_entry {
-               phys_addr_t addr;       /* start of memory segment */
-               phys_addr_t size;       /* size of memory segment */
-               long type;              /* type of memory segment */
-       } map[BOOT_MEM_MAP_MAX];
-};
-
-extern struct boot_mem_map boot_mem_map;
-
 extern void add_memory_region(phys_addr_t start, phys_addr_t size, long type);
 extern void detect_memory_region(phys_addr_t start, phys_addr_t sz_min,  phys_addr_t sz_max);
 
index f345a87..79bf34e 100644 (file)
@@ -46,6 +46,7 @@ extern unsigned long __xchg_called_with_bad_pointer(void)
        __typeof(*(m)) __ret;                                           \
                                                                        \
        if (kernel_uses_llsc) {                                         \
+               loongson_llsc_mb();                                     \
                __asm__ __volatile__(                                   \
                "       .set    push                            \n"     \
                "       .set    noat                            \n"     \
@@ -60,7 +61,7 @@ extern unsigned long __xchg_called_with_bad_pointer(void)
                "       .set    pop                             \n"     \
                : "=&r" (__ret), "=" GCC_OFF_SMALL_ASM() (*m)           \
                : GCC_OFF_SMALL_ASM() (*m), "Jr" (val)                  \
-               : "memory");                                            \
+               : __LLSC_CLOBBER);                                      \
        } else {                                                        \
                unsigned long __flags;                                  \
                                                                        \
@@ -117,6 +118,7 @@ static inline unsigned long __xchg(volatile void *ptr, unsigned long x,
        __typeof(*(m)) __ret;                                           \
                                                                        \
        if (kernel_uses_llsc) {                                         \
+               loongson_llsc_mb();                                     \
                __asm__ __volatile__(                                   \
                "       .set    push                            \n"     \
                "       .set    noat                            \n"     \
@@ -132,8 +134,9 @@ static inline unsigned long __xchg(volatile void *ptr, unsigned long x,
                "       .set    pop                             \n"     \
                "2:                                             \n"     \
                : "=&r" (__ret), "=" GCC_OFF_SMALL_ASM() (*m)           \
-               : GCC_OFF_SMALL_ASM() (*m), "Jr" (old), "Jr" (new)              \
-               : "memory");                                            \
+               : GCC_OFF_SMALL_ASM() (*m), "Jr" (old), "Jr" (new)      \
+               : __LLSC_CLOBBER);                                      \
+               loongson_llsc_mb();                                     \
        } else {                                                        \
                unsigned long __flags;                                  \
                                                                        \
@@ -229,6 +232,7 @@ static inline unsigned long __cmpxchg64(volatile void *ptr,
         */
        local_irq_save(flags);
 
+       loongson_llsc_mb();
        asm volatile(
        "       .set    push                            \n"
        "       .set    " MIPS_ISA_ARCH_LEVEL "         \n"
@@ -274,6 +278,7 @@ static inline unsigned long __cmpxchg64(volatile void *ptr,
          "r" (old),
          "r" (new)
        : "memory");
+       loongson_llsc_mb();
 
        local_irq_restore(flags);
        return ret;
@@ -290,10 +295,13 @@ static inline unsigned long __cmpxchg64(volatile void *ptr,
         * will cause a build error unless cpu_has_64bits is a          \
         * compile-time constant 1.                                     \
         */                                                             \
-       if (cpu_has_64bits && kernel_uses_llsc)                         \
+       if (cpu_has_64bits && kernel_uses_llsc) {                       \
+               smp_mb__before_llsc();                                  \
                __res = __cmpxchg64((ptr), __old, __new);               \
-       else                                                            \
+               smp_llsc_mb();                                          \
+       } else {                                                        \
                __res = __cmpxchg64_unsupported();                      \
+       }                                                               \
                                                                        \
        __res;                                                          \
 })
index 6998a97..983a6a7 100644 (file)
 #ifndef cpu_has_pindexed_dcache
 #define cpu_has_pindexed_dcache        (cpu_data[0].dcache.flags & MIPS_CACHE_PINDEX)
 #endif
-#ifndef cpu_has_local_ebase
-#define cpu_has_local_ebase    1
-#endif
 
 /*
  * I-Cache snoops remote store.         This only matters on SMP.  Some multiprocessors
 #define cpu_has_dsp3           __ase(MIPS_ASE_DSP3)
 #endif
 
+#ifndef cpu_has_loongson_mmi
+#define cpu_has_loongson_mmi           __ase(MIPS_ASE_LOONGSON_MMI)
+#endif
+
+#ifndef cpu_has_loongson_cam
+#define cpu_has_loongson_cam           __ase(MIPS_ASE_LOONGSON_CAM)
+#endif
+
+#ifndef cpu_has_loongson_ext
+#define cpu_has_loongson_ext           __ase(MIPS_ASE_LOONGSON_EXT)
+#endif
+
+#ifndef cpu_has_loongson_ext2
+#define cpu_has_loongson_ext2          __ase(MIPS_ASE_LOONGSON_EXT2)
+#endif
+
 #ifndef cpu_has_mipsmt
 #define cpu_has_mipsmt         __isa_lt_and_ase(6, MIPS_ASE_MIPSMT)
 #endif
index a45af3d..7bbb667 100644 (file)
@@ -38,7 +38,7 @@ static inline int __pure __get_cpu_type(const int cpu_type)
 #if defined(CONFIG_SYS_HAS_CPU_MIPS32_R1) || \
     defined(CONFIG_SYS_HAS_CPU_MIPS32_R2)
        case CPU_4KEC:
-       case CPU_JZRISC:
+       case CPU_XBURST:
 #endif
 
 #ifdef CONFIG_SYS_HAS_CPU_MIPS32_R2
@@ -116,11 +116,6 @@ static inline int __pure __get_cpu_type(const int cpu_type)
        case CPU_VR4181A:
 #endif
 
-#ifdef CONFIG_SYS_HAS_CPU_R4300
-       case CPU_R4300:
-       case CPU_R4310:
-#endif
-
 #ifdef CONFIG_SYS_HAS_CPU_R4X00
        case CPU_R4000PC:
        case CPU_R4000SC:
@@ -143,10 +138,6 @@ static inline int __pure __get_cpu_type(const int cpu_type)
        case CPU_R5000:
 #endif
 
-#ifdef CONFIG_SYS_HAS_CPU_R5432
-       case CPU_R5432:
-#endif
-
 #ifdef CONFIG_SYS_HAS_CPU_R5500
        case CPU_R5500:
 #endif
@@ -155,10 +146,6 @@ static inline int __pure __get_cpu_type(const int cpu_type)
        case CPU_NEVADA:
 #endif
 
-#ifdef CONFIG_SYS_HAS_CPU_R8000
-       case CPU_R8000:
-#endif
-
 #ifdef CONFIG_SYS_HAS_CPU_R10000
        case CPU_R10000:
        case CPU_R12000:
index 290369f..7fddcb8 100644 (file)
@@ -47,7 +47,7 @@
 #define PRID_COMP_CAVIUM       0x0d0000
 #define PRID_COMP_LOONGSON     0x140000
 #define PRID_COMP_INGENIC_D0   0xd00000        /* JZ4740, JZ4750 */
-#define PRID_COMP_INGENIC_D1   0xd10000        /* JZ4770, JZ4775 */
+#define PRID_COMP_INGENIC_D1   0xd10000        /* JZ4770, JZ4775, X1000 */
 #define PRID_COMP_INGENIC_E1   0xe10000        /* JZ4780 */
 
 /*
  * These are the PRID's for when 23:16 == PRID_COMP_INGENIC_*
  */
 
-#define PRID_IMP_JZRISC               0x0200
+#define PRID_IMP_XBURST               0x0200
 
 /*
  * These are the PRID's for when 23:16 == PRID_COMP_NETLOGIC
@@ -293,19 +293,14 @@ enum cpu_type_enum {
        /*
         * R4000 class processors
         */
-       CPU_R4000PC, CPU_R4000SC, CPU_R4000MC, CPU_R4200, CPU_R4300, CPU_R4310,
+       CPU_R4000PC, CPU_R4000SC, CPU_R4000MC, CPU_R4200,
        CPU_R4400PC, CPU_R4400SC, CPU_R4400MC, CPU_R4600, CPU_R4640, CPU_R4650,
-       CPU_R4700, CPU_R5000, CPU_R5500, CPU_NEVADA, CPU_R5432, CPU_R10000,
+       CPU_R4700, CPU_R5000, CPU_R5500, CPU_NEVADA, CPU_R10000,
        CPU_R12000, CPU_R14000, CPU_R16000, CPU_VR41XX, CPU_VR4111, CPU_VR4121,
        CPU_VR4122, CPU_VR4131, CPU_VR4133, CPU_VR4181, CPU_VR4181A, CPU_RM7000,
        CPU_SR71000, CPU_TX49XX,
 
        /*
-        * R8000 class processors
-        */
-       CPU_R8000,
-
-       /*
         * TX3900 class processors
         */
        CPU_TX3912, CPU_TX3922, CPU_TX3927,
@@ -315,7 +310,7 @@ enum cpu_type_enum {
         */
        CPU_4KC, CPU_4KEC, CPU_4KSC, CPU_24K, CPU_34K, CPU_1004K, CPU_74K,
        CPU_ALCHEMY, CPU_PR4450, CPU_BMIPS32, CPU_BMIPS3300, CPU_BMIPS4350,
-       CPU_BMIPS4380, CPU_BMIPS5000, CPU_JZRISC, CPU_LOONGSON1, CPU_M14KC,
+       CPU_BMIPS4380, CPU_BMIPS5000, CPU_XBURST, CPU_LOONGSON1, CPU_M14KC,
        CPU_M14KEC, CPU_INTERAPTIV, CPU_P5600, CPU_PROAPTIV, CPU_1074K,
        CPU_M5150, CPU_I6400, CPU_P6600, CPU_M6250,
 
@@ -433,5 +428,9 @@ enum cpu_type_enum {
 #define MIPS_ASE_MSA           0x00000100 /* MIPS SIMD Architecture */
 #define MIPS_ASE_DSP3          0x00000200 /* Signal Processing ASE Rev 3*/
 #define MIPS_ASE_MIPS16E2      0x00000400 /* MIPS16e2 */
+#define MIPS_ASE_LOONGSON_MMI  0x00000800 /* Loongson MultiMedia extensions Instructions */
+#define MIPS_ASE_LOONGSON_CAM  0x00001000 /* Loongson CAM */
+#define MIPS_ASE_LOONGSON_EXT  0x00002000 /* Loongson EXTensions */
+#define MIPS_ASE_LOONGSON_EXT2 0x00004000 /* Loongson EXTensions R2 */
 
 #endif /* _ASM_CPU_H */
index 97a2806..2b7b567 100644 (file)
  * instruction, so the lower 16 bits must be zero.  Should be true on
  * on any sane architecture; generic code does not use this assumption.
  */
-extern const unsigned long mips_io_port_base;
+extern unsigned long mips_io_port_base;
 
-/*
- * Gcc will generate code to load the value of mips_io_port_base after each
- * function call which may be fairly wasteful in some cases.  So we don't
- * play quite by the book.  We tell gcc mips_io_port_base is a long variable
- * which solves the code generation issue.  Now we need to violate the
- * aliasing rules a little to make initialization possible and finally we
- * will need the barrier() to fight side effects of the aliasing chat.
- * This trickery will eventually collapse under gcc's optimizer.  Oh well.
- */
 static inline void set_io_port_base(unsigned long base)
 {
-       * (unsigned long *) &mips_io_port_base = base;
-       barrier();
+       mips_io_port_base = base;
 }
 
 /*
@@ -262,11 +252,11 @@ static inline void __iomem *ioremap_prot(phys_addr_t offset,
 #define ioremap_uc ioremap_nocache
 
 /*
- * ioremap_cachable -  map bus memory into CPU space
+ * ioremap_cache -     map bus memory into CPU space
  * @offset:        bus address of the memory
  * @size:          size of the resource to map
  *
- * ioremap_nocache performs a platform specific sequence of operations to
+ * ioremap_cache performs a platform specific sequence of operations to
  * make bus memory CPU accessible via the readb/readw/readl/writeb/
  * writew/writel functions and the other mmio helpers. The returned
  * address is not guaranteed to be usable directly as a virtual
@@ -276,9 +266,8 @@ static inline void __iomem *ioremap_prot(phys_addr_t offset,
  * the CPU.  Also enables full write-combining.         Useful for some
  * memory-like regions on I/O busses.
  */
-#define ioremap_cachable(offset, size)                                 \
+#define ioremap_cache(offset, size)                                    \
        __ioremap_mode((offset), (size), _page_cachable_default)
-#define ioremap_cache ioremap_cachable
 
 /*
  * ioremap_wc     -   map bus memory into CPU space
index 0ef8893..f879be3 100644 (file)
@@ -98,6 +98,7 @@ enum bcm47xx_board {
        BCM47XX_BOARD_MOTOROLA_WR850GP,
        BCM47XX_BOARD_MOTOROLA_WR850GV2V3,
 
+       BCM47XX_BOARD_NETGEAR_R6200_V1,
        BCM47XX_BOARD_NETGEAR_WGR614V8,
        BCM47XX_BOARD_NETGEAR_WGR614V9,
        BCM47XX_BOARD_NETGEAR_WGR614_V10,
index a4f7986..513270c 100644 (file)
@@ -45,7 +45,6 @@
 #define cpu_has_ic_fills_f_dc  0
 #define cpu_has_64bits         1
 #define cpu_has_octeon_cache   1
-#define cpu_has_saa            octeon_has_saa()
 #define cpu_has_mips32r1       1
 #define cpu_has_mips32r2       1
 #define cpu_has_mips64r1       1
@@ -60,7 +59,6 @@
 
 #define cpu_has_rixi           (cpu_data[0].cputype != CPU_CAVIUM_OCTEON)
 
-#define ARCH_HAS_IRQ_PER_CPU   1
 #define ARCH_HAS_SPINLOCK_PREFETCH 1
 #define spin_lock_prefetch(x) prefetch(x)
 #define PREFETCH_STRIDE 128
 #define ARCH_HAS_USABLE_BUILTIN_POPCOUNT 1
 #endif
 
-static inline int octeon_has_saa(void)
-{
-       int id;
-       asm volatile ("mfc0 %0, $15,0" : "=r" (id));
-       return id >= 0x000d0300;
-}
-
 /*
  * The last 256MB are reserved for device to device mappings and the
  * BAR1 hole.
index 35c80be..2421411 100644 (file)
@@ -12,7 +12,6 @@
 #define R4600_V1_INDEX_ICACHEOP_WAR    0
 #define R4600_V1_HIT_CACHEOP_WAR       0
 #define R4600_V2_HIT_CACHEOP_WAR       0
-#define R5432_CP0_INTERRUPT_WAR                0
 #define BCM1250_M3_WAR                 0
 #define SIBYTE_1956_WAR                        0
 #define MIPS4K_ICACHE_REFILL_WAR       0
index 1c11310..00beb69 100644 (file)
@@ -32,7 +32,6 @@
 #define cpu_has_vtag_icache            0
 #define cpu_has_ic_fills_f_dc          0
 #define cpu_has_pindexed_dcache                0
-#define cpu_has_local_ebase            0
 #define cpu_icache_snoops_remote_store 1
 #define cpu_has_mips_4                 0
 #define cpu_has_mips_5                 0
index a1bc2e7..f0f4a35 100644 (file)
@@ -11,7 +11,6 @@
 #define R4600_V1_INDEX_ICACHEOP_WAR    0
 #define R4600_V1_HIT_CACHEOP_WAR       0
 #define R4600_V2_HIT_CACHEOP_WAR       0
-#define R5432_CP0_INTERRUPT_WAR                0
 #define BCM1250_M3_WAR                 0
 #define SIBYTE_1956_WAR                        0
 #define MIPS4K_ICACHE_REFILL_WAR       0
index fba6405..b48eb4a 100644 (file)
@@ -15,7 +15,6 @@
 #define R4600_V1_INDEX_ICACHEOP_WAR    1
 #define R4600_V1_HIT_CACHEOP_WAR       1
 #define R4600_V2_HIT_CACHEOP_WAR       1
-#define R5432_CP0_INTERRUPT_WAR                0
 #define BCM1250_M3_WAR                 0
 #define SIBYTE_1956_WAR                        0
 #define MIPS4K_ICACHE_REFILL_WAR       0
index 4ee0e4b..ef3efce 100644 (file)
@@ -11,7 +11,6 @@
 #define R4600_V1_INDEX_ICACHEOP_WAR    0
 #define R4600_V1_HIT_CACHEOP_WAR       0
 #define R4600_V2_HIT_CACHEOP_WAR       0
-#define R5432_CP0_INTERRUPT_WAR                0
 #define BCM1250_M3_WAR                 0
 #define SIBYTE_1956_WAR                        0
 #define MIPS4K_ICACHE_REFILL_WAR       0
index 4821c7b..61cd673 100644 (file)
@@ -11,7 +11,6 @@
 #define R4600_V1_INDEX_ICACHEOP_WAR    0
 #define R4600_V1_HIT_CACHEOP_WAR       0
 #define R4600_V2_HIT_CACHEOP_WAR       0
-#define R5432_CP0_INTERRUPT_WAR                0
 #define BCM1250_M3_WAR                 0
 #define SIBYTE_1956_WAR                        0
 #define MIPS4K_ICACHE_REFILL_WAR       0
index 9807ecd..e77b9d1 100644 (file)
@@ -11,7 +11,6 @@
 #define R4600_V1_INDEX_ICACHEOP_WAR    0
 #define R4600_V1_HIT_CACHEOP_WAR       0
 #define R4600_V2_HIT_CACHEOP_WAR       0
-#define R5432_CP0_INTERRUPT_WAR                0
 #define BCM1250_M3_WAR                 0
 #define SIBYTE_1956_WAR                        0
 #define MIPS4K_ICACHE_REFILL_WAR       0
diff --git a/arch/mips/include/asm/mach-jz4740/gpio.h b/arch/mips/include/asm/mach-jz4740/gpio.h
deleted file mode 100644 (file)
index 2092a35..0000000
+++ /dev/null
@@ -1,15 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- *  Copyright (C) 2009, Lars-Peter Clausen <lars@metafoo.de>
- *  JZ4740 GPIO pin definitions
- */
-
-#ifndef _JZ_GPIO_H
-#define _JZ_GPIO_H
-
-#define JZ_GPIO_PORTA(x) ((x) + 32 * 0)
-#define JZ_GPIO_PORTB(x) ((x) + 32 * 1)
-#define JZ_GPIO_PORTC(x) ((x) + 32 * 2)
-#define JZ_GPIO_PORTD(x) ((x) + 32 * 3)
-
-#endif
diff --git a/arch/mips/include/asm/mach-jz4740/jz4740_fb.h b/arch/mips/include/asm/mach-jz4740/jz4740_fb.h
deleted file mode 100644 (file)
index e84a48f..0000000
+++ /dev/null
@@ -1,58 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- *  Copyright (C) 2009, Lars-Peter Clausen <lars@metafoo.de>
- */
-
-#ifndef __ASM_MACH_JZ4740_JZ4740_FB_H__
-#define __ASM_MACH_JZ4740_JZ4740_FB_H__
-
-#include <linux/fb.h>
-
-enum jz4740_fb_lcd_type {
-       JZ_LCD_TYPE_GENERIC_16_BIT = 0,
-       JZ_LCD_TYPE_GENERIC_18_BIT = 0 | (1 << 4),
-       JZ_LCD_TYPE_SPECIAL_TFT_1 = 1,
-       JZ_LCD_TYPE_SPECIAL_TFT_2 = 2,
-       JZ_LCD_TYPE_SPECIAL_TFT_3 = 3,
-       JZ_LCD_TYPE_NON_INTERLACED_CCIR656 = 5,
-       JZ_LCD_TYPE_INTERLACED_CCIR656 = 7,
-       JZ_LCD_TYPE_SINGLE_COLOR_STN = 8,
-       JZ_LCD_TYPE_SINGLE_MONOCHROME_STN = 9,
-       JZ_LCD_TYPE_DUAL_COLOR_STN = 10,
-       JZ_LCD_TYPE_DUAL_MONOCHROME_STN = 11,
-       JZ_LCD_TYPE_8BIT_SERIAL = 12,
-};
-
-#define JZ4740_FB_SPECIAL_TFT_CONFIG(start, stop) (((start) << 16) | (stop))
-
-/*
-* width: width of the lcd display in mm
-* height: height of the lcd display in mm
-* num_modes: size of modes
-* modes: list of valid video modes
-* bpp: bits per pixel for the lcd
-* lcd_type: lcd type
-*/
-
-struct jz4740_fb_platform_data {
-       unsigned int width;
-       unsigned int height;
-
-       size_t num_modes;
-       struct fb_videomode *modes;
-
-       unsigned int bpp;
-       enum jz4740_fb_lcd_type lcd_type;
-
-       struct {
-               uint32_t spl;
-               uint32_t cls;
-               uint32_t ps;
-               uint32_t rev;
-       } special_tft_config;
-
-       unsigned pixclk_falling_edge:1;
-       unsigned date_enable_active_low:1;
-};
-
-#endif
diff --git a/arch/mips/include/asm/mach-jz4740/jz4740_mmc.h b/arch/mips/include/asm/mach-jz4740/jz4740_mmc.h
deleted file mode 100644 (file)
index 9a7de47..0000000
+++ /dev/null
@@ -1,12 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef __LINUX_MMC_JZ4740_MMC
-#define __LINUX_MMC_JZ4740_MMC
-
-struct jz4740_mmc_platform_data {
-       unsigned card_detect_active_low:1;
-       unsigned read_only_active_low:1;
-
-       unsigned data_1bit:1;
-};
-
-#endif
diff --git a/arch/mips/include/asm/mach-jz4740/platform.h b/arch/mips/include/asm/mach-jz4740/platform.h
deleted file mode 100644 (file)
index 241270d..0000000
+++ /dev/null
@@ -1,26 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- *  Copyright (C) 2009-2010, Lars-Peter Clausen <lars@metafoo.de>
- *  JZ4740 platform device definitions
- */
-
-
-#ifndef __JZ4740_PLATFORM_H
-#define __JZ4740_PLATFORM_H
-
-#include <linux/platform_device.h>
-
-extern struct platform_device jz4740_udc_device;
-extern struct platform_device jz4740_udc_xceiv_device;
-extern struct platform_device jz4740_mmc_device;
-extern struct platform_device jz4740_i2c_device;
-extern struct platform_device jz4740_nand_device;
-extern struct platform_device jz4740_framebuffer_device;
-extern struct platform_device jz4740_i2s_device;
-extern struct platform_device jz4740_pcm_device;
-extern struct platform_device jz4740_codec_device;
-extern struct platform_device jz4740_adc_device;
-extern struct platform_device jz4740_pwm_device;
-extern struct platform_device jz4740_dma_device;
-
-#endif
index 581915c..4aca25f 100644 (file)
@@ -43,7 +43,6 @@
 #define cpu_has_vint           0
 #define cpu_has_vtag_icache    0
 #define cpu_has_watch          1
-#define cpu_has_local_ebase    0
 
 #ifdef CONFIG_CPU_LOONGSON3
 #define cpu_has_wsbh           1
index d068fc4..d62d2ff 100644 (file)
@@ -11,7 +11,6 @@
 #define R4600_V1_INDEX_ICACHEOP_WAR    0
 #define R4600_V1_HIT_CACHEOP_WAR       0
 #define R4600_V2_HIT_CACHEOP_WAR       0
-#define R5432_CP0_INTERRUPT_WAR                0
 #define BCM1250_M3_WAR                 0
 #define SIBYTE_1956_WAR                        0
 #define MIPS4K_ICACHE_REFILL_WAR       1
index a60bf9d..31c546f 100644 (file)
@@ -11,7 +11,6 @@
 #define R4600_V1_INDEX_ICACHEOP_WAR    0
 #define R4600_V1_HIT_CACHEOP_WAR       0
 #define R4600_V2_HIT_CACHEOP_WAR       0
-#define R5432_CP0_INTERRUPT_WAR                0
 #define BCM1250_M3_WAR                 0
 #define SIBYTE_1956_WAR                        0
 #define MIPS4K_ICACHE_REFILL_WAR       0
index 1bfd489..af430d2 100644 (file)
@@ -11,7 +11,6 @@
 #define R4600_V1_INDEX_ICACHEOP_WAR    0
 #define R4600_V1_HIT_CACHEOP_WAR       0
 #define R4600_V2_HIT_CACHEOP_WAR       0
-#define R5432_CP0_INTERRUPT_WAR                0
 #define BCM1250_M3_WAR                 0
 #define SIBYTE_1956_WAR                        0
 #define MIPS4K_ICACHE_REFILL_WAR       1
index a3dde98..eca16d1 100644 (file)
@@ -15,7 +15,6 @@
 #define R4600_V1_INDEX_ICACHEOP_WAR    0
 #define R4600_V1_HIT_CACHEOP_WAR       0
 #define R4600_V2_HIT_CACHEOP_WAR       1
-#define R5432_CP0_INTERRUPT_WAR                0
 #define BCM1250_M3_WAR                 0
 #define SIBYTE_1956_WAR                        0
 #define MIPS4K_ICACHE_REFILL_WAR       0
index 520f8fc..4755b61 100644 (file)
@@ -11,7 +11,6 @@
 #define R4600_V1_INDEX_ICACHEOP_WAR    0
 #define R4600_V1_HIT_CACHEOP_WAR       0
 #define R4600_V2_HIT_CACHEOP_WAR       0
-#define R5432_CP0_INTERRUPT_WAR                0
 
 #if defined(CONFIG_SB1_PASS_2_WORKAROUNDS)
 
index a8e2c58..445abb4 100644 (file)
@@ -11,7 +11,6 @@
 #define R4600_V1_INDEX_ICACHEOP_WAR    0
 #define R4600_V1_HIT_CACHEOP_WAR       0
 #define R4600_V2_HIT_CACHEOP_WAR       0
-#define R5432_CP0_INTERRUPT_WAR                0
 #define BCM1250_M3_WAR                 0
 #define SIBYTE_1956_WAR                        0
 #define MIPS4K_ICACHE_REFILL_WAR       0
index 1e6966e..bdbdc19 100644 (file)
 #define MIPS_CONF7_IAR         (_ULCAST_(1) << 10)
 #define MIPS_CONF7_AR          (_ULCAST_(1) << 16)
 
+/* Ingenic Config7 bits */
+#define MIPS_CONF7_BTB_LOOP_EN (_ULCAST_(1) << 4)
+
 /* Config7 Bits specific to MIPS Technologies. */
 
 /* Performance counters implemented Per TC */
@@ -2813,6 +2816,7 @@ __BUILD_SET_C0(status)
 __BUILD_SET_C0(cause)
 __BUILD_SET_C0(config)
 __BUILD_SET_C0(config5)
+__BUILD_SET_C0(config7)
 __BUILD_SET_C0(intcontrol)
 __BUILD_SET_C0(intctl)
 __BUILD_SET_C0(srsmap)
index 6dc0b21..ed70994 100644 (file)
@@ -103,22 +103,16 @@ search_module_dbetables(unsigned long addr)
 #define MODULE_PROC_FAMILY "TX39XX "
 #elif defined CONFIG_CPU_VR41XX
 #define MODULE_PROC_FAMILY "VR41XX "
-#elif defined CONFIG_CPU_R4300
-#define MODULE_PROC_FAMILY "R4300 "
 #elif defined CONFIG_CPU_R4X00
 #define MODULE_PROC_FAMILY "R4X00 "
 #elif defined CONFIG_CPU_TX49XX
 #define MODULE_PROC_FAMILY "TX49XX "
 #elif defined CONFIG_CPU_R5000
 #define MODULE_PROC_FAMILY "R5000 "
-#elif defined CONFIG_CPU_R5432
-#define MODULE_PROC_FAMILY "R5432 "
 #elif defined CONFIG_CPU_R5500
 #define MODULE_PROC_FAMILY "R5500 "
 #elif defined CONFIG_CPU_NEVADA
 #define MODULE_PROC_FAMILY "NEVADA "
-#elif defined CONFIG_CPU_R8000
-#define MODULE_PROC_FAMILY "R8000 "
 #elif defined CONFIG_CPU_R10000
 #define MODULE_PROC_FAMILY "R10000 "
 #elif defined CONFIG_CPU_RM7000
index 6048150..a2e2876 100644 (file)
@@ -51,7 +51,7 @@ extern void octeon_setup_delays(void);
 extern void octeon_io_clk_delay(unsigned long);
 
 #define OCTEON_ARGV_MAX_ARGS   64
-#define OCTOEN_SERIAL_LEN      20
+#define OCTEON_SERIAL_LEN      20
 
 struct octeon_boot_descriptor {
 #ifdef __BIG_ENDIAN_BITFIELD
@@ -102,7 +102,7 @@ struct octeon_boot_descriptor {
        uint16_t chip_type;
        uint8_t chip_rev_major;
        uint8_t chip_rev_minor;
-       char board_serial_number[OCTOEN_SERIAL_LEN];
+       char board_serial_number[OCTEON_SERIAL_LEN];
        uint8_t mac_addr_base[6];
        uint8_t mac_addr_count;
        uint64_t cvmx_desc_vaddr;
index 4360998..6f48649 100644 (file)
@@ -108,7 +108,6 @@ extern unsigned long PCIBIOS_MIN_MEM;
 
 #define HAVE_PCI_MMAP
 #define ARCH_GENERIC_PCI_MMAP_RESOURCE
-#define HAVE_ARCH_PCI_RESOURCE_TO_USER
 
 /*
  * Dynamic DMA mapping stuff.
index aa16b85..1668423 100644 (file)
@@ -54,7 +54,7 @@ static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
 
 #define __pte_free_tlb(tlb,pte,address)                        \
 do {                                                   \
-       pgtable_page_dtor(pte);                         \
+       pgtable_pte_page_dtor(pte);                     \
        tlb_remove_page((tlb), pte);                    \
 } while (0)
 
@@ -105,8 +105,6 @@ static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, pud_t *pud)
 
 #endif /* __PAGETABLE_PUD_FOLDED */
 
-#define check_pgt_cache()      do { } while (0)
-
 extern void pagetable_init(void);
 
 #endif /* _ASM_PGALLOC_H */
index 74afe8c..ba96714 100644 (file)
 #include <asm/highmem.h>
 #endif
 
+/*
+ * Regarding 32-bit MIPS huge page support (and the tradeoff it entails):
+ *
+ *  We use the same huge page sizes as 64-bit MIPS. Assuming a 4KB page size,
+ * our 2-level table layout would normally have a PGD entry cover a contiguous
+ * 4MB virtual address region (pointing to a 4KB PTE page of 1,024 32-bit pte_t
+ * pointers, each pointing to a 4KB physical page). The problem is that 4MB,
+ * spanning both halves of a TLB EntryLo0,1 pair, requires 2MB hardware page
+ * support, not one of the standard supported sizes (1MB,4MB,16MB,...).
+ *  To correct for this, when huge pages are enabled, we halve the number of
+ * pointers a PTE page holds, making its last half go to waste. Correspondingly,
+ * we double the number of PGD pages. Overall, page table memory overhead
+ * increases to match 64-bit MIPS, but PTE lookups remain CPU cache-friendly.
+ *
+ * NOTE: We don't yet support huge pages if extended-addressing is enabled
+ *       (i.e. EVA, XPA, 36-bit Alchemy/Netlogic).
+ */
+
 extern int temp_tlb_entry;
 
 /*
@@ -44,7 +62,12 @@ extern int add_temporary_entry(unsigned long entrylo0, unsigned long entrylo1,
  */
 
 /* PGDIR_SHIFT determines what a third-level page table entry can map */
-#define PGDIR_SHIFT    (2 * PAGE_SHIFT + PTE_ORDER - PTE_T_LOG2)
+#if defined(CONFIG_MIPS_HUGE_TLB_SUPPORT) && !defined(CONFIG_PHYS_ADDR_T_64BIT)
+# define PGDIR_SHIFT   (2 * PAGE_SHIFT + PTE_ORDER - PTE_T_LOG2 - 1)
+#else
+# define PGDIR_SHIFT   (2 * PAGE_SHIFT + PTE_ORDER - PTE_T_LOG2)
+#endif
+
 #define PGDIR_SIZE     (1UL << PGDIR_SHIFT)
 #define PGDIR_MASK     (~(PGDIR_SIZE-1))
 
@@ -52,14 +75,23 @@ extern int add_temporary_entry(unsigned long entrylo0, unsigned long entrylo1,
  * Entries per page directory level: we use two-level, so
  * we don't really have any PUD/PMD directory physically.
  */
-#define __PGD_ORDER    (32 - 3 * PAGE_SHIFT + PGD_T_LOG2 + PTE_T_LOG2)
+#if defined(CONFIG_MIPS_HUGE_TLB_SUPPORT) && !defined(CONFIG_PHYS_ADDR_T_64BIT)
+# define __PGD_ORDER   (32 - 3 * PAGE_SHIFT + PGD_T_LOG2 + PTE_T_LOG2 + 1)
+#else
+# define __PGD_ORDER   (32 - 3 * PAGE_SHIFT + PGD_T_LOG2 + PTE_T_LOG2)
+#endif
+
 #define PGD_ORDER      (__PGD_ORDER >= 0 ? __PGD_ORDER : 0)
 #define PUD_ORDER      aieeee_attempt_to_allocate_pud
-#define PMD_ORDER      1
+#define PMD_ORDER      aieeee_attempt_to_allocate_pmd
 #define PTE_ORDER      0
 
 #define PTRS_PER_PGD   (USER_PTRS_PER_PGD * 2)
-#define PTRS_PER_PTE   ((PAGE_SIZE << PTE_ORDER) / sizeof(pte_t))
+#if defined(CONFIG_MIPS_HUGE_TLB_SUPPORT) && !defined(CONFIG_PHYS_ADDR_T_64BIT)
+# define PTRS_PER_PTE  ((PAGE_SIZE << PTE_ORDER) / sizeof(pte_t) / 2)
+#else
+# define PTRS_PER_PTE  ((PAGE_SIZE << PTE_ORDER) / sizeof(pte_t))
+#endif
 
 #define USER_PTRS_PER_PGD      (0x80000000UL/PGDIR_SIZE)
 #define FIRST_USER_ADDRESS     0UL
@@ -87,7 +119,7 @@ extern int add_temporary_entry(unsigned long entrylo0, unsigned long entrylo1,
 
 extern void load_pgd(unsigned long pg_dir);
 
-extern pte_t invalid_pte_table[PAGE_SIZE/sizeof(pte_t)];
+extern pte_t invalid_pte_table[PTRS_PER_PTE];
 
 /*
  * Empty pgd/pmd entries point to the invalid_pte_table.
@@ -97,7 +129,19 @@ static inline int pmd_none(pmd_t pmd)
        return pmd_val(pmd) == (unsigned long) invalid_pte_table;
 }
 
-#define pmd_bad(pmd)           (pmd_val(pmd) & ~PAGE_MASK)
+static inline int pmd_bad(pmd_t pmd)
+{
+#ifdef CONFIG_MIPS_HUGE_TLB_SUPPORT
+       /* pmd_huge(pmd) but inline */
+       if (unlikely(pmd_val(pmd) & _PAGE_HUGE))
+               return 0;
+#endif
+
+       if (unlikely(pmd_val(pmd) & ~PAGE_MASK))
+               return 1;
+
+       return 0;
+}
 
 static inline int pmd_present(pmd_t pmd)
 {
@@ -146,6 +190,7 @@ static inline pte_t pfn_pte(unsigned long pfn, pgprot_t prot)
 #else
 #define pte_pfn(x)             ((unsigned long)((x).pte >> _PFN_SHIFT))
 #define pfn_pte(pfn, prot)     __pte(((unsigned long long)(pfn) << _PFN_SHIFT) | pgprot_val(prot))
+#define pfn_pmd(pfn, prot)     __pmd(((unsigned long long)(pfn) << _PFN_SHIFT) | pgprot_val(prot))
 #endif
 #endif /* defined(CONFIG_PHYS_ADDR_T_64BIT) && defined(CONFIG_CPU_MIPS32) */
 
@@ -159,6 +204,7 @@ static inline pte_t pfn_pte(unsigned long pfn, pgprot_t prot)
 #define pgd_offset_k(address) pgd_offset(&init_mm, address)
 
 #define pgd_index(address)     (((address) >> PGDIR_SHIFT) & (PTRS_PER_PGD-1))
+#define pmd_index(address)     (((address) >> PMD_SHIFT) & (PTRS_PER_PMD-1))
 
 /* to find an entry in a page-table-directory */
 #define pgd_offset(mm, addr)   ((mm)->pgd + pgd_index(addr))
@@ -175,7 +221,7 @@ static inline pte_t pfn_pte(unsigned long pfn, pgprot_t prot)
        ((pte_t *)page_address(pmd_page(*(dir))) + __pte_offset(address))
 #define pte_unmap(pte) ((void)(pte))
 
-#if defined(CONFIG_CPU_R3000) || defined(CONFIG_CPU_TX39XX)
+#if defined(CONFIG_CPU_R3K_TLB)
 
 /* Swap entries must have VALID bit cleared. */
 #define __swp_type(x)                  (((x).val >> 10) & 0x1f)
@@ -220,6 +266,6 @@ static inline pte_t pfn_pte(unsigned long pfn, pgprot_t prot)
 
 #endif /* defined(CONFIG_PHYS_ADDR_T_64BIT) && defined(CONFIG_CPU_MIPS32) */
 
-#endif /* defined(CONFIG_CPU_R3000) || defined(CONFIG_CPU_TX39XX) */
+#endif /* defined(CONFIG_CPU_R3K_TLB) */
 
 #endif /* _ASM_PGTABLE_32_H */
index f88a48c..4da79b8 100644 (file)
@@ -52,6 +52,9 @@ enum pgtable_bits {
        _PAGE_WRITE_SHIFT,
        _PAGE_ACCESSED_SHIFT,
        _PAGE_MODIFIED_SHIFT,
+#if defined(CONFIG_ARCH_HAS_PTE_SPECIAL)
+       _PAGE_SPECIAL_SHIFT,
+#endif
 };
 
 /*
@@ -78,9 +81,12 @@ enum pgtable_bits {
        _PAGE_WRITE_SHIFT,
        _PAGE_ACCESSED_SHIFT,
        _PAGE_MODIFIED_SHIFT,
+#if defined(CONFIG_ARCH_HAS_PTE_SPECIAL)
+       _PAGE_SPECIAL_SHIFT,
+#endif
 };
 
-#elif defined(CONFIG_CPU_R3000) || defined(CONFIG_CPU_TX39XX)
+#elif defined(CONFIG_CPU_R3K_TLB)
 
 /* Page table bits used for r3k systems */
 enum pgtable_bits {
@@ -90,6 +96,9 @@ enum pgtable_bits {
        _PAGE_WRITE_SHIFT,
        _PAGE_ACCESSED_SHIFT,
        _PAGE_MODIFIED_SHIFT,
+#if defined(CONFIG_ARCH_HAS_PTE_SPECIAL)
+       _PAGE_SPECIAL_SHIFT,
+#endif
 
        /* Used by TLB hardware (placed in EntryLo) */
        _PAGE_GLOBAL_SHIFT = 8,
@@ -110,9 +119,12 @@ enum pgtable_bits {
        _PAGE_WRITE_SHIFT,
        _PAGE_ACCESSED_SHIFT,
        _PAGE_MODIFIED_SHIFT,
-#if defined(CONFIG_64BIT) && defined(CONFIG_MIPS_HUGE_TLB_SUPPORT)
+#if defined(CONFIG_MIPS_HUGE_TLB_SUPPORT)
        _PAGE_HUGE_SHIFT,
 #endif
+#if defined(CONFIG_ARCH_HAS_PTE_SPECIAL)
+       _PAGE_SPECIAL_SHIFT,
+#endif
 
        /* Used by TLB hardware (placed in EntryLo*) */
 #if defined(CONFIG_CPU_HAS_RIXI)
@@ -132,9 +144,14 @@ enum pgtable_bits {
 #define _PAGE_WRITE            (1 << _PAGE_WRITE_SHIFT)
 #define _PAGE_ACCESSED         (1 << _PAGE_ACCESSED_SHIFT)
 #define _PAGE_MODIFIED         (1 << _PAGE_MODIFIED_SHIFT)
-#if defined(CONFIG_64BIT) && defined(CONFIG_MIPS_HUGE_TLB_SUPPORT)
+#if defined(CONFIG_MIPS_HUGE_TLB_SUPPORT)
 # define _PAGE_HUGE            (1 << _PAGE_HUGE_SHIFT)
 #endif
+#if defined(CONFIG_ARCH_HAS_PTE_SPECIAL)
+# define _PAGE_SPECIAL         (1 << _PAGE_SPECIAL_SHIFT)
+#else
+# define _PAGE_SPECIAL         0
+#endif
 
 /* Used by TLB hardware (placed in EntryLo*) */
 #if defined(CONFIG_XPA)
@@ -146,7 +163,7 @@ enum pgtable_bits {
 #define _PAGE_GLOBAL           (1 << _PAGE_GLOBAL_SHIFT)
 #define _PAGE_VALID            (1 << _PAGE_VALID_SHIFT)
 #define _PAGE_DIRTY            (1 << _PAGE_DIRTY_SHIFT)
-#if defined(CONFIG_CPU_R3000) || defined(CONFIG_CPU_TX39XX)
+#if defined(CONFIG_CPU_R3K_TLB)
 # define _CACHE_UNCACHED       (1 << _CACHE_UNCACHED_SHIFT)
 # define _CACHE_MASK           _CACHE_UNCACHED
 # define _PFN_SHIFT            PAGE_SHIFT
@@ -204,7 +221,7 @@ static inline uint64_t pte_to_entrylo(unsigned long pte_val)
 /*
  * Cache attributes
  */
-#if defined(CONFIG_CPU_R3000) || defined(CONFIG_CPU_TX39XX)
+#if defined(CONFIG_CPU_R3K_TLB)
 
 #define _CACHE_CACHABLE_NONCOHERENT 0
 #define _CACHE_UNCACHED_ACCELERATED _CACHE_UNCACHED
@@ -216,13 +233,6 @@ static inline uint64_t pte_to_entrylo(unsigned long pte_val)
 
 #define _CACHE_CACHABLE_NONCOHERENT (5<<_CACHE_SHIFT)
 
-#elif defined(CONFIG_CPU_LOONGSON3)
-
-/* Using COHERENT flag for NONCOHERENT doesn't hurt. */
-
-#define _CACHE_CACHABLE_NONCOHERENT (3<<_CACHE_SHIFT)  /* LOONGSON       */
-#define _CACHE_CACHABLE_COHERENT    (3<<_CACHE_SHIFT)  /* LOONGSON-3     */
-
 #elif defined(CONFIG_MACH_INGENIC)
 
 /* Ingenic uses the WA bit to achieve write-combine memory writes */
index 7d27194..f85bd5b 100644 (file)
@@ -199,7 +199,7 @@ static inline void pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *pt
 static inline void set_pte(pte_t *ptep, pte_t pteval)
 {
        *ptep = pteval;
-#if !defined(CONFIG_CPU_R3000) && !defined(CONFIG_CPU_TX39XX)
+#if !defined(CONFIG_CPU_R3K_TLB)
        if (pte_val(pteval) & _PAGE_GLOBAL) {
                pte_t *buddy = ptep_buddy(ptep);
                /*
@@ -218,7 +218,7 @@ static inline void set_pte(pte_t *ptep, pte_t pteval)
 static inline void pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
 {
        htw_stop();
-#if !defined(CONFIG_CPU_R3000) && !defined(CONFIG_CPU_TX39XX)
+#if !defined(CONFIG_CPU_R3K_TLB)
        /* Preserve global status for the pair */
        if (pte_val(*ptep_buddy(ptep)) & _PAGE_GLOBAL)
                set_pte_at(mm, addr, ptep, __pte(_PAGE_GLOBAL));
@@ -277,6 +277,7 @@ extern pgd_t swapper_pg_dir[];
 static inline int pte_write(pte_t pte) { return pte.pte_low & _PAGE_WRITE; }
 static inline int pte_dirty(pte_t pte) { return pte.pte_low & _PAGE_MODIFIED; }
 static inline int pte_young(pte_t pte) { return pte.pte_low & _PAGE_ACCESSED; }
+static inline int pte_special(pte_t pte) { return pte.pte_low & _PAGE_SPECIAL; }
 
 static inline pte_t pte_wrprotect(pte_t pte)
 {
@@ -337,10 +338,17 @@ static inline pte_t pte_mkyoung(pte_t pte)
        }
        return pte;
 }
+
+static inline pte_t pte_mkspecial(pte_t pte)
+{
+       pte.pte_low |= _PAGE_SPECIAL;
+       return pte;
+}
 #else
 static inline int pte_write(pte_t pte) { return pte_val(pte) & _PAGE_WRITE; }
 static inline int pte_dirty(pte_t pte) { return pte_val(pte) & _PAGE_MODIFIED; }
 static inline int pte_young(pte_t pte) { return pte_val(pte) & _PAGE_ACCESSED; }
+static inline int pte_special(pte_t pte) { return pte_val(pte) & _PAGE_SPECIAL; }
 
 static inline pte_t pte_wrprotect(pte_t pte)
 {
@@ -384,6 +392,12 @@ static inline pte_t pte_mkyoung(pte_t pte)
        return pte;
 }
 
+static inline pte_t pte_mkspecial(pte_t pte)
+{
+       pte_val(pte) |= _PAGE_SPECIAL;
+       return pte;
+}
+
 #ifdef CONFIG_MIPS_HUGE_TLB_SUPPORT
 static inline int pte_huge(pte_t pte)  { return pte_val(pte) & _PAGE_HUGE; }
 
@@ -394,8 +408,6 @@ static inline pte_t pte_mkhuge(pte_t pte)
 }
 #endif /* CONFIG_MIPS_HUGE_TLB_SUPPORT */
 #endif
-static inline int pte_special(pte_t pte)       { return 0; }
-static inline pte_t pte_mkspecial(pte_t pte)   { return pte; }
 
 /*
  * Macro to make mark a page protection value as "uncacheable".         Note
@@ -649,9 +661,4 @@ pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
 #define HAVE_ARCH_UNMAPPED_AREA
 #define HAVE_ARCH_UNMAPPED_AREA_TOPDOWN
 
-/*
- * No page table caches to initialise
- */
-#define pgtable_cache_init()   do { } while (0)
-
 #endif /* _ASM_PGTABLE_H */
index aca909b..fba18d4 100644 (file)
 
 extern unsigned int vced_count, vcei_count;
 
-/*
- * MIPS does have an arch_pick_mmap_layout()
- */
-#define HAVE_ARCH_PICK_MMAP_LAYOUT 1
-
 #ifdef CONFIG_32BIT
 #ifdef CONFIG_KVM_GUEST
 /* User space process size is limited to 1GB in KVM Guest Mode */
index 83bb439..25fa651 100644 (file)
@@ -54,7 +54,7 @@ static inline void mips_syscall_update_nr(struct task_struct *task,
                task_thread_info(task)->syscall = regs->regs[2];
 }
 
-static inline unsigned long mips_get_syscall_arg(unsigned long *arg,
+static inline void mips_get_syscall_arg(unsigned long *arg,
        struct task_struct *task, struct pt_regs *regs, unsigned int n)
 {
        unsigned long usp __maybe_unused = regs->regs[29];
@@ -63,23 +63,24 @@ static inline unsigned long mips_get_syscall_arg(unsigned long *arg,
        case 0: case 1: case 2: case 3:
                *arg = regs->regs[4 + n];
 
-               return 0;
+               return;
 
 #ifdef CONFIG_32BIT
        case 4: case 5: case 6: case 7:
-               return get_user(*arg, (int *)usp + n);
+               get_user(*arg, (int *)usp + n);
+               return;
 #endif
 
 #ifdef CONFIG_64BIT
        case 4: case 5: case 6: case 7:
 #ifdef CONFIG_MIPS32_O32
                if (test_tsk_thread_flag(task, TIF_32BIT_REGS))
-                       return get_user(*arg, (int *)usp + n);
+                       get_user(*arg, (int *)usp + n);
                else
 #endif
                        *arg = regs->regs[4 + n];
 
-               return 0;
+               return;
 #endif
 
        default:
@@ -126,21 +127,13 @@ static inline void syscall_get_arguments(struct task_struct *task,
 {
        unsigned int i = 0;
        unsigned int n = 6;
-       int ret;
 
        /* O32 ABI syscall() */
        if (mips_syscall_is_indirect(task, regs))
                i++;
 
        while (n--)
-               ret |= mips_get_syscall_arg(args++, task, regs, i++);
-
-       /*
-        * No way to communicate an error because this is a void function.
-        */
-#if 0
-       return ret;
-#endif
+               mips_get_syscall_arg(args++, task, regs, i++);
 }
 
 extern const unsigned long sys_call_table[];
index a013fa4..cc7b516 100644 (file)
@@ -8,6 +8,7 @@
 #define __ASM_VDSO_H
 
 #include <linux/mm_types.h>
+#include <vdso/datapage.h>
 
 #include <asm/barrier.h>
 
@@ -49,84 +50,9 @@ extern struct mips_vdso_image vdso_image_o32;
 extern struct mips_vdso_image vdso_image_n32;
 #endif
 
-/**
- * union mips_vdso_data - Data provided by the kernel for the VDSO.
- * @xtime_sec:         Current real time (seconds part).
- * @xtime_nsec:                Current real time (nanoseconds part, shifted).
- * @wall_to_mono_sec:  Wall-to-monotonic offset (seconds part).
- * @wall_to_mono_nsec: Wall-to-monotonic offset (nanoseconds part).
- * @seq_count:         Counter to synchronise updates (odd = updating).
- * @cs_shift:          Clocksource shift value.
- * @clock_mode:                Clocksource to use for time functions.
- * @cs_mult:           Clocksource multiplier value.
- * @cs_cycle_last:     Clock cycle value at last update.
- * @cs_mask:           Clocksource mask value.
- * @tz_minuteswest:    Minutes west of Greenwich (from timezone).
- * @tz_dsttime:                Type of DST correction (from timezone).
- *
- * This structure contains data needed by functions within the VDSO. It is
- * populated by the kernel and mapped read-only into user memory. The time
- * fields are mirrors of internal data from the timekeeping infrastructure.
- *
- * Note: Care should be taken when modifying as the layout must remain the same
- * for both 64- and 32-bit (for 32-bit userland on 64-bit kernel).
- */
 union mips_vdso_data {
-       struct {
-               u64 xtime_sec;
-               u64 xtime_nsec;
-               u64 wall_to_mono_sec;
-               u64 wall_to_mono_nsec;
-               u32 seq_count;
-               u32 cs_shift;
-               u8 clock_mode;
-               u32 cs_mult;
-               u64 cs_cycle_last;
-               u64 cs_mask;
-               s32 tz_minuteswest;
-               s32 tz_dsttime;
-       };
-
+       struct vdso_data data[CS_BASES];
        u8 page[PAGE_SIZE];
 };
 
-static inline u32 vdso_data_read_begin(const union mips_vdso_data *data)
-{
-       u32 seq;
-
-       while (true) {
-               seq = READ_ONCE(data->seq_count);
-               if (likely(!(seq & 1))) {
-                       /* Paired with smp_wmb() in vdso_data_write_*(). */
-                       smp_rmb();
-                       return seq;
-               }
-
-               cpu_relax();
-       }
-}
-
-static inline bool vdso_data_read_retry(const union mips_vdso_data *data,
-                                       u32 start_seq)
-{
-       /* Paired with smp_wmb() in vdso_data_write_*(). */
-       smp_rmb();
-       return unlikely(data->seq_count != start_seq);
-}
-
-static inline void vdso_data_write_begin(union mips_vdso_data *data)
-{
-       ++data->seq_count;
-
-       /* Ensure sequence update is written before other data page values. */
-       smp_wmb();
-}
-
-static inline void vdso_data_write_end(union mips_vdso_data *data)
-{
-       /* Ensure data values are written before updating sequence again. */
-       smp_wmb();
-       ++data->seq_count;
-}
-
 #endif /* __ASM_VDSO_H */
diff --git a/arch/mips/include/asm/vdso/gettimeofday.h b/arch/mips/include/asm/vdso/gettimeofday.h
new file mode 100644 (file)
index 0000000..e78462e
--- /dev/null
@@ -0,0 +1,222 @@
+/*
+ * Copyright (C) 2018 ARM Limited
+ * Copyright (C) 2015 Imagination Technologies
+ * Author: Alex Smith <alex.smith@imgtec.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ */
+#ifndef __ASM_VDSO_GETTIMEOFDAY_H
+#define __ASM_VDSO_GETTIMEOFDAY_H
+
+#ifndef __ASSEMBLY__
+
+#include <linux/compiler.h>
+#include <linux/time.h>
+
+#include <asm/vdso/vdso.h>
+#include <asm/clocksource.h>
+#include <asm/io.h>
+#include <asm/unistd.h>
+#include <asm/vdso.h>
+
+#define VDSO_HAS_CLOCK_GETRES          1
+
+#ifdef CONFIG_MIPS_CLOCK_VSYSCALL
+
+static __always_inline long gettimeofday_fallback(
+                               struct __kernel_old_timeval *_tv,
+                               struct timezone *_tz)
+{
+       register struct timezone *tz asm("a1") = _tz;
+       register struct __kernel_old_timeval *tv asm("a0") = _tv;
+       register long ret asm("v0");
+       register long nr asm("v0") = __NR_gettimeofday;
+       register long error asm("a3");
+
+       asm volatile(
+       "       syscall\n"
+       : "=r" (ret), "=r" (error)
+       : "r" (tv), "r" (tz), "r" (nr)
+       : "$1", "$3", "$8", "$9", "$10", "$11", "$12", "$13",
+         "$14", "$15", "$24", "$25", "hi", "lo", "memory");
+
+       return error ? -ret : ret;
+}
+
+#else
+
+static __always_inline long gettimeofday_fallback(
+                               struct __kernel_old_timeval *_tv,
+                               struct timezone *_tz)
+{
+       return -1;
+}
+
+#endif
+
+static __always_inline long clock_gettime_fallback(
+                                       clockid_t _clkid,
+                                       struct __kernel_timespec *_ts)
+{
+       register struct __kernel_timespec *ts asm("a1") = _ts;
+       register clockid_t clkid asm("a0") = _clkid;
+       register long ret asm("v0");
+#if _MIPS_SIM == _MIPS_SIM_ABI64
+       register long nr asm("v0") = __NR_clock_gettime;
+#else
+       register long nr asm("v0") = __NR_clock_gettime64;
+#endif
+       register long error asm("a3");
+
+       asm volatile(
+       "       syscall\n"
+       : "=r" (ret), "=r" (error)
+       : "r" (clkid), "r" (ts), "r" (nr)
+       : "$1", "$3", "$8", "$9", "$10", "$11", "$12", "$13",
+         "$14", "$15", "$24", "$25", "hi", "lo", "memory");
+
+       return error ? -ret : ret;
+}
+
+static __always_inline int clock_getres_fallback(
+                                       clockid_t _clkid,
+                                       struct __kernel_timespec *_ts)
+{
+       register struct __kernel_timespec *ts asm("a1") = _ts;
+       register clockid_t clkid asm("a0") = _clkid;
+       register long ret asm("v0");
+#if _MIPS_SIM == _MIPS_SIM_ABI64
+       register long nr asm("v0") = __NR_clock_getres;
+#else
+       register long nr asm("v0") = __NR_clock_getres_time64;
+#endif
+       register long error asm("a3");
+
+       asm volatile(
+       "       syscall\n"
+       : "=r" (ret), "=r" (error)
+       : "r" (clkid), "r" (ts), "r" (nr)
+       : "$1", "$3", "$8", "$9", "$10", "$11", "$12", "$13",
+         "$14", "$15", "$24", "$25", "hi", "lo", "memory");
+
+       return error ? -ret : ret;
+}
+
+#if _MIPS_SIM != _MIPS_SIM_ABI64
+
+#define VDSO_HAS_32BIT_FALLBACK        1
+
+static __always_inline long clock_gettime32_fallback(
+                                       clockid_t _clkid,
+                                       struct old_timespec32 *_ts)
+{
+       register struct old_timespec32 *ts asm("a1") = _ts;
+       register clockid_t clkid asm("a0") = _clkid;
+       register long ret asm("v0");
+       register long nr asm("v0") = __NR_clock_gettime;
+       register long error asm("a3");
+
+       asm volatile(
+       "       syscall\n"
+       : "=r" (ret), "=r" (error)
+       : "r" (clkid), "r" (ts), "r" (nr)
+       : "$1", "$3", "$8", "$9", "$10", "$11", "$12", "$13",
+         "$14", "$15", "$24", "$25", "hi", "lo", "memory");
+
+       return error ? -ret : ret;
+}
+
+static __always_inline int clock_getres32_fallback(
+                                       clockid_t _clkid,
+                                       struct old_timespec32 *_ts)
+{
+       register struct old_timespec32 *ts asm("a1") = _ts;
+       register clockid_t clkid asm("a0") = _clkid;
+       register long ret asm("v0");
+       register long nr asm("v0") = __NR_clock_getres;
+       register long error asm("a3");
+
+       asm volatile(
+       "       syscall\n"
+       : "=r" (ret), "=r" (error)
+       : "r" (clkid), "r" (ts), "r" (nr)
+       : "$1", "$3", "$8", "$9", "$10", "$11", "$12", "$13",
+         "$14", "$15", "$24", "$25", "hi", "lo", "memory");
+
+       return error ? -ret : ret;
+}
+#endif
+
+#ifdef CONFIG_CSRC_R4K
+
+static __always_inline u64 read_r4k_count(void)
+{
+       unsigned int count;
+
+       __asm__ __volatile__(
+       "       .set push\n"
+       "       .set mips32r2\n"
+       "       rdhwr   %0, $2\n"
+       "       .set pop\n"
+       : "=r" (count));
+
+       return count;
+}
+
+#endif
+
+#ifdef CONFIG_CLKSRC_MIPS_GIC
+
+static __always_inline u64 read_gic_count(const struct vdso_data *data)
+{
+       void __iomem *gic = get_gic(data);
+       u32 hi, hi2, lo;
+
+       do {
+               hi = __raw_readl(gic + sizeof(lo));
+               lo = __raw_readl(gic);
+               hi2 = __raw_readl(gic + sizeof(lo));
+       } while (hi2 != hi);
+
+       return (((u64)hi) << 32) + lo;
+}
+
+#endif
+
+static __always_inline u64 __arch_get_hw_counter(s32 clock_mode)
+{
+#ifdef CONFIG_CLKSRC_MIPS_GIC
+       const struct vdso_data *data = get_vdso_data();
+#endif
+       u64 cycle_now;
+
+       switch (clock_mode) {
+#ifdef CONFIG_CSRC_R4K
+       case VDSO_CLOCK_R4K:
+               cycle_now = read_r4k_count();
+               break;
+#endif
+#ifdef CONFIG_CLKSRC_MIPS_GIC
+       case VDSO_CLOCK_GIC:
+               cycle_now = read_gic_count(data);
+               break;
+#endif
+       default:
+               cycle_now = 0;
+               break;
+       }
+
+       return cycle_now;
+}
+
+static __always_inline const struct vdso_data *__arch_get_vdso_data(void)
+{
+       return get_vdso_data();
+}
+
+#endif /* !__ASSEMBLY__ */
+
+#endif /* __ASM_VDSO_GETTIMEOFDAY_H */
similarity index 76%
rename from arch/mips/vdso/vdso.h
rename to arch/mips/include/asm/vdso/vdso.h
index b65b169..737ddfc 100644 (file)
@@ -6,17 +6,6 @@
 
 #include <asm/sgidefs.h>
 
-#if _MIPS_SIM != _MIPS_SIM_ABI64 && defined(CONFIG_64BIT)
-
-/* Building 32-bit VDSO for the 64-bit kernel. Fake a 32-bit Kconfig. */
-#define BUILD_VDSO32_64
-#undef CONFIG_64BIT
-#define CONFIG_32BIT 1
-#ifndef __ASSEMBLY__
-#include <asm-generic/atomic64.h>
-#endif
-#endif
-
 #ifndef __ASSEMBLY__
 
 #include <asm/asm.h>
@@ -69,14 +58,14 @@ static inline unsigned long get_vdso_base(void)
        return addr;
 }
 
-static inline const union mips_vdso_data *get_vdso_data(void)
+static inline const struct vdso_data *get_vdso_data(void)
 {
-       return (const union mips_vdso_data *)(get_vdso_base() - PAGE_SIZE);
+       return (const struct vdso_data *)(get_vdso_base() - PAGE_SIZE);
 }
 
 #ifdef CONFIG_CLKSRC_MIPS_GIC
 
-static inline void __iomem *get_gic(const union mips_vdso_data *data)
+static inline void __iomem *get_gic(const struct vdso_data *data)
 {
        return (void __iomem *)data - PAGE_SIZE;
 }
diff --git a/arch/mips/include/asm/vdso/vsyscall.h b/arch/mips/include/asm/vdso/vsyscall.h
new file mode 100644 (file)
index 0000000..1953147
--- /dev/null
@@ -0,0 +1,43 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ASM_VDSO_VSYSCALL_H
+#define __ASM_VDSO_VSYSCALL_H
+
+#ifndef __ASSEMBLY__
+
+#include <linux/timekeeper_internal.h>
+#include <vdso/datapage.h>
+
+extern struct vdso_data *vdso_data;
+
+/*
+ * Update the vDSO data page to keep in sync with kernel timekeeping.
+ */
+static __always_inline
+struct vdso_data *__mips_get_k_vdso_data(void)
+{
+       return vdso_data;
+}
+#define __arch_get_k_vdso_data __mips_get_k_vdso_data
+
+static __always_inline
+int __mips_get_clock_mode(struct timekeeper *tk)
+{
+       u32 clock_mode = tk->tkr_mono.clock->archdata.vdso_clock_mode;
+
+       return clock_mode;
+}
+#define __arch_get_clock_mode __mips_get_clock_mode
+
+static __always_inline
+int __mips_use_vsyscall(struct vdso_data *vdata)
+{
+       return (vdata[CS_HRES_COARSE].clock_mode != VDSO_CLOCK_NONE);
+}
+#define __arch_use_vsyscall __mips_use_vsyscall
+
+/* The asm-generic header needs to be included after the definitions above */
+#include <asm-generic/vdso/vsyscall.h>
+
+#endif /* !__ASSEMBLY__ */
+
+#endif /* __ASM_VDSO_VSYSCALL_H */
index 9344e24..1eedd59 100644 (file)
 #endif
 
 /*
- * When an interrupt happens on a CP0 register read instruction, CPU may
- * lock up or read corrupted values of CP0 registers after it enters
- * the exception handler.
- *
- * This workaround makes sure that we read a "safe" CP0 register as the
- * first thing in the exception handler, which breaks one of the
- * pre-conditions for this problem.
- */
-#ifndef R5432_CP0_INTERRUPT_WAR
-#error Check setting of R5432_CP0_INTERRUPT_WAR for your platform
-#endif
-
-/*
  * Workaround for the Sibyte M3 errata the text of which can be found at
  *
  *   http://sibyte.broadcom.com/hw/bcm1250/docs/pass2errata.txt
index c2b4096..57dc2ac 100644 (file)
@@ -95,6 +95,9 @@
 #define MADV_WIPEONFORK 18             /* Zero memory on fork, child only */
 #define MADV_KEEPONFORK 19             /* Undo MADV_WIPEONFORK */
 
+#define MADV_COLD      20              /* deactivate these pages */
+#define MADV_PAGEOUT   21              /* reclaim these pages */
+
 /* compatibility flags */
 #define MAP_FILE       0
 
index 88d6aa7..6de14c0 100644 (file)
@@ -5,15 +5,10 @@
 
 # Object file lists.
 
-obj-y += prom.o time.o reset.o setup.o \
-       platform.o timer.o
+obj-y += prom.o time.o reset.o setup.o timer.o
 
 CFLAGS_setup.o = -I$(src)/../../../scripts/dtc/libfdt
 
-# board specific support
-
-obj-$(CONFIG_JZ4740_QI_LB60)   += board-qi_lb60.o
-
 # PM support
 
 obj-$(CONFIG_PM) += pm.o
diff --git a/arch/mips/jz4740/board-qi_lb60.c b/arch/mips/jz4740/board-qi_lb60.c
deleted file mode 100644 (file)
index 4a7a80c..0000000
+++ /dev/null
@@ -1,523 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * linux/arch/mips/jz4740/board-qi_lb60.c
- *
- * QI_LB60 board support
- *
- * Copyright (c) 2009 Qi Hardware inc.,
- * Author: Xiangfu Liu <xiangfu@qi-hardware.com>
- * Copyright 2010, Lars-Peter Clausen <lars@metafoo.de>
- */
-
-#include <linux/kernel.h>
-#include <linux/init.h>
-#include <linux/gpio.h>
-#include <linux/gpio/machine.h>
-
-#include <linux/input.h>
-#include <linux/gpio_keys.h>
-#include <linux/input/matrix_keypad.h>
-#include <linux/spi/spi.h>
-#include <linux/spi/spi_gpio.h>
-#include <linux/pinctrl/machine.h>
-#include <linux/pinctrl/pinconf-generic.h>
-#include <linux/power_supply.h>
-#include <linux/power/jz4740-battery.h>
-#include <linux/power/gpio-charger.h>
-#include <linux/pwm.h>
-
-#include <linux/platform_data/jz4740/jz4740_nand.h>
-
-#include <asm/mach-jz4740/gpio.h>
-#include <asm/mach-jz4740/jz4740_fb.h>
-#include <asm/mach-jz4740/jz4740_mmc.h>
-
-#include <linux/regulator/fixed.h>
-#include <linux/regulator/machine.h>
-
-#include <asm/mach-jz4740/platform.h>
-
-/* GPIOs */
-#define QI_LB60_GPIO_KEYOUT(x)         (JZ_GPIO_PORTC(10) + (x))
-#define QI_LB60_GPIO_KEYIN(x)          (JZ_GPIO_PORTD(18) + (x))
-#define QI_LB60_GPIO_KEYIN8            JZ_GPIO_PORTD(26)
-
-/* NAND */
-
-/* Early prototypes of the QI LB60 had only 1GB of NAND.
- * In order to support these devices as well the partition and ecc layout is
- * initialized depending on the NAND size */
-static struct mtd_partition qi_lb60_partitions_1gb[] = {
-       {
-               .name = "NAND BOOT partition",
-               .offset = 0 * 0x100000,
-               .size = 4 * 0x100000,
-       },
-       {
-               .name = "NAND KERNEL partition",
-               .offset = 4 * 0x100000,
-               .size = 4 * 0x100000,
-       },
-       {
-               .name = "NAND ROOTFS partition",
-               .offset = 8 * 0x100000,
-               .size = (504 + 512) * 0x100000,
-       },
-};
-
-static struct mtd_partition qi_lb60_partitions_2gb[] = {
-       {
-               .name = "NAND BOOT partition",
-               .offset = 0 * 0x100000,
-               .size = 4 * 0x100000,
-       },
-       {
-               .name = "NAND KERNEL partition",
-               .offset = 4 * 0x100000,
-               .size = 4 * 0x100000,
-       },
-       {
-               .name = "NAND ROOTFS partition",
-               .offset = 8 * 0x100000,
-               .size = (504 + 512 + 1024) * 0x100000,
-       },
-};
-
-static int qi_lb60_ooblayout_ecc(struct mtd_info *mtd, int section,
-                                struct mtd_oob_region *oobregion)
-{
-       if (section)
-               return -ERANGE;
-
-       oobregion->length = 36;
-       oobregion->offset = 6;
-
-       if (mtd->oobsize == 128) {
-               oobregion->length *= 2;
-               oobregion->offset *= 2;
-       }
-
-       return 0;
-}
-
-static int qi_lb60_ooblayout_free(struct mtd_info *mtd, int section,
-                                 struct mtd_oob_region *oobregion)
-{
-       int eccbytes = 36, eccoff = 6;
-
-       if (section > 1)
-               return -ERANGE;
-
-       if (mtd->oobsize == 128) {
-               eccbytes *= 2;
-               eccoff *= 2;
-       }
-
-       if (!section) {
-               oobregion->offset = 2;
-               oobregion->length = eccoff - 2;
-       } else {
-               oobregion->offset = eccoff + eccbytes;
-               oobregion->length = mtd->oobsize - oobregion->offset;
-       }
-
-       return 0;
-}
-
-static const struct mtd_ooblayout_ops qi_lb60_ooblayout_ops = {
-       .ecc = qi_lb60_ooblayout_ecc,
-       .free = qi_lb60_ooblayout_free,
-};
-
-static void qi_lb60_nand_ident(struct platform_device *pdev,
-               struct mtd_info *mtd, struct mtd_partition **partitions,
-               int *num_partitions)
-{
-       struct nand_chip *chip = mtd_to_nand(mtd);
-
-       if (chip->page_shift == 12) {
-               *partitions = qi_lb60_partitions_2gb;
-               *num_partitions = ARRAY_SIZE(qi_lb60_partitions_2gb);
-       } else {
-               *partitions = qi_lb60_partitions_1gb;
-               *num_partitions = ARRAY_SIZE(qi_lb60_partitions_1gb);
-       }
-
-       mtd_set_ooblayout(mtd, &qi_lb60_ooblayout_ops);
-}
-
-static struct jz_nand_platform_data qi_lb60_nand_pdata = {
-       .ident_callback = qi_lb60_nand_ident,
-       .banks = { 1 },
-};
-
-static struct gpiod_lookup_table qi_lb60_nand_gpio_table = {
-       .dev_id = "jz4740-nand.0",
-       .table = {
-               GPIO_LOOKUP("GPIOC", 30, "busy", 0),
-               { },
-       },
-};
-
-
-/* Keyboard*/
-
-#define KEY_QI_QI      KEY_F13
-#define KEY_QI_UPRED   KEY_RIGHTALT
-#define KEY_QI_VOLUP   KEY_VOLUMEUP
-#define KEY_QI_VOLDOWN KEY_VOLUMEDOWN
-#define KEY_QI_FN      KEY_LEFTCTRL
-
-static const uint32_t qi_lb60_keymap[] = {
-       KEY(0, 0, KEY_F1),      /* S2 */
-       KEY(0, 1, KEY_F2),      /* S3 */
-       KEY(0, 2, KEY_F3),      /* S4 */
-       KEY(0, 3, KEY_F4),      /* S5 */
-       KEY(0, 4, KEY_F5),      /* S6 */
-       KEY(0, 5, KEY_F6),      /* S7 */
-       KEY(0, 6, KEY_F7),      /* S8 */
-
-       KEY(1, 0, KEY_Q),       /* S10 */
-       KEY(1, 1, KEY_W),       /* S11 */
-       KEY(1, 2, KEY_E),       /* S12 */
-       KEY(1, 3, KEY_R),       /* S13 */
-       KEY(1, 4, KEY_T),       /* S14 */
-       KEY(1, 5, KEY_Y),       /* S15 */
-       KEY(1, 6, KEY_U),       /* S16 */
-       KEY(1, 7, KEY_I),       /* S17 */
-       KEY(2, 0, KEY_A),       /* S18 */
-       KEY(2, 1, KEY_S),       /* S19 */
-       KEY(2, 2, KEY_D),       /* S20 */
-       KEY(2, 3, KEY_F),       /* S21 */
-       KEY(2, 4, KEY_G),       /* S22 */
-       KEY(2, 5, KEY_H),       /* S23 */
-       KEY(2, 6, KEY_J),       /* S24 */
-       KEY(2, 7, KEY_K),       /* S25 */
-       KEY(3, 0, KEY_ESC),     /* S26 */
-       KEY(3, 1, KEY_Z),       /* S27 */
-       KEY(3, 2, KEY_X),       /* S28 */
-       KEY(3, 3, KEY_C),       /* S29 */
-       KEY(3, 4, KEY_V),       /* S30 */
-       KEY(3, 5, KEY_B),       /* S31 */
-       KEY(3, 6, KEY_N),       /* S32 */
-       KEY(3, 7, KEY_M),       /* S33 */
-       KEY(4, 0, KEY_TAB),     /* S34 */
-       KEY(4, 1, KEY_CAPSLOCK),        /* S35 */
-       KEY(4, 2, KEY_BACKSLASH),       /* S36 */
-       KEY(4, 3, KEY_APOSTROPHE),      /* S37 */
-       KEY(4, 4, KEY_COMMA),   /* S38 */
-       KEY(4, 5, KEY_DOT),     /* S39 */
-       KEY(4, 6, KEY_SLASH),   /* S40 */
-       KEY(4, 7, KEY_UP),      /* S41 */
-       KEY(5, 0, KEY_O),       /* S42 */
-       KEY(5, 1, KEY_L),       /* S43 */
-       KEY(5, 2, KEY_EQUAL),   /* S44 */
-       KEY(5, 3, KEY_QI_UPRED),        /* S45 */
-       KEY(5, 4, KEY_SPACE),   /* S46 */
-       KEY(5, 5, KEY_QI_QI),   /* S47 */
-       KEY(5, 6, KEY_RIGHTCTRL),       /* S48 */
-       KEY(5, 7, KEY_LEFT),    /* S49 */
-       KEY(6, 0, KEY_F8),      /* S50 */
-       KEY(6, 1, KEY_P),       /* S51 */
-       KEY(6, 2, KEY_BACKSPACE),/* S52 */
-       KEY(6, 3, KEY_ENTER),   /* S53 */
-       KEY(6, 4, KEY_QI_VOLUP),        /* S54 */
-       KEY(6, 5, KEY_QI_VOLDOWN),      /* S55 */
-       KEY(6, 6, KEY_DOWN),    /* S56 */
-       KEY(6, 7, KEY_RIGHT),   /* S57 */
-
-       KEY(7, 0, KEY_LEFTSHIFT),       /* S58 */
-       KEY(7, 1, KEY_LEFTALT), /* S59 */
-       KEY(7, 2, KEY_QI_FN),   /* S60 */
-};
-
-static const struct matrix_keymap_data qi_lb60_keymap_data = {
-       .keymap         = qi_lb60_keymap,
-       .keymap_size    = ARRAY_SIZE(qi_lb60_keymap),
-};
-
-static const unsigned int qi_lb60_keypad_cols[] = {
-       QI_LB60_GPIO_KEYOUT(0),
-       QI_LB60_GPIO_KEYOUT(1),
-       QI_LB60_GPIO_KEYOUT(2),
-       QI_LB60_GPIO_KEYOUT(3),
-       QI_LB60_GPIO_KEYOUT(4),
-       QI_LB60_GPIO_KEYOUT(5),
-       QI_LB60_GPIO_KEYOUT(6),
-       QI_LB60_GPIO_KEYOUT(7),
-};
-
-static const unsigned int qi_lb60_keypad_rows[] = {
-       QI_LB60_GPIO_KEYIN(0),
-       QI_LB60_GPIO_KEYIN(1),
-       QI_LB60_GPIO_KEYIN(2),
-       QI_LB60_GPIO_KEYIN(3),
-       QI_LB60_GPIO_KEYIN(4),
-       QI_LB60_GPIO_KEYIN(5),
-       QI_LB60_GPIO_KEYIN(6),
-       QI_LB60_GPIO_KEYIN8,
-};
-
-static struct matrix_keypad_platform_data qi_lb60_pdata = {
-       .keymap_data = &qi_lb60_keymap_data,
-       .col_gpios      = qi_lb60_keypad_cols,
-       .row_gpios      = qi_lb60_keypad_rows,
-       .num_col_gpios  = ARRAY_SIZE(qi_lb60_keypad_cols),
-       .num_row_gpios  = ARRAY_SIZE(qi_lb60_keypad_rows),
-       .col_scan_delay_us      = 10,
-       .debounce_ms            = 10,
-       .wakeup                 = 1,
-       .active_low             = 1,
-};
-
-static struct platform_device qi_lb60_keypad = {
-       .name           = "matrix-keypad",
-       .id             = -1,
-       .dev            = {
-               .platform_data = &qi_lb60_pdata,
-       },
-};
-
-/* Display */
-static struct fb_videomode qi_lb60_video_modes[] = {
-       {
-               .name = "320x240",
-               .xres = 320,
-               .yres = 240,
-               .refresh = 30,
-               .left_margin = 140,
-               .right_margin = 273,
-               .upper_margin = 20,
-               .lower_margin = 2,
-               .hsync_len = 1,
-               .vsync_len = 1,
-               .sync = 0,
-               .vmode = FB_VMODE_NONINTERLACED,
-       },
-};
-
-static struct jz4740_fb_platform_data qi_lb60_fb_pdata = {
-       .width          = 60,
-       .height         = 45,
-       .num_modes      = ARRAY_SIZE(qi_lb60_video_modes),
-       .modes          = qi_lb60_video_modes,
-       .bpp            = 24,
-       .lcd_type       = JZ_LCD_TYPE_8BIT_SERIAL,
-       .pixclk_falling_edge = 1,
-};
-
-struct spi_gpio_platform_data qi_lb60_spigpio_platform_data = {
-       .num_chipselect = 1,
-};
-
-static struct platform_device qi_lb60_spigpio_device = {
-       .name = "spi_gpio",
-       .id   = 1,
-       .dev = {
-               .platform_data = &qi_lb60_spigpio_platform_data,
-       },
-};
-
-static struct gpiod_lookup_table qi_lb60_spigpio_gpio_table = {
-       .dev_id         = "spi_gpio",
-       .table          = {
-               GPIO_LOOKUP("GPIOC", 23,
-                           "sck", GPIO_ACTIVE_HIGH),
-               GPIO_LOOKUP("GPIOC", 22,
-                           "mosi", GPIO_ACTIVE_HIGH),
-               GPIO_LOOKUP("GPIOC", 21,
-                           "cs", GPIO_ACTIVE_HIGH),
-               { },
-       },
-};
-
-static struct spi_board_info qi_lb60_spi_board_info[] = {
-       {
-               .modalias = "ili8960",
-               .chip_select = 0,
-               .bus_num = 1,
-               .max_speed_hz = 30 * 1000,
-               .mode = SPI_3WIRE,
-       },
-};
-
-/* Battery */
-static struct jz_battery_platform_data qi_lb60_battery_pdata = {
-       .gpio_charge =  JZ_GPIO_PORTC(27),
-       .gpio_charge_active_low = 1,
-       .info = {
-               .name = "battery",
-               .technology = POWER_SUPPLY_TECHNOLOGY_LIPO,
-               .voltage_max_design = 4200000,
-               .voltage_min_design = 3600000,
-       },
-};
-
-/* GPIO Key: power */
-static struct gpio_keys_button qi_lb60_gpio_keys_buttons[] = {
-       [0] = {
-               .code           = KEY_POWER,
-               .gpio           = JZ_GPIO_PORTD(29),
-               .active_low     = 1,
-               .desc           = "Power",
-               .wakeup         = 1,
-       },
-};
-
-static struct gpio_keys_platform_data qi_lb60_gpio_keys_data = {
-       .nbuttons = ARRAY_SIZE(qi_lb60_gpio_keys_buttons),
-       .buttons = qi_lb60_gpio_keys_buttons,
-};
-
-static struct platform_device qi_lb60_gpio_keys = {
-       .name = "gpio-keys",
-       .id =   -1,
-       .dev = {
-               .platform_data = &qi_lb60_gpio_keys_data,
-       }
-};
-
-static struct jz4740_mmc_platform_data qi_lb60_mmc_pdata = {
-       /* Intentionally left blank */
-};
-
-static struct gpiod_lookup_table qi_lb60_mmc_gpio_table = {
-       .dev_id = "jz4740-mmc.0",
-       .table = {
-               GPIO_LOOKUP("GPIOD", 0, "cd", GPIO_ACTIVE_HIGH),
-               GPIO_LOOKUP("GPIOD", 2, "power", GPIO_ACTIVE_LOW),
-               { },
-       },
-};
-
-/* beeper */
-static struct pwm_lookup qi_lb60_pwm_lookup[] = {
-       PWM_LOOKUP("jz4740-pwm", 4, "pwm-beeper", NULL, 0,
-                  PWM_POLARITY_NORMAL),
-};
-
-static struct platform_device qi_lb60_pwm_beeper = {
-       .name = "pwm-beeper",
-       .id = -1,
-};
-
-/* charger */
-static char *qi_lb60_batteries[] = {
-       "battery",
-};
-
-static struct gpio_charger_platform_data qi_lb60_charger_pdata = {
-       .name = "usb",
-       .type = POWER_SUPPLY_TYPE_USB,
-       .gpio = JZ_GPIO_PORTD(28),
-       .gpio_active_low = 1,
-       .supplied_to = qi_lb60_batteries,
-       .num_supplicants = ARRAY_SIZE(qi_lb60_batteries),
-};
-
-static struct platform_device qi_lb60_charger_device = {
-       .name = "gpio-charger",
-       .dev = {
-               .platform_data = &qi_lb60_charger_pdata,
-       },
-};
-
-/* audio */
-static struct platform_device qi_lb60_audio_device = {
-       .name = "qi-lb60-audio",
-       .id = -1,
-};
-
-static struct gpiod_lookup_table qi_lb60_audio_gpio_table = {
-       .dev_id = "qi-lb60-audio",
-       .table = {
-               GPIO_LOOKUP("GPIOB", 29, "snd", 0),
-               GPIO_LOOKUP("GPIOD", 4, "amp", 0),
-               { },
-       },
-};
-
-static struct platform_device *jz_platform_devices[] __initdata = {
-       &jz4740_udc_device,
-       &jz4740_udc_xceiv_device,
-       &jz4740_mmc_device,
-       &jz4740_nand_device,
-       &qi_lb60_keypad,
-       &qi_lb60_spigpio_device,
-       &jz4740_framebuffer_device,
-       &jz4740_pcm_device,
-       &jz4740_i2s_device,
-       &jz4740_codec_device,
-       &jz4740_adc_device,
-       &jz4740_pwm_device,
-       &jz4740_dma_device,
-       &qi_lb60_gpio_keys,
-       &qi_lb60_pwm_beeper,
-       &qi_lb60_charger_device,
-       &qi_lb60_audio_device,
-};
-
-static unsigned long pin_cfg_bias_disable[] = {
-           PIN_CONFIG_BIAS_DISABLE,
-};
-
-static struct pinctrl_map pin_map[] __initdata = {
-       /* NAND pin configuration */
-       PIN_MAP_MUX_GROUP_DEFAULT("jz4740-nand",
-                       "10010000.pin-controller", "nand-cs1", "nand"),
-
-       /* fbdev pin configuration */
-       PIN_MAP_MUX_GROUP("jz4740-fb", PINCTRL_STATE_DEFAULT,
-                       "10010000.pin-controller", "lcd-8bit", "lcd"),
-       PIN_MAP_MUX_GROUP("jz4740-fb", PINCTRL_STATE_SLEEP,
-                       "10010000.pin-controller", "lcd-no-pins", "lcd"),
-
-       /* MMC pin configuration */
-       PIN_MAP_MUX_GROUP_DEFAULT("jz4740-mmc.0",
-                       "10010000.pin-controller", "mmc-1bit", "mmc"),
-       PIN_MAP_MUX_GROUP_DEFAULT("jz4740-mmc.0",
-                       "10010000.pin-controller", "mmc-4bit", "mmc"),
-       PIN_MAP_CONFIGS_PIN_DEFAULT("jz4740-mmc.0",
-                       "10010000.pin-controller", "PD0", pin_cfg_bias_disable),
-       PIN_MAP_CONFIGS_PIN_DEFAULT("jz4740-mmc.0",
-                       "10010000.pin-controller", "PD2", pin_cfg_bias_disable),
-
-       /* PWM pin configuration */
-       PIN_MAP_MUX_GROUP_DEFAULT("jz4740-pwm",
-                       "10010000.pin-controller", "pwm4", "pwm4"),
-};
-
-
-static int __init qi_lb60_init_platform_devices(void)
-{
-       jz4740_framebuffer_device.dev.platform_data = &qi_lb60_fb_pdata;
-       jz4740_nand_device.dev.platform_data = &qi_lb60_nand_pdata;
-       jz4740_adc_device.dev.platform_data = &qi_lb60_battery_pdata;
-       jz4740_mmc_device.dev.platform_data = &qi_lb60_mmc_pdata;
-
-       gpiod_add_lookup_table(&qi_lb60_audio_gpio_table);
-       gpiod_add_lookup_table(&qi_lb60_nand_gpio_table);
-       gpiod_add_lookup_table(&qi_lb60_spigpio_gpio_table);
-       gpiod_add_lookup_table(&qi_lb60_mmc_gpio_table);
-
-       spi_register_board_info(qi_lb60_spi_board_info,
-                               ARRAY_SIZE(qi_lb60_spi_board_info));
-
-       pwm_add_table(qi_lb60_pwm_lookup, ARRAY_SIZE(qi_lb60_pwm_lookup));
-       pinctrl_register_mappings(pin_map, ARRAY_SIZE(pin_map));
-
-       return platform_add_devices(jz_platform_devices,
-                                       ARRAY_SIZE(jz_platform_devices));
-
-}
-
-static int __init qi_lb60_board_setup(void)
-{
-       printk(KERN_INFO "Qi Hardware JZ4740 QI LB60 setup\n");
-
-       if (qi_lb60_init_platform_devices())
-               panic("Failed to initialize platform devices");
-
-       return 0;
-}
-arch_initcall(qi_lb60_board_setup);
diff --git a/arch/mips/jz4740/platform.c b/arch/mips/jz4740/platform.c
deleted file mode 100644 (file)
index c74c99f..0000000
+++ /dev/null
@@ -1,250 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- *  Copyright (C) 2009-2010, Lars-Peter Clausen <lars@metafoo.de>
- *  JZ4740 platform devices
- */
-
-#include <linux/clk.h>
-#include <linux/device.h>
-#include <linux/kernel.h>
-#include <linux/platform_device.h>
-#include <linux/resource.h>
-
-#include <linux/dma-mapping.h>
-
-#include <linux/usb/musb.h>
-
-#include <asm/mach-jz4740/platform.h>
-#include <asm/mach-jz4740/base.h>
-#include <asm/mach-jz4740/irq.h>
-
-#include <linux/serial_core.h>
-#include <linux/serial_8250.h>
-
-/* USB Device Controller */
-struct platform_device jz4740_udc_xceiv_device = {
-       .name = "usb_phy_generic",
-       .id   = 0,
-};
-
-static struct resource jz4740_udc_resources[] = {
-       [0] = {
-               .start = JZ4740_UDC_BASE_ADDR,
-               .end   = JZ4740_UDC_BASE_ADDR + 0x10000 - 1,
-               .flags = IORESOURCE_MEM,
-       },
-       [1] = {
-               .start = JZ4740_IRQ_UDC,
-               .end   = JZ4740_IRQ_UDC,
-               .flags = IORESOURCE_IRQ,
-               .name  = "mc",
-       },
-};
-
-struct platform_device jz4740_udc_device = {
-       .name = "musb-jz4740",
-       .id   = -1,
-       .dev  = {
-               .dma_mask          = &jz4740_udc_device.dev.coherent_dma_mask,
-               .coherent_dma_mask = DMA_BIT_MASK(32),
-       },
-       .num_resources = ARRAY_SIZE(jz4740_udc_resources),
-       .resource      = jz4740_udc_resources,
-};
-
-/* MMC/SD controller */
-static struct resource jz4740_mmc_resources[] = {
-       {
-               .start  = JZ4740_MSC_BASE_ADDR,
-               .end    = JZ4740_MSC_BASE_ADDR + 0x1000 - 1,
-               .flags  = IORESOURCE_MEM,
-       },
-       {
-               .start  = JZ4740_IRQ_MSC,
-               .end    = JZ4740_IRQ_MSC,
-               .flags  = IORESOURCE_IRQ,
-       }
-};
-
-struct platform_device jz4740_mmc_device = {
-       .name           = "jz4740-mmc",
-       .id             = 0,
-       .dev = {
-               .dma_mask = &jz4740_mmc_device.dev.coherent_dma_mask,
-               .coherent_dma_mask = DMA_BIT_MASK(32),
-       },
-       .num_resources  = ARRAY_SIZE(jz4740_mmc_resources),
-       .resource       = jz4740_mmc_resources,
-};
-
-/* I2C controller */
-static struct resource jz4740_i2c_resources[] = {
-       {
-               .start  = JZ4740_I2C_BASE_ADDR,
-               .end    = JZ4740_I2C_BASE_ADDR + 0x1000 - 1,
-               .flags  = IORESOURCE_MEM,
-       },
-       {
-               .start  = JZ4740_IRQ_I2C,
-               .end    = JZ4740_IRQ_I2C,
-               .flags  = IORESOURCE_IRQ,
-       }
-};
-
-struct platform_device jz4740_i2c_device = {
-       .name           = "jz4740-i2c",
-       .id             = 0,
-       .num_resources  = ARRAY_SIZE(jz4740_i2c_resources),
-       .resource       = jz4740_i2c_resources,
-};
-
-/* NAND controller */
-static struct resource jz4740_nand_resources[] = {
-       {
-               .name   = "mmio",
-               .start  = JZ4740_EMC_BASE_ADDR,
-               .end    = JZ4740_EMC_BASE_ADDR + 0x1000 - 1,
-               .flags  = IORESOURCE_MEM,
-       },
-       {
-               .name   = "bank1",
-               .start  = 0x18000000,
-               .end    = 0x180C0000 - 1,
-               .flags = IORESOURCE_MEM,
-       },
-       {
-               .name   = "bank2",
-               .start  = 0x14000000,
-               .end    = 0x140C0000 - 1,
-               .flags = IORESOURCE_MEM,
-       },
-       {
-               .name   = "bank3",
-               .start  = 0x0C000000,
-               .end    = 0x0C0C0000 - 1,
-               .flags = IORESOURCE_MEM,
-       },
-       {
-               .name   = "bank4",
-               .start  = 0x08000000,
-               .end    = 0x080C0000 - 1,
-               .flags = IORESOURCE_MEM,
-       },
-};
-
-struct platform_device jz4740_nand_device = {
-       .name = "jz4740-nand",
-       .num_resources = ARRAY_SIZE(jz4740_nand_resources),
-       .resource = jz4740_nand_resources,
-};
-
-/* LCD controller */
-static struct resource jz4740_framebuffer_resources[] = {
-       {
-               .start  = JZ4740_LCD_BASE_ADDR,
-               .end    = JZ4740_LCD_BASE_ADDR + 0x1000 - 1,
-               .flags  = IORESOURCE_MEM,
-       },
-};
-
-struct platform_device jz4740_framebuffer_device = {
-       .name           = "jz4740-fb",
-       .id             = -1,
-       .num_resources  = ARRAY_SIZE(jz4740_framebuffer_resources),
-       .resource       = jz4740_framebuffer_resources,
-       .dev = {
-               .dma_mask = &jz4740_framebuffer_device.dev.coherent_dma_mask,
-               .coherent_dma_mask = DMA_BIT_MASK(32),
-       },
-};
-
-/* I2S controller */
-static struct resource jz4740_i2s_resources[] = {
-       {
-               .start  = JZ4740_AIC_BASE_ADDR,
-               .end    = JZ4740_AIC_BASE_ADDR + 0x38 - 1,
-               .flags  = IORESOURCE_MEM,
-       },
-};
-
-struct platform_device jz4740_i2s_device = {
-       .name           = "jz4740-i2s",
-       .id             = -1,
-       .num_resources  = ARRAY_SIZE(jz4740_i2s_resources),
-       .resource       = jz4740_i2s_resources,
-};
-
-/* PCM */
-struct platform_device jz4740_pcm_device = {
-       .name           = "jz4740-pcm-audio",
-       .id             = -1,
-};
-
-/* Codec */
-static struct resource jz4740_codec_resources[] = {
-       {
-               .start  = JZ4740_AIC_BASE_ADDR + 0x80,
-               .end    = JZ4740_AIC_BASE_ADDR + 0x88 - 1,
-               .flags  = IORESOURCE_MEM,
-       },
-};
-
-struct platform_device jz4740_codec_device = {
-       .name           = "jz4740-codec",
-       .id             = -1,
-       .num_resources  = ARRAY_SIZE(jz4740_codec_resources),
-       .resource       = jz4740_codec_resources,
-};
-
-/* ADC controller */
-static struct resource jz4740_adc_resources[] = {
-       {
-               .start  = JZ4740_SADC_BASE_ADDR,
-               .end    = JZ4740_SADC_BASE_ADDR + 0x30,
-               .flags  = IORESOURCE_MEM,
-       },
-       {
-               .start  = JZ4740_IRQ_SADC,
-               .end    = JZ4740_IRQ_SADC,
-               .flags  = IORESOURCE_IRQ,
-       },
-       {
-               .start  = JZ4740_IRQ_ADC_BASE,
-               .end    = JZ4740_IRQ_ADC_BASE,
-               .flags  = IORESOURCE_IRQ,
-       },
-};
-
-struct platform_device jz4740_adc_device = {
-       .name           = "jz4740-adc",
-       .id             = -1,
-       .num_resources  = ARRAY_SIZE(jz4740_adc_resources),
-       .resource       = jz4740_adc_resources,
-};
-
-/* PWM */
-struct platform_device jz4740_pwm_device = {
-       .name = "jz4740-pwm",
-       .id   = -1,
-};
-
-/* DMA */
-static struct resource jz4740_dma_resources[] = {
-       {
-               .start  = JZ4740_DMAC_BASE_ADDR,
-               .end    = JZ4740_DMAC_BASE_ADDR + 0x400 - 1,
-               .flags  = IORESOURCE_MEM,
-       },
-       {
-               .start  = JZ4740_IRQ_DMAC,
-               .end    = JZ4740_IRQ_DMAC,
-               .flags  = IORESOURCE_IRQ,
-       },
-};
-
-struct platform_device jz4740_dma_device = {
-       .name           = "jz4740-dma",
-       .id             = -1,
-       .num_resources  = ARRAY_SIZE(jz4740_dma_resources),
-       .resource       = jz4740_dma_resources,
-};
index 88f33af..ff4555c 100644 (file)
@@ -4,15 +4,10 @@
  *  JZ4740 SoC prom code
  */
 
-#include <linux/kernel.h>
 #include <linux/init.h>
-#include <linux/string.h>
-
-#include <linux/serial_reg.h>
 
 #include <asm/bootinfo.h>
 #include <asm/fw/fw.h>
-#include <asm/mach-jz4740/base.h>
 
 void __init prom_init(void)
 {
index 4264eaf..dc8ee21 100644 (file)
 #include <asm/bootinfo.h>
 #include <asm/prom.h>
 
-#include <asm/mach-jz4740/base.h>
-
 #include "reset.h"
 
+#define JZ4740_EMC_BASE_ADDR 0x13010000
 
 #define JZ4740_EMC_SDRAM_CTRL 0x80
 
@@ -45,6 +44,8 @@ static void __init jz4740_detect_mem(void)
 
 static unsigned long __init get_board_mach_type(const void *fdt)
 {
+       if (!fdt_node_check_compatible(fdt, 0, "ingenic,x1000"))
+               return MACH_INGENIC_X1000;
        if (!fdt_node_check_compatible(fdt, 0, "ingenic,jz4780"))
                return MACH_INGENIC_JZ4780;
        if (!fdt_node_check_compatible(fdt, 0, "ingenic,jz4770"))
@@ -85,6 +86,8 @@ void __init device_tree_init(void)
 const char *get_system_type(void)
 {
        switch (mips_machtype) {
+       case MACH_INGENIC_X1000:
+               return "X1000";
        case MACH_INGENIC_JZ4780:
                return "JZ4780";
        case MACH_INGENIC_JZ4770:
index cb768e5..5476899 100644 (file)
  *  JZ4740 platform time support
  */
 
-#include <linux/clk.h>
 #include <linux/clk-provider.h>
-#include <linux/interrupt.h>
-#include <linux/kernel.h>
-#include <linux/time.h>
+#include <linux/clocksource.h>
 
-#include <linux/clockchips.h>
-#include <linux/sched_clock.h>
-
-#include <asm/mach-jz4740/irq.h>
 #include <asm/mach-jz4740/timer.h>
-#include <asm/time.h>
-
-#define TIMER_CLOCKEVENT 0
-#define TIMER_CLOCKSOURCE 1
-
-static uint16_t jz4740_jiffies_per_tick;
-
-static u64 jz4740_clocksource_read(struct clocksource *cs)
-{
-       return jz4740_timer_get_count(TIMER_CLOCKSOURCE);
-}
-
-static struct clocksource jz4740_clocksource = {
-       .name = "jz4740-timer",
-       .rating = 200,
-       .read = jz4740_clocksource_read,
-       .mask = CLOCKSOURCE_MASK(16),
-       .flags = CLOCK_SOURCE_IS_CONTINUOUS,
-};
-
-static u64 notrace jz4740_read_sched_clock(void)
-{
-       return jz4740_timer_get_count(TIMER_CLOCKSOURCE);
-}
-
-static irqreturn_t jz4740_clockevent_irq(int irq, void *devid)
-{
-       struct clock_event_device *cd = devid;
-
-       jz4740_timer_ack_full(TIMER_CLOCKEVENT);
-
-       if (!clockevent_state_periodic(cd))
-               jz4740_timer_disable(TIMER_CLOCKEVENT);
-
-       cd->event_handler(cd);
-
-       return IRQ_HANDLED;
-}
-
-static int jz4740_clockevent_set_periodic(struct clock_event_device *evt)
-{
-       jz4740_timer_set_count(TIMER_CLOCKEVENT, 0);
-       jz4740_timer_set_period(TIMER_CLOCKEVENT, jz4740_jiffies_per_tick);
-       jz4740_timer_irq_full_enable(TIMER_CLOCKEVENT);
-       jz4740_timer_enable(TIMER_CLOCKEVENT);
-
-       return 0;
-}
-
-static int jz4740_clockevent_resume(struct clock_event_device *evt)
-{
-       jz4740_timer_irq_full_enable(TIMER_CLOCKEVENT);
-       jz4740_timer_enable(TIMER_CLOCKEVENT);
-
-       return 0;
-}
-
-static int jz4740_clockevent_shutdown(struct clock_event_device *evt)
-{
-       jz4740_timer_disable(TIMER_CLOCKEVENT);
-
-       return 0;
-}
-
-static int jz4740_clockevent_set_next(unsigned long evt,
-       struct clock_event_device *cd)
-{
-       jz4740_timer_set_count(TIMER_CLOCKEVENT, 0);
-       jz4740_timer_set_period(TIMER_CLOCKEVENT, evt);
-       jz4740_timer_enable(TIMER_CLOCKEVENT);
-
-       return 0;
-}
-
-static struct clock_event_device jz4740_clockevent = {
-       .name = "jz4740-timer",
-       .features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT,
-       .set_next_event = jz4740_clockevent_set_next,
-       .set_state_shutdown = jz4740_clockevent_shutdown,
-       .set_state_periodic = jz4740_clockevent_set_periodic,
-       .set_state_oneshot = jz4740_clockevent_shutdown,
-       .tick_resume = jz4740_clockevent_resume,
-       .rating = 200,
-#ifdef CONFIG_MACH_JZ4740
-       .irq = JZ4740_IRQ_TCU0,
-#endif
-#if defined(CONFIG_MACH_JZ4770) || defined(CONFIG_MACH_JZ4780)
-       .irq = JZ4780_IRQ_TCU2,
-#endif
-};
-
-static struct irqaction timer_irqaction = {
-       .handler        = jz4740_clockevent_irq,
-       .flags          = IRQF_PERCPU | IRQF_TIMER,
-       .name           = "jz4740-timerirq",
-       .dev_id         = &jz4740_clockevent,
-};
 
 void __init plat_time_init(void)
 {
-       int ret;
-       uint32_t clk_rate;
-       uint16_t ctrl;
-       struct clk *ext_clk;
-
        of_clk_init(NULL);
        jz4740_timer_init();
-
-       ext_clk = clk_get(NULL, "ext");
-       if (IS_ERR(ext_clk))
-               panic("unable to get ext clock");
-       clk_rate = clk_get_rate(ext_clk) >> 4;
-       clk_put(ext_clk);
-
-       jz4740_jiffies_per_tick = DIV_ROUND_CLOSEST(clk_rate, HZ);
-
-       clockevent_set_clock(&jz4740_clockevent, clk_rate);
-       jz4740_clockevent.min_delta_ns = clockevent_delta2ns(100, &jz4740_clockevent);
-       jz4740_clockevent.min_delta_ticks = 100;
-       jz4740_clockevent.max_delta_ns = clockevent_delta2ns(0xffff, &jz4740_clockevent);
-       jz4740_clockevent.max_delta_ticks = 0xffff;
-       jz4740_clockevent.cpumask = cpumask_of(0);
-
-       clockevents_register_device(&jz4740_clockevent);
-
-       ret = clocksource_register_hz(&jz4740_clocksource, clk_rate);
-
-       if (ret)
-               printk(KERN_ERR "Failed to register clocksource: %d\n", ret);
-
-       sched_clock_register(jz4740_read_sched_clock, 16, clk_rate);
-
-       setup_irq(jz4740_clockevent.irq, &timer_irqaction);
-
-       ctrl = JZ_TIMER_CTRL_PRESCALE_16 | JZ_TIMER_CTRL_SRC_EXT;
-
-       jz4740_timer_set_ctrl(TIMER_CLOCKEVENT, ctrl);
-       jz4740_timer_set_ctrl(TIMER_CLOCKSOURCE, ctrl);
-
-       jz4740_timer_set_period(TIMER_CLOCKEVENT, jz4740_jiffies_per_tick);
-       jz4740_timer_irq_full_enable(TIMER_CLOCKEVENT);
-
-       jz4740_timer_set_period(TIMER_CLOCKSOURCE, 0xffff);
-
-       jz4740_timer_enable(TIMER_CLOCKEVENT);
-       jz4740_timer_enable(TIMER_CLOCKSOURCE);
+       timer_probe();
 }
index 1db2995..2c38f75 100644 (file)
@@ -58,6 +58,7 @@ int __mm_isBranchInstr(struct pt_regs *regs, struct mm_decoded_insn dec_insn,
                       unsigned long *contpc)
 {
        union mips_instruction insn = (union mips_instruction)dec_insn.insn;
+       int __maybe_unused bc_false = 0;
 
        if (!cpu_has_mmips)
                return 0;
@@ -139,7 +140,6 @@ int __mm_isBranchInstr(struct pt_regs *regs, struct mm_decoded_insn dec_insn,
 #ifdef CONFIG_MIPS_FP_SUPPORT
                case mm_bc2f_op:
                case mm_bc1f_op: {
-                       int bc_false = 0;
                        unsigned int fcr31;
                        unsigned int bit;
 
index 9635c1d..c2eb392 100644 (file)
@@ -1384,15 +1384,6 @@ static inline void cpu_probe_legacy(struct cpuinfo_mips *c, unsigned int cpu)
                        break;
                }
                break;
-       case PRID_IMP_R4300:
-               c->cputype = CPU_R4300;
-               __cpu_name[cpu] = "R4300";
-               set_isa(c, MIPS_CPU_ISA_III);
-               c->fpu_msk31 |= FPU_CSR_CONDX;
-               c->options = R4K_OPTS | MIPS_CPU_FPU | MIPS_CPU_32FPR |
-                            MIPS_CPU_LLSC;
-               c->tlbsize = 32;
-               break;
        case PRID_IMP_R4600:
                c->cputype = CPU_R4600;
                __cpu_name[cpu] = "R4600";
@@ -1468,14 +1459,6 @@ static inline void cpu_probe_legacy(struct cpuinfo_mips *c, unsigned int cpu)
                             MIPS_CPU_LLSC;
                c->tlbsize = 48;
                break;
-       case PRID_IMP_R5432:
-               c->cputype = CPU_R5432;
-               __cpu_name[cpu] = "R5432";
-               set_isa(c, MIPS_CPU_ISA_IV);
-               c->options = R4K_OPTS | MIPS_CPU_FPU | MIPS_CPU_32FPR |
-                            MIPS_CPU_WATCH | MIPS_CPU_LLSC;
-               c->tlbsize = 48;
-               break;
        case PRID_IMP_R5500:
                c->cputype = CPU_R5500;
                __cpu_name[cpu] = "R5500";
@@ -1508,15 +1491,6 @@ static inline void cpu_probe_legacy(struct cpuinfo_mips *c, unsigned int cpu)
                 */
                c->tlbsize = (read_c0_info() & (1 << 29)) ? 64 : 48;
                break;
-       case PRID_IMP_R8000:
-               c->cputype = CPU_R8000;
-               __cpu_name[cpu] = "RM8000";
-               set_isa(c, MIPS_CPU_ISA_IV);
-               c->options = MIPS_CPU_TLB | MIPS_CPU_4KEX |
-                            MIPS_CPU_FPU | MIPS_CPU_32FPR |
-                            MIPS_CPU_LLSC;
-               c->tlbsize = 384;      /* has weird TLB: 3-way x 128 */
-               break;
        case PRID_IMP_R10000:
                c->cputype = CPU_R10000;
                __cpu_name[cpu] = "R10000";
@@ -1573,6 +1547,8 @@ static inline void cpu_probe_legacy(struct cpuinfo_mips *c, unsigned int cpu)
                        __cpu_name[cpu] = "ICT Loongson-3";
                        set_elf_platform(cpu, "loongson3a");
                        set_isa(c, MIPS_CPU_ISA_M64R1);
+                       c->ases |= (MIPS_ASE_LOONGSON_MMI | MIPS_ASE_LOONGSON_CAM |
+                               MIPS_ASE_LOONGSON_EXT);
                        break;
                case PRID_REV_LOONGSON3B_R1:
                case PRID_REV_LOONGSON3B_R2:
@@ -1580,6 +1556,8 @@ static inline void cpu_probe_legacy(struct cpuinfo_mips *c, unsigned int cpu)
                        __cpu_name[cpu] = "ICT Loongson-3";
                        set_elf_platform(cpu, "loongson3b");
                        set_isa(c, MIPS_CPU_ISA_M64R1);
+                       c->ases |= (MIPS_ASE_LOONGSON_MMI | MIPS_ASE_LOONGSON_CAM |
+                               MIPS_ASE_LOONGSON_EXT);
                        break;
                }
 
@@ -1946,6 +1924,8 @@ static inline void cpu_probe_loongson(struct cpuinfo_mips *c, unsigned int cpu)
                decode_configs(c);
                c->options |= MIPS_CPU_FTLB | MIPS_CPU_TLBINV | MIPS_CPU_LDPTE;
                c->writecombine = _CACHE_UNCACHED_ACCELERATED;
+               c->ases |= (MIPS_ASE_LOONGSON_MMI | MIPS_ASE_LOONGSON_CAM |
+                       MIPS_ASE_LOONGSON_EXT | MIPS_ASE_LOONGSON_EXT2);
                break;
        default:
                panic("Unknown Loongson Processor ID!");
@@ -1956,14 +1936,29 @@ static inline void cpu_probe_loongson(struct cpuinfo_mips *c, unsigned int cpu)
 static inline void cpu_probe_ingenic(struct cpuinfo_mips *c, unsigned int cpu)
 {
        decode_configs(c);
-       /* JZRISC does not implement the CP0 counter. */
+
+       /*
+        * XBurst misses a config2 register, so config3 decode was skipped in
+        * decode_configs().
+        */
+       decode_config3(c);
+
+       /* XBurst does not implement the CP0 counter. */
        c->options &= ~MIPS_CPU_COUNTER;
        BUG_ON(!__builtin_constant_p(cpu_has_counter) || cpu_has_counter);
+
        switch (c->processor_id & PRID_IMP_MASK) {
-       case PRID_IMP_JZRISC:
-               c->cputype = CPU_JZRISC;
+       case PRID_IMP_XBURST:
+               c->cputype = CPU_XBURST;
                c->writecombine = _CACHE_UNCACHED_ACCELERATED;
                __cpu_name[cpu] = "Ingenic JZRISC";
+               /*
+                * The XBurst core by default attempts to avoid branch target
+                * buffer lookups by detecting & special casing loops. This
+                * feature will cause BogoMIPS and lpj calculate in error.
+                * Set cp0 config7 bit 4 to disable this feature.
+                */
+               set_c0_config7(MIPS_CONF7_BTB_LOOP_EN);
                break;
        default:
                panic("Unknown Ingenic Processor ID!");
index 398b905..efde27c 100644 (file)
@@ -32,9 +32,6 @@
 NESTED(except_vec3_generic, 0, sp)
        .set    push
        .set    noat
-#if R5432_CP0_INTERRUPT_WAR
-       mfc0    k0, CP0_INDEX
-#endif
        mfc0    k1, CP0_CAUSE
        andi    k1, k1, 0x7c
 #ifdef CONFIG_64BIT
index 7388f13..eb2afc0 100644 (file)
@@ -151,7 +151,6 @@ void __init check_wait(void)
                cpu_wait = r39xx_wait;
                break;
        case CPU_R4200:
-/*     case CPU_R4300: */
        case CPU_R4600:
        case CPU_R4640:
        case CPU_R4650:
@@ -173,7 +172,7 @@ void __init check_wait(void)
        case CPU_CAVIUM_OCTEON_PLUS:
        case CPU_CAVIUM_OCTEON2:
        case CPU_CAVIUM_OCTEON3:
-       case CPU_JZRISC:
+       case CPU_XBURST:
        case CPU_LOONGSON1:
        case CPU_XLR:
        case CPU_XLP:
index b2de408..f8d3671 100644 (file)
@@ -124,6 +124,10 @@ static int show_cpuinfo(struct seq_file *m, void *v)
        if (cpu_has_eva)        seq_printf(m, "%s", " eva");
        if (cpu_has_htw)        seq_printf(m, "%s", " htw");
        if (cpu_has_xpa)        seq_printf(m, "%s", " xpa");
+       if (cpu_has_loongson_mmi)       seq_printf(m, "%s", " loongson-mmi");
+       if (cpu_has_loongson_cam)       seq_printf(m, "%s", " loongson-cam");
+       if (cpu_has_loongson_ext)       seq_printf(m, "%s", " loongson-ext");
+       if (cpu_has_loongson_ext2)      seq_printf(m, "%s", " loongson-ext2");
        seq_printf(m, "\n");
 
        if (cpu_has_mmips) {
index d9434cd..b449b68 100644 (file)
@@ -217,7 +217,7 @@ einval: li  v0, -ENOSYS
 #define sys_sched_getaffinity  mipsmt_sys_sched_getaffinity
 #endif /* CONFIG_MIPS_MT_FPAFF */
 
-#define __SYSCALL(nr, entry, nargs)    PTR entry
+#define __SYSCALL(nr, entry)   PTR entry
        .align  2
        .type   sys_call_table, @object
 EXPORT(sys_call_table)
index c761ddf..35d8c86 100644 (file)
@@ -101,7 +101,7 @@ not_n32_scall:
 
        END(handle_sysn32)
 
-#define __SYSCALL(nr, entry, nargs) PTR entry
+#define __SYSCALL(nr, entry)   PTR entry
        .type   sysn32_call_table, @object
 EXPORT(sysn32_call_table)
 #include <asm/syscall_table_64_n32.h>
index 727fb8a..23b2e2b 100644 (file)
@@ -109,7 +109,7 @@ illegal_syscall:
        j       n64_syscall_exit
        END(handle_sys64)
 
-#define __SYSCALL(nr, entry, nargs) PTR entry
+#define __SYSCALL(nr, entry)   PTR entry
        .align  3
        .type   sys_call_table, @object
 EXPORT(sys_call_table)
index feb2653..41df822 100644 (file)
@@ -213,7 +213,7 @@ einval: li  v0, -ENOSYS
        jr      ra
        END(sys32_syscall)
 
-#define __SYSCALL(nr, entry, nargs) PTR entry
+#define __SYSCALL(nr, entry)   PTR entry
        .align  3
        .type   sys32_call_table,@object
 EXPORT(sys32_call_table)
index ab349d2..b8249c2 100644 (file)
@@ -63,8 +63,6 @@ unsigned long mips_machtype __read_mostly = MACH_UNKNOWN;
 
 EXPORT_SYMBOL(mips_machtype);
 
-struct boot_mem_map boot_mem_map;
-
 static char __initdata command_line[COMMAND_LINE_SIZE];
 char __initdata arcs_cmdline[COMMAND_LINE_SIZE];
 
@@ -76,7 +74,7 @@ static char __initdata builtin_cmdline[COMMAND_LINE_SIZE] = CONFIG_CMDLINE;
  * mips_io_port_base is the begin of the address space to which x86 style
  * I/O ports are mapped.
  */
-const unsigned long mips_io_port_base = -1;
+unsigned long mips_io_port_base = -1;
 EXPORT_SYMBOL(mips_io_port_base);
 
 static struct resource code_resource = { .name = "Kernel code", };
@@ -92,8 +90,10 @@ EXPORT_SYMBOL(ARCH_PFN_OFFSET);
 
 void __init add_memory_region(phys_addr_t start, phys_addr_t size, long type)
 {
-       int x = boot_mem_map.nr_map;
-       int i;
+       /*
+        * Note: This function only exists for historical reason,
+        * new code should use memblock_add or memblock_add_node instead.
+        */
 
        /*
         * If the region reaches the top of the physical address space, adjust
@@ -108,38 +108,20 @@ void __init add_memory_region(phys_addr_t start, phys_addr_t size, long type)
                return;
        }
 
-       /*
-        * Try to merge with existing entry, if any.
-        */
-       for (i = 0; i < boot_mem_map.nr_map; i++) {
-               struct boot_mem_map_entry *entry = boot_mem_map.map + i;
-               unsigned long top;
-
-               if (entry->type != type)
-                       continue;
-
-               if (start + size < entry->addr)
-                       continue;                       /* no overlap */
+       memblock_add(start, size);
+       /* Reserve any memory except the ordinary RAM ranges. */
+       switch (type) {
+       case BOOT_MEM_RAM:
+               break;
 
-               if (entry->addr + entry->size < start)
-                       continue;                       /* no overlap */
+       case BOOT_MEM_NOMAP: /* Discard the range from the system. */
+               memblock_remove(start, size);
+               break;
 
-               top = max(entry->addr + entry->size, start + size);
-               entry->addr = min(entry->addr, start);
-               entry->size = top - entry->addr;
-
-               return;
+       default: /* Reserve the rest of the memory types at boot time */
+               memblock_reserve(start, size);
+               break;
        }
-
-       if (boot_mem_map.nr_map == BOOT_MEM_MAP_MAX) {
-               pr_err("Ooops! Too many entries in the memory map!\n");
-               return;
-       }
-
-       boot_mem_map.map[x].addr = start;
-       boot_mem_map.map[x].size = size;
-       boot_mem_map.map[x].type = type;
-       boot_mem_map.nr_map++;
 }
 
 void __init detect_memory_region(phys_addr_t start, phys_addr_t sz_min, phys_addr_t sz_max)
@@ -161,70 +143,6 @@ void __init detect_memory_region(phys_addr_t start, phys_addr_t sz_min, phys_add
        add_memory_region(start, size, BOOT_MEM_RAM);
 }
 
-static bool __init __maybe_unused memory_region_available(phys_addr_t start,
-                                                         phys_addr_t size)
-{
-       int i;
-       bool in_ram = false, free = true;
-
-       for (i = 0; i < boot_mem_map.nr_map; i++) {
-               phys_addr_t start_, end_;
-
-               start_ = boot_mem_map.map[i].addr;
-               end_ = boot_mem_map.map[i].addr + boot_mem_map.map[i].size;
-
-               switch (boot_mem_map.map[i].type) {
-               case BOOT_MEM_RAM:
-                       if (start >= start_ && start + size <= end_)
-                               in_ram = true;
-                       break;
-               case BOOT_MEM_RESERVED:
-               case BOOT_MEM_NOMAP:
-                       if ((start >= start_ && start < end_) ||
-                           (start < start_ && start + size >= start_))
-                               free = false;
-                       break;
-               default:
-                       continue;
-               }
-       }
-
-       return in_ram && free;
-}
-
-static void __init print_memory_map(void)
-{
-       int i;
-       const int field = 2 * sizeof(unsigned long);
-
-       for (i = 0; i < boot_mem_map.nr_map; i++) {
-               printk(KERN_INFO " memory: %0*Lx @ %0*Lx ",
-                      field, (unsigned long long) boot_mem_map.map[i].size,
-                      field, (unsigned long long) boot_mem_map.map[i].addr);
-
-               switch (boot_mem_map.map[i].type) {
-               case BOOT_MEM_RAM:
-                       printk(KERN_CONT "(usable)\n");
-                       break;
-               case BOOT_MEM_INIT_RAM:
-                       printk(KERN_CONT "(usable after init)\n");
-                       break;
-               case BOOT_MEM_ROM_DATA:
-                       printk(KERN_CONT "(ROM data)\n");
-                       break;
-               case BOOT_MEM_RESERVED:
-                       printk(KERN_CONT "(reserved)\n");
-                       break;
-               case BOOT_MEM_NOMAP:
-                       printk(KERN_CONT "(nomap)\n");
-                       break;
-               default:
-                       printk(KERN_CONT "type %lu\n", boot_mem_map.map[i].type);
-                       break;
-               }
-       }
-}
-
 /*
  * Manage initrd
  */
@@ -376,8 +294,11 @@ static void __init bootmem_init(void)
 
 static void __init bootmem_init(void)
 {
-       phys_addr_t ramstart = PHYS_ADDR_MAX;
-       int i;
+       struct memblock_region *mem;
+       phys_addr_t ramstart, ramend;
+
+       ramstart = memblock_start_of_DRAM();
+       ramend = memblock_end_of_DRAM();
 
        /*
         * Sanity check any INITRD first. We don't take it into account
@@ -391,122 +312,66 @@ static void __init bootmem_init(void)
        memblock_reserve(__pa_symbol(&_text),
                        __pa_symbol(&_end) - __pa_symbol(&_text));
 
+       /* max_low_pfn is not a number of pages but the end pfn of low mem */
+
+#ifdef CONFIG_MIPS_AUTO_PFN_OFFSET
+       ARCH_PFN_OFFSET = PFN_UP(ramstart);
+#else
        /*
-        * max_low_pfn is not a number of pages. The number of pages
-        * of the system is given by 'max_low_pfn - min_low_pfn'.
+        * Reserve any memory between the start of RAM and PHYS_OFFSET
         */
-       min_low_pfn = ~0UL;
-       max_low_pfn = 0;
-
-       /* Find the highest and lowest page frame numbers we have available. */
-       for (i = 0; i < boot_mem_map.nr_map; i++) {
-               unsigned long start, end;
-
-               if (boot_mem_map.map[i].type != BOOT_MEM_RAM)
-                       continue;
+       if (ramstart > PHYS_OFFSET)
+               memblock_reserve(PHYS_OFFSET, PFN_UP(ramstart) - PHYS_OFFSET);
 
-               start = PFN_UP(boot_mem_map.map[i].addr);
-               end = PFN_DOWN(boot_mem_map.map[i].addr
-                               + boot_mem_map.map[i].size);
+       if (PFN_UP(ramstart) > ARCH_PFN_OFFSET) {
+               pr_info("Wasting %lu bytes for tracking %lu unused pages\n",
+                       (unsigned long)((PFN_UP(ramstart) - ARCH_PFN_OFFSET) * sizeof(struct page)),
+                       (unsigned long)(PFN_UP(ramstart) - ARCH_PFN_OFFSET));
+       }
+#endif
 
-               ramstart = min(ramstart, boot_mem_map.map[i].addr);
+       min_low_pfn = ARCH_PFN_OFFSET;
+       max_pfn = PFN_DOWN(ramend);
+       for_each_memblock(memory, mem) {
+               unsigned long start = memblock_region_memory_base_pfn(mem);
+               unsigned long end = memblock_region_memory_end_pfn(mem);
 
-#ifndef CONFIG_HIGHMEM
                /*
                 * Skip highmem here so we get an accurate max_low_pfn if low
                 * memory stops short of high memory.
                 * If the region overlaps HIGHMEM_START, end is clipped so
                 * max_pfn excludes the highmem portion.
                 */
+               if (memblock_is_nomap(mem))
+                       continue;
                if (start >= PFN_DOWN(HIGHMEM_START))
                        continue;
                if (end > PFN_DOWN(HIGHMEM_START))
                        end = PFN_DOWN(HIGHMEM_START);
-#endif
-
                if (end > max_low_pfn)
                        max_low_pfn = end;
-               if (start < min_low_pfn)
-                       min_low_pfn = start;
        }
 
        if (min_low_pfn >= max_low_pfn)
                panic("Incorrect memory mapping !!!");
 
-#ifdef CONFIG_MIPS_AUTO_PFN_OFFSET
-       ARCH_PFN_OFFSET = PFN_UP(ramstart);
-#else
-       /*
-        * Reserve any memory between the start of RAM and PHYS_OFFSET
-        */
-       if (ramstart > PHYS_OFFSET) {
-               add_memory_region(PHYS_OFFSET, ramstart - PHYS_OFFSET,
-                                 BOOT_MEM_RESERVED);
-               memblock_reserve(PHYS_OFFSET, ramstart - PHYS_OFFSET);
-       }
-
-       if (min_low_pfn > ARCH_PFN_OFFSET) {
-               pr_info("Wasting %lu bytes for tracking %lu unused pages\n",
-                       (min_low_pfn - ARCH_PFN_OFFSET) * sizeof(struct page),
-                       min_low_pfn - ARCH_PFN_OFFSET);
-       } else if (ARCH_PFN_OFFSET - min_low_pfn > 0UL) {
-               pr_info("%lu free pages won't be used\n",
-                       ARCH_PFN_OFFSET - min_low_pfn);
-       }
-       min_low_pfn = ARCH_PFN_OFFSET;
-#endif
-
-       /*
-        * Determine low and high memory ranges
-        */
-       max_pfn = max_low_pfn;
-       if (max_low_pfn > PFN_DOWN(HIGHMEM_START)) {
+       if (max_pfn > PFN_DOWN(HIGHMEM_START)) {
 #ifdef CONFIG_HIGHMEM
                highstart_pfn = PFN_DOWN(HIGHMEM_START);
-               highend_pfn = max_low_pfn;
-#endif
+               highend_pfn = max_pfn;
+#else
                max_low_pfn = PFN_DOWN(HIGHMEM_START);
-       }
-
-       /* Install all valid RAM ranges to the memblock memory region */
-       for (i = 0; i < boot_mem_map.nr_map; i++) {
-               unsigned long start, end;
-
-               start = PFN_UP(boot_mem_map.map[i].addr);
-               end = PFN_DOWN(boot_mem_map.map[i].addr
-                               + boot_mem_map.map[i].size);
-
-               if (start < min_low_pfn)
-                       start = min_low_pfn;
-#ifndef CONFIG_HIGHMEM
-               /* Ignore highmem regions if highmem is unsupported */
-               if (end > max_low_pfn)
-                       end = max_low_pfn;
+               max_pfn = max_low_pfn;
 #endif
-               if (end <= start)
-                       continue;
-
-               memblock_add_node(PFN_PHYS(start), PFN_PHYS(end - start), 0);
+       }
 
-               /* Reserve any memory except the ordinary RAM ranges. */
-               switch (boot_mem_map.map[i].type) {
-               case BOOT_MEM_RAM:
-                       break;
-               case BOOT_MEM_NOMAP: /* Discard the range from the system. */
-                       memblock_remove(PFN_PHYS(start), PFN_PHYS(end - start));
-                       continue;
-               default: /* Reserve the rest of the memory types at boot time */
-                       memblock_reserve(PFN_PHYS(start), PFN_PHYS(end - start));
-                       break;
-               }
 
-               /*
-                * In any case the added to the memblock memory regions
-                * (highmem/lowmem, available/reserved, etc) are considered
-                * as present, so inform sparsemem about them.
-                */
-               memory_present(0, start, end);
-       }
+       /*
+        * In any case the added to the memblock memory regions
+        * (highmem/lowmem, available/reserved, etc) are considered
+        * as present, so inform sparsemem about them.
+        */
+       memblocks_present();
 
        /*
         * Reserve initrd memory if needed.
@@ -528,8 +393,9 @@ static int __init early_parse_mem(char *p)
         * size.
         */
        if (usermem == 0) {
-               boot_mem_map.nr_map = 0;
                usermem = 1;
+               memblock_remove(memblock_start_of_DRAM(),
+                       memblock_end_of_DRAM() - memblock_start_of_DRAM());
        }
        start = 0;
        size = memparse(p, &p);
@@ -586,14 +452,13 @@ early_param("memmap", early_parse_memmap);
 unsigned long setup_elfcorehdr, setup_elfcorehdr_size;
 static int __init early_parse_elfcorehdr(char *p)
 {
-       int i;
+       struct memblock_region *mem;
 
        setup_elfcorehdr = memparse(p, &p);
 
-       for (i = 0; i < boot_mem_map.nr_map; i++) {
-               unsigned long start = boot_mem_map.map[i].addr;
-               unsigned long end = (boot_mem_map.map[i].addr +
-                                    boot_mem_map.map[i].size);
+        for_each_memblock(memory, mem) {
+               unsigned long start = mem->base;
+               unsigned long end = start + mem->size;
                if (setup_elfcorehdr >= start && setup_elfcorehdr < end) {
                        /*
                         * Reserve from the elf core header to the end of
@@ -613,47 +478,20 @@ static int __init early_parse_elfcorehdr(char *p)
 early_param("elfcorehdr", early_parse_elfcorehdr);
 #endif
 
-static void __init arch_mem_addpart(phys_addr_t mem, phys_addr_t end, int type)
-{
-       phys_addr_t size;
-       int i;
-
-       size = end - mem;
-       if (!size)
-               return;
-
-       /* Make sure it is in the boot_mem_map */
-       for (i = 0; i < boot_mem_map.nr_map; i++) {
-               if (mem >= boot_mem_map.map[i].addr &&
-                   mem < (boot_mem_map.map[i].addr +
-                          boot_mem_map.map[i].size))
-                       return;
-       }
-       add_memory_region(mem, size, type);
-}
-
 #ifdef CONFIG_KEXEC
-static inline unsigned long long get_total_mem(void)
-{
-       unsigned long long total;
-
-       total = max_pfn - min_low_pfn;
-       return total << PAGE_SHIFT;
-}
-
 static void __init mips_parse_crashkernel(void)
 {
        unsigned long long total_mem;
        unsigned long long crash_size, crash_base;
        int ret;
 
-       total_mem = get_total_mem();
+       total_mem = memblock_phys_mem_size();
        ret = parse_crashkernel(boot_command_line, total_mem,
                                &crash_size, &crash_base);
        if (ret != 0 || crash_size <= 0)
                return;
 
-       if (!memory_region_available(crash_base, crash_size)) {
+       if (!memblock_find_in_range(crash_base, crash_base + crash_size, crash_size, 0)) {
                pr_warn("Invalid memory region reserved for crash kernel\n");
                return;
        }
@@ -686,6 +524,17 @@ static void __init request_crashkernel(struct resource *res)
 }
 #endif /* !defined(CONFIG_KEXEC)  */
 
+static void __init check_kernel_sections_mem(void)
+{
+       phys_addr_t start = PFN_PHYS(PFN_DOWN(__pa_symbol(&_text)));
+       phys_addr_t size = PFN_PHYS(PFN_UP(__pa_symbol(&_end))) - start;
+
+       if (!memblock_is_region_memory(start, size)) {
+               pr_info("Kernel sections are not in the memory maps\n");
+               memblock_add(start, size);
+       }
+}
+
 #define USE_PROM_CMDLINE       IS_ENABLED(CONFIG_MIPS_CMDLINE_FROM_BOOTLOADER)
 #define USE_DTB_CMDLINE                IS_ENABLED(CONFIG_MIPS_CMDLINE_FROM_DTB)
 #define EXTEND_WITH_PROM       IS_ENABLED(CONFIG_MIPS_CMDLINE_DTB_EXTEND)
@@ -731,25 +580,6 @@ static void __init arch_mem_init(char **cmdline_p)
        plat_mem_setup();
        memblock_set_bottom_up(true);
 
-       /*
-        * Make sure all kernel memory is in the maps.  The "UP" and
-        * "DOWN" are opposite for initdata since if it crosses over
-        * into another memory section you don't want that to be
-        * freed when the initdata is freed.
-        */
-       arch_mem_addpart(PFN_DOWN(__pa_symbol(&_text)) << PAGE_SHIFT,
-                        PFN_UP(__pa_symbol(&_edata)) << PAGE_SHIFT,
-                        BOOT_MEM_RAM);
-       arch_mem_addpart(PFN_UP(__pa_symbol(&__init_begin)) << PAGE_SHIFT,
-                        PFN_DOWN(__pa_symbol(&__init_end)) << PAGE_SHIFT,
-                        BOOT_MEM_INIT_RAM);
-       arch_mem_addpart(PFN_DOWN(__pa_symbol(&__bss_start)) << PAGE_SHIFT,
-                        PFN_UP(__pa_symbol(&__bss_stop)) << PAGE_SHIFT,
-                        BOOT_MEM_RAM);
-
-       pr_info("Determined physical RAM map:\n");
-       print_memory_map();
-
 #if defined(CONFIG_CMDLINE_BOOL) && defined(CONFIG_CMDLINE_OVERRIDE)
        strlcpy(boot_command_line, builtin_cmdline, COMMAND_LINE_SIZE);
 #else
@@ -783,14 +613,17 @@ static void __init arch_mem_init(char **cmdline_p)
 
        parse_early_param();
 
-       if (usermem) {
-               pr_info("User-defined physical RAM map:\n");
-               print_memory_map();
-       }
+       if (usermem)
+               pr_info("User-defined physical RAM map overwrite\n");
+
+       check_kernel_sections_mem();
 
        early_init_fdt_reserve_self();
        early_init_fdt_scan_reserved_mem();
 
+#ifndef CONFIG_NUMA
+       memblock_set_node(0, PHYS_ADDR_MAX, &memblock.memory, 0);
+#endif
        bootmem_init();
 
        /*
@@ -830,12 +663,12 @@ static void __init arch_mem_init(char **cmdline_p)
 
        memblock_dump_all();
 
-       early_memtest(PFN_PHYS(min_low_pfn), PFN_PHYS(max_low_pfn));
+       early_memtest(PFN_PHYS(ARCH_PFN_OFFSET), PFN_PHYS(max_low_pfn));
 }
 
 static void __init resource_init(void)
 {
-       int i;
+       struct memblock_region *region;
 
        if (UNCAC_BASE != IO_BASE)
                return;
@@ -847,16 +680,10 @@ static void __init resource_init(void)
        bss_resource.start = __pa_symbol(&__bss_start);
        bss_resource.end = __pa_symbol(&__bss_stop) - 1;
 
-       for (i = 0; i < boot_mem_map.nr_map; i++) {
+       for_each_memblock(memory, region) {
+               phys_addr_t start = PFN_PHYS(memblock_region_memory_base_pfn(region));
+               phys_addr_t end = PFN_PHYS(memblock_region_memory_end_pfn(region)) - 1;
                struct resource *res;
-               unsigned long start, end;
-
-               start = boot_mem_map.map[i].addr;
-               end = boot_mem_map.map[i].addr + boot_mem_map.map[i].size - 1;
-               if (start >= HIGHMEM_START)
-                       continue;
-               if (end >= HIGHMEM_START)
-                       end = HIGHMEM_START - 1;
 
                res = memblock_alloc(sizeof(struct resource), SMP_CACHE_BYTES);
                if (!res)
@@ -865,20 +692,8 @@ static void __init resource_init(void)
 
                res->start = start;
                res->end = end;
-               res->flags = IORESOURCE_MEM | IORESOURCE_BUSY;
-
-               switch (boot_mem_map.map[i].type) {
-               case BOOT_MEM_RAM:
-               case BOOT_MEM_INIT_RAM:
-               case BOOT_MEM_ROM_DATA:
-                       res->name = "System RAM";
-                       res->flags |= IORESOURCE_SYSRAM;
-                       break;
-               case BOOT_MEM_RESERVED:
-               case BOOT_MEM_NOMAP:
-               default:
-                       res->name = "reserved";
-               }
+               res->flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY;
+               res->name = "System RAM";
 
                request_resource(&iomem_resource, res);
 
index b6dc78a..b0e25e9 100644 (file)
@@ -132,6 +132,7 @@ static inline int mips_atomic_set(unsigned long addr, unsigned long new)
                  [efault] "i" (-EFAULT)
                : "memory");
        } else if (cpu_has_llsc) {
+               loongson_llsc_mb();
                __asm__ __volatile__ (
                "       .set    push                                    \n"
                "       .set    "MIPS_ISA_ARCH_LEVEL"                   \n"
index acd338d..1e25707 100644 (file)
@@ -13,10 +13,10 @@ emit() {
        t_entry="$3"
 
        while [ $t_nxt -lt $t_nr ]; do
-               printf "__SYSCALL(%s, sys_ni_syscall, )\n" "${t_nxt}"
+               printf "__SYSCALL(%s,sys_ni_syscall)\n" "${t_nxt}"
                t_nxt=$((t_nxt+1))
        done
-       printf "__SYSCALL(%s, %s, )\n" "${t_nxt}" "${t_entry}"
+       printf "__SYSCALL(%s,%s)\n" "${t_nxt}" "${t_entry}"
 }
 
 grep -E "^[0-9A-Fa-fXx]+[[:space:]]+${my_abis}" "$in" | sort -n | (
index 3a37268..bc35f84 100644 (file)
 #include <asm/mips-cps.h>
 #include <asm/page.h>
 #include <asm/vdso.h>
+#include <vdso/helpers.h>
+#include <vdso/vsyscall.h>
 
 /* Kernel-provided data used by the VDSO. */
-static union mips_vdso_data vdso_data __page_aligned_data;
+static union mips_vdso_data mips_vdso_data __page_aligned_data;
+struct vdso_data *vdso_data = mips_vdso_data.data;
 
 /*
  * Mapping for the VDSO data/GIC pages. The real pages are mapped manually, as
@@ -66,34 +69,6 @@ static int __init init_vdso(void)
 }
 subsys_initcall(init_vdso);
 
-void update_vsyscall(struct timekeeper *tk)
-{
-       vdso_data_write_begin(&vdso_data);
-
-       vdso_data.xtime_sec = tk->xtime_sec;
-       vdso_data.xtime_nsec = tk->tkr_mono.xtime_nsec;
-       vdso_data.wall_to_mono_sec = tk->wall_to_monotonic.tv_sec;
-       vdso_data.wall_to_mono_nsec = tk->wall_to_monotonic.tv_nsec;
-       vdso_data.cs_shift = tk->tkr_mono.shift;
-
-       vdso_data.clock_mode = tk->tkr_mono.clock->archdata.vdso_clock_mode;
-       if (vdso_data.clock_mode != VDSO_CLOCK_NONE) {
-               vdso_data.cs_mult = tk->tkr_mono.mult;
-               vdso_data.cs_cycle_last = tk->tkr_mono.cycle_last;
-               vdso_data.cs_mask = tk->tkr_mono.mask;
-       }
-
-       vdso_data_write_end(&vdso_data);
-}
-
-void update_vsyscall_tz(void)
-{
-       if (vdso_data.clock_mode != VDSO_CLOCK_NONE) {
-               vdso_data.tz_minuteswest = sys_tz.tz_minuteswest;
-               vdso_data.tz_dsttime = sys_tz.tz_dsttime;
-       }
-}
-
 static unsigned long vdso_base(void)
 {
        unsigned long base;
@@ -163,7 +138,7 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
         */
        if (cpu_has_dc_aliases) {
                base = __ALIGN_MASK(base, shm_align_mask);
-               base += ((unsigned long)&vdso_data - gic_size) & shm_align_mask;
+               base += ((unsigned long)vdso_data - gic_size) & shm_align_mask;
        }
 
        data_addr = base + gic_size;
@@ -189,7 +164,7 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
 
        /* Map data page. */
        ret = remap_pfn_range(vma, data_addr,
-                             virt_to_phys(&vdso_data) >> PAGE_SHIFT,
+                             virt_to_phys(vdso_data) >> PAGE_SHIFT,
                              PAGE_SIZE, PAGE_READONLY);
        if (ret)
                goto out;
index b4323b2..156a95a 100644 (file)
@@ -468,14 +468,14 @@ void __init ltq_soc_init(void)
                clkdev_add_pmu("1f203018.usb2-phy", "phy", 1, 2, PMU_ANALOG_USB0_P);
                clkdev_add_pmu("1f203034.usb2-phy", "phy", 1, 2, PMU_ANALOG_USB1_P);
                /* rc 0 */
-               clkdev_add_pmu("1d900000.pcie", "phy", 1, 2, PMU_ANALOG_PCIE0_P);
+               clkdev_add_pmu("1f106800.phy", "phy", 1, 2, PMU_ANALOG_PCIE0_P);
                clkdev_add_pmu("1d900000.pcie", "msi", 1, 1, PMU1_PCIE_MSI);
-               clkdev_add_pmu("1d900000.pcie", "pdi", 1, 1, PMU1_PCIE_PDI);
+               clkdev_add_pmu("1f106800.phy", "pdi", 1, 1, PMU1_PCIE_PDI);
                clkdev_add_pmu("1d900000.pcie", "ctl", 1, 1, PMU1_PCIE_CTL);
                /* rc 1 */
-               clkdev_add_pmu("19000000.pcie", "phy", 1, 2, PMU_ANALOG_PCIE1_P);
+               clkdev_add_pmu("1f700400.phy", "phy", 1, 2, PMU_ANALOG_PCIE1_P);
                clkdev_add_pmu("19000000.pcie", "msi", 1, 1, PMU1_PCIE1_MSI);
-               clkdev_add_pmu("19000000.pcie", "pdi", 1, 1, PMU1_PCIE1_PDI);
+               clkdev_add_pmu("1f700400.phy", "pdi", 1, 1, PMU1_PCIE1_PDI);
                clkdev_add_pmu("19000000.pcie", "ctl", 1, 1, PMU1_PCIE1_CTL);
        }
 
@@ -499,9 +499,9 @@ void __init ltq_soc_init(void)
                clkdev_add_pmu("1e101000.usb", "otg", 1, 0, PMU_USB0);
                clkdev_add_pmu("1e106000.usb", "otg", 1, 0, PMU_USB1);
                /* rc 2 */
-               clkdev_add_pmu("1a800000.pcie", "phy", 1, 2, PMU_ANALOG_PCIE2_P);
+               clkdev_add_pmu("1f106a00.pcie", "phy", 1, 2, PMU_ANALOG_PCIE2_P);
                clkdev_add_pmu("1a800000.pcie", "msi", 1, 1, PMU1_PCIE2_MSI);
-               clkdev_add_pmu("1a800000.pcie", "pdi", 1, 1, PMU1_PCIE2_PDI);
+               clkdev_add_pmu("1f106a00.pcie", "pdi", 1, 1, PMU1_PCIE2_PDI);
                clkdev_add_pmu("1a800000.pcie", "ctl", 1, 1, PMU1_PCIE2_CTL);
                clkdev_add_pmu("1e10b308.eth", NULL, 0, 0, PMU_SWITCH | PMU_PPE_DP);
                clkdev_add_pmu("1da00000.usif", "NULL", 1, 0, PMU_USIF);
@@ -526,10 +526,10 @@ void __init ltq_soc_init(void)
                clkdev_add_pmu("1e101000.usb", "otg", 1, 0, PMU_USB0 | PMU_AHBM);
                clkdev_add_pmu("1f203034.usb2-phy", "phy", 1, 0, PMU_USB1_P);
                clkdev_add_pmu("1e106000.usb", "otg", 1, 0, PMU_USB1 | PMU_AHBM);
-               clkdev_add_pmu("1d900000.pcie", "phy", 1, 1, PMU1_PCIE_PHY);
+               clkdev_add_pmu("1f106800.phy", "phy", 1, 1, PMU1_PCIE_PHY);
                clkdev_add_pmu("1d900000.pcie", "bus", 1, 0, PMU_PCIE_CLK);
                clkdev_add_pmu("1d900000.pcie", "msi", 1, 1, PMU1_PCIE_MSI);
-               clkdev_add_pmu("1d900000.pcie", "pdi", 1, 1, PMU1_PCIE_PDI);
+               clkdev_add_pmu("1f106800.phy", "pdi", 1, 1, PMU1_PCIE_PDI);
                clkdev_add_pmu("1d900000.pcie", "ctl", 1, 1, PMU1_PCIE_CTL);
                clkdev_add_pmu(NULL, "ahb", 1, 0, PMU_AHBM | PMU_AHBS);
 
index 1e8d335..46f483e 100644 (file)
@@ -28,11 +28,11 @@ obj-$(CONFIG_HIGHMEM)               += highmem.o
 obj-$(CONFIG_HUGETLB_PAGE)     += hugetlbpage.o
 obj-$(CONFIG_DMA_NONCOHERENT)  += dma-noncoherent.o
 
+obj-$(CONFIG_CPU_R3K_TLB)      += tlb-r3k.o
 obj-$(CONFIG_CPU_R4K_CACHE_TLB) += c-r4k.o cex-gen.o tlb-r4k.o
-obj-$(CONFIG_CPU_R3000)                += c-r3k.o tlb-r3k.o
-obj-$(CONFIG_CPU_R8000)                += c-r4k.o cex-gen.o tlb-r8k.o
+obj-$(CONFIG_CPU_R3000)                += c-r3k.o
 obj-$(CONFIG_CPU_SB1)          += c-r4k.o cerr-sb1.o cex-sb1.o tlb-r4k.o
-obj-$(CONFIG_CPU_TX39XX)       += c-tx39.o tlb-r3k.o
+obj-$(CONFIG_CPU_TX39XX)       += c-tx39.o
 obj-$(CONFIG_CPU_CAVIUM_OCTEON) += c-octeon.o cex-oct.o tlb-r4k.o
 
 obj-$(CONFIG_IP22_CPU_SCACHE)  += sc-ip22.o
index 5166e38..89b9c85 100644 (file)
@@ -1098,7 +1098,6 @@ static void probe_pcache(void)
                c->options |= MIPS_CPU_CACHE_CDEX_P;
                break;
 
-       case CPU_R5432:
        case CPU_R5500:
                icache_size = 1 << (12 + ((config & CONF_IC) >> 9));
                c->icache.linesz = 16 << ((config & CONF_IB) >> 5);
@@ -1134,7 +1133,6 @@ static void probe_pcache(void)
        case CPU_R4400PC:
        case CPU_R4400SC:
        case CPU_R4400MC:
-       case CPU_R4300:
                icache_size = 1 << (12 + ((config & CONF_IC) >> 9));
                c->icache.linesz = 16 << ((config & CONF_IB) >> 5);
                c->icache.ways = 1;
index 8a038b3..090fa65 100644 (file)
@@ -269,37 +269,46 @@ void __init fixrange_init(unsigned long start, unsigned long end,
 #endif
 }
 
-unsigned __weak platform_maar_init(unsigned num_pairs)
+struct maar_walk_info {
+       struct maar_config cfg[16];
+       unsigned int num_cfg;
+};
+
+static int maar_res_walk(unsigned long start_pfn, unsigned long nr_pages,
+                        void *data)
 {
-       struct maar_config cfg[BOOT_MEM_MAP_MAX];
-       unsigned i, num_configured, num_cfg = 0;
-
-       for (i = 0; i < boot_mem_map.nr_map; i++) {
-               switch (boot_mem_map.map[i].type) {
-               case BOOT_MEM_RAM:
-               case BOOT_MEM_INIT_RAM:
-                       break;
-               default:
-                       continue;
-               }
+       struct maar_walk_info *wi = data;
+       struct maar_config *cfg = &wi->cfg[wi->num_cfg];
+       unsigned int maar_align;
 
-               /* Round lower up */
-               cfg[num_cfg].lower = boot_mem_map.map[i].addr;
-               cfg[num_cfg].lower = (cfg[num_cfg].lower + 0xffff) & ~0xffff;
+       /* MAAR registers hold physical addresses right shifted by 4 bits */
+       maar_align = BIT(MIPS_MAAR_ADDR_SHIFT + 4);
 
-               /* Round upper down */
-               cfg[num_cfg].upper = boot_mem_map.map[i].addr +
-                                       boot_mem_map.map[i].size;
-               cfg[num_cfg].upper = (cfg[num_cfg].upper & ~0xffff) - 1;
+       /* Fill in the MAAR config entry */
+       cfg->lower = ALIGN(PFN_PHYS(start_pfn), maar_align);
+       cfg->upper = ALIGN_DOWN(PFN_PHYS(start_pfn + nr_pages), maar_align) - 1;
+       cfg->attrs = MIPS_MAAR_S;
+
+       /* Ensure we don't overflow the cfg array */
+       if (!WARN_ON(wi->num_cfg >= ARRAY_SIZE(wi->cfg)))
+               wi->num_cfg++;
+
+       return 0;
+}
 
-               cfg[num_cfg].attrs = MIPS_MAAR_S;
-               num_cfg++;
-       }
 
-       num_configured = maar_config(cfg, num_cfg, num_pairs);
-       if (num_configured < num_cfg)
-               pr_warn("Not enough MAAR pairs (%u) for all bootmem regions (%u)\n",
-                       num_pairs, num_cfg);
+unsigned __weak platform_maar_init(unsigned num_pairs)
+{
+       unsigned int num_configured;
+       struct maar_walk_info wi;
+
+       wi.num_cfg = 0;
+       walk_system_ram_range(0, max_pfn, &wi, maar_res_walk);
+
+       num_configured = maar_config(wi.cfg, wi.num_cfg, num_pairs);
+       if (num_configured < wi.num_cfg)
+               pr_warn("Not enough MAAR pairs (%u) for all memory regions (%u)\n",
+                       num_pairs, wi.num_cfg);
 
        return num_configured;
 }
@@ -382,33 +391,6 @@ void maar_init(void)
 }
 
 #ifndef CONFIG_NEED_MULTIPLE_NODES
-int page_is_ram(unsigned long pagenr)
-{
-       int i;
-
-       for (i = 0; i < boot_mem_map.nr_map; i++) {
-               unsigned long addr, end;
-
-               switch (boot_mem_map.map[i].type) {
-               case BOOT_MEM_RAM:
-               case BOOT_MEM_INIT_RAM:
-                       break;
-               default:
-                       /* not usable memory */
-                       continue;
-               }
-
-               addr = PFN_UP(boot_mem_map.map[i].addr);
-               end = PFN_DOWN(boot_mem_map.map[i].addr +
-                              boot_mem_map.map[i].size);
-
-               if (pagenr >= addr && pagenr < end)
-                       return 1;
-       }
-
-       return 0;
-}
-
 void __init paging_init(void)
 {
        unsigned long max_zone_pfns[MAX_NR_ZONES];
@@ -443,7 +425,7 @@ void __init paging_init(void)
 static struct kcore_list kcore_kseg0;
 #endif
 
-static inline void mem_init_free_highmem(void)
+static inline void __init mem_init_free_highmem(void)
 {
 #ifdef CONFIG_HIGHMEM
        unsigned long tmp;
@@ -454,7 +436,7 @@ static inline void mem_init_free_highmem(void)
        for (tmp = highstart_pfn; tmp < highend_pfn; tmp++) {
                struct page *page = pfn_to_page(tmp);
 
-               if (!page_is_ram(tmp))
+               if (!memblock_is_memory(PFN_PHYS(tmp)))
                        SetPageReserved(page);
                else
                        free_highmem_page(page);
@@ -464,6 +446,12 @@ static inline void mem_init_free_highmem(void)
 
 void __init mem_init(void)
 {
+       /*
+        * When _PFN_SHIFT is greater than PAGE_SHIFT we won't have enough PTE
+        * bits to hold a full 32b physical address on MIPS32 systems.
+        */
+       BUILD_BUG_ON(IS_ENABLED(CONFIG_32BIT) && (_PFN_SHIFT > PAGE_SHIFT));
+
 #ifdef CONFIG_HIGHMEM
 #ifdef CONFIG_DISCONTIGMEM
 #error "CONFIG_HIGHMEM and CONFIG_DISCONTIGMEM dont work together yet"
index d79f2b4..00fe90c 100644 (file)
 unsigned long shm_align_mask = PAGE_SIZE - 1;  /* Sane caches */
 EXPORT_SYMBOL(shm_align_mask);
 
-/* gap between mmap and stack */
-#define MIN_GAP (128*1024*1024UL)
-#define MAX_GAP ((TASK_SIZE)/6*5)
-
-static int mmap_is_legacy(struct rlimit *rlim_stack)
-{
-       if (current->personality & ADDR_COMPAT_LAYOUT)
-               return 1;
-
-       if (rlim_stack->rlim_cur == RLIM_INFINITY)
-               return 1;
-
-       return sysctl_legacy_va_layout;
-}
-
-static unsigned long mmap_base(unsigned long rnd, struct rlimit *rlim_stack)
-{
-       unsigned long gap = rlim_stack->rlim_cur;
-
-       if (gap < MIN_GAP)
-               gap = MIN_GAP;
-       else if (gap > MAX_GAP)
-               gap = MAX_GAP;
-
-       return PAGE_ALIGN(TASK_SIZE - gap - rnd);
-}
-
 #define COLOUR_ALIGN(addr, pgoff)                              \
        ((((addr) + shm_align_mask) & ~shm_align_mask) +        \
         (((pgoff) << PAGE_SHIFT) & shm_align_mask))
@@ -144,63 +117,6 @@ unsigned long arch_get_unmapped_area_topdown(struct file *filp,
                        addr0, len, pgoff, flags, DOWN);
 }
 
-unsigned long arch_mmap_rnd(void)
-{
-       unsigned long rnd;
-
-#ifdef CONFIG_COMPAT
-       if (TASK_IS_32BIT_ADDR)
-               rnd = get_random_long() & ((1UL << mmap_rnd_compat_bits) - 1);
-       else
-#endif /* CONFIG_COMPAT */
-               rnd = get_random_long() & ((1UL << mmap_rnd_bits) - 1);
-
-       return rnd << PAGE_SHIFT;
-}
-
-void arch_pick_mmap_layout(struct mm_struct *mm, struct rlimit *rlim_stack)
-{
-       unsigned long random_factor = 0UL;
-
-       if (current->flags & PF_RANDOMIZE)
-               random_factor = arch_mmap_rnd();
-
-       if (mmap_is_legacy(rlim_stack)) {
-               mm->mmap_base = TASK_UNMAPPED_BASE + random_factor;
-               mm->get_unmapped_area = arch_get_unmapped_area;
-       } else {
-               mm->mmap_base = mmap_base(random_factor, rlim_stack);
-               mm->get_unmapped_area = arch_get_unmapped_area_topdown;
-       }
-}
-
-static inline unsigned long brk_rnd(void)
-{
-       unsigned long rnd = get_random_long();
-
-       rnd = rnd << PAGE_SHIFT;
-       /* 8MB for 32bit, 256MB for 64bit */
-       if (TASK_IS_32BIT_ADDR)
-               rnd = rnd & 0x7ffffful;
-       else
-               rnd = rnd & 0xffffffful;
-
-       return rnd;
-}
-
-unsigned long arch_randomize_brk(struct mm_struct *mm)
-{
-       unsigned long base = mm->brk;
-       unsigned long ret;
-
-       ret = PAGE_ALIGN(base + brk_rnd());
-
-       if (ret < mm->brk)
-               return mm->brk;
-
-       return ret;
-}
-
 bool __virt_addr_valid(const volatile void *kaddr)
 {
        unsigned long vaddr = (unsigned long)kaddr;
index e2a33ad..6416a53 100644 (file)
@@ -12,6 +12,7 @@
 #include <asm/fixmap.h>
 #include <asm/pgtable.h>
 #include <asm/pgalloc.h>
+#include <asm/tlbflush.h>
 
 void pgd_init(unsigned long page)
 {
@@ -30,6 +31,25 @@ void pgd_init(unsigned long page)
        }
 }
 
+#if defined(CONFIG_TRANSPARENT_HUGEPAGE)
+pmd_t mk_pmd(struct page *page, pgprot_t prot)
+{
+       pmd_t pmd;
+
+       pmd_val(pmd) = (page_to_pfn(page) << _PFN_SHIFT) | pgprot_val(prot);
+
+       return pmd;
+}
+
+
+void set_pmd_at(struct mm_struct *mm, unsigned long addr,
+               pmd_t *pmdp, pmd_t pmd)
+{
+       *pmdp = pmd;
+       flush_tlb_all();
+}
+#endif /* defined(CONFIG_TRANSPARENT_HUGEPAGE) */
+
 void __init pagetable_init(void)
 {
        unsigned long vaddr;
index 3946739..dbdbfe5 100644 (file)
@@ -221,13 +221,26 @@ static inline int __init mips_sc_probe(void)
        else
                return 0;
 
-       /*
-        * According to config2 it would be 5-ways, but that is contradicted
-        * by all documentation.
-        */
-       if (current_cpu_type() == CPU_JZRISC &&
-                               mips_machtype == MACH_INGENIC_JZ4770)
-               c->scache.ways = 4;
+       if (current_cpu_type() == CPU_XBURST) {
+               switch (mips_machtype) {
+               /*
+                * According to config2 it would be 5-ways, but that is
+                * contradicted by all documentation.
+                */
+               case MACH_INGENIC_JZ4770:
+                       c->scache.ways = 4;
+                       break;
+
+               /*
+                * According to config2 it would be 5-ways and 512-sets,
+                * but that is contradicted by all documentation.
+                */
+               case MACH_INGENIC_X1000:
+                       c->scache.sets = 256;
+                       c->scache.ways = 4;
+                       break;
+               }
+       }
 
        c->scache.waysize = c->scache.sets * c->scache.linesz;
        c->scache.waybit = __ffs(c->scache.waysize);
diff --git a/arch/mips/mm/tlb-r8k.c b/arch/mips/mm/tlb-r8k.c
deleted file mode 100644 (file)
index c1e9e14..0000000
+++ /dev/null
@@ -1,239 +0,0 @@
-/*
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file "COPYING" in the main directory of this archive
- * for more details.
- *
- * Copyright (C) 1996 David S. Miller (davem@davemloft.net)
- * Copyright (C) 1997, 1998, 1999, 2000 Ralf Baechle ralf@gnu.org
- * Carsten Langgaard, carstenl@mips.com
- * Copyright (C) 2002 MIPS Technologies, Inc.  All rights reserved.
- */
-#include <linux/sched.h>
-#include <linux/smp.h>
-#include <linux/mm.h>
-
-#include <asm/cpu.h>
-#include <asm/bootinfo.h>
-#include <asm/mmu_context.h>
-#include <asm/pgtable.h>
-
-extern void build_tlb_refill_handler(void);
-
-#define TFP_TLB_SIZE           384
-#define TFP_TLB_SET_SHIFT      7
-
-/* CP0 hazard avoidance. */
-#define BARRIER __asm__ __volatile__(".set noreorder\n\t" \
-                                    "nop; nop; nop; nop; nop; nop;\n\t" \
-                                    ".set reorder\n\t")
-
-void local_flush_tlb_all(void)
-{
-       unsigned long flags;
-       unsigned long old_ctx;
-       int entry;
-
-       local_irq_save(flags);
-       /* Save old context and create impossible VPN2 value */
-       old_ctx = read_c0_entryhi();
-       write_c0_entrylo(0);
-
-       for (entry = 0; entry < TFP_TLB_SIZE; entry++) {
-               write_c0_tlbset(entry >> TFP_TLB_SET_SHIFT);
-               write_c0_vaddr(entry << PAGE_SHIFT);
-               write_c0_entryhi(CKSEG0 + (entry << (PAGE_SHIFT + 1)));
-               mtc0_tlbw_hazard();
-               tlb_write();
-       }
-       tlbw_use_hazard();
-       write_c0_entryhi(old_ctx);
-       local_irq_restore(flags);
-}
-
-void local_flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
-       unsigned long end)
-{
-       struct mm_struct *mm = vma->vm_mm;
-       int cpu = smp_processor_id();
-       unsigned long flags;
-       int oldpid, newpid, size;
-
-       if (!cpu_context(cpu, mm))
-               return;
-
-       size = (end - start + (PAGE_SIZE - 1)) >> PAGE_SHIFT;
-       size = (size + 1) >> 1;
-
-       local_irq_save(flags);
-
-       if (size > TFP_TLB_SIZE / 2) {
-               drop_mmu_context(mm);
-               goto out_restore;
-       }
-
-       oldpid = read_c0_entryhi();
-       newpid = cpu_asid(cpu, mm);
-
-       write_c0_entrylo(0);
-
-       start &= PAGE_MASK;
-       end += (PAGE_SIZE - 1);
-       end &= PAGE_MASK;
-       while (start < end) {
-               signed long idx;
-
-               write_c0_vaddr(start);
-               write_c0_entryhi(start);
-               start += PAGE_SIZE;
-               tlb_probe();
-               idx = read_c0_tlbset();
-               if (idx < 0)
-                       continue;
-
-               write_c0_entryhi(CKSEG0 + (idx << (PAGE_SHIFT + 1)));
-               tlb_write();
-       }
-       write_c0_entryhi(oldpid);
-
-out_restore:
-       local_irq_restore(flags);
-}
-
-/* Usable for KV1 addresses only! */
-void local_flush_tlb_kernel_range(unsigned long start, unsigned long end)
-{
-       unsigned long size, flags;
-
-       size = (end - start + (PAGE_SIZE - 1)) >> PAGE_SHIFT;
-       size = (size + 1) >> 1;
-
-       if (size > TFP_TLB_SIZE / 2) {
-               local_flush_tlb_all();
-               return;
-       }
-
-       local_irq_save(flags);
-
-       write_c0_entrylo(0);
-
-       start &= PAGE_MASK;
-       end += (PAGE_SIZE - 1);
-       end &= PAGE_MASK;
-       while (start < end) {
-               signed long idx;
-
-               write_c0_vaddr(start);
-               write_c0_entryhi(start);
-               start += PAGE_SIZE;
-               tlb_probe();
-               idx = read_c0_tlbset();
-               if (idx < 0)
-                       continue;
-
-               write_c0_entryhi(CKSEG0 + (idx << (PAGE_SHIFT + 1)));
-               tlb_write();
-       }
-
-       local_irq_restore(flags);
-}
-
-void local_flush_tlb_page(struct vm_area_struct *vma, unsigned long page)
-{
-       int cpu = smp_processor_id();
-       unsigned long flags;
-       int oldpid, newpid;
-       signed long idx;
-
-       if (!cpu_context(cpu, vma->vm_mm))
-               return;
-
-       newpid = cpu_asid(cpu, vma->vm_mm);
-       page &= PAGE_MASK;
-       local_irq_save(flags);
-       oldpid = read_c0_entryhi();
-       write_c0_vaddr(page);
-       write_c0_entryhi(newpid);
-       tlb_probe();
-       idx = read_c0_tlbset();
-       if (idx < 0)
-               goto finish;
-
-       write_c0_entrylo(0);
-       write_c0_entryhi(CKSEG0 + (idx << (PAGE_SHIFT + 1)));
-       tlb_write();
-
-finish:
-       write_c0_entryhi(oldpid);
-       local_irq_restore(flags);
-}
-
-/*
- * We will need multiple versions of update_mmu_cache(), one that just
- * updates the TLB with the new pte(s), and another which also checks
- * for the R4k "end of page" hardware bug and does the needy.
- */
-void __update_tlb(struct vm_area_struct * vma, unsigned long address, pte_t pte)
-{
-       unsigned long flags;
-       pgd_t *pgdp;
-       pmd_t *pmdp;
-       pte_t *ptep;
-       int pid;
-
-       /*
-        * Handle debugger faulting in for debugee.
-        */
-       if (current->active_mm != vma->vm_mm)
-               return;
-
-       pid = read_c0_entryhi() & cpu_asid_mask(&current_cpu_data);
-
-       local_irq_save(flags);
-       address &= PAGE_MASK;
-       write_c0_vaddr(address);
-       write_c0_entryhi(pid);
-       pgdp = pgd_offset(vma->vm_mm, address);
-       pmdp = pmd_offset(pgdp, address);
-       ptep = pte_offset_map(pmdp, address);
-       tlb_probe();
-
-       write_c0_entrylo(pte_val(*ptep++) >> 6);
-       tlb_write();
-
-       write_c0_entryhi(pid);
-       local_irq_restore(flags);
-}
-
-static void probe_tlb(unsigned long config)
-{
-       struct cpuinfo_mips *c = &current_cpu_data;
-
-       c->tlbsize = 3 * 128;           /* 3 sets each 128 entries */
-}
-
-void tlb_init(void)
-{
-       unsigned int config = read_c0_config();
-       unsigned long status;
-
-       probe_tlb(config);
-
-       status = read_c0_status();
-       status &= ~(ST0_UPS | ST0_KPS);
-#ifdef CONFIG_PAGE_SIZE_4KB
-       status |= (TFP_PAGESIZE_4K << 32) | (TFP_PAGESIZE_4K << 36);
-#elif defined(CONFIG_PAGE_SIZE_8KB)
-       status |= (TFP_PAGESIZE_8K << 32) | (TFP_PAGESIZE_8K << 36);
-#elif defined(CONFIG_PAGE_SIZE_16KB)
-       status |= (TFP_PAGESIZE_16K << 32) | (TFP_PAGESIZE_16K << 36);
-#elif defined(CONFIG_PAGE_SIZE_64KB)
-       status |= (TFP_PAGESIZE_64K << 32) | (TFP_PAGESIZE_64K << 36);
-#endif
-       write_c0_status(status);
-
-       write_c0_wired(0);
-
-       local_flush_tlb_all();
-
-       build_tlb_refill_handler();
-}
index 144ceb0..e01cb33 100644 (file)
@@ -545,7 +545,6 @@ void build_tlb_write_entry(u32 **p, struct uasm_label **l,
                tlbw(p);
                break;
 
-       case CPU_R4300:
        case CPU_5KC:
        case CPU_TX49XX:
        case CPU_PR4450:
@@ -604,13 +603,12 @@ void build_tlb_write_entry(u32 **p, struct uasm_label **l,
 
        case CPU_VR4131:
        case CPU_VR4133:
-       case CPU_R5432:
                uasm_i_nop(p);
                uasm_i_nop(p);
                tlbw(p);
                break;
 
-       case CPU_JZRISC:
+       case CPU_XBURST:
                tlbw(p);
                uasm_i_nop(p);
                break;
@@ -631,7 +629,7 @@ static __maybe_unused void build_convert_pte_to_entrylo(u32 **p,
                return;
        }
 
-       if (cpu_has_rixi && _PAGE_NO_EXEC) {
+       if (cpu_has_rixi && !!_PAGE_NO_EXEC) {
                if (fill_includes_sw_bits) {
                        UASM_i_ROTR(p, reg, reg, ilog2(_PAGE_GLOBAL));
                } else {
@@ -2609,21 +2607,11 @@ void build_tlb_refill_handler(void)
        check_for_high_segbits = current_cpu_data.vmbits > (PGDIR_SHIFT + PGD_ORDER + PAGE_SHIFT - 3);
 #endif
 
-       switch (current_cpu_type()) {
-       case CPU_R2000:
-       case CPU_R3000:
-       case CPU_R3000A:
-       case CPU_R3081E:
-       case CPU_TX3912:
-       case CPU_TX3922:
-       case CPU_TX3927:
+       if (cpu_has_3kex) {
 #ifndef CONFIG_MIPS_PGD_C0_CONTEXT
-               if (cpu_has_local_ebase)
-                       build_r3000_tlb_refill_handler();
                if (!run_once) {
-                       if (!cpu_has_local_ebase)
-                               build_r3000_tlb_refill_handler();
                        build_setup_pgd();
+                       build_r3000_tlb_refill_handler();
                        build_r3000_tlb_load_handler();
                        build_r3000_tlb_store_handler();
                        build_r3000_tlb_modify_handler();
@@ -2633,34 +2621,27 @@ void build_tlb_refill_handler(void)
 #else
                panic("No R3000 TLB refill handler");
 #endif
-               break;
+               return;
+       }
 
-       case CPU_R8000:
-               panic("No R8000 TLB refill handler yet");
-               break;
+       if (cpu_has_ldpte)
+               setup_pw();
 
-       default:
+       if (!run_once) {
+               scratch_reg = allocate_kscratch();
+               build_setup_pgd();
+               build_r4000_tlb_load_handler();
+               build_r4000_tlb_store_handler();
+               build_r4000_tlb_modify_handler();
                if (cpu_has_ldpte)
-                       setup_pw();
-
-               if (!run_once) {
-                       scratch_reg = allocate_kscratch();
-                       build_setup_pgd();
-                       build_r4000_tlb_load_handler();
-                       build_r4000_tlb_store_handler();
-                       build_r4000_tlb_modify_handler();
-                       if (cpu_has_ldpte)
-                               build_loongson3_tlb_refill_handler();
-                       else if (!cpu_has_local_ebase)
-                               build_r4000_tlb_refill_handler();
-                       flush_tlb_handlers();
-                       run_once++;
-               }
-               if (cpu_has_local_ebase)
+                       build_loongson3_tlb_refill_handler();
+               else
                        build_r4000_tlb_refill_handler();
-               if (cpu_has_xpa)
-                       config_xpa_params();
-               if (cpu_has_htw)
-                       config_htw_params();
+               flush_tlb_handlers();
+               run_once++;
        }
+       if (cpu_has_xpa)
+               config_xpa_params();
+       if (cpu_has_htw)
+               config_htw_params();
 }
index 868921a..7c25a0a 100644 (file)
@@ -39,17 +39,6 @@ void __init fw_meminit(void)
 
 void __init prom_free_prom_memory(void)
 {
-       unsigned long addr;
-       int i;
-
-       for (i = 0; i < boot_mem_map.nr_map; i++) {
-               if (boot_mem_map.map[i].type != BOOT_MEM_ROM_DATA)
-                       continue;
-
-               addr = boot_mem_map.map[i].addr;
-               free_init_pages("YAMON memory",
-                               addr, addr + boot_mem_map.map[i].size);
-       }
 }
 
 phys_addr_t mips_cdmm_phys_base(void)
index f743fd9..1a0fc5b 100644 (file)
@@ -34,6 +34,7 @@
 
 #include <linux/kernel.h>
 #include <linux/of_fdt.h>
+#include <linux/memblock.h>
 
 #include <asm/idle.h>
 #include <asm/reboot.h>
@@ -67,12 +68,11 @@ static void nlm_linux_exit(void)
 static void nlm_fixup_mem(void)
 {
        const int pref_backup = 512;
-       int i;
+       struct memblock_region *mem;
 
-       for (i = 0; i < boot_mem_map.nr_map; i++) {
-               if (boot_mem_map.map[i].type != BOOT_MEM_RAM)
-                       continue;
-               boot_mem_map.map[i].size -= pref_backup;
+       for_each_memblock(memory, mem) {
+               memblock_remove(mem->base + mem->size - pref_backup,
+                       pref_backup);
        }
 }
 
@@ -110,7 +110,7 @@ void __init plat_mem_setup(void)
        /* memory and bootargs from DT */
        xlp_early_init_devtree();
 
-       if (boot_mem_map.nr_map == 0) {
+       if (memblock_end_of_DRAM() == 0) {
                pr_info("Using DRAM BARs for memory map.\n");
                xlp_init_mem_from_bars();
        }
index bcf7f55..7b4d403 100644 (file)
  * Most of the IOC3 PCI config register aren't present
  * we emulate what is needed for a normal PCI enumeration
  */
-static u32 emulate_ioc3_cfg(int where, int size)
+static int ioc3_cfg_rd(void *addr, int where, int size, u32 *value)
 {
-       if (size == 1 && where == 0x3d)
-               return 0x01;
-       else if (size == 2 && where == 0x3c)
-               return 0x0100;
-       else if (size == 4 && where == 0x3c)
-               return 0x00000100;
+       u32 cf, shift, mask;
 
-       return 0;
+       switch (where & ~3) {
+       case 0x00 ... 0x10:
+       case 0x40 ... 0x44:
+               if (get_dbe(cf, (u32 *)addr))
+                       return PCIBIOS_DEVICE_NOT_FOUND;
+               break;
+       case 0x3c:
+               /* emulate sane interrupt pin value */
+               cf = 0x00000100;
+               break;
+       default:
+               cf = 0;
+               break;
+       }
+       shift = (where & 3) << 3;
+       mask = 0xffffffffU >> ((4 - size) << 3);
+       *value = (cf >> shift) & mask;
+
+       return PCIBIOS_SUCCESSFUL;
+}
+
+static int ioc3_cfg_wr(void *addr, int where, int size, u32 value)
+{
+       u32 cf, shift, mask, smask;
+
+       if ((where >= 0x14 && where < 0x40) || (where >= 0x48))
+               return PCIBIOS_SUCCESSFUL;
+
+       if (get_dbe(cf, (u32 *)addr))
+               return PCIBIOS_DEVICE_NOT_FOUND;
+
+       shift = ((where & 3) << 3);
+       mask = (0xffffffffU >> ((4 - size) << 3));
+       smask = mask << shift;
+
+       cf = (cf & ~smask) | ((value & mask) << shift);
+       if (put_dbe(cf, (u32 *)addr))
+               return PCIBIOS_DEVICE_NOT_FOUND;
+
+       return PCIBIOS_SUCCESSFUL;
 }
 
 static void bridge_disable_swapping(struct pci_dev *dev)
@@ -64,7 +98,7 @@ static int pci_conf0_read_config(struct pci_bus *bus, unsigned int devfn,
        int slot = PCI_SLOT(devfn);
        int fn = PCI_FUNC(devfn);
        void *addr;
-       u32 cf, shift, mask;
+       u32 cf;
        int res;
 
        addr = &bridge->b_type0_cfg_dev[slot].f[fn].c[PCI_VENDOR_ID];
@@ -75,8 +109,10 @@ static int pci_conf0_read_config(struct pci_bus *bus, unsigned int devfn,
         * IOC3 is broken beyond belief ...  Don't even give the
         * generic PCI code a chance to look at it for real ...
         */
-       if (cf == (PCI_VENDOR_ID_SGI | (PCI_DEVICE_ID_SGI_IOC3 << 16)))
-               goto is_ioc3;
+       if (cf == (PCI_VENDOR_ID_SGI | (PCI_DEVICE_ID_SGI_IOC3 << 16))) {
+               addr = &bridge->b_type0_cfg_dev[slot].f[fn].l[where >> 2];
+               return ioc3_cfg_rd(addr, where, size, value);
+       }
 
        addr = &bridge->b_type0_cfg_dev[slot].f[fn].c[where ^ (4 - size)];
 
@@ -88,26 +124,6 @@ static int pci_conf0_read_config(struct pci_bus *bus, unsigned int devfn,
                res = get_dbe(*value, (u32 *)addr);
 
        return res ? PCIBIOS_DEVICE_NOT_FOUND : PCIBIOS_SUCCESSFUL;
-
-is_ioc3:
-
-       /*
-        * IOC3 special handling
-        */
-       if ((where >= 0x14 && where < 0x40) || (where >= 0x48)) {
-               *value = emulate_ioc3_cfg(where, size);
-               return PCIBIOS_SUCCESSFUL;
-       }
-
-       addr = &bridge->b_type0_cfg_dev[slot].f[fn].l[where >> 2];
-       if (get_dbe(cf, (u32 *)addr))
-               return PCIBIOS_DEVICE_NOT_FOUND;
-
-       shift = ((where & 3) << 3);
-       mask = (0xffffffffU >> ((4 - size) << 3));
-       *value = (cf >> shift) & mask;
-
-       return PCIBIOS_SUCCESSFUL;
 }
 
 static int pci_conf1_read_config(struct pci_bus *bus, unsigned int devfn,
@@ -119,7 +135,7 @@ static int pci_conf1_read_config(struct pci_bus *bus, unsigned int devfn,
        int slot = PCI_SLOT(devfn);
        int fn = PCI_FUNC(devfn);
        void *addr;
-       u32 cf, shift, mask;
+       u32 cf;
        int res;
 
        bridge_write(bc, b_pci_cfg, (busno << 16) | (slot << 11));
@@ -131,8 +147,10 @@ static int pci_conf1_read_config(struct pci_bus *bus, unsigned int devfn,
         * IOC3 is broken beyond belief ...  Don't even give the
         * generic PCI code a chance to look at it for real ...
         */
-       if (cf == (PCI_VENDOR_ID_SGI | (PCI_DEVICE_ID_SGI_IOC3 << 16)))
-               goto is_ioc3;
+       if (cf == (PCI_VENDOR_ID_SGI | (PCI_DEVICE_ID_SGI_IOC3 << 16))) {
+               addr = &bridge->b_type1_cfg.c[(fn << 8) | (where & ~3)];
+               return ioc3_cfg_rd(addr, where, size, value);
+       }
 
        addr = &bridge->b_type1_cfg.c[(fn << 8) | (where ^ (4 - size))];
 
@@ -144,26 +162,6 @@ static int pci_conf1_read_config(struct pci_bus *bus, unsigned int devfn,
                res = get_dbe(*value, (u32 *)addr);
 
        return res ? PCIBIOS_DEVICE_NOT_FOUND : PCIBIOS_SUCCESSFUL;
-
-is_ioc3:
-
-       /*
-        * IOC3 special handling
-        */
-       if ((where >= 0x14 && where < 0x40) || (where >= 0x48)) {
-               *value = emulate_ioc3_cfg(where, size);
-               return PCIBIOS_SUCCESSFUL;
-       }
-
-       addr = &bridge->b_type1_cfg.c[(fn << 8) | where];
-       if (get_dbe(cf, (u32 *)addr))
-               return PCIBIOS_DEVICE_NOT_FOUND;
-
-       shift = ((where & 3) << 3);
-       mask = (0xffffffffU >> ((4 - size) << 3));
-       *value = (cf >> shift) & mask;
-
-       return PCIBIOS_SUCCESSFUL;
 }
 
 static int pci_read_config(struct pci_bus *bus, unsigned int devfn,
@@ -183,7 +181,7 @@ static int pci_conf0_write_config(struct pci_bus *bus, unsigned int devfn,
        int slot = PCI_SLOT(devfn);
        int fn = PCI_FUNC(devfn);
        void *addr;
-       u32 cf, shift, mask, smask;
+       u32 cf;
        int res;
 
        addr = &bridge->b_type0_cfg_dev[slot].f[fn].c[PCI_VENDOR_ID];
@@ -194,8 +192,10 @@ static int pci_conf0_write_config(struct pci_bus *bus, unsigned int devfn,
         * IOC3 is broken beyond belief ...  Don't even give the
         * generic PCI code a chance to look at it for real ...
         */
-       if (cf == (PCI_VENDOR_ID_SGI | (PCI_DEVICE_ID_SGI_IOC3 << 16)))
-               goto is_ioc3;
+       if (cf == (PCI_VENDOR_ID_SGI | (PCI_DEVICE_ID_SGI_IOC3 << 16))) {
+               addr = &bridge->b_type0_cfg_dev[slot].f[fn].l[where >> 2];
+               return ioc3_cfg_wr(addr, where, size, value);
+       }
 
        addr = &bridge->b_type0_cfg_dev[slot].f[fn].c[where ^ (4 - size)];
 
@@ -210,29 +210,6 @@ static int pci_conf0_write_config(struct pci_bus *bus, unsigned int devfn,
                return PCIBIOS_DEVICE_NOT_FOUND;
 
        return PCIBIOS_SUCCESSFUL;
-
-is_ioc3:
-
-       /*
-        * IOC3 special handling
-        */
-       if ((where >= 0x14 && where < 0x40) || (where >= 0x48))
-               return PCIBIOS_SUCCESSFUL;
-
-       addr = &bridge->b_type0_cfg_dev[slot].f[fn].l[where >> 2];
-
-       if (get_dbe(cf, (u32 *)addr))
-               return PCIBIOS_DEVICE_NOT_FOUND;
-
-       shift = ((where & 3) << 3);
-       mask = (0xffffffffU >> ((4 - size) << 3));
-       smask = mask << shift;
-
-       cf = (cf & ~smask) | ((value & mask) << shift);
-       if (put_dbe(cf, (u32 *)addr))
-               return PCIBIOS_DEVICE_NOT_FOUND;
-
-       return PCIBIOS_SUCCESSFUL;
 }
 
 static int pci_conf1_write_config(struct pci_bus *bus, unsigned int devfn,
@@ -244,7 +221,7 @@ static int pci_conf1_write_config(struct pci_bus *bus, unsigned int devfn,
        int fn = PCI_FUNC(devfn);
        int busno = bus->number;
        void *addr;
-       u32 cf, shift, mask, smask;
+       u32 cf;
        int res;
 
        bridge_write(bc, b_pci_cfg, (busno << 16) | (slot << 11));
@@ -256,8 +233,10 @@ static int pci_conf1_write_config(struct pci_bus *bus, unsigned int devfn,
         * IOC3 is broken beyond belief ...  Don't even give the
         * generic PCI code a chance to look at it for real ...
         */
-       if (cf == (PCI_VENDOR_ID_SGI | (PCI_DEVICE_ID_SGI_IOC3 << 16)))
-               goto is_ioc3;
+       if (cf == (PCI_VENDOR_ID_SGI | (PCI_DEVICE_ID_SGI_IOC3 << 16))) {
+               addr = &bridge->b_type0_cfg_dev[slot].f[fn].l[where >> 2];
+               return ioc3_cfg_wr(addr, where, size, value);
+       }
 
        addr = &bridge->b_type1_cfg.c[(fn << 8) | (where ^ (4 - size))];
 
@@ -272,28 +251,6 @@ static int pci_conf1_write_config(struct pci_bus *bus, unsigned int devfn,
                return PCIBIOS_DEVICE_NOT_FOUND;
 
        return PCIBIOS_SUCCESSFUL;
-
-is_ioc3:
-
-       /*
-        * IOC3 special handling
-        */
-       if ((where >= 0x14 && where < 0x40) || (where >= 0x48))
-               return PCIBIOS_SUCCESSFUL;
-
-       addr = &bridge->b_type0_cfg_dev[slot].f[fn].l[where >> 2];
-       if (get_dbe(cf, (u32 *)addr))
-               return PCIBIOS_DEVICE_NOT_FOUND;
-
-       shift = ((where & 3) << 3);
-       mask = (0xffffffffU >> ((4 - size) << 3));
-       smask = mask << shift;
-
-       cf = (cf & ~smask) | ((value & mask) << shift);
-       if (put_dbe(cf, (u32 *)addr))
-               return PCIBIOS_DEVICE_NOT_FOUND;
-
-       return PCIBIOS_SUCCESSFUL;
 }
 
 static int pci_write_config(struct pci_bus *bus, unsigned int devfn,
index 6fdcb3d..dfb5279 100644 (file)
@@ -61,6 +61,10 @@ int init_debug = 1;
 /* memory blocks */
 struct prom_pmemblock mdesc[PROM_MAX_PMEMBLOCKS];
 
+static phys_addr_t prom_mem_base[MAX_PROM_MEM] __initdata;
+static phys_addr_t prom_mem_size[MAX_PROM_MEM] __initdata;
+static unsigned int nr_prom_mem __initdata;
+
 /* default feature sets */
 static char msp_default_features[] =
 #if defined(CONFIG_PMC_MSP4200_EVAL) \
@@ -352,6 +356,16 @@ void __init prom_meminit(void)
 
                add_memory_region(base, size, type);
                p++;
+
+               if (type == BOOT_MEM_ROM_DATA) {
+                       if (nr_prom_mem >= 5) {
+                               pr_err("Too many ROM DATA regions");
+                               continue;
+                       }
+                       prom_mem_base[nr_prom_mem] = base;
+                       prom_mem_size[nr_prom_mem] = size;
+                       nr_prom_mem++;
+               }
        }
 }
 
@@ -407,13 +421,9 @@ void __init prom_free_prom_memory(void)
        envp[i] = NULL;                 /* end array with null pointer */
        prom_envp = envp;
 
-       for (i = 0; i < boot_mem_map.nr_map; i++) {
-               if (boot_mem_map.map[i].type != BOOT_MEM_ROM_DATA)
-                       continue;
-
-               addr = boot_mem_map.map[i].addr;
+       for (i = 0; i < nr_prom_mem; i++) {
                free_init_pages("prom memory",
-                               addr, addr + boot_mem_map.map[i].size);
+                       prom_mem_base[i], prom_mem_base[i] + prom_mem_size[i]);
        }
 }
 
index 49c22dd..1434fa6 100644 (file)
@@ -51,7 +51,6 @@ choice
                select MIPS_GIC
                select COMMON_CLK
                select CLKSRC_MIPS_GIC
-               select HAVE_PCI
 endchoice
 
 choice
index 0ad8ff2..652424d 100644 (file)
@@ -106,10 +106,8 @@ static int rt_timer_probe(struct platform_device *pdev)
        }
 
        rt->irq = platform_get_irq(pdev, 0);
-       if (rt->irq < 0) {
-               dev_err(&pdev->dev, "failed to load irq\n");
+       if (rt->irq < 0)
                return rt->irq;
-       }
 
        rt->membase = devm_ioremap_resource(&pdev->dev, res);
        if (IS_ERR(rt->membase))
index c0cf7ba..c61362d 100644 (file)
@@ -8,6 +8,7 @@
 
 #include <linux/init.h>
 #include <linux/kernel.h>
+#include <linux/mm.h>
 #include <linux/sched.h>
 #include <linux/sched/debug.h>
 #include <linux/sched/signal.h>
@@ -300,23 +301,6 @@ static void print_buserr(const struct pt_regs *regs)
               field, regs->cp0_epc, field, regs->regs[31]);
 }
 
-/*
- * Check, whether MC's (virtual) DMA address caused the bus error.
- * See "Virtual DMA Specification", Draft 1.5, Feb 13 1992, SGI
- */
-
-static int addr_is_ram(unsigned long addr, unsigned sz)
-{
-       int i;
-
-       for (i = 0; i < boot_mem_map.nr_map; i++) {
-               unsigned long a = boot_mem_map.map[i].addr;
-               if (a <= addr && addr+sz <= a+boot_mem_map.map[i].size)
-                       return 1;
-       }
-       return 0;
-}
-
 static int check_microtlb(u32 hi, u32 lo, unsigned long vaddr)
 {
        /* This is likely rather similar to correct code ;-) */
@@ -331,7 +315,7 @@ static int check_microtlb(u32 hi, u32 lo, unsigned long vaddr)
                        /* PTEIndex is VPN-low (bits [22:14]/[20:12] ?) */
                        unsigned long pte = (lo >> 6) << 12; /* PTEBase */
                        pte += 8*((vaddr >> pgsz) & 0x1ff);
-                       if (addr_is_ram(pte, 8)) {
+                       if (page_is_ram(PFN_DOWN(pte))) {
                                /*
                                 * Note: Since DMA hardware does look up
                                 * translation on its own, this PTE *must*
index 7221df2..69cfa0a 100644 (file)
@@ -1,6 +1,12 @@
 # SPDX-License-Identifier: GPL-2.0
 # Objects to go into the VDSO.
-obj-vdso-y := elf.o gettimeofday.o sigreturn.o
+
+# Absolute relocation type $(ARCH_REL_TYPE_ABS) needs to be defined before
+# the inclusion of generic Makefile.
+ARCH_REL_TYPE_ABS := R_MIPS_JUMP_SLOT|R_MIPS_GLOB_DAT
+include $(srctree)/lib/vdso/Makefile
+
+obj-vdso-y := elf.o vgettimeofday.o sigreturn.o
 
 # Common compiler flags between ABIs.
 ccflags-vdso := \
@@ -15,15 +21,31 @@ ifdef CONFIG_CC_IS_CLANG
 ccflags-vdso += $(filter --target=%,$(KBUILD_CFLAGS))
 endif
 
+#
+# The -fno-jump-tables flag only prevents the compiler from generating
+# jump tables but does not prevent the compiler from emitting absolute
+# offsets.
 cflags-vdso := $(ccflags-vdso) \
        $(filter -W%,$(filter-out -Wa$(comma)%,$(KBUILD_CFLAGS))) \
-       -O2 -g -fPIC -fno-strict-aliasing -fno-common -fno-builtin -G 0 \
-       -DDISABLE_BRANCH_PROFILING \
+       -O3 -g -fPIC -fno-strict-aliasing -fno-common -fno-builtin -G 0 \
+       -fno-stack-protector -fno-jump-tables -DDISABLE_BRANCH_PROFILING \
        $(call cc-option, -fno-asynchronous-unwind-tables) \
        $(call cc-option, -fno-stack-protector)
 aflags-vdso := $(ccflags-vdso) \
        -D__ASSEMBLY__ -Wa,-gdwarf-2
 
+ifneq ($(c-gettimeofday-y),)
+CFLAGS_vgettimeofday.o = -include $(c-gettimeofday-y)
+
+# config-n32-o32-env.c prepares the environment to build a 32bit vDSO
+# library on a 64bit kernel.
+# Note: Needs to be included before than the generic library.
+CFLAGS_vgettimeofday-o32.o = -include $(srctree)/$(src)/config-n32-o32-env.c -include $(c-gettimeofday-y)
+CFLAGS_vgettimeofday-n32.o = -include $(srctree)/$(src)/config-n32-o32-env.c -include $(c-gettimeofday-y)
+endif
+
+CFLAGS_REMOVE_vgettimeofday.o = -pg
+
 #
 # For the pre-R6 code in arch/mips/vdso/vdso.h for locating
 # the base address of VDSO, the linker will emit a R_MIPS_PC32
@@ -48,6 +70,8 @@ VDSO_LDFLAGS := \
        $(addprefix -Wl$(comma),$(filter -E%,$(KBUILD_CFLAGS))) \
        -nostdlib -shared -Wl,--hash-style=sysv -Wl,--build-id
 
+CFLAGS_REMOVE_vdso.o = -pg
+
 GCOV_PROFILE := n
 UBSAN_SANITIZE := n
 
@@ -55,11 +79,14 @@ UBSAN_SANITIZE := n
 # Shared build commands.
 #
 
+quiet_cmd_vdsold_and_vdso_check = LD      $@
+      cmd_vdsold_and_vdso_check = $(cmd_vdsold); $(cmd_vdso_check)
+
 quiet_cmd_vdsold = VDSO    $@
       cmd_vdsold = $(CC) $(c_flags) $(VDSO_LDFLAGS) \
                    -Wl,-T $(filter %.lds,$^) $(filter %.o,$^) -o $@
 
-quiet_cmd_vdsoas_o_S = AS       $@
+quiet_cmd_vdsoas_o_S = AS      $@
       cmd_vdsoas_o_S = $(CC) $(a_flags) -c -o $@ $<
 
 # Strip rule for the raw .so files
@@ -95,7 +122,7 @@ $(obj-vdso): KBUILD_AFLAGS := $(aflags-vdso) $(native-abi)
 $(obj)/vdso.lds: KBUILD_CPPFLAGS := $(ccflags-vdso) $(native-abi)
 
 $(obj)/vdso.so.dbg.raw: $(obj)/vdso.lds $(obj-vdso) FORCE
-       $(call if_changed,vdsold)
+       $(call if_changed,vdsold_and_vdso_check)
 
 $(obj)/vdso-image.c: $(obj)/vdso.so.dbg.raw $(obj)/vdso.so.raw \
                      $(obj)/genvdso FORCE
@@ -133,7 +160,7 @@ $(obj)/vdso-o32.lds: $(src)/vdso.lds.S FORCE
        $(call if_changed_dep,cpp_lds_S)
 
 $(obj)/vdso-o32.so.dbg.raw: $(obj)/vdso-o32.lds $(obj-vdso-o32) FORCE
-       $(call if_changed,vdsold)
+       $(call if_changed,vdsold_and_vdso_check)
 
 $(obj)/vdso-o32-image.c: VDSO_NAME := o32
 $(obj)/vdso-o32-image.c: $(obj)/vdso-o32.so.dbg.raw $(obj)/vdso-o32.so.raw \
@@ -173,7 +200,7 @@ $(obj)/vdso-n32.lds: $(src)/vdso.lds.S FORCE
        $(call if_changed_dep,cpp_lds_S)
 
 $(obj)/vdso-n32.so.dbg.raw: $(obj)/vdso-n32.lds $(obj-vdso-n32) FORCE
-       $(call if_changed,vdsold)
+       $(call if_changed,vdsold_and_vdso_check)
 
 $(obj)/vdso-n32-image.c: VDSO_NAME := n32
 $(obj)/vdso-n32-image.c: $(obj)/vdso-n32.so.dbg.raw $(obj)/vdso-n32.so.raw \
diff --git a/arch/mips/vdso/config-n32-o32-env.c b/arch/mips/vdso/config-n32-o32-env.c
new file mode 100644 (file)
index 0000000..0011a63
--- /dev/null
@@ -0,0 +1,19 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Configuration file for O32 and N32 binaries.
+ * Note: To be included before lib/vdso/gettimeofday.c
+ */
+#if defined(CONFIG_MIPS32_O32) || defined(CONFIG_MIPS32_N32)
+/*
+ * In case of a 32 bit VDSO for a 64 bit kernel fake a 32 bit kernel
+ * configuration.
+ */
+#undef CONFIG_64BIT
+
+#define BUILD_VDSO32
+#define CONFIG_32BIT 1
+#define CONFIG_GENERIC_ATOMIC64 1
+#define BUILD_VDSO32_64
+
+#endif
+
index e7543e8..a25cb14 100644 (file)
@@ -4,7 +4,7 @@
  * Author: Alex Smith <alex.smith@imgtec.com>
  */
 
-#include "vdso.h"
+#include <asm/vdso/vdso.h>
 
 #include <asm/isa-rev.h>
 
index c359763..e5c0ab9 100644 (file)
@@ -4,7 +4,7 @@
  * Author: Alex Smith <alex.smith@imgtec.com>
  */
 
-#include "vdso.h"
+#include <asm/vdso/vdso.h>
 
 #include <uapi/asm/unistd.h>
 
index 94d90c4..da46274 100644 (file)
@@ -95,6 +95,10 @@ VERSION
        global:
                __vdso_clock_gettime;
                __vdso_gettimeofday;
+               __vdso_clock_getres;
+#if _MIPS_SIM != _MIPS_SIM_ABI64
+               __vdso_clock_gettime64;
+#endif
 #endif
        local: *;
        };
diff --git a/arch/mips/vdso/vgettimeofday.c b/arch/mips/vdso/vgettimeofday.c
new file mode 100644 (file)
index 0000000..6ebdc37
--- /dev/null
@@ -0,0 +1,58 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * MIPS64 and compat userspace implementations of gettimeofday()
+ * and similar.
+ *
+ * Copyright (C) 2015 Imagination Technologies
+ * Copyright (C) 2018 ARM Limited
+ *
+ */
+#include <linux/time.h>
+#include <linux/types.h>
+
+#if _MIPS_SIM != _MIPS_SIM_ABI64
+int __vdso_clock_gettime(clockid_t clock,
+                        struct old_timespec32 *ts)
+{
+       return __cvdso_clock_gettime32(clock, ts);
+}
+
+int __vdso_gettimeofday(struct __kernel_old_timeval *tv,
+                       struct timezone *tz)
+{
+       return __cvdso_gettimeofday(tv, tz);
+}
+
+int __vdso_clock_getres(clockid_t clock_id,
+                       struct old_timespec32 *res)
+{
+       return __cvdso_clock_getres_time32(clock_id, res);
+}
+
+int __vdso_clock_gettime64(clockid_t clock,
+                          struct __kernel_timespec *ts)
+{
+       return __cvdso_clock_gettime(clock, ts);
+}
+
+#else
+
+int __vdso_clock_gettime(clockid_t clock,
+                        struct __kernel_timespec *ts)
+{
+       return __cvdso_clock_gettime(clock, ts);
+}
+
+int __vdso_gettimeofday(struct __kernel_old_timeval *tv,
+                       struct timezone *tz)
+{
+       return __cvdso_gettimeofday(tv, tz);
+}
+
+int __vdso_clock_getres(clockid_t clock_id,
+                       struct __kernel_timespec *res)
+{
+       return __cvdso_clock_getres(clock_id, res);
+}
+
+#endif
index e78b43d..37125e6 100644 (file)
@@ -23,8 +23,6 @@
 extern pgd_t *pgd_alloc(struct mm_struct *mm);
 extern void pgd_free(struct mm_struct *mm, pgd_t * pgd);
 
-#define check_pgt_cache()              do { } while (0)
-
 static inline pgtable_t pte_alloc_one(struct mm_struct *mm)
 {
        pgtable_t pte;
index c70cc56..0588ec9 100644 (file)
@@ -403,8 +403,6 @@ extern pgd_t swapper_pg_dir[PTRS_PER_PGD];
  * into virtual address `from'
  */
 
-#define pgtable_cache_init()       do { } while (0)
-
 #endif /* !__ASSEMBLY__ */
 
 #endif /* _ASMNDS32_PGTABLE_H */
index 4bc8cf7..0b146d7 100644 (file)
@@ -41,10 +41,8 @@ static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
 
 #define __pte_free_tlb(tlb, pte, addr)                         \
        do {                                                    \
-               pgtable_page_dtor(pte);                         \
+               pgtable_pte_page_dtor(pte);                     \
                tlb_remove_page((tlb), (pte));                  \
        } while (0)
 
-#define check_pgt_cache()      do { } while (0)
-
 #endif /* _ASM_NIOS2_PGALLOC_H */
index 95237b7..99985d8 100644 (file)
@@ -291,8 +291,6 @@ static inline void pte_clear(struct mm_struct *mm,
 
 #include <asm-generic/pgtable.h>
 
-#define pgtable_cache_init()           do { } while (0)
-
 extern void __init paging_init(void);
 extern void __init mmu_init(void);
 
index 6bbd4ae..4cf35b0 100644 (file)
@@ -123,7 +123,7 @@ asmlinkage void __init nios2_boot_init(unsigned r4, unsigned r5, unsigned r6,
                dtb_passed = r6;
 
                if (r7)
-                       strncpy(cmdline_passed, (char *)r7, COMMAND_LINE_SIZE);
+                       strlcpy(cmdline_passed, (char *)r7, COMMAND_LINE_SIZE);
        }
 #endif
 
@@ -131,10 +131,10 @@ asmlinkage void __init nios2_boot_init(unsigned r4, unsigned r5, unsigned r6,
 
 #ifndef CONFIG_CMDLINE_FORCE
        if (cmdline_passed[0])
-               strncpy(boot_command_line, cmdline_passed, COMMAND_LINE_SIZE);
+               strlcpy(boot_command_line, cmdline_passed, COMMAND_LINE_SIZE);
 #ifdef CONFIG_NIOS2_CMDLINE_IGNORE_DTB
        else
-               strncpy(boot_command_line, CONFIG_CMDLINE, COMMAND_LINE_SIZE);
+               strlcpy(boot_command_line, CONFIG_CMDLINE, COMMAND_LINE_SIZE);
 #endif
 #endif
 
index 3d4b397..da12a4c 100644 (file)
@@ -75,7 +75,7 @@ static inline struct page *pte_alloc_one(struct mm_struct *mm)
        if (!pte)
                return NULL;
        clear_page(page_address(pte));
-       if (!pgtable_page_ctor(pte)) {
+       if (!pgtable_pte_page_ctor(pte)) {
                __free_page(pte);
                return NULL;
        }
@@ -89,18 +89,16 @@ static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
 
 static inline void pte_free(struct mm_struct *mm, struct page *pte)
 {
-       pgtable_page_dtor(pte);
+       pgtable_pte_page_dtor(pte);
        __free_page(pte);
 }
 
 #define __pte_free_tlb(tlb, pte, addr) \
 do {                                   \
-       pgtable_page_dtor(pte);         \
+       pgtable_pte_page_dtor(pte);     \
        tlb_remove_page((tlb), (pte));  \
 } while (0)
 
 #define pmd_pgtable(pmd) pmd_page(pmd)
 
-#define check_pgt_cache()          do { } while (0)
-
 #endif
index 2fe9ff5..248d22d 100644 (file)
@@ -443,11 +443,6 @@ static inline void update_mmu_cache(struct vm_area_struct *vma,
 
 #include <asm-generic/pgtable.h>
 
-/*
- * No page table caches to initialise
- */
-#define pgtable_cache_init()           do { } while (0)
-
 typedef pte_t *pte_addr_t;
 
 #endif /* __ASSEMBLY__ */
index b41a79f..4d5b8bd 100644 (file)
@@ -16,6 +16,7 @@
  */
 
 #include <linux/dma-noncoherent.h>
+#include <linux/pagewalk.h>
 
 #include <asm/cpuinfo.h>
 #include <asm/spr_defs.h>
@@ -43,6 +44,10 @@ page_set_nocache(pte_t *pte, unsigned long addr,
        return 0;
 }
 
+static const struct mm_walk_ops set_nocache_walk_ops = {
+       .pte_entry              = page_set_nocache,
+};
+
 static int
 page_clear_nocache(pte_t *pte, unsigned long addr,
                   unsigned long next, struct mm_walk *walk)
@@ -58,6 +63,10 @@ page_clear_nocache(pte_t *pte, unsigned long addr,
        return 0;
 }
 
+static const struct mm_walk_ops clear_nocache_walk_ops = {
+       .pte_entry              = page_clear_nocache,
+};
+
 /*
  * Alloc "coherent" memory, which for OpenRISC means simply uncached.
  *
@@ -80,10 +89,6 @@ arch_dma_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle,
 {
        unsigned long va;
        void *page;
-       struct mm_walk walk = {
-               .pte_entry = page_set_nocache,
-               .mm = &init_mm
-       };
 
        page = alloc_pages_exact(size, gfp | __GFP_ZERO);
        if (!page)
@@ -98,7 +103,8 @@ arch_dma_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle,
         * We need to iterate through the pages, clearing the dcache for
         * them and setting the cache-inhibit bit.
         */
-       if (walk_page_range(va, va + size, &walk)) {
+       if (walk_page_range(&init_mm, va, va + size, &set_nocache_walk_ops,
+                       NULL)) {
                free_pages_exact(page, size);
                return NULL;
        }
@@ -111,13 +117,10 @@ arch_dma_free(struct device *dev, size_t size, void *vaddr,
                dma_addr_t dma_handle, unsigned long attrs)
 {
        unsigned long va = (unsigned long)vaddr;
-       struct mm_walk walk = {
-               .pte_entry = page_clear_nocache,
-               .mm = &init_mm
-       };
 
        /* walk_page_range shouldn't be able to fail here */
-       WARN_ON(walk_page_range(va, va + size, &walk));
+       WARN_ON(walk_page_range(&init_mm, va, va + size,
+                       &clear_nocache_walk_ops, NULL));
 
        free_pages_exact(vaddr, size);
 }
index 4f2059a..d98647c 100644 (file)
@@ -124,6 +124,4 @@ pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd, pte_t *pte)
        pmd_populate_kernel(mm, pmd, page_address(pte_page))
 #define pmd_pgtable(pmd) pmd_page(pmd)
 
-#define check_pgt_cache()      do { } while (0)
-
 #endif
index 6d58c17..4ac374b 100644 (file)
@@ -132,8 +132,6 @@ static inline void purge_tlb_entries(struct mm_struct *mm, unsigned long addr)
 #define PTRS_PER_PTE    (1UL << BITS_PER_PTE)
 
 /* Definitions for 2nd level */
-#define pgtable_cache_init()   do { } while (0)
-
 #define PMD_SHIFT       (PLD_SHIFT + BITS_PER_PTE)
 #define PMD_SIZE       (1UL << PMD_SHIFT)
 #define PMD_MASK       (~(PMD_SIZE-1))
index c98162f..6fd8871 100644 (file)
@@ -48,6 +48,9 @@
 #define MADV_DONTFORK  10              /* don't inherit across fork */
 #define MADV_DOFORK    11              /* do inherit across fork */
 
+#define MADV_COLD      20              /* deactivate these pages */
+#define MADV_PAGEOUT   21              /* reclaim these pages */
+
 #define MADV_MERGEABLE   65            /* KSM may merge identical pages */
 #define MADV_UNMERGEABLE 66            /* KSM may not merge identical pages */
 
index 2372d35..327567b 100644 (file)
@@ -112,8 +112,6 @@ extern pgprot_t     pci_phys_mem_access_prot(struct file *file,
                                         unsigned long size,
                                         pgprot_t prot);
 
-#define HAVE_ARCH_PCI_RESOURCE_TO_USER
-
 extern resource_size_t pcibios_io_space_offset(struct pci_controller *hose);
 extern void pcibios_setup_bus_devices(struct pci_bus *bus);
 extern void pcibios_setup_bus_self(struct pci_bus *bus);
index 2b2c60a..6dd78a2 100644 (file)
@@ -64,8 +64,6 @@ static inline void pte_free(struct mm_struct *mm, pgtable_t ptepage)
 extern struct kmem_cache *pgtable_cache[];
 #define PGT_CACHE(shift) pgtable_cache[shift]
 
-static inline void check_pgt_cache(void) { }
-
 #ifdef CONFIG_PPC_BOOK3S
 #include <asm/book3s/pgalloc.h>
 #else
index 8b7865a..4053b2a 100644 (file)
@@ -87,7 +87,6 @@ extern unsigned long ioremap_bot;
 unsigned long vmalloc_to_phys(void *vmalloc_addr);
 
 void pgtable_cache_add(unsigned int shift);
-void pgtable_cache_init(void);
 
 #if defined(CONFIG_STRICT_KERNEL_RWX) || defined(CONFIG_PPC32)
 void mark_initmem_nx(void);
index 3410ea9..6c12376 100644 (file)
@@ -1748,7 +1748,7 @@ void flush_hash_hugepage(unsigned long vsid, unsigned long addr,
        /*
         * IF we try to do a HUGE PTE update after a withdraw is done.
         * we will find the below NULL. This happens when we do
-        * split_huge_page_pmd
+        * split_huge_pmd
         */
        if (!hpte_slot_array)
                return;
index b056cae..56cc845 100644 (file)
@@ -129,11 +129,8 @@ static long mm_iommu_do_alloc(struct mm_struct *mm, unsigned long ua,
                 * Allow to use larger than 64k IOMMU pages. Only do that
                 * if we are backed by hugetlb.
                 */
-               if ((mem->pageshift > PAGE_SHIFT) && PageHuge(page)) {
-                       struct page *head = compound_head(page);
-
-                       pageshift = compound_order(head) + PAGE_SHIFT;
-               }
+               if ((mem->pageshift > PAGE_SHIFT) && PageHuge(page))
+                       pageshift = page_shift(compound_head(page));
                mem->pageshift = min(mem->pageshift, pageshift);
                /*
                 * We don't need struct page reference any more, switch
index 9ba07e5..2ef24a5 100644 (file)
@@ -7,7 +7,7 @@
 #include <linux/kernel.h>
 #include <linux/gfp.h>
 #include <linux/types.h>
-#include <linux/mm.h>
+#include <linux/pagewalk.h>
 #include <linux/hugetlb.h>
 #include <linux/syscalls.h>
 
@@ -139,14 +139,14 @@ static int subpage_walk_pmd_entry(pmd_t *pmd, unsigned long addr,
        return 0;
 }
 
+static const struct mm_walk_ops subpage_walk_ops = {
+       .pmd_entry      = subpage_walk_pmd_entry,
+};
+
 static void subpage_mark_vma_nohuge(struct mm_struct *mm, unsigned long addr,
                                    unsigned long len)
 {
        struct vm_area_struct *vma;
-       struct mm_walk subpage_proto_walk = {
-               .mm = mm,
-               .pmd_entry = subpage_walk_pmd_entry,
-       };
 
        /*
         * We don't try too hard, we just mark all the vma in that range
@@ -163,7 +163,7 @@ static void subpage_mark_vma_nohuge(struct mm_struct *mm, unsigned long addr,
                if (vma->vm_start >= (addr + len))
                        break;
                vma->vm_flags |= VM_NOHUGEPAGE;
-               walk_page_vma(vma, &subpage_proto_walk);
+               walk_page_vma(vma, &subpage_walk_ops, NULL);
                vma = vma->vm_next;
        }
 }
index a8953f1..73d4873 100644 (file)
@@ -667,7 +667,7 @@ void flush_dcache_icache_hugepage(struct page *page)
 
        BUG_ON(!PageCompound(page));
 
-       for (i = 0; i < (1UL << compound_order(page)); i++) {
+       for (i = 0; i < compound_nr(page); i++) {
                if (!PageHighMem(page)) {
                        __flush_dcache_icache(page_address(page+i));
                } else {
index a7b0521..ee4bd6d 100644 (file)
@@ -25,7 +25,7 @@ void pte_frag_destroy(void *pte_frag)
        count = ((unsigned long)pte_frag & ~PAGE_MASK) >> PTE_FRAG_SIZE_SHIFT;
        /* We allow PTE_FRAG_NR fragments from a PTE page */
        if (atomic_sub_and_test(PTE_FRAG_NR - count, &page->pt_frag_refcount)) {
-               pgtable_page_dtor(page);
+               pgtable_pte_page_dtor(page);
                __free_page(page);
        }
 }
@@ -61,7 +61,7 @@ static pte_t *__alloc_for_ptecache(struct mm_struct *mm, int kernel)
                page = alloc_page(PGALLOC_GFP | __GFP_ACCOUNT);
                if (!page)
                        return NULL;
-               if (!pgtable_page_ctor(page)) {
+               if (!pgtable_pte_page_ctor(page)) {
                        __free_page(page);
                        return NULL;
                }
@@ -113,7 +113,7 @@ void pte_fragment_free(unsigned long *table, int kernel)
        BUG_ON(atomic_read(&page->pt_frag_refcount) <= 0);
        if (atomic_dec_and_test(&page->pt_frag_refcount)) {
                if (!kernel)
-                       pgtable_page_dtor(page);
+                       pgtable_pte_page_dtor(page);
                __free_page(page);
        }
 }
index 065ff14..1d93e55 100644 (file)
@@ -10,6 +10,8 @@
 
 #include <linux/file.h>
 #include <linux/fs.h>
+#include <linux/fs_context.h>
+#include <linux/fs_parser.h>
 #include <linux/fsnotify.h>
 #include <linux/backing-dev.h>
 #include <linux/init.h>
@@ -20,7 +22,6 @@
 #include <linux/pagemap.h>
 #include <linux/poll.h>
 #include <linux/slab.h>
-#include <linux/parser.h>
 
 #include <asm/prom.h>
 #include <asm/spu.h>
@@ -30,7 +31,7 @@
 #include "spufs.h"
 
 struct spufs_sb_info {
-       int debug;
+       bool debug;
 };
 
 static struct kmem_cache *spufs_inode_cache;
@@ -574,16 +575,27 @@ long spufs_create(struct path *path, struct dentry *dentry,
 }
 
 /* File system initialization */
+struct spufs_fs_context {
+       kuid_t  uid;
+       kgid_t  gid;
+       umode_t mode;
+};
+
 enum {
-       Opt_uid, Opt_gid, Opt_mode, Opt_debug, Opt_err,
+       Opt_uid, Opt_gid, Opt_mode, Opt_debug,
+};
+
+static const struct fs_parameter_spec spufs_param_specs[] = {
+       fsparam_u32     ("gid",                         Opt_gid),
+       fsparam_u32oct  ("mode",                        Opt_mode),
+       fsparam_u32     ("uid",                         Opt_uid),
+       fsparam_flag    ("debug",                       Opt_debug),
+       {}
 };
 
-static const match_table_t spufs_tokens = {
-       { Opt_uid,   "uid=%d" },
-       { Opt_gid,   "gid=%d" },
-       { Opt_mode,  "mode=%o" },
-       { Opt_debug, "debug" },
-       { Opt_err,    NULL  },
+static const struct fs_parameter_description spufs_fs_parameters = {
+       .name           = "spufs",
+       .specs          = spufs_param_specs,
 };
 
 static int spufs_show_options(struct seq_file *m, struct dentry *root)
@@ -604,47 +616,41 @@ static int spufs_show_options(struct seq_file *m, struct dentry *root)
        return 0;
 }
 
-static int
-spufs_parse_options(struct super_block *sb, char *options, struct inode *root)
-{
-       char *p;
-       substring_t args[MAX_OPT_ARGS];
-
-       while ((p = strsep(&options, ",")) != NULL) {
-               int token, option;
-
-               if (!*p)
-                       continue;
-
-               token = match_token(p, spufs_tokens, args);
-               switch (token) {
-               case Opt_uid:
-                       if (match_int(&args[0], &option))
-                               return 0;
-                       root->i_uid = make_kuid(current_user_ns(), option);
-                       if (!uid_valid(root->i_uid))
-                               return 0;
-                       break;
-               case Opt_gid:
-                       if (match_int(&args[0], &option))
-                               return 0;
-                       root->i_gid = make_kgid(current_user_ns(), option);
-                       if (!gid_valid(root->i_gid))
-                               return 0;
-                       break;
-               case Opt_mode:
-                       if (match_octal(&args[0], &option))
-                               return 0;
-                       root->i_mode = option | S_IFDIR;
-                       break;
-               case Opt_debug:
-                       spufs_get_sb_info(sb)->debug = 1;
-                       break;
-               default:
-                       return 0;
-               }
+static int spufs_parse_param(struct fs_context *fc, struct fs_parameter *param)
+{
+       struct spufs_fs_context *ctx = fc->fs_private;
+       struct spufs_sb_info *sbi = fc->s_fs_info;
+       struct fs_parse_result result;
+       kuid_t uid;
+       kgid_t gid;
+       int opt;
+
+       opt = fs_parse(fc, &spufs_fs_parameters, param, &result);
+       if (opt < 0)
+               return opt;
+
+       switch (opt) {
+       case Opt_uid:
+               uid = make_kuid(current_user_ns(), result.uint_32);
+               if (!uid_valid(uid))
+                       return invalf(fc, "Unknown uid");
+               ctx->uid = uid;
+               break;
+       case Opt_gid:
+               gid = make_kgid(current_user_ns(), result.uint_32);
+               if (!gid_valid(gid))
+                       return invalf(fc, "Unknown gid");
+               ctx->gid = gid;
+               break;
+       case Opt_mode:
+               ctx->mode = result.uint_32 & S_IALLUGO;
+               break;
+       case Opt_debug:
+               sbi->debug = true;
+               break;
        }
-       return 1;
+
+       return 0;
 }
 
 static void spufs_exit_isolated_loader(void)
@@ -678,79 +684,98 @@ spufs_init_isolated_loader(void)
        printk(KERN_INFO "spufs: SPU isolation mode enabled\n");
 }
 
-static int
-spufs_create_root(struct super_block *sb, void *data)
+static int spufs_create_root(struct super_block *sb, struct fs_context *fc)
 {
+       struct spufs_fs_context *ctx = fc->fs_private;
        struct inode *inode;
-       int ret;
 
-       ret = -ENODEV;
        if (!spu_management_ops)
-               goto out;
+               return -ENODEV;
 
-       ret = -ENOMEM;
-       inode = spufs_new_inode(sb, S_IFDIR | 0775);
+       inode = spufs_new_inode(sb, S_IFDIR | ctx->mode);
        if (!inode)
-               goto out;
+               return -ENOMEM;
 
+       inode->i_uid = ctx->uid;
+       inode->i_gid = ctx->gid;
        inode->i_op = &simple_dir_inode_operations;
        inode->i_fop = &simple_dir_operations;
        SPUFS_I(inode)->i_ctx = NULL;
        inc_nlink(inode);
 
-       ret = -EINVAL;
-       if (!spufs_parse_options(sb, data, inode))
-               goto out_iput;
-
-       ret = -ENOMEM;
        sb->s_root = d_make_root(inode);
        if (!sb->s_root)
-               goto out;
-
+               return -ENOMEM;
        return 0;
-out_iput:
-       iput(inode);
-out:
-       return ret;
 }
 
-static int
-spufs_fill_super(struct super_block *sb, void *data, int silent)
-{
-       struct spufs_sb_info *info;
-       static const struct super_operations s_ops = {
-               .alloc_inode = spufs_alloc_inode,
-               .free_inode = spufs_free_inode,
-               .statfs = simple_statfs,
-               .evict_inode = spufs_evict_inode,
-               .show_options = spufs_show_options,
-       };
-
-       info = kzalloc(sizeof(*info), GFP_KERNEL);
-       if (!info)
-               return -ENOMEM;
+static const struct super_operations spufs_ops = {
+       .alloc_inode    = spufs_alloc_inode,
+       .free_inode     = spufs_free_inode,
+       .statfs         = simple_statfs,
+       .evict_inode    = spufs_evict_inode,
+       .show_options   = spufs_show_options,
+};
 
+static int spufs_fill_super(struct super_block *sb, struct fs_context *fc)
+{
        sb->s_maxbytes = MAX_LFS_FILESIZE;
        sb->s_blocksize = PAGE_SIZE;
        sb->s_blocksize_bits = PAGE_SHIFT;
        sb->s_magic = SPUFS_MAGIC;
-       sb->s_op = &s_ops;
-       sb->s_fs_info = info;
+       sb->s_op = &spufs_ops;
 
-       return spufs_create_root(sb, data);
+       return spufs_create_root(sb, fc);
+}
+
+static int spufs_get_tree(struct fs_context *fc)
+{
+       return get_tree_single(fc, spufs_fill_super);
 }
 
-static struct dentry *
-spufs_mount(struct file_system_type *fstype, int flags,
-               const char *name, void *data)
+static void spufs_free_fc(struct fs_context *fc)
 {
-       return mount_single(fstype, flags, data, spufs_fill_super);
+       kfree(fc->s_fs_info);
+}
+
+static const struct fs_context_operations spufs_context_ops = {
+       .free           = spufs_free_fc,
+       .parse_param    = spufs_parse_param,
+       .get_tree       = spufs_get_tree,
+};
+
+static int spufs_init_fs_context(struct fs_context *fc)
+{
+       struct spufs_fs_context *ctx;
+       struct spufs_sb_info *sbi;
+
+       ctx = kzalloc(sizeof(struct spufs_fs_context), GFP_KERNEL);
+       if (!ctx)
+               goto nomem;
+
+       sbi = kzalloc(sizeof(struct spufs_sb_info), GFP_KERNEL);
+       if (!sbi)
+               goto nomem_ctx;
+
+       ctx->uid = current_uid();
+       ctx->gid = current_gid();
+       ctx->mode = 0755;
+
+       fc->s_fs_info = sbi;
+       fc->ops = &spufs_context_ops;
+       return 0;
+
+nomem_ctx:
+       kfree(ctx);
+nomem:
+       return -ENOMEM;
 }
 
 static struct file_system_type spufs_type = {
        .owner = THIS_MODULE,
        .name = "spufs",
-       .mount = spufs_mount,
+       .init_fs_context = spufs_init_fs_context,
+       .parameters     = &spufs_fs_parameters,
        .kill_sb = kill_litter_super,
 };
 MODULE_ALIAS_FS("spufs");
index 71d29fb..8eebbc8 100644 (file)
@@ -59,6 +59,18 @@ config RISCV
        select ARCH_HAS_GIGANTIC_PAGE
        select ARCH_WANT_HUGE_PMD_SHARE if 64BIT
        select SPARSEMEM_STATIC if 32BIT
+       select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT if MMU
+       select HAVE_ARCH_MMAP_RND_BITS
+
+config ARCH_MMAP_RND_BITS_MIN
+       default 18 if 64BIT
+       default 8
+
+# max bits determined by the following formula:
+#  VA_BITS - PAGE_SHIFT - 3
+config ARCH_MMAP_RND_BITS_MAX
+       default 24 if 64BIT # SV39 based
+       default 17
 
 config MMU
        def_bool y
index 42b5ec2..afa43c7 100644 (file)
@@ -13,6 +13,7 @@
        aliases {
                serial0 = &uart0;
                serial1 = &uart1;
+               ethernet0 = &eth0;
        };
 
        chosen {
@@ -60,7 +61,6 @@
                        };
                };
                cpu2: cpu@2 {
-                       clock-frequency = <0>;
                        compatible = "sifive,u54-mc", "sifive,rocket0", "riscv";
                        d-cache-block-size = <64>;
                        d-cache-sets = <64>;
@@ -84,7 +84,6 @@
                        };
                };
                cpu3: cpu@3 {
-                       clock-frequency = <0>;
                        compatible = "sifive,u54-mc", "sifive,rocket0", "riscv";
                        d-cache-block-size = <64>;
                        d-cache-sets = <64>;
                        };
                };
                cpu4: cpu@4 {
-                       clock-frequency = <0>;
                        compatible = "sifive,u54-mc", "sifive,rocket0", "riscv";
                        d-cache-block-size = <64>;
                        d-cache-sets = <64>;
                        #size-cells = <0>;
                        status = "disabled";
                };
+               pwm0: pwm@10020000 {
+                       compatible = "sifive,fu540-c000-pwm", "sifive,pwm0";
+                       reg = <0x0 0x10020000 0x0 0x1000>;
+                       interrupt-parent = <&plic0>;
+                       interrupts = <42 43 44 45>;
+                       clocks = <&prci PRCI_CLK_TLCLK>;
+                       #pwm-cells = <3>;
+                       status = "disabled";
+               };
+               pwm1: pwm@10021000 {
+                       compatible = "sifive,fu540-c000-pwm", "sifive,pwm0";
+                       reg = <0x0 0x10021000 0x0 0x1000>;
+                       interrupt-parent = <&plic0>;
+                       interrupts = <46 47 48 49>;
+                       clocks = <&prci PRCI_CLK_TLCLK>;
+                       #pwm-cells = <3>;
+                       status = "disabled";
+               };
 
        };
 };
index 93d68cb..104d334 100644 (file)
                reg = <0>;
        };
 };
+
+&pwm0 {
+       status = "okay";
+};
+
+&pwm1 {
+       status = "okay";
+};
index 3efff55..420a0db 100644 (file)
@@ -29,6 +29,8 @@ CONFIG_IP_PNP_DHCP=y
 CONFIG_IP_PNP_BOOTP=y
 CONFIG_IP_PNP_RARP=y
 CONFIG_NETLINK_DIAG=y
+CONFIG_NET_9P=y
+CONFIG_NET_9P_VIRTIO=y
 CONFIG_PCI=y
 CONFIG_PCIEPORTBUS=y
 CONFIG_PCI_HOST_GENERIC=y
@@ -39,6 +41,7 @@ CONFIG_BLK_DEV_LOOP=y
 CONFIG_VIRTIO_BLK=y
 CONFIG_BLK_DEV_SD=y
 CONFIG_BLK_DEV_SR=y
+CONFIG_SCSI_VIRTIO=y
 CONFIG_ATA=y
 CONFIG_SATA_AHCI=y
 CONFIG_SATA_AHCI_PLATFORM=y
@@ -54,6 +57,7 @@ CONFIG_SERIAL_8250_CONSOLE=y
 CONFIG_SERIAL_OF_PLATFORM=y
 CONFIG_SERIAL_EARLYCON_RISCV_SBI=y
 CONFIG_HVC_RISCV_SBI=y
+CONFIG_VIRTIO_CONSOLE=y
 CONFIG_HW_RANDOM=y
 CONFIG_HW_RANDOM_VIRTIO=y
 CONFIG_SPI=y
@@ -61,6 +65,7 @@ CONFIG_SPI_SIFIVE=y
 # CONFIG_PTP_1588_CLOCK is not set
 CONFIG_DRM=y
 CONFIG_DRM_RADEON=y
+CONFIG_DRM_VIRTIO_GPU=y
 CONFIG_FRAMEBUFFER_CONSOLE=y
 CONFIG_USB=y
 CONFIG_USB_XHCI_HCD=y
@@ -73,7 +78,12 @@ CONFIG_USB_STORAGE=y
 CONFIG_USB_UAS=y
 CONFIG_MMC=y
 CONFIG_MMC_SPI=y
+CONFIG_VIRTIO_PCI=y
+CONFIG_VIRTIO_BALLOON=y
+CONFIG_VIRTIO_INPUT=y
 CONFIG_VIRTIO_MMIO=y
+CONFIG_RPMSG_CHAR=y
+CONFIG_RPMSG_VIRTIO=y
 CONFIG_EXT4_FS=y
 CONFIG_EXT4_FS_POSIX_ACL=y
 CONFIG_AUTOFS4_FS=y
@@ -86,6 +96,7 @@ CONFIG_NFS_V4=y
 CONFIG_NFS_V4_1=y
 CONFIG_NFS_V4_2=y
 CONFIG_ROOT_NFS=y
+CONFIG_9P_FS=y
 CONFIG_CRYPTO_USER_API_HASH=y
 CONFIG_CRYPTO_DEV_VIRTIO=y
 CONFIG_PRINTK_TIME=y
index 7da93e4..87ee6e6 100644 (file)
@@ -29,6 +29,8 @@ CONFIG_IP_PNP_DHCP=y
 CONFIG_IP_PNP_BOOTP=y
 CONFIG_IP_PNP_RARP=y
 CONFIG_NETLINK_DIAG=y
+CONFIG_NET_9P=y
+CONFIG_NET_9P_VIRTIO=y
 CONFIG_PCI=y
 CONFIG_PCIEPORTBUS=y
 CONFIG_PCI_HOST_GENERIC=y
@@ -39,6 +41,7 @@ CONFIG_BLK_DEV_LOOP=y
 CONFIG_VIRTIO_BLK=y
 CONFIG_BLK_DEV_SD=y
 CONFIG_BLK_DEV_SR=y
+CONFIG_SCSI_VIRTIO=y
 CONFIG_ATA=y
 CONFIG_SATA_AHCI=y
 CONFIG_SATA_AHCI_PLATFORM=y
@@ -54,11 +57,13 @@ CONFIG_SERIAL_8250_CONSOLE=y
 CONFIG_SERIAL_OF_PLATFORM=y
 CONFIG_SERIAL_EARLYCON_RISCV_SBI=y
 CONFIG_HVC_RISCV_SBI=y
+CONFIG_VIRTIO_CONSOLE=y
 CONFIG_HW_RANDOM=y
 CONFIG_HW_RANDOM_VIRTIO=y
 # CONFIG_PTP_1588_CLOCK is not set
 CONFIG_DRM=y
 CONFIG_DRM_RADEON=y
+CONFIG_DRM_VIRTIO_GPU=y
 CONFIG_FRAMEBUFFER_CONSOLE=y
 CONFIG_USB=y
 CONFIG_USB_XHCI_HCD=y
@@ -69,7 +74,12 @@ CONFIG_USB_OHCI_HCD=y
 CONFIG_USB_OHCI_HCD_PLATFORM=y
 CONFIG_USB_STORAGE=y
 CONFIG_USB_UAS=y
+CONFIG_VIRTIO_PCI=y
+CONFIG_VIRTIO_BALLOON=y
+CONFIG_VIRTIO_INPUT=y
 CONFIG_VIRTIO_MMIO=y
+CONFIG_RPMSG_CHAR=y
+CONFIG_RPMSG_VIRTIO=y
 CONFIG_SIFIVE_PLIC=y
 CONFIG_EXT4_FS=y
 CONFIG_EXT4_FS_POSIX_ACL=y
@@ -83,6 +93,7 @@ CONFIG_NFS_V4=y
 CONFIG_NFS_V4_1=y
 CONFIG_NFS_V4_2=y
 CONFIG_ROOT_NFS=y
+CONFIG_9P_FS=y
 CONFIG_CRYPTO_USER_API_HASH=y
 CONFIG_CRYPTO_DEV_VIRTIO=y
 CONFIG_PRINTK_TIME=y
index 56a67d6..d59ea92 100644 (file)
@@ -78,12 +78,8 @@ static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
 
 #define __pte_free_tlb(tlb, pte, buf)   \
 do {                                    \
-       pgtable_page_dtor(pte);         \
+       pgtable_pte_page_dtor(pte);     \
        tlb_remove_page((tlb), pte);    \
 } while (0)
 
-static inline void check_pgt_cache(void)
-{
-}
-
 #endif /* _ASM_RISCV_PGALLOC_H */
index 80905b2..7255f2d 100644 (file)
@@ -83,6 +83,18 @@ extern pgd_t swapper_pg_dir[];
 #define __S110 PAGE_SHARED_EXEC
 #define __S111 PAGE_SHARED_EXEC
 
+#define VMALLOC_SIZE     (KERN_VIRT_SIZE >> 1)
+#define VMALLOC_END      (PAGE_OFFSET - 1)
+#define VMALLOC_START    (PAGE_OFFSET - VMALLOC_SIZE)
+
+#define FIXADDR_TOP      VMALLOC_START
+#ifdef CONFIG_64BIT
+#define FIXADDR_SIZE     PMD_SIZE
+#else
+#define FIXADDR_SIZE     PGDIR_SIZE
+#endif
+#define FIXADDR_START    (FIXADDR_TOP - FIXADDR_SIZE)
+
 /*
  * Roughly size the vmemmap space to be large enough to fit enough
  * struct pages to map half the virtual address space. Then
@@ -424,23 +436,6 @@ extern void *dtb_early_va;
 extern void setup_bootmem(void);
 extern void paging_init(void);
 
-static inline void pgtable_cache_init(void)
-{
-       /* No page table caches to initialize */
-}
-
-#define VMALLOC_SIZE     (KERN_VIRT_SIZE >> 1)
-#define VMALLOC_END      (PAGE_OFFSET - 1)
-#define VMALLOC_START    (PAGE_OFFSET - VMALLOC_SIZE)
-
-#define FIXADDR_TOP      VMALLOC_START
-#ifdef CONFIG_64BIT
-#define FIXADDR_SIZE     PMD_SIZE
-#else
-#define FIXADDR_SIZE     PGDIR_SIZE
-#endif
-#define FIXADDR_START    (FIXADDR_TOP - FIXADDR_SIZE)
-
 /*
  * Task size is 0x4000000000 for RV64 or 0x9fc00000 for RV32.
  * Note that PGDIR_SIZE must evenly divide TASK_SIZE.
index 74ccfd4..da7aa88 100644 (file)
@@ -166,9 +166,13 @@ ENTRY(handle_exception)
        move a0, sp /* pt_regs */
        tail do_IRQ
 1:
-       /* Exceptions run with interrupts enabled */
+       /* Exceptions run with interrupts enabled or disabled
+          depending on the state of sstatus.SR_SPIE */
+       andi t0, s1, SR_SPIE
+       beqz t0, 1f
        csrs CSR_SSTATUS, SR_SIE
 
+1:
        /* Handle syscalls */
        li t0, EXC_SYSCALL
        beq s4, t0, handle_syscall
index 15a9189..72f89b7 100644 (file)
@@ -63,6 +63,11 @@ _start_kernel:
        li t0, SR_FS
        csrc CSR_SSTATUS, t0
 
+#ifdef CONFIG_SMP
+       li t0, CONFIG_NR_CPUS
+       bgeu a0, t0, .Lsecondary_park
+#endif
+
        /* Pick one hart to run the main boot sequence */
        la a3, hart_lottery
        li a2, 1
@@ -154,9 +159,6 @@ relocate:
 
 .Lsecondary_start:
 #ifdef CONFIG_SMP
-       li a1, CONFIG_NR_CPUS
-       bgeu a0, a1, .Lsecondary_park
-
        /* Set trap vector to spin forever to help debug */
        la a3, .Lsecondary_park
        csrw CSR_STVEC, a3
index 3836760..b18cd6c 100644 (file)
@@ -206,3 +206,4 @@ void smp_send_reschedule(int cpu)
 {
        send_ipi_single(cpu, IPI_RESCHEDULE);
 }
+EXPORT_SYMBOL_GPL(smp_send_reschedule);
index 541a2b8..9dd1f2e 100644 (file)
@@ -9,6 +9,7 @@
 #include <asm/sbi.h>
 
 unsigned long riscv_timebase;
+EXPORT_SYMBOL_GPL(riscv_timebase);
 
 void __init time_init(void)
 {
index f933a47..43a81d0 100644 (file)
@@ -554,9 +554,9 @@ config ARCH_HAS_KEXEC_PURGATORY
        def_bool y
        depends on KEXEC_FILE
 
-config KEXEC_VERIFY_SIG
+config KEXEC_SIG
        bool "Verify kernel signature during kexec_file_load() syscall"
-       depends on KEXEC_FILE && SYSTEM_DATA_VERIFICATION
+       depends on KEXEC_FILE && MODULE_SIG_FORMAT
        help
          This option makes kernel signature verification mandatory for
          the kexec_file_load() syscall.
index a4418fc..70139d0 100644 (file)
 #include <linux/types.h>
 #include <linux/errno.h>
 #include <linux/fs.h>
+#include <linux/fs_context.h>
+#include <linux/fs_parser.h>
 #include <linux/namei.h>
 #include <linux/vfs.h>
 #include <linux/slab.h>
 #include <linux/pagemap.h>
 #include <linux/time.h>
-#include <linux/parser.h>
 #include <linux/sysfs.h>
 #include <linux/init.h>
 #include <linux/kobject.h>
 #include <linux/seq_file.h>
-#include <linux/mount.h>
 #include <linux/uio.h>
 #include <asm/ebcdic.h>
 #include "hypfs.h"
@@ -207,52 +207,44 @@ static int hypfs_release(struct inode *inode, struct file *filp)
        return 0;
 }
 
-enum { opt_uid, opt_gid, opt_err };
+enum { Opt_uid, Opt_gid, };
 
-static const match_table_t hypfs_tokens = {
-       {opt_uid, "uid=%u"},
-       {opt_gid, "gid=%u"},
-       {opt_err, NULL}
+static const struct fs_parameter_spec hypfs_param_specs[] = {
+       fsparam_u32("gid", Opt_gid),
+       fsparam_u32("uid", Opt_uid),
+       {}
 };
 
-static int hypfs_parse_options(char *options, struct super_block *sb)
+static const struct fs_parameter_description hypfs_fs_parameters = {
+       .name           = "hypfs",
+       .specs          = hypfs_param_specs,
+};
+
+static int hypfs_parse_param(struct fs_context *fc, struct fs_parameter *param)
 {
-       char *str;
-       substring_t args[MAX_OPT_ARGS];
+       struct hypfs_sb_info *hypfs_info = fc->s_fs_info;
+       struct fs_parse_result result;
        kuid_t uid;
        kgid_t gid;
-
-       if (!options)
-               return 0;
-       while ((str = strsep(&options, ",")) != NULL) {
-               int token, option;
-               struct hypfs_sb_info *hypfs_info = sb->s_fs_info;
-
-               if (!*str)
-                       continue;
-               token = match_token(str, hypfs_tokens, args);
-               switch (token) {
-               case opt_uid:
-                       if (match_int(&args[0], &option))
-                               return -EINVAL;
-                       uid = make_kuid(current_user_ns(), option);
-                       if (!uid_valid(uid))
-                               return -EINVAL;
-                       hypfs_info->uid = uid;
-                       break;
-               case opt_gid:
-                       if (match_int(&args[0], &option))
-                               return -EINVAL;
-                       gid = make_kgid(current_user_ns(), option);
-                       if (!gid_valid(gid))
-                               return -EINVAL;
-                       hypfs_info->gid = gid;
-                       break;
-               case opt_err:
-               default:
-                       pr_err("%s is not a valid mount option\n", str);
-                       return -EINVAL;
-               }
+       int opt;
+
+       opt = fs_parse(fc, &hypfs_fs_parameters, param, &result);
+       if (opt < 0)
+               return opt;
+
+       switch (opt) {
+       case Opt_uid:
+               uid = make_kuid(current_user_ns(), result.uint_32);
+               if (!uid_valid(uid))
+                       return invalf(fc, "Unknown uid");
+               hypfs_info->uid = uid;
+               break;
+       case Opt_gid:
+               gid = make_kgid(current_user_ns(), result.uint_32);
+               if (!gid_valid(gid))
+                       return invalf(fc, "Unknown gid");
+               hypfs_info->gid = gid;
+               break;
        }
        return 0;
 }
@@ -266,26 +258,18 @@ static int hypfs_show_options(struct seq_file *s, struct dentry *root)
        return 0;
 }
 
-static int hypfs_fill_super(struct super_block *sb, void *data, int silent)
+static int hypfs_fill_super(struct super_block *sb, struct fs_context *fc)
 {
+       struct hypfs_sb_info *sbi = sb->s_fs_info;
        struct inode *root_inode;
-       struct dentry *root_dentry;
-       int rc = 0;
-       struct hypfs_sb_info *sbi;
+       struct dentry *root_dentry, *update_file;
+       int rc;
 
-       sbi = kzalloc(sizeof(struct hypfs_sb_info), GFP_KERNEL);
-       if (!sbi)
-               return -ENOMEM;
-       mutex_init(&sbi->lock);
-       sbi->uid = current_uid();
-       sbi->gid = current_gid();
-       sb->s_fs_info = sbi;
        sb->s_blocksize = PAGE_SIZE;
        sb->s_blocksize_bits = PAGE_SHIFT;
        sb->s_magic = HYPFS_MAGIC;
        sb->s_op = &hypfs_s_ops;
-       if (hypfs_parse_options(data, sb))
-               return -EINVAL;
+
        root_inode = hypfs_make_inode(sb, S_IFDIR | 0755);
        if (!root_inode)
                return -ENOMEM;
@@ -300,18 +284,46 @@ static int hypfs_fill_super(struct super_block *sb, void *data, int silent)
                rc = hypfs_diag_create_files(root_dentry);
        if (rc)
                return rc;
-       sbi->update_file = hypfs_create_update_file(root_dentry);
-       if (IS_ERR(sbi->update_file))
-               return PTR_ERR(sbi->update_file);
+       update_file = hypfs_create_update_file(root_dentry);
+       if (IS_ERR(update_file))
+               return PTR_ERR(update_file);
+       sbi->update_file = update_file;
        hypfs_update_update(sb);
        pr_info("Hypervisor filesystem mounted\n");
        return 0;
 }
 
-static struct dentry *hypfs_mount(struct file_system_type *fst, int flags,
-                       const char *devname, void *data)
+static int hypfs_get_tree(struct fs_context *fc)
+{
+       return get_tree_single(fc, hypfs_fill_super);
+}
+
+static void hypfs_free_fc(struct fs_context *fc)
 {
-       return mount_single(fst, flags, data, hypfs_fill_super);
+       kfree(fc->s_fs_info);
+}
+
+static const struct fs_context_operations hypfs_context_ops = {
+       .free           = hypfs_free_fc,
+       .parse_param    = hypfs_parse_param,
+       .get_tree       = hypfs_get_tree,
+};
+
+static int hypfs_init_fs_context(struct fs_context *fc)
+{
+       struct hypfs_sb_info *sbi;
+
+       sbi = kzalloc(sizeof(struct hypfs_sb_info), GFP_KERNEL);
+       if (!sbi)
+               return -ENOMEM;
+
+       mutex_init(&sbi->lock);
+       sbi->uid = current_uid();
+       sbi->gid = current_gid();
+
+       fc->s_fs_info = sbi;
+       fc->ops = &hypfs_context_ops;
+       return 0;
 }
 
 static void hypfs_kill_super(struct super_block *sb)
@@ -442,7 +454,8 @@ static const struct file_operations hypfs_file_ops = {
 static struct file_system_type hypfs_type = {
        .owner          = THIS_MODULE,
        .name           = "s390_hypfs",
-       .mount          = hypfs_mount,
+       .init_fs_context = hypfs_init_fs_context,
+       .parameters     = &hypfs_fs_parameters,
        .kill_sb        = hypfs_kill_super
 };
 
index ae3e322..ceeb552 100644 (file)
@@ -70,7 +70,7 @@ struct hws_qsi_info_block {       /* Bit(s) */
        unsigned long tear;         /* 24-31: TEAR contents              */
        unsigned long dear;         /* 32-39: DEAR contents              */
        unsigned int rsvrd0;        /* 40-43: reserved                   */
-       unsigned int cpu_speed;     /* 44-47: CPU speed                  */
+       unsigned int cpu_speed;     /* 44-47: CPU speed                  */
        unsigned long long rsvrd1;  /* 48-55: reserved                   */
        unsigned long long rsvrd2;  /* 56-63: reserved                   */
 } __packed;
@@ -89,10 +89,10 @@ struct hws_lsctl_request_block {
        unsigned long tear;         /* 16-23: TEAR contents              */
        unsigned long dear;         /* 24-31: DEAR contents              */
        /* 32-63:                                                        */
-       unsigned long rsvrd1;       /* reserved                          */
-       unsigned long rsvrd2;       /* reserved                          */
-       unsigned long rsvrd3;       /* reserved                          */
-       unsigned long rsvrd4;       /* reserved                          */
+       unsigned long rsvrd1;       /* reserved                          */
+       unsigned long rsvrd2;       /* reserved                          */
+       unsigned long rsvrd3;       /* reserved                          */
+       unsigned long rsvrd4;       /* reserved                          */
 } __packed;
 
 struct hws_basic_entry {
index 560d8f7..4652fff 100644 (file)
@@ -60,6 +60,7 @@ struct perf_sf_sde_regs {
 #define PERF_CPUM_SF_MODE_MASK         (PERF_CPUM_SF_BASIC_MODE| \
                                         PERF_CPUM_SF_DIAG_MODE)
 #define PERF_CPUM_SF_FULL_BLOCKS       0x0004    /* Process full SDBs only */
+#define PERF_CPUM_SF_FREQ_MODE         0x0008    /* Sampling with frequency */
 
 #define REG_NONE               0
 #define REG_OVERFLOW           1
@@ -70,5 +71,6 @@ struct perf_sf_sde_regs {
 #define SAMPL_FLAGS(hwc)       ((hwc)->config_base)
 #define SAMPL_DIAG_MODE(hwc)   (SAMPL_FLAGS(hwc) & PERF_CPUM_SF_DIAG_MODE)
 #define SDB_FULL_BLOCKS(hwc)   (SAMPL_FLAGS(hwc) & PERF_CPUM_SF_FULL_BLOCKS)
+#define SAMPLE_FREQ_MODE(hwc)  (SAMPL_FLAGS(hwc) & PERF_CPUM_SF_FREQ_MODE)
 
 #endif /* _ASM_S390_PERF_EVENT_H */
index 0c46007..36c578c 100644 (file)
@@ -1682,12 +1682,6 @@ extern void s390_reset_cmma(struct mm_struct *mm);
 #define HAVE_ARCH_UNMAPPED_AREA
 #define HAVE_ARCH_UNMAPPED_AREA_TOPDOWN
 
-/*
- * No page table caches to initialise
- */
-static inline void pgtable_cache_init(void) { }
-static inline void check_pgt_cache(void) { }
-
 #include <asm-generic/pgtable.h>
 
 #endif /* _S390_PAGE_H */
index 8c5755f..f9e5e1f 100644 (file)
@@ -4,7 +4,7 @@
  *
  *  zcrypt 2.2.1 (user-visible header)
  *
- *  Copyright IBM Corp. 2001, 2018
+ *  Copyright IBM Corp. 2001, 2019
  *  Author(s): Robert Burroughs
  *            Eric Rossman (edrossma@us.ibm.com)
  *
@@ -286,7 +286,7 @@ struct zcrypt_device_matrix_ext {
  *      0x08: CEX3A
  *      0x0a: CEX4
  *      0x0b: CEX5
- *      0x0c: CEX6
+ *      0x0c: CEX6 and CEX7
  *      0x0d: device is disabled
  *
  *   ZCRYPT_QDEPTH_MASK
index 6d0635c..9da6fa3 100644 (file)
@@ -130,7 +130,7 @@ static int s390_elf_probe(const char *buf, unsigned long len)
 const struct kexec_file_ops s390_kexec_elf_ops = {
        .probe = s390_elf_probe,
        .load = s390_elf_load,
-#ifdef CONFIG_KEXEC_VERIFY_SIG
+#ifdef CONFIG_KEXEC_SIG
        .verify_sig = s390_verify_sig,
-#endif /* CONFIG_KEXEC_VERIFY_SIG */
+#endif /* CONFIG_KEXEC_SIG */
 };
index 58318bf..af23eff 100644 (file)
@@ -59,7 +59,7 @@ static int s390_image_probe(const char *buf, unsigned long len)
 const struct kexec_file_ops s390_kexec_image_ops = {
        .probe = s390_image_probe,
        .load = s390_image_load,
-#ifdef CONFIG_KEXEC_VERIFY_SIG
+#ifdef CONFIG_KEXEC_SIG
        .verify_sig = s390_verify_sig,
-#endif /* CONFIG_KEXEC_VERIFY_SIG */
+#endif /* CONFIG_KEXEC_SIG */
 };
index fbdd3ea..8415ae7 100644 (file)
@@ -10,7 +10,7 @@
 #include <linux/elf.h>
 #include <linux/errno.h>
 #include <linux/kexec.h>
-#include <linux/module.h>
+#include <linux/module_signature.h>
 #include <linux/verification.h>
 #include <asm/boot_data.h>
 #include <asm/ipl.h>
@@ -22,29 +22,7 @@ const struct kexec_file_ops * const kexec_file_loaders[] = {
        NULL,
 };
 
-#ifdef CONFIG_KEXEC_VERIFY_SIG
-/*
- * Module signature information block.
- *
- * The constituents of the signature section are, in order:
- *
- *     - Signer's name
- *     - Key identifier
- *     - Signature data
- *     - Information block
- */
-struct module_signature {
-       u8      algo;           /* Public-key crypto algorithm [0] */
-       u8      hash;           /* Digest algorithm [0] */
-       u8      id_type;        /* Key identifier type [PKEY_ID_PKCS7] */
-       u8      signer_len;     /* Length of signer's name [0] */
-       u8      key_id_len;     /* Length of key identifier [0] */
-       u8      __pad[3];
-       __be32  sig_len;        /* Length of signature data */
-};
-
-#define PKEY_ID_PKCS7 2
-
+#ifdef CONFIG_KEXEC_SIG
 int s390_verify_sig(const char *kernel, unsigned long kernel_len)
 {
        const unsigned long marker_len = sizeof(MODULE_SIG_STRING) - 1;
@@ -90,7 +68,7 @@ int s390_verify_sig(const char *kernel, unsigned long kernel_len)
                                      VERIFYING_MODULE_SIGNATURE,
                                      NULL, NULL);
 }
-#endif /* CONFIG_KEXEC_VERIFY_SIG */
+#endif /* CONFIG_KEXEC_SIG */
 
 static int kexec_file_update_purgatory(struct kimage *image,
                                       struct s390_load_data *data)
index 292a452..544a02e 100644 (file)
@@ -673,13 +673,89 @@ out:
        rcu_read_unlock();
 }
 
+static unsigned long getrate(bool freq, unsigned long sample,
+                            struct hws_qsi_info_block *si)
+{
+       unsigned long rate;
+
+       if (freq) {
+               rate = freq_to_sample_rate(si, sample);
+               rate = hw_limit_rate(si, rate);
+       } else {
+               /* The min/max sampling rates specifies the valid range
+                * of sample periods.  If the specified sample period is
+                * out of range, limit the period to the range boundary.
+                */
+               rate = hw_limit_rate(si, sample);
+
+               /* The perf core maintains a maximum sample rate that is
+                * configurable through the sysctl interface.  Ensure the
+                * sampling rate does not exceed this value.  This also helps
+                * to avoid throttling when pushing samples with
+                * perf_event_overflow().
+                */
+               if (sample_rate_to_freq(si, rate) >
+                   sysctl_perf_event_sample_rate) {
+                       debug_sprintf_event(sfdbg, 1,
+                                           "Sampling rate exceeds maximum "
+                                           "perf sample rate\n");
+                       rate = 0;
+               }
+       }
+       return rate;
+}
+
+/* The sampling information (si) contains information about the
+ * min/max sampling intervals and the CPU speed.  So calculate the
+ * correct sampling interval and avoid the whole period adjust
+ * feedback loop.
+ *
+ * Since the CPU Measurement sampling facility can not handle frequency
+ * calculate the sampling interval when frequency is specified using
+ * this formula:
+ *     interval := cpu_speed * 1000000 / sample_freq
+ *
+ * Returns errno on bad input and zero on success with parameter interval
+ * set to the correct sampling rate.
+ *
+ * Note: This function turns off freq bit to avoid calling function
+ * perf_adjust_period(). This causes frequency adjustment in the common
+ * code part which causes tremendous variations in the counter values.
+ */
+static int __hw_perf_event_init_rate(struct perf_event *event,
+                                    struct hws_qsi_info_block *si)
+{
+       struct perf_event_attr *attr = &event->attr;
+       struct hw_perf_event *hwc = &event->hw;
+       unsigned long rate;
+
+       if (attr->freq) {
+               if (!attr->sample_freq)
+                       return -EINVAL;
+               rate = getrate(attr->freq, attr->sample_freq, si);
+               attr->freq = 0;         /* Don't call  perf_adjust_period() */
+               SAMPL_FLAGS(hwc) |= PERF_CPUM_SF_FREQ_MODE;
+       } else {
+               rate = getrate(attr->freq, attr->sample_period, si);
+               if (!rate)
+                       return -EINVAL;
+       }
+       attr->sample_period = rate;
+       SAMPL_RATE(hwc) = rate;
+       hw_init_period(hwc, SAMPL_RATE(hwc));
+       debug_sprintf_event(sfdbg, 4, "__hw_perf_event_init_rate:"
+                           "cpu:%d period:%llx freq:%d,%#lx\n", event->cpu,
+                           event->attr.sample_period, event->attr.freq,
+                           SAMPLE_FREQ_MODE(hwc));
+       return 0;
+}
+
 static int __hw_perf_event_init(struct perf_event *event)
 {
        struct cpu_hw_sf *cpuhw;
        struct hws_qsi_info_block si;
        struct perf_event_attr *attr = &event->attr;
        struct hw_perf_event *hwc = &event->hw;
-       unsigned long rate;
        int cpu, err;
 
        /* Reserve CPU-measurement sampling facility */
@@ -745,43 +821,9 @@ static int __hw_perf_event_init(struct perf_event *event)
        if (attr->config1 & PERF_CPUM_SF_FULL_BLOCKS)
                SAMPL_FLAGS(hwc) |= PERF_CPUM_SF_FULL_BLOCKS;
 
-       /* The sampling information (si) contains information about the
-        * min/max sampling intervals and the CPU speed.  So calculate the
-        * correct sampling interval and avoid the whole period adjust
-        * feedback loop.
-        */
-       rate = 0;
-       if (attr->freq) {
-               if (!attr->sample_freq) {
-                       err = -EINVAL;
-                       goto out;
-               }
-               rate = freq_to_sample_rate(&si, attr->sample_freq);
-               rate = hw_limit_rate(&si, rate);
-               attr->freq = 0;
-               attr->sample_period = rate;
-       } else {
-               /* The min/max sampling rates specifies the valid range
-                * of sample periods.  If the specified sample period is
-                * out of range, limit the period to the range boundary.
-                */
-               rate = hw_limit_rate(&si, hwc->sample_period);
-
-               /* The perf core maintains a maximum sample rate that is
-                * configurable through the sysctl interface.  Ensure the
-                * sampling rate does not exceed this value.  This also helps
-                * to avoid throttling when pushing samples with
-                * perf_event_overflow().
-                */
-               if (sample_rate_to_freq(&si, rate) >
-                     sysctl_perf_event_sample_rate) {
-                       err = -EINVAL;
-                       debug_sprintf_event(sfdbg, 1, "Sampling rate exceeds maximum perf sample rate\n");
-                       goto out;
-               }
-       }
-       SAMPL_RATE(hwc) = rate;
-       hw_init_period(hwc, SAMPL_RATE(hwc));
+       err =  __hw_perf_event_init_rate(event, &si);
+       if (err)
+               goto out;
 
        /* Initialize sample data overflow accounting */
        hwc->extra_reg.reg = REG_OVERFLOW;
@@ -904,6 +946,8 @@ static void cpumsf_pmu_enable(struct pmu *pmu)
                        if (sfb_has_pending_allocs(&cpuhw->sfb, hwc))
                                extend_sampling_buffer(&cpuhw->sfb, hwc);
                }
+               /* Rate may be adjusted with ioctl() */
+               cpuhw->lsctl.interval = SAMPL_RATE(&cpuhw->event->hw);
        }
 
        /* (Re)enable the PMU and sampling facility */
@@ -922,8 +966,9 @@ static void cpumsf_pmu_enable(struct pmu *pmu)
        lpp(&S390_lowcore.lpp);
 
        debug_sprintf_event(sfdbg, 6, "pmu_enable: es=%i cs=%i ed=%i cd=%i "
-                           "tear=%p dear=%p\n", cpuhw->lsctl.es,
-                           cpuhw->lsctl.cs, cpuhw->lsctl.ed, cpuhw->lsctl.cd,
+                           "interval:%lx tear=%p dear=%p\n",
+                           cpuhw->lsctl.es, cpuhw->lsctl.cs, cpuhw->lsctl.ed,
+                           cpuhw->lsctl.cd, cpuhw->lsctl.interval,
                            (void *) cpuhw->lsctl.tear,
                            (void *) cpuhw->lsctl.dear);
 }
@@ -1717,6 +1762,44 @@ static void cpumsf_pmu_read(struct perf_event *event)
        /* Nothing to do ... updates are interrupt-driven */
 }
 
+/* Check if the new sampling period/freqeuncy is appropriate.
+ *
+ * Return non-zero on error and zero on passed checks.
+ */
+static int cpumsf_pmu_check_period(struct perf_event *event, u64 value)
+{
+       struct hws_qsi_info_block si;
+       unsigned long rate;
+       bool do_freq;
+
+       memset(&si, 0, sizeof(si));
+       if (event->cpu == -1) {
+               if (qsi(&si))
+                       return -ENODEV;
+       } else {
+               /* Event is pinned to a particular CPU, retrieve the per-CPU
+                * sampling structure for accessing the CPU-specific QSI.
+                */
+               struct cpu_hw_sf *cpuhw = &per_cpu(cpu_hw_sf, event->cpu);
+
+               si = cpuhw->qsi;
+       }
+
+       do_freq = !!SAMPLE_FREQ_MODE(&event->hw);
+       rate = getrate(do_freq, value, &si);
+       if (!rate)
+               return -EINVAL;
+
+       event->attr.sample_period = rate;
+       SAMPL_RATE(&event->hw) = rate;
+       hw_init_period(&event->hw, SAMPL_RATE(&event->hw));
+       debug_sprintf_event(sfdbg, 4, "cpumsf_pmu_check_period:"
+                           "cpu:%d value:%llx period:%llx freq:%d\n",
+                           event->cpu, value,
+                           event->attr.sample_period, do_freq);
+       return 0;
+}
+
 /* Activate sampling control.
  * Next call of pmu_enable() starts sampling.
  */
@@ -1908,6 +1991,8 @@ static struct pmu cpumf_sampling = {
 
        .setup_aux    = aux_buffer_setup,
        .free_aux     = aux_buffer_free,
+
+       .check_period = cpumsf_pmu_check_period,
 };
 
 static void cpumf_measurement_alert(struct ext_code ext_code,
index 2db6fb4..3627953 100644 (file)
@@ -311,7 +311,8 @@ int arch_update_cpu_topology(void)
        on_each_cpu(__arch_update_dedicated_flag, NULL, 0);
        for_each_online_cpu(cpu) {
                dev = get_cpu_device(cpu);
-               kobject_uevent(&dev->kobj, KOBJ_CHANGE);
+               if (dev)
+                       kobject_uevent(&dev->kobj, KOBJ_CHANGE);
        }
        return rc;
 }
index cd8e03f..edcdca9 100644 (file)
@@ -9,7 +9,7 @@
  */
 
 #include <linux/kernel.h>
-#include <linux/mm.h>
+#include <linux/pagewalk.h>
 #include <linux/swap.h>
 #include <linux/smp.h>
 #include <linux/spinlock.h>
@@ -2521,13 +2521,9 @@ static int __zap_zero_pages(pmd_t *pmd, unsigned long start,
        return 0;
 }
 
-static inline void zap_zero_pages(struct mm_struct *mm)
-{
-       struct mm_walk walk = { .pmd_entry = __zap_zero_pages };
-
-       walk.mm = mm;
-       walk_page_range(0, TASK_SIZE, &walk);
-}
+static const struct mm_walk_ops zap_zero_walk_ops = {
+       .pmd_entry      = __zap_zero_pages,
+};
 
 /*
  * switch on pgstes for its userspace process (for kvm)
@@ -2546,7 +2542,7 @@ int s390_enable_sie(void)
        mm->context.has_pgste = 1;
        /* split thp mappings and disable thp for future mappings */
        thp_split_mm(mm);
-       zap_zero_pages(mm);
+       walk_page_range(mm, 0, TASK_SIZE, &zap_zero_walk_ops, NULL);
        up_write(&mm->mmap_sem);
        return 0;
 }
@@ -2589,12 +2585,13 @@ static int __s390_enable_skey_hugetlb(pte_t *pte, unsigned long addr,
        return 0;
 }
 
+static const struct mm_walk_ops enable_skey_walk_ops = {
+       .hugetlb_entry          = __s390_enable_skey_hugetlb,
+       .pte_entry              = __s390_enable_skey_pte,
+};
+
 int s390_enable_skey(void)
 {
-       struct mm_walk walk = {
-               .hugetlb_entry = __s390_enable_skey_hugetlb,
-               .pte_entry = __s390_enable_skey_pte,
-       };
        struct mm_struct *mm = current->mm;
        struct vm_area_struct *vma;
        int rc = 0;
@@ -2614,8 +2611,7 @@ int s390_enable_skey(void)
        }
        mm->def_flags &= ~VM_MERGEABLE;
 
-       walk.mm = mm;
-       walk_page_range(0, TASK_SIZE, &walk);
+       walk_page_range(mm, 0, TASK_SIZE, &enable_skey_walk_ops, NULL);
 
 out_up:
        up_write(&mm->mmap_sem);
@@ -2633,13 +2629,14 @@ static int __s390_reset_cmma(pte_t *pte, unsigned long addr,
        return 0;
 }
 
+static const struct mm_walk_ops reset_cmma_walk_ops = {
+       .pte_entry              = __s390_reset_cmma,
+};
+
 void s390_reset_cmma(struct mm_struct *mm)
 {
-       struct mm_walk walk = { .pte_entry = __s390_reset_cmma };
-
        down_write(&mm->mmap_sem);
-       walk.mm = mm;
-       walk_page_range(0, TASK_SIZE, &walk);
+       walk_page_range(mm, 0, TASK_SIZE, &reset_cmma_walk_ops, NULL);
        up_write(&mm->mmap_sem);
 }
 EXPORT_SYMBOL_GPL(s390_reset_cmma);
index 54fcdf6..3dd253f 100644 (file)
@@ -210,7 +210,7 @@ unsigned long *page_table_alloc(struct mm_struct *mm)
        page = alloc_page(GFP_KERNEL);
        if (!page)
                return NULL;
-       if (!pgtable_page_ctor(page)) {
+       if (!pgtable_pte_page_ctor(page)) {
                __free_page(page);
                return NULL;
        }
@@ -256,7 +256,7 @@ void page_table_free(struct mm_struct *mm, unsigned long *table)
                atomic_xor_bits(&page->_refcount, 3U << 24);
        }
 
-       pgtable_page_dtor(page);
+       pgtable_pte_page_dtor(page);
        __free_page(page);
 }
 
@@ -308,7 +308,7 @@ void __tlb_remove_table(void *_table)
        case 3:         /* 4K page table with pgstes */
                if (mask & 3)
                        atomic_xor_bits(&page->_refcount, 3 << 24);
-               pgtable_page_dtor(page);
+               pgtable_pte_page_dtor(page);
                __free_page(page);
                break;
        }
index b56f908..22d968b 100644 (file)
@@ -2,10 +2,8 @@
 #ifndef __ASM_SH_PGALLOC_H
 #define __ASM_SH_PGALLOC_H
 
-#include <linux/quicklist.h>
 #include <asm/page.h>
-
-#define QUICK_PT 0     /* Other page table pages that are zero on free */
+#include <asm-generic/pgalloc.h>
 
 extern pgd_t *pgd_alloc(struct mm_struct *);
 extern void pgd_free(struct mm_struct *mm, pgd_t *pgd);
@@ -29,44 +27,9 @@ static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd,
 }
 #define pmd_pgtable(pmd) pmd_page(pmd)
 
-/*
- * Allocate and free page tables.
- */
-static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
-{
-       return quicklist_alloc(QUICK_PT, GFP_KERNEL, NULL);
-}
-
-static inline pgtable_t pte_alloc_one(struct mm_struct *mm)
-{
-       struct page *page;
-       void *pg;
-
-       pg = quicklist_alloc(QUICK_PT, GFP_KERNEL, NULL);
-       if (!pg)
-               return NULL;
-       page = virt_to_page(pg);
-       if (!pgtable_page_ctor(page)) {
-               quicklist_free(QUICK_PT, NULL, pg);
-               return NULL;
-       }
-       return page;
-}
-
-static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
-{
-       quicklist_free(QUICK_PT, NULL, pte);
-}
-
-static inline void pte_free(struct mm_struct *mm, pgtable_t pte)
-{
-       pgtable_page_dtor(pte);
-       quicklist_free_page(QUICK_PT, NULL, pte);
-}
-
 #define __pte_free_tlb(tlb,pte,addr)                   \
 do {                                                   \
-       pgtable_page_dtor(pte);                         \
+       pgtable_pte_page_dtor(pte);                     \
        tlb_remove_page((tlb), (pte));                  \
 } while (0)
 
@@ -79,9 +42,4 @@ do {                                                  \
 } while (0);
 #endif
 
-static inline void check_pgt_cache(void)
-{
-       quicklist_trim(QUICK_PT, NULL, 25, 16);
-}
-
 #endif /* __ASM_SH_PGALLOC_H */
index 9085d11..cbd0f3c 100644 (file)
@@ -123,11 +123,6 @@ typedef pte_t *pte_addr_t;
 
 #define pte_pfn(x)             ((unsigned long)(((x).pte_low >> PAGE_SHIFT)))
 
-/*
- * Initialise the page table caches
- */
-extern void pgtable_cache_init(void);
-
 struct vm_area_struct;
 struct mm_struct;
 
index 02ed2df..5c8a2eb 100644 (file)
@@ -1,9 +1,6 @@
 # SPDX-License-Identifier: GPL-2.0
 menu "Memory management options"
 
-config QUICKLIST
-       def_bool y
-
 config MMU
         bool "Support for memory management hardware"
        depends on !CPU_SH2
index cc779a9..dca946f 100644 (file)
@@ -97,7 +97,3 @@ void __init page_table_range_init(unsigned long start, unsigned long end,
 void __set_fixmap(enum fixed_addresses idx, unsigned long phys, pgprot_t prot)
 {
 }
-
-void pgtable_cache_init(void)
-{
-}
index cfec79b..4deddf4 100644 (file)
@@ -38,8 +38,6 @@ static inline int pci_proc_domain(struct pci_bus *bus)
 #define arch_can_pci_mmap_io() 1
 #define HAVE_ARCH_PCI_GET_UNMAPPED_AREA
 #define get_pci_unmapped_area get_fb_unmapped_area
-
-#define HAVE_ARCH_PCI_RESOURCE_TO_USER
 #endif /* CONFIG_SPARC64 */
 
 #if defined(CONFIG_SPARC64) || defined(CONFIG_LEON_PCI)
index 282be50..10538a4 100644 (file)
@@ -17,8 +17,6 @@ void srmmu_free_nocache(void *addr, int size);
 
 extern struct resource sparc_iomap;
 
-#define check_pgt_cache()      do { } while (0)
-
 pgd_t *get_pgd_fast(void);
 static inline void free_pgd_fast(pgd_t *pgd)
 {
index 48abccb..9d3e5cc 100644 (file)
@@ -69,8 +69,6 @@ void pte_free(struct mm_struct *mm, pgtable_t ptepage);
 #define pmd_populate(MM, PMD, PTE)             pmd_set(MM, PMD, PTE)
 #define pmd_pgtable(PMD)                       ((pte_t *)__pmd_page(PMD))
 
-#define check_pgt_cache()      do { } while (0)
-
 void pgtable_free(void *table, bool is_page);
 
 #ifdef CONFIG_SMP
index 4eebed6..31da448 100644 (file)
@@ -445,9 +445,4 @@ static inline int io_remap_pfn_range(struct vm_area_struct *vma,
 /* We provide our own get_unmapped_area to cope with VA holes for userland */
 #define HAVE_ARCH_UNMAPPED_AREA
 
-/*
- * No page table caches to initialise
- */
-#define pgtable_cache_init()   do { } while (0)
-
 #endif /* !(_SPARC_PGTABLE_H) */
index 1599de7..6ae8016 100644 (file)
@@ -1078,7 +1078,7 @@ static inline int io_remap_pfn_range(struct vm_area_struct *vma,
 }
 #define io_remap_pfn_range io_remap_pfn_range 
 
-static inline unsigned long untagged_addr(unsigned long start)
+static inline unsigned long __untagged_addr(unsigned long start)
 {
        if (adi_capable()) {
                long addr = start;
@@ -1098,7 +1098,8 @@ static inline unsigned long untagged_addr(unsigned long start)
 
        return start;
 }
-#define untagged_addr untagged_addr
+#define untagged_addr(addr) \
+       ((__typeof__(addr))(__untagged_addr((unsigned long)(addr))))
 
 static inline bool pte_access_permitted(pte_t pte, bool write)
 {
@@ -1135,7 +1136,6 @@ unsigned long get_fb_unmapped_area(struct file *filp, unsigned long,
                                   unsigned long);
 #define HAVE_ARCH_FB_UNMAPPED_AREA
 
-void pgtable_cache_init(void);
 void sun4v_register_fault_status(void);
 void sun4v_ktsb_register(void);
 void __init cheetah_ecache_flush_init(void);
index 046ab11..906eda1 100644 (file)
@@ -31,7 +31,6 @@
 #include <asm/page.h>
 #include <asm/pgtable.h>
 #include <asm/vaddrs.h>
-#include <asm/pgalloc.h>       /* bug in asm-generic/tlb.h: check_pgt_cache */
 #include <asm/setup.h>
 #include <asm/tlb.h>
 #include <asm/prom.h>
index 4b099dd..e6d9181 100644 (file)
@@ -2903,7 +2903,7 @@ pgtable_t pte_alloc_one(struct mm_struct *mm)
        struct page *page = alloc_page(GFP_KERNEL | __GFP_ZERO);
        if (!page)
                return NULL;
-       if (!pgtable_page_ctor(page)) {
+       if (!pgtable_pte_page_ctor(page)) {
                free_unref_page(page);
                return NULL;
        }
@@ -2919,7 +2919,7 @@ static void __pte_free(pgtable_t pte)
 {
        struct page *page = virt_to_page(pte);
 
-       pgtable_page_dtor(page);
+       pgtable_pte_page_dtor(page);
        __free_page(page);
 }
 
index aaebbc0..cc3ad64 100644 (file)
@@ -378,7 +378,7 @@ pgtable_t pte_alloc_one(struct mm_struct *mm)
        if ((pte = (unsigned long)pte_alloc_one_kernel(mm)) == 0)
                return NULL;
        page = pfn_to_page(__nocache_pa(pte) >> PAGE_SHIFT);
-       if (!pgtable_page_ctor(page)) {
+       if (!pgtable_pte_page_ctor(page)) {
                __free_page(page);
                return NULL;
        }
@@ -389,7 +389,7 @@ void pte_free(struct mm_struct *mm, pgtable_t pte)
 {
        unsigned long p;
 
-       pgtable_page_dtor(pte);
+       pgtable_pte_page_dtor(pte);
        p = (unsigned long)page_address(pte);   /* Cached address (for test) */
        if (p == 0)
                BUG();
index 2638e46..fea5a0d 100644 (file)
@@ -335,3 +335,10 @@ config UML_NET_SLIRP
          Startup example: "eth0=slirp,FE:FD:01:02:03:04,/usr/local/bin/slirp"
 
 endmenu
+
+config VIRTIO_UML
+       tristate "UML driver for virtio devices"
+       select VIRTIO
+       help
+         This driver provides support for virtio based paravirtual device
+         drivers over vhost-user sockets.
index 6933198..a290821 100644 (file)
@@ -1,6 +1,6 @@
+# SPDX-License-Identifier: GPL-2.0
 # 
 # Copyright (C) 2000, 2002, 2003 Jeff Dike (jdike@karaya.com)
-# Licensed under the GPL
 #
 
 # pcap is broken in 2.5 because kbuild doesn't allow pcap.a to be linked
@@ -61,6 +61,7 @@ obj-$(CONFIG_XTERM_CHAN) += xterm.o xterm_kern.o
 obj-$(CONFIG_UML_WATCHDOG) += harddog.o
 obj-$(CONFIG_BLK_DEV_COW_COMMON) += cow_user.o
 obj-$(CONFIG_UML_RANDOM) += random.o
+obj-$(CONFIG_VIRTIO_UML) += virtio_uml.o
 
 # pcap_user.o must be added explicitly.
 USER_OBJS := fd.o null.o pty.o tty.o xterm.o slip_common.o pcap_user.o vde_user.o vector_user.o
index c512b03..c37cc4f 100644 (file)
@@ -1,6 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /* 
  * Copyright (C) 2000, 2001 Jeff Dike (jdike@karaya.com)
- * Licensed under the GPL
  */
 
 #ifndef __CHAN_KERN_H__
index 749d2bf..6299705 100644 (file)
@@ -1,6 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{linux.intel,addtoit}.com)
- * Licensed under the GPL
  */
 
 #include <linux/slab.h>
index 3fd7c3e..4d80526 100644 (file)
@@ -1,6 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{linux.intel,addtoit}.com)
- * Licensed under the GPL
  */
 
 #include <stdlib.h>
index 03f1b56..72222bb 100644 (file)
@@ -1,6 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  * Copyright (C) 2000, 2001 Jeff Dike (jdike@karaya.com)
- * Licensed under the GPL
  */
 
 #ifndef __CHAN_USER_H__
index 0ee9cc6..74b0c26 100644 (file)
@@ -1,6 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Copyright (C) 2007 Jeff Dike (jdike@{linux.intel,addtoit}.com)
- * Licensed under the GPL
  */
 
 /*
index c2dd195..1509cc7 100644 (file)
@@ -1,6 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
- * Licensed under the GPL
  */
 
 #ifndef __DAEMON_H__
index 7568cc2..fd24026 100644 (file)
@@ -1,9 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Copyright (C) 2001 Lennert Buytenhek (buytenh@gnu.org) and
  * James Leu (jleu@mindspring.net).
  * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
  * Copyright (C) 2001 by various other people who didn't put their name here.
- * Licensed under the GPL.
  */
 
 #include <linux/init.h>
index 8813c10..3695821 100644 (file)
@@ -1,9 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
  * Copyright (C) 2001 Lennert Buytenhek (buytenh@gnu.org) and
  * James Leu (jleu@mindspring.net).
  * Copyright (C) 2001 by various other people who didn't put their name here.
- * Licensed under the GPL.
  */
 
 #include <stdint.h>
index a13a427..082d739 100644 (file)
@@ -1,6 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{linux.intel,addtoit}.com)
- * Licensed under the GPL
  */
 
 #include <stdio.h>
index 3aa8b0d..070468d 100644 (file)
@@ -1,6 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Copyright (C) 2002 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
- * Licensed under the GPL
  */
 
 #include <stdio.h>
index 7f9dbdb..bf75b1c 100644 (file)
@@ -1,6 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Copyright (C) 2002 Steve Schmidtke
- * Licensed under the GPL
  */
 
 #include <linux/fs.h>
index e0e6393..4f2a4ac 100644 (file)
@@ -1,6 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
- * Licensed under the GPL
  */
 
 #include <linux/irqreturn.h>
index 138a145..a151ff5 100644 (file)
@@ -1,6 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /* 
  * Copyright (C) 2001, 2002 Jeff Dike (jdike@karaya.com)
- * Licensed under the GPL
  */
 
 #ifndef __LINE_H__
index 44af737..6356378 100644 (file)
@@ -1,7 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  * Copyright (C) 2001 Lennert Buytenhek (buytenh@gnu.org)
  * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
- * Licensed under the GPL
  */
 
 #ifndef __MCONSOLE_H__
index ff3ab72..0117489 100644 (file)
@@ -1,7 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Copyright (C) 2001 Lennert Buytenhek (buytenh@gnu.org)
  * Copyright (C) 2001 - 2008 Jeff Dike (jdike@{addtoit,linux.intel}.com)
- * Licensed under the GPL
  */
 
 #include <linux/console.h>
index 7a0c6a1..56d8d6a 100644 (file)
@@ -1,6 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  * Copyright (C) 2001, 2002 Jeff Dike (jdike@karaya.com)
- * Licensed under the GPL
  */
 
 #ifndef __MCONSOLE_KERN_H__
index 9920982..e24298a 100644 (file)
@@ -1,7 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Copyright (C) 2001 Lennert Buytenhek (buytenh@gnu.org)
  * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
- * Licensed under the GPL
  */
 
 #include <errno.h>
index 6e5be5f..327b728 100644 (file)
@@ -1,9 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
  * Copyright (C) 2001 Lennert Buytenhek (buytenh@gnu.org) and
  * James Leu (jleu@mindspring.net).
  * Copyright (C) 2001 by various other people who didn't put their name here.
- * Licensed under the GPL.
  */
 
 #include <linux/memblock.h>
index e9f8445..4c95764 100644 (file)
@@ -1,6 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
- * Licensed under the GPL
  */
 
 #include <stdio.h>
index 1049574..8708776 100644 (file)
@@ -1,6 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Copyright (C) 2002 - 2007 Jeff Dike (jdike@{linux.intel,addtoit}.com)
- * Licensed under the GPL
  */
 
 #include <stddef.h>
index be0fb57..cfe4cb1 100644 (file)
@@ -1,6 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Copyright (C) 2002 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
- * Licensed under the GPL.
  */
 
 #include <linux/init.h>
index c07b9c7..bbd2063 100644 (file)
@@ -1,6 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Copyright (C) 2002 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
- * Licensed under the GPL.
  */
 
 #include <errno.h>
index 1ca7c76..216246f 100644 (file)
@@ -1,6 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /* 
  * Copyright (C) 2002 Jeff Dike (jdike@karaya.com)
- * Licensed under the GPL
  */
 
 #include <net_user.h>
index 372a80c..9085b33 100644 (file)
@@ -1,6 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /* 
  * Copyright (C) 2001 Jeff Dike (jdike@karaya.com)
- * Licensed under the GPL
  */
 
 #ifndef __PORT_H__
index b0e9ff3..a47ca53 100644 (file)
@@ -1,6 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{linux.intel,addtoit}.com)
- * Licensed under the GPL
  */
 
 #include <linux/completion.h>
index 5f56d11..5b5b64c 100644 (file)
@@ -1,6 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{linux.intel,addtoit}.com)
- * Licensed under the GPL
  */
 
 #include <stdio.h>
index f1fcc2c..39c6006 100644 (file)
@@ -1,6 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
- * Licensed under the GPL
  */
 
 #include <stdio.h>
index ed5249f..c58ccdc 100644 (file)
@@ -1,6 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Copyright (C) 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
- * Licensed under the GPL.
  */
 
 #include <linux/if_arp.h>
index 0d6b66c..8016d32 100644 (file)
@@ -1,6 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Copyright (C) 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
- * Licensed under the GPL.
  */
 
 #include <stdio.h>
index 4ef11ca..2d97692 100644 (file)
@@ -1,6 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Copyright (C) 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
- * Licensed under the GPL.
  */
 
 #include <linux/if_arp.h>
index 98b6a41..8f633e2 100644 (file)
@@ -1,6 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Copyright (C) 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
- * Licensed under the GPL.
  */
 
 #include <unistd.h>
index 7ae407d..b213201 100644 (file)
@@ -1,6 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0
 /* 
  * Copyright (C) 2000, 2002 Jeff Dike (jdike@karaya.com)
- * Licensed under the GPL
  */
 
 #include <linux/fs.h>
index c90817b..0021d7f 100644 (file)
@@ -1,6 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0
 /* 
  * Copyright (C) 2000, 2001 Jeff Dike (jdike@karaya.com)
- * Licensed under the GPL
  */
 
 #include <linux/posix_types.h>
index 6d8275f..3a409ec 100644 (file)
@@ -1,6 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /* 
  * Copyright (C) 2000 Jeff Dike (jdike@karaya.com)
- * Licensed under the GPL
  */
 
 #ifndef __STDIO_CONSOLE_H
index eaa201b..884a762 100644 (file)
@@ -1,6 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{linux.intel,addtoit}.com)
- * Licensed under the GPL
  */
 
 #include <errno.h>
index cc1cc85..f016fe1 100644 (file)
@@ -1,7 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /* 
  * Copyright (C) 2000 Jeff Dike (jdike@karaya.com)
  * Copyright (C) 2001 RidgeRun, Inc (glonnon@ridgerun.com)
- * Licensed under the GPL
  */
 
 #ifndef __UM_UBD_USER_H
index 33c1cd6..612535c 100644 (file)
@@ -1,8 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Copyright (C) 2018 Cambridge Greys Ltd
  * Copyright (C) 2015-2016 Anton Ivanov (aivanov@brocade.com)
  * Copyright (C) 2000 Jeff Dike (jdike@karaya.com)
- * Licensed under the GPL
  */
 
 /* 2001-09-28...2002-04-17
index 6f74479..a1afe41 100644 (file)
@@ -1,8 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Copyright (C) 2016 Anton Ivanov (aivanov@brocade.com)
  * Copyright (C) 2000, 2001, 2002 Jeff Dike (jdike@karaya.com)
  * Copyright (C) 2001 Ridgerun,Inc (glonnon@ridgerun.com)
- * Licensed under the GPL
  */
 
 #include <stddef.h>
index c190c64..fe39bee 100644 (file)
@@ -1,6 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
- * Licensed under the GPL
  */
 
 #ifndef __DRIVERS_UMCAST_H
index f5ba6e3..595a54f 100644 (file)
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * user-mode-linux networking multicast transport
  * Copyright (C) 2001 by Harald Welte <laforge@gnumonks.org>
@@ -8,7 +9,6 @@
  * James Leu (jleu@mindspring.net).
  * Copyright (C) 2001 by various other people who didn't put their name here.
  *
- * Licensed under the GPL.
  */
 
 #include <linux/init.h>
index 6074184..b50b13c 100644 (file)
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * user-mode-linux networking multicast transport
  * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
@@ -8,7 +9,6 @@
  * James Leu (jleu@mindspring.net).
  * Copyright (C) 2001 by various other people who didn't put their name here.
  *
- * Licensed under the GPL.
  *
  */
 
index fc3a059..cab0379 100644 (file)
@@ -1,6 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  * Copyright (C) 2007 Luca Bigliardi (shammash@artha.org).
- * Licensed under the GPL.
  */
 
 #ifndef __UM_VDE_H__
index 6a365fa..bc6f22c 100644 (file)
@@ -1,6 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Copyright (C) 2007 Luca Bigliardi (shammash@artha.org).
- * Licensed under the GPL.
  *
  * Transport usage:
  *  ethN=vde,<vde_switch>,<mac addr>,<port>,<group>,<mode>,<description>
index 64cb630..bc7dc4e 100644 (file)
@@ -1,6 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Copyright (C) 2007 Luca Bigliardi (shammash@artha.org).
- * Licensed under the GPL.
  */
 
 #include <stddef.h>
index e190e4c..769ffbd 100644 (file)
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Copyright (C) 2017 - Cambridge Greys Limited
  * Copyright (C) 2011 - 2014 Cisco Systems Inc
@@ -5,7 +6,6 @@
  * Copyright (C) 2001 Lennert Buytenhek (buytenh@gnu.org) and
  * James Leu (jleu@mindspring.net).
  * Copyright (C) 2001 by various other people who didn't put their name here.
- * Licensed under the GPL.
  */
 
 #include <linux/version.h>
@@ -76,6 +76,7 @@ static void vector_eth_configure(int n, struct arglist *def);
 #define DEFAULT_VECTOR_SIZE 64
 #define TX_SMALL_PACKET 128
 #define MAX_IOV_SIZE (MAX_SKB_FRAGS + 1)
+#define MAX_ITERATIONS 64
 
 static const struct {
        const char string[ETH_GSTRING_LEN];
@@ -121,7 +122,8 @@ static int get_mtu(struct arglist *def)
 
        if (mtu != NULL) {
                if (kstrtoul(mtu, 10, &result) == 0)
-                       return result;
+                       if ((result < (1 << 16) - 1) && (result >= 576))
+                               return result;
        }
        return ETH_MAX_PACKET;
 }
@@ -186,6 +188,8 @@ static int get_transport_options(struct arglist *def)
 
 
        if (strncmp(transport, TRANS_TAP, TRANS_TAP_LEN) == 0)
+               return 0;
+       if (strncmp(transport, TRANS_HYBRID, TRANS_HYBRID_LEN) == 0)
                return (vec_rx | VECTOR_BPF);
        if (strncmp(transport, TRANS_RAW, TRANS_RAW_LEN) == 0)
                return (vec_rx | vec_tx | VECTOR_QDISC_BYPASS);
@@ -415,6 +419,7 @@ static int vector_send(struct vector_queue *qi)
                                        if (net_ratelimit())
                                                netdev_err(vp->dev, "sendmmsg err=%i\n",
                                                        result);
+                                       vp->in_error = true;
                                        result = send_len;
                                }
                                if (result > 0) {
@@ -842,6 +847,10 @@ static int vector_legacy_rx(struct vector_private *vp)
        }
 
        pkt_len = uml_vector_recvmsg(vp->fds->rx_fd, &hdr, 0);
+       if (pkt_len < 0) {
+               vp->in_error = true;
+               return pkt_len;
+       }
 
        if (skb != NULL) {
                if (pkt_len > vp->header_size) {
@@ -888,12 +897,16 @@ static int writev_tx(struct vector_private *vp, struct sk_buff *skb)
 
        if (iov_count < 1)
                goto drop;
+
        pkt_len = uml_vector_writev(
                vp->fds->tx_fd,
                (struct iovec *) &iov,
                iov_count
        );
 
+       if (pkt_len < 0)
+               goto drop;
+
        netif_trans_update(vp->dev);
        netif_wake_queue(vp->dev);
 
@@ -908,6 +921,8 @@ static int writev_tx(struct vector_private *vp, struct sk_buff *skb)
 drop:
        vp->dev->stats.tx_dropped++;
        consume_skb(skb);
+       if (pkt_len < 0)
+               vp->in_error = true;
        return pkt_len;
 }
 
@@ -936,6 +951,9 @@ static int vector_mmsg_rx(struct vector_private *vp)
        packet_count = uml_vector_recvmmsg(
                vp->fds->rx_fd, qi->mmsg_vector, qi->max_depth, 0);
 
+       if (packet_count < 0)
+               vp->in_error = true;
+
        if (packet_count <= 0)
                return packet_count;
 
@@ -1005,15 +1023,18 @@ static int vector_mmsg_rx(struct vector_private *vp)
 static void vector_rx(struct vector_private *vp)
 {
        int err;
+       int iter = 0;
 
        if ((vp->options & VECTOR_RX) > 0)
-               while ((err = vector_mmsg_rx(vp)) > 0)
-                       ;
+               while (((err = vector_mmsg_rx(vp)) > 0) && (iter < MAX_ITERATIONS))
+                       iter++;
        else
-               while ((err = vector_legacy_rx(vp)) > 0)
-                       ;
+               while (((err = vector_legacy_rx(vp)) > 0) && (iter < MAX_ITERATIONS))
+                       iter++;
        if ((err != 0) && net_ratelimit())
                netdev_err(vp->dev, "vector_rx: error(%d)\n", err);
+       if (iter == MAX_ITERATIONS)
+               netdev_err(vp->dev, "vector_rx: device stuck, remote end may have closed the connection\n");
 }
 
 static int vector_net_start_xmit(struct sk_buff *skb, struct net_device *dev)
@@ -1021,6 +1042,13 @@ static int vector_net_start_xmit(struct sk_buff *skb, struct net_device *dev)
        struct vector_private *vp = netdev_priv(dev);
        int queue_depth = 0;
 
+       if (vp->in_error) {
+               deactivate_fd(vp->fds->rx_fd, vp->rx_irq);
+               if ((vp->fds->rx_fd != vp->fds->tx_fd) && (vp->tx_irq != 0))
+                       deactivate_fd(vp->fds->tx_fd, vp->tx_irq);
+               return NETDEV_TX_BUSY;
+       }
+
        if ((vp->options & VECTOR_TX) == 0) {
                writev_tx(vp, skb);
                return NETDEV_TX_OK;
@@ -1131,6 +1159,7 @@ static int vector_net_close(struct net_device *dev)
        vp->fds = NULL;
        spin_lock_irqsave(&vp->lock, flags);
        vp->opened = false;
+       vp->in_error = false;
        spin_unlock_irqrestore(&vp->lock, flags);
        return 0;
 }
@@ -1498,7 +1527,8 @@ static void vector_eth_configure(
                .transport_data         = NULL,
                .in_write_poll          = false,
                .coalesce               = 2,
-               .req_size               = get_req_size(def)
+               .req_size               = get_req_size(def),
+               .in_error               = false
                });
 
        dev->features = dev->hw_features = (NETIF_F_SG | NETIF_F_FRAGLIST);
index 0b0a767..4d292e6 100644 (file)
@@ -1,6 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  * Copyright (C) 2002 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
- * Licensed under the GPL
  */
 
 #ifndef __UM_VECTOR_KERN_H
@@ -116,6 +116,7 @@ struct vector_private {
        bool rexmit_scheduled;
        bool opened;
        bool in_write_poll;
+       bool in_error;
 
        /* ethtool stats */
 
index 77e4ebc..0794d23 100644 (file)
@@ -1,7 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Copyright (C) 2017 - Cambridge Greys Limited
  * Copyright (C) 2011 - 2014 Cisco Systems Inc
- * Licensed under the GPL.
  */
 
 #include <linux/etherdevice.h>
@@ -418,7 +418,7 @@ static int build_raw_transport_data(struct vector_private *vp)
        return 0;
 }
 
-static int build_tap_transport_data(struct vector_private *vp)
+static int build_hybrid_transport_data(struct vector_private *vp)
 {
        if (uml_raw_enable_vnet_headers(vp->fds->rx_fd)) {
                vp->form_header = &raw_form_header;
@@ -432,7 +432,7 @@ static int build_tap_transport_data(struct vector_private *vp)
                                NETIF_F_TSO | NETIF_F_GSO | NETIF_F_GRO);
                netdev_info(
                        vp->dev,
-                       "tap/raw: using vnet headers for tso and tx/rx checksum"
+                       "tap/raw hybrid: using vnet headers for tso and tx/rx checksum"
                );
        } else {
                return 0; /* do not try to enable tap too if raw failed */
@@ -442,6 +442,38 @@ static int build_tap_transport_data(struct vector_private *vp)
        return -1;
 }
 
+static int build_tap_transport_data(struct vector_private *vp)
+{
+       /* "Pure" tap uses the same fd for rx and tx */
+       if (uml_tap_enable_vnet_headers(vp->fds->tx_fd)) {
+               vp->form_header = &raw_form_header;
+               vp->verify_header = &raw_verify_header;
+               vp->header_size = sizeof(struct virtio_net_hdr);
+               vp->rx_header_size = sizeof(struct virtio_net_hdr);
+               vp->dev->hw_features |=
+                       (NETIF_F_TSO | NETIF_F_GSO | NETIF_F_GRO);
+               vp->dev->features |=
+                       (NETIF_F_RXCSUM | NETIF_F_HW_CSUM |
+                               NETIF_F_TSO | NETIF_F_GSO | NETIF_F_GRO);
+               netdev_info(
+                       vp->dev,
+                       "tap: using vnet headers for tso and tx/rx checksum"
+               );
+               return 0;
+       }
+       return -1;
+}
+
+
+static int build_bess_transport_data(struct vector_private *vp)
+{
+       vp->form_header = NULL;
+       vp->verify_header = NULL;
+       vp->header_size = 0;
+       vp->rx_header_size = 0;
+       return 0;
+}
+
 int build_transport_data(struct vector_private *vp)
 {
        char *transport = uml_vector_fetch_arg(vp->parsed, "transport");
@@ -454,6 +486,10 @@ int build_transport_data(struct vector_private *vp)
                return build_raw_transport_data(vp);
        if (strncmp(transport, TRANS_TAP, TRANS_TAP_LEN) == 0)
                return build_tap_transport_data(vp);
+       if (strncmp(transport, TRANS_HYBRID, TRANS_HYBRID_LEN) == 0)
+               return build_hybrid_transport_data(vp);
+       if (strncmp(transport, TRANS_BESS, TRANS_BESS_LEN) == 0)
+               return build_bess_transport_data(vp);
        return 0;
 }
 
index b3f7b3c..e2c969b 100644 (file)
@@ -1,6 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
- * Licensed under the GPL
  */
 
 #include <stdio.h>
@@ -17,6 +17,7 @@
 #include <sys/stat.h>
 #include <fcntl.h>
 #include <sys/socket.h>
+#include <sys/un.h>
 #include <net/ethernet.h>
 #include <netinet/ip.h>
 #include <netinet/ether.h>
@@ -33,7 +34,8 @@
 
 #define ID_GRE 0
 #define ID_L2TPV3 1
-#define ID_MAX 1
+#define ID_BESS 2
+#define ID_MAX 2
 
 #define TOKEN_IFNAME "ifname"
 
 #define VNET_HDR_FAIL "could not enable vnet headers on fd %d"
 #define TUN_GET_F_FAIL "tapraw: TUNGETFEATURES failed: %s"
 #define L2TPV3_BIND_FAIL "l2tpv3_open : could not bind socket err=%i"
+#define UNIX_BIND_FAIL "unix_open : could not bind socket err=%i"
 #define BPF_ATTACH_FAIL "Failed to attach filter size %d to %d, err %d\n"
 
+#define MAX_UN_LEN 107
+
 /* This is very ugly and brute force lookup, but it is done
  * only once at initialization so not worth doing hashes or
  * anything more intelligent
@@ -114,12 +119,76 @@ cleanup:
 
 #define PATH_NET_TUN "/dev/net/tun"
 
-static struct vector_fds *user_init_tap_fds(struct arglist *ifspec)
+
+static int create_tap_fd(char *iface)
 {
        struct ifreq ifr;
        int fd = -1;
-       struct sockaddr_ll sock;
        int err = -ENOMEM, offload;
+
+       fd = open(PATH_NET_TUN, O_RDWR);
+       if (fd < 0) {
+               printk(UM_KERN_ERR "uml_tap: failed to open tun device\n");
+               goto tap_fd_cleanup;
+       }
+       memset(&ifr, 0, sizeof(ifr));
+       ifr.ifr_flags = IFF_TAP | IFF_NO_PI | IFF_VNET_HDR;
+       strncpy((char *)&ifr.ifr_name, iface, sizeof(ifr.ifr_name) - 1);
+
+       err = ioctl(fd, TUNSETIFF, (void *) &ifr);
+       if (err != 0) {
+               printk(UM_KERN_ERR "uml_tap: failed to select tap interface\n");
+               goto tap_fd_cleanup;
+       }
+
+       offload = TUN_F_CSUM | TUN_F_TSO4 | TUN_F_TSO6;
+       ioctl(fd, TUNSETOFFLOAD, offload);
+       return fd;
+tap_fd_cleanup:
+       if (fd >= 0)
+               os_close_file(fd);
+       return err;
+}
+
+static int create_raw_fd(char *iface, int flags, int proto)
+{
+       struct ifreq ifr;
+       int fd = -1;
+       struct sockaddr_ll sock;
+       int err = -ENOMEM;
+
+       fd = socket(AF_PACKET, SOCK_RAW, flags);
+       if (fd == -1) {
+               err = -errno;
+               goto raw_fd_cleanup;
+       }
+       memset(&ifr, 0, sizeof(ifr));
+       strncpy((char *)&ifr.ifr_name, iface, sizeof(ifr.ifr_name) - 1);
+       if (ioctl(fd, SIOCGIFINDEX, (void *) &ifr) < 0) {
+               err = -errno;
+               goto raw_fd_cleanup;
+       }
+
+       sock.sll_family = AF_PACKET;
+       sock.sll_protocol = htons(proto);
+       sock.sll_ifindex = ifr.ifr_ifindex;
+
+       if (bind(fd,
+               (struct sockaddr *) &sock, sizeof(struct sockaddr_ll)) < 0) {
+               err = -errno;
+               goto raw_fd_cleanup;
+       }
+       return fd;
+raw_fd_cleanup:
+       printk(UM_KERN_ERR "user_init_raw: init failed, error %d", err);
+       if (fd >= 0)
+               os_close_file(fd);
+       return err;
+}
+
+static struct vector_fds *user_init_tap_fds(struct arglist *ifspec)
+{
+       int fd = -1;
        char *iface;
        struct vector_fds *result = NULL;
 
@@ -141,117 +210,167 @@ static struct vector_fds *user_init_tap_fds(struct arglist *ifspec)
 
        /* TAP */
 
-       fd = open(PATH_NET_TUN, O_RDWR);
+       fd = create_tap_fd(iface);
        if (fd < 0) {
-               printk(UM_KERN_ERR "uml_tap: failed to open tun device\n");
+               printk(UM_KERN_ERR "uml_tap: failed to create tun interface\n");
                goto tap_cleanup;
        }
        result->tx_fd = fd;
-       memset(&ifr, 0, sizeof(ifr));
-       ifr.ifr_flags = IFF_TAP | IFF_NO_PI | IFF_VNET_HDR;
-       strncpy((char *)&ifr.ifr_name, iface, sizeof(ifr.ifr_name) - 1);
+       result->rx_fd = fd;
+       return result;
+tap_cleanup:
+       printk(UM_KERN_ERR "user_init_tap: init failed, error %d", fd);
+       if (result != NULL)
+               kfree(result);
+       return NULL;
+}
 
-       err = ioctl(fd, TUNSETIFF, (void *) &ifr);
-       if (err != 0) {
-               printk(UM_KERN_ERR "uml_tap: failed to select tap interface\n");
-               goto tap_cleanup;
+static struct vector_fds *user_init_hybrid_fds(struct arglist *ifspec)
+{
+       char *iface;
+       struct vector_fds *result = NULL;
+
+       iface = uml_vector_fetch_arg(ifspec, TOKEN_IFNAME);
+       if (iface == NULL) {
+               printk(UM_KERN_ERR "uml_tap: failed to parse interface spec\n");
+               goto hybrid_cleanup;
        }
 
-       offload = TUN_F_CSUM | TUN_F_TSO4 | TUN_F_TSO6;
-       ioctl(fd, TUNSETOFFLOAD, offload);
+       result = uml_kmalloc(sizeof(struct vector_fds), UM_GFP_KERNEL);
+       if (result == NULL) {
+               printk(UM_KERN_ERR "uml_tap: failed to allocate file descriptors\n");
+               goto hybrid_cleanup;
+       }
+       result->rx_fd = -1;
+       result->tx_fd = -1;
+       result->remote_addr = NULL;
+       result->remote_addr_size = 0;
+
+       /* TAP */
+
+       result->tx_fd = create_tap_fd(iface);
+       if (result->tx_fd < 0) {
+               printk(UM_KERN_ERR "uml_tap: failed to create tun interface: %i\n", result->tx_fd);
+               goto hybrid_cleanup;
+       }
 
        /* RAW */
 
-       fd = socket(AF_PACKET, SOCK_RAW, htons(ETH_P_ALL));
-       if (fd == -1) {
+       result->rx_fd = create_raw_fd(iface, ETH_P_ALL, ETH_P_ALL);
+       if (result->rx_fd == -1) {
                printk(UM_KERN_ERR
-                       "uml_tap: failed to create socket: %i\n", -errno);
-               goto tap_cleanup;
+                       "uml_tap: failed to create paired raw socket: %i\n", result->rx_fd);
+               goto hybrid_cleanup;
        }
-       result->rx_fd = fd;
-       memset(&ifr, 0, sizeof(ifr));
-       strncpy((char *)&ifr.ifr_name, iface, sizeof(ifr.ifr_name) - 1);
-       if (ioctl(fd, SIOCGIFINDEX, (void *) &ifr) < 0) {
-               printk(UM_KERN_ERR
-                       "uml_tap: failed to set interface: %i\n", -errno);
-               goto tap_cleanup;
+       return result;
+hybrid_cleanup:
+       printk(UM_KERN_ERR "user_init_hybrid: init failed");
+       if (result != NULL)
+               kfree(result);
+       return NULL;
+}
+
+static struct vector_fds *user_init_unix_fds(struct arglist *ifspec, int id)
+{
+       int fd = -1;
+       int socktype;
+       char *src, *dst;
+       struct vector_fds *result = NULL;
+       struct sockaddr_un *local_addr = NULL, *remote_addr = NULL;
+
+       src = uml_vector_fetch_arg(ifspec, "src");
+       dst = uml_vector_fetch_arg(ifspec, "dst");
+       result = uml_kmalloc(sizeof(struct vector_fds), UM_GFP_KERNEL);
+       if (result == NULL) {
+               printk(UM_KERN_ERR "unix open:cannot allocate remote addr");
+               goto unix_cleanup;
+       }
+       remote_addr = uml_kmalloc(sizeof(struct sockaddr_un), UM_GFP_KERNEL);
+       if (remote_addr == NULL) {
+               printk(UM_KERN_ERR "unix open:cannot allocate remote addr");
+               goto unix_cleanup;
        }
 
-       sock.sll_family = AF_PACKET;
-       sock.sll_protocol = htons(ETH_P_ALL);
-       sock.sll_ifindex = ifr.ifr_ifindex;
+       switch (id) {
+       case ID_BESS:
+               socktype = SOCK_SEQPACKET;
+               if ((src != NULL) && (strlen(src) <= MAX_UN_LEN)) {
+                       local_addr = uml_kmalloc(sizeof(struct sockaddr_un), UM_GFP_KERNEL);
+                       if (local_addr == NULL) {
+                               printk(UM_KERN_ERR "bess open:cannot allocate local addr");
+                               goto unix_cleanup;
+                       }
+                       local_addr->sun_family = AF_UNIX;
+                       memcpy(local_addr->sun_path, src, strlen(src) + 1);
+               }
+               if ((dst == NULL) || (strlen(dst) > MAX_UN_LEN))
+                       goto unix_cleanup;
+               remote_addr->sun_family = AF_UNIX;
+               memcpy(remote_addr->sun_path, dst, strlen(dst) + 1);
+               break;
+       default:
+               printk(KERN_ERR "Unsupported unix socket type\n");
+               return NULL;
+       }
 
-       if (bind(fd,
-               (struct sockaddr *) &sock, sizeof(struct sockaddr_ll)) < 0) {
+       fd = socket(AF_UNIX, socktype, 0);
+       if (fd == -1) {
                printk(UM_KERN_ERR
-                       "user_init_tap: failed to bind raw pair, err %d\n",
-                               -errno);
-               goto tap_cleanup;
+                       "unix open: could not open socket, error = %d",
+                       -errno
+               );
+               goto unix_cleanup;
        }
+       if (local_addr != NULL) {
+               if (bind(fd, (struct sockaddr *) local_addr, sizeof(struct sockaddr_un))) {
+                       printk(UM_KERN_ERR UNIX_BIND_FAIL, errno);
+                       goto unix_cleanup;
+               }
+       }
+       switch (id) {
+       case ID_BESS:
+               if (connect(fd, remote_addr, sizeof(struct sockaddr_un)) < 0) {
+                       printk(UM_KERN_ERR "bess open:cannot connect to %s %i", remote_addr->sun_path, -errno);
+                       goto unix_cleanup;
+               }
+               break;
+       }
+       result->rx_fd = fd;
+       result->tx_fd = fd;
+       result->remote_addr_size = sizeof(struct sockaddr_un);
+       result->remote_addr = remote_addr;
        return result;
-tap_cleanup:
-       printk(UM_KERN_ERR "user_init_tap: init failed, error %d", err);
-       if (result != NULL) {
-               if (result->rx_fd >= 0)
-                       os_close_file(result->rx_fd);
-               if (result->tx_fd >= 0)
-                       os_close_file(result->tx_fd);
+unix_cleanup:
+       if (fd >= 0)
+               os_close_file(fd);
+       if (remote_addr != NULL)
+               kfree(remote_addr);
+       if (result != NULL)
                kfree(result);
-       }
        return NULL;
 }
 
-
 static struct vector_fds *user_init_raw_fds(struct arglist *ifspec)
 {
-       struct ifreq ifr;
        int rxfd = -1, txfd = -1;
-       struct sockaddr_ll sock;
        int err = -ENOMEM;
        char *iface;
        struct vector_fds *result = NULL;
 
        iface = uml_vector_fetch_arg(ifspec, TOKEN_IFNAME);
        if (iface == NULL)
-               goto cleanup;
+               goto raw_cleanup;
 
-       rxfd = socket(AF_PACKET, SOCK_RAW, ETH_P_ALL);
+       rxfd = create_raw_fd(iface, ETH_P_ALL, ETH_P_ALL);
        if (rxfd == -1) {
                err = -errno;
-               goto cleanup;
+               goto raw_cleanup;
        }
-       txfd = socket(AF_PACKET, SOCK_RAW, 0); /* Turn off RX on this fd */
+       txfd = create_raw_fd(iface, 0, ETH_P_IP); /* Turn off RX on this fd */
        if (txfd == -1) {
                err = -errno;
-               goto cleanup;
+               goto raw_cleanup;
        }
-       memset(&ifr, 0, sizeof(ifr));
-       strncpy((char *)&ifr.ifr_name, iface, sizeof(ifr.ifr_name) - 1);
-       if (ioctl(rxfd, SIOCGIFINDEX, (void *) &ifr) < 0) {
-               err = -errno;
-               goto cleanup;
-       }
-
-       sock.sll_family = AF_PACKET;
-       sock.sll_protocol = htons(ETH_P_ALL);
-       sock.sll_ifindex = ifr.ifr_ifindex;
-
-       if (bind(rxfd,
-               (struct sockaddr *) &sock, sizeof(struct sockaddr_ll)) < 0) {
-               err = -errno;
-               goto cleanup;
-       }
-
-       sock.sll_family = AF_PACKET;
-       sock.sll_protocol = htons(ETH_P_IP);
-       sock.sll_ifindex = ifr.ifr_ifindex;
-
-       if (bind(txfd,
-               (struct sockaddr *) &sock, sizeof(struct sockaddr_ll)) < 0) {
-               err = -errno;
-               goto cleanup;
-       }
-
        result = uml_kmalloc(sizeof(struct vector_fds), UM_GFP_KERNEL);
        if (result != NULL) {
                result->rx_fd = rxfd;
@@ -260,13 +379,10 @@ static struct vector_fds *user_init_raw_fds(struct arglist *ifspec)
                result->remote_addr_size = 0;
        }
        return result;
-cleanup:
+raw_cleanup:
        printk(UM_KERN_ERR "user_init_raw: init failed, error %d", err);
-       if (rxfd >= 0)
-               os_close_file(rxfd);
-       if (txfd >= 0)
-               os_close_file(txfd);
-       kfree(result);
+       if (result != NULL)
+               kfree(result);
        return NULL;
 }
 
@@ -456,12 +572,16 @@ struct vector_fds *uml_vector_user_open(
        }
        if (strncmp(transport, TRANS_RAW, TRANS_RAW_LEN) == 0)
                return user_init_raw_fds(parsed);
+       if (strncmp(transport, TRANS_HYBRID, TRANS_HYBRID_LEN) == 0)
+               return user_init_hybrid_fds(parsed);
        if (strncmp(transport, TRANS_TAP, TRANS_TAP_LEN) == 0)
                return user_init_tap_fds(parsed);
        if (strncmp(transport, TRANS_GRE, TRANS_GRE_LEN) == 0)
                return user_init_socket_fds(parsed, ID_GRE);
        if (strncmp(transport, TRANS_L2TPV3, TRANS_L2TPV3_LEN) == 0)
                return user_init_socket_fds(parsed, ID_L2TPV3);
+       if (strncmp(transport, TRANS_BESS, TRANS_BESS_LEN) == 0)
+               return user_init_unix_fds(parsed, ID_BESS);
        return NULL;
 }
 
@@ -482,8 +602,9 @@ int uml_vector_sendmsg(int fd, void *hdr, int flags)
 int uml_vector_recvmsg(int fd, void *hdr, int flags)
 {
        int n;
+       struct msghdr *msg = (struct msghdr *) hdr;
 
-       CATCH_EINTR(n = recvmsg(fd, (struct msghdr *) hdr,  flags));
+       CATCH_EINTR(n = readv(fd, msg->msg_iov, msg->msg_iovlen));
        if ((n < 0) && (errno == EAGAIN))
                return 0;
        if (n >= 0)
@@ -497,7 +618,7 @@ int uml_vector_writev(int fd, void *hdr, int iovcount)
        int n;
 
        CATCH_EINTR(n = writev(fd, (struct iovec *) hdr,  iovcount));
-       if ((n < 0) && (errno == EAGAIN))
+       if ((n < 0) && ((errno == EAGAIN) || (errno == ENOBUFS)))
                return 0;
        if (n >= 0)
                return n;
@@ -514,7 +635,7 @@ int uml_vector_sendmmsg(
        int n;
 
        CATCH_EINTR(n = sendmmsg(fd, (struct mmsghdr *) msgvec, vlen, flags));
-       if ((n < 0) && (errno == EAGAIN))
+       if ((n < 0) && ((errno == EAGAIN) || (errno == ENOBUFS)))
                return 0;
        if (n >= 0)
                return n;
index d7cbff7..649ec25 100644 (file)
@@ -1,6 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  * Copyright (C) 2002 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
- * Licensed under the GPL
  */
 
 #ifndef __UM_VECTOR_USER_H
 #define TRANS_TAP "tap"
 #define TRANS_TAP_LEN strlen(TRANS_TAP)
 
-
 #define TRANS_GRE "gre"
 #define TRANS_GRE_LEN strlen(TRANS_RAW)
 
 #define TRANS_L2TPV3 "l2tpv3"
 #define TRANS_L2TPV3_LEN strlen(TRANS_L2TPV3)
 
+#define TRANS_HYBRID "hybrid"
+#define TRANS_HYBRID_LEN strlen(TRANS_HYBRID)
+
+#define TRANS_BESS "bess"
+#define TRANS_BESS_LEN strlen(TRANS_BESS)
+
 #ifndef IPPROTO_GRE
 #define IPPROTO_GRE 0x2F
 #endif
diff --git a/arch/um/drivers/vhost_user.h b/arch/um/drivers/vhost_user.h
new file mode 100644 (file)
index 0000000..45ff5ea
--- /dev/null
@@ -0,0 +1,117 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/* Vhost-user protocol */
+
+#ifndef __VHOST_USER_H__
+#define __VHOST_USER_H__
+
+/* Message flags */
+#define VHOST_USER_FLAG_REPLY          BIT(2)
+#define VHOST_USER_FLAG_NEED_REPLY     BIT(3)
+/* Feature bits */
+#define VHOST_USER_F_PROTOCOL_FEATURES 30
+/* Protocol feature bits */
+#define VHOST_USER_PROTOCOL_F_REPLY_ACK                3
+#define VHOST_USER_PROTOCOL_F_SLAVE_REQ                5
+#define VHOST_USER_PROTOCOL_F_CONFIG           9
+/* Vring state index masks */
+#define VHOST_USER_VRING_INDEX_MASK    0xff
+#define VHOST_USER_VRING_POLL_MASK     BIT(8)
+
+/* Supported version */
+#define VHOST_USER_VERSION             1
+/* Supported transport features */
+#define VHOST_USER_SUPPORTED_F         BIT_ULL(VHOST_USER_F_PROTOCOL_FEATURES)
+/* Supported protocol features */
+#define VHOST_USER_SUPPORTED_PROTOCOL_F        (BIT_ULL(VHOST_USER_PROTOCOL_F_REPLY_ACK) | \
+                                        BIT_ULL(VHOST_USER_PROTOCOL_F_SLAVE_REQ) | \
+                                        BIT_ULL(VHOST_USER_PROTOCOL_F_CONFIG))
+
+enum vhost_user_request {
+       VHOST_USER_GET_FEATURES = 1,
+       VHOST_USER_SET_FEATURES = 2,
+       VHOST_USER_SET_OWNER = 3,
+       VHOST_USER_RESET_OWNER = 4,
+       VHOST_USER_SET_MEM_TABLE = 5,
+       VHOST_USER_SET_LOG_BASE = 6,
+       VHOST_USER_SET_LOG_FD = 7,
+       VHOST_USER_SET_VRING_NUM = 8,
+       VHOST_USER_SET_VRING_ADDR = 9,
+       VHOST_USER_SET_VRING_BASE = 10,
+       VHOST_USER_GET_VRING_BASE = 11,
+       VHOST_USER_SET_VRING_KICK = 12,
+       VHOST_USER_SET_VRING_CALL = 13,
+       VHOST_USER_SET_VRING_ERR = 14,
+       VHOST_USER_GET_PROTOCOL_FEATURES = 15,
+       VHOST_USER_SET_PROTOCOL_FEATURES = 16,
+       VHOST_USER_GET_QUEUE_NUM = 17,
+       VHOST_USER_SET_VRING_ENABLE = 18,
+       VHOST_USER_SEND_RARP = 19,
+       VHOST_USER_NET_SEND_MTU = 20,
+       VHOST_USER_SET_SLAVE_REQ_FD = 21,
+       VHOST_USER_IOTLB_MSG = 22,
+       VHOST_USER_SET_VRING_ENDIAN = 23,
+       VHOST_USER_GET_CONFIG = 24,
+       VHOST_USER_SET_CONFIG = 25,
+};
+
+enum vhost_user_slave_request {
+       VHOST_USER_SLAVE_IOTLB_MSG = 1,
+       VHOST_USER_SLAVE_CONFIG_CHANGE_MSG = 2,
+       VHOST_USER_SLAVE_VRING_HOST_NOTIFIER_MSG = 3,
+};
+
+struct vhost_user_header {
+       /*
+        * Use enum vhost_user_request for outgoing messages,
+        * uses enum vhost_user_slave_request for incoming ones.
+        */
+       u32 request;
+       u32 flags;
+       u32 size;
+} __packed;
+
+struct vhost_user_config {
+       u32 offset;
+       u32 size;
+       u32 flags;
+       u8 payload[0]; /* Variable length */
+} __packed;
+
+struct vhost_user_vring_state {
+       u32 index;
+       u32 num;
+} __packed;
+
+struct vhost_user_vring_addr {
+       u32 index;
+       u32 flags;
+       u64 desc, used, avail, log;
+} __packed;
+
+struct vhost_user_mem_region {
+       u64 guest_addr;
+       u64 size;
+       u64 user_addr;
+       u64 mmap_offset;
+} __packed;
+
+struct vhost_user_mem_regions {
+       u32 num;
+       u32 padding;
+       struct vhost_user_mem_region regions[2]; /* Currently supporting 2 */
+} __packed;
+
+union vhost_user_payload {
+       u64 integer;
+       struct vhost_user_config config;
+       struct vhost_user_vring_state vring_state;
+       struct vhost_user_vring_addr vring_addr;
+       struct vhost_user_mem_regions mem_regions;
+};
+
+struct vhost_user_msg {
+       struct vhost_user_header header;
+       union vhost_user_payload payload;
+} __packed;
+
+#endif
diff --git a/arch/um/drivers/virtio_uml.c b/arch/um/drivers/virtio_uml.c
new file mode 100644 (file)
index 0000000..fc8c52c
--- /dev/null
@@ -0,0 +1,1177 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Virtio vhost-user driver
+ *
+ * Copyright(c) 2019 Intel Corporation
+ *
+ * This module allows virtio devices to be used over a vhost-user socket.
+ *
+ * Guest devices can be instantiated by kernel module or command line
+ * parameters. One device will be created for each parameter. Syntax:
+ *
+ *             [virtio_uml.]device=<socket>:<virtio_id>[:<platform_id>]
+ * where:
+ *             <socket>        := vhost-user socket path to connect
+ *             <virtio_id>     := virtio device id (as in virtio_ids.h)
+ *             <platform_id>   := (optional) platform device id
+ *
+ * example:
+ *             virtio_uml.device=/var/uml.socket:1
+ *
+ * Based on Virtio MMIO driver by Pawel Moll, copyright 2011-2014, ARM Ltd.
+ */
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+#include <linux/virtio.h>
+#include <linux/virtio_config.h>
+#include <linux/virtio_ring.h>
+#include <shared/as-layout.h>
+#include <irq_kern.h>
+#include <init.h>
+#include <os.h>
+#include "vhost_user.h"
+
+/* Workaround due to a conflict between irq_user.h and irqreturn.h */
+#ifdef IRQ_NONE
+#undef IRQ_NONE
+#endif
+
+#define MAX_SUPPORTED_QUEUE_SIZE       256
+
+#define to_virtio_uml_device(_vdev) \
+       container_of(_vdev, struct virtio_uml_device, vdev)
+
+struct virtio_uml_device {
+       struct virtio_device vdev;
+       struct platform_device *pdev;
+
+       int sock, req_fd;
+       u64 features;
+       u64 protocol_features;
+       u8 status;
+};
+
+struct virtio_uml_vq_info {
+       int kick_fd, call_fd;
+       char name[32];
+};
+
+extern unsigned long long physmem_size, highmem;
+
+#define vu_err(vu_dev, ...)    dev_err(&(vu_dev)->pdev->dev, __VA_ARGS__)
+
+/* Vhost-user protocol */
+
+static int full_sendmsg_fds(int fd, const void *buf, unsigned int len,
+                           const int *fds, unsigned int fds_num)
+{
+       int rc;
+
+       do {
+               rc = os_sendmsg_fds(fd, buf, len, fds, fds_num);
+               if (rc > 0) {
+                       buf += rc;
+                       len -= rc;
+                       fds = NULL;
+                       fds_num = 0;
+               }
+       } while (len && (rc >= 0 || rc == -EINTR));
+
+       if (rc < 0)
+               return rc;
+       return 0;
+}
+
+static int full_read(int fd, void *buf, int len)
+{
+       int rc;
+
+       do {
+               rc = os_read_file(fd, buf, len);
+               if (rc > 0) {
+                       buf += rc;
+                       len -= rc;
+               }
+       } while (len && (rc > 0 || rc == -EINTR));
+
+       if (rc < 0)
+               return rc;
+       if (rc == 0)
+               return -ECONNRESET;
+       return 0;
+}
+
+static int vhost_user_recv_header(int fd, struct vhost_user_msg *msg)
+{
+       return full_read(fd, msg, sizeof(msg->header));
+}
+
+static int vhost_user_recv(int fd, struct vhost_user_msg *msg,
+                          size_t max_payload_size)
+{
+       size_t size;
+       int rc = vhost_user_recv_header(fd, msg);
+
+       if (rc)
+               return rc;
+       size = msg->header.size;
+       if (size > max_payload_size)
+               return -EPROTO;
+       return full_read(fd, &msg->payload, size);
+}
+
+static int vhost_user_recv_resp(struct virtio_uml_device *vu_dev,
+                               struct vhost_user_msg *msg,
+                               size_t max_payload_size)
+{
+       int rc = vhost_user_recv(vu_dev->sock, msg, max_payload_size);
+
+       if (rc)
+               return rc;
+
+       if (msg->header.flags != (VHOST_USER_FLAG_REPLY | VHOST_USER_VERSION))
+               return -EPROTO;
+
+       return 0;
+}
+
+static int vhost_user_recv_u64(struct virtio_uml_device *vu_dev,
+                              u64 *value)
+{
+       struct vhost_user_msg msg;
+       int rc = vhost_user_recv_resp(vu_dev, &msg,
+                                     sizeof(msg.payload.integer));
+
+       if (rc)
+               return rc;
+       if (msg.header.size != sizeof(msg.payload.integer))
+               return -EPROTO;
+       *value = msg.payload.integer;
+       return 0;
+}
+
+static int vhost_user_recv_req(struct virtio_uml_device *vu_dev,
+                              struct vhost_user_msg *msg,
+                              size_t max_payload_size)
+{
+       int rc = vhost_user_recv(vu_dev->req_fd, msg, max_payload_size);
+
+       if (rc)
+               return rc;
+
+       if ((msg->header.flags & ~VHOST_USER_FLAG_NEED_REPLY) !=
+                       VHOST_USER_VERSION)
+               return -EPROTO;
+
+       return 0;
+}
+
+static int vhost_user_send(struct virtio_uml_device *vu_dev,
+                          bool need_response, struct vhost_user_msg *msg,
+                          int *fds, size_t num_fds)
+{
+       size_t size = sizeof(msg->header) + msg->header.size;
+       bool request_ack;
+       int rc;
+
+       msg->header.flags |= VHOST_USER_VERSION;
+
+       /*
+        * The need_response flag indicates that we already need a response,
+        * e.g. to read the features. In these cases, don't request an ACK as
+        * it is meaningless. Also request an ACK only if supported.
+        */
+       request_ack = !need_response;
+       if (!(vu_dev->protocol_features &
+                       BIT_ULL(VHOST_USER_PROTOCOL_F_REPLY_ACK)))
+               request_ack = false;
+
+       if (request_ack)
+               msg->header.flags |= VHOST_USER_FLAG_NEED_REPLY;
+
+       rc = full_sendmsg_fds(vu_dev->sock, msg, size, fds, num_fds);
+       if (rc < 0)
+               return rc;
+
+       if (request_ack) {
+               uint64_t status;
+
+               rc = vhost_user_recv_u64(vu_dev, &status);
+               if (rc)
+                       return rc;
+
+               if (status) {
+                       vu_err(vu_dev, "slave reports error: %llu\n", status);
+                       return -EIO;
+               }
+       }
+
+       return 0;
+}
+
+static int vhost_user_send_no_payload(struct virtio_uml_device *vu_dev,
+                                     bool need_response, u32 request)
+{
+       struct vhost_user_msg msg = {
+               .header.request = request,
+       };
+
+       return vhost_user_send(vu_dev, need_response, &msg, NULL, 0);
+}
+
+static int vhost_user_send_no_payload_fd(struct virtio_uml_device *vu_dev,
+                                        u32 request, int fd)
+{
+       struct vhost_user_msg msg = {
+               .header.request = request,
+       };
+
+       return vhost_user_send(vu_dev, false, &msg, &fd, 1);
+}
+
+static int vhost_user_send_u64(struct virtio_uml_device *vu_dev,
+                              u32 request, u64 value)
+{
+       struct vhost_user_msg msg = {
+               .header.request = request,
+               .header.size = sizeof(msg.payload.integer),
+               .payload.integer = value,
+       };
+
+       return vhost_user_send(vu_dev, false, &msg, NULL, 0);
+}
+
+static int vhost_user_set_owner(struct virtio_uml_device *vu_dev)
+{
+       return vhost_user_send_no_payload(vu_dev, false, VHOST_USER_SET_OWNER);
+}
+
+static int vhost_user_get_features(struct virtio_uml_device *vu_dev,
+                                  u64 *features)
+{
+       int rc = vhost_user_send_no_payload(vu_dev, true,
+                                           VHOST_USER_GET_FEATURES);
+
+       if (rc)
+               return rc;
+       return vhost_user_recv_u64(vu_dev, features);
+}
+
+static int vhost_user_set_features(struct virtio_uml_device *vu_dev,
+                                  u64 features)
+{
+       return vhost_user_send_u64(vu_dev, VHOST_USER_SET_FEATURES, features);
+}
+
+static int vhost_user_get_protocol_features(struct virtio_uml_device *vu_dev,
+                                           u64 *protocol_features)
+{
+       int rc = vhost_user_send_no_payload(vu_dev, true,
+                       VHOST_USER_GET_PROTOCOL_FEATURES);
+
+       if (rc)
+               return rc;
+       return vhost_user_recv_u64(vu_dev, protocol_features);
+}
+
+static int vhost_user_set_protocol_features(struct virtio_uml_device *vu_dev,
+                                           u64 protocol_features)
+{
+       return vhost_user_send_u64(vu_dev, VHOST_USER_SET_PROTOCOL_FEATURES,
+                                  protocol_features);
+}
+
+static void vhost_user_reply(struct virtio_uml_device *vu_dev,
+                            struct vhost_user_msg *msg, int response)
+{
+       struct vhost_user_msg reply = {
+               .payload.integer = response,
+       };
+       size_t size = sizeof(reply.header) + sizeof(reply.payload.integer);
+       int rc;
+
+       reply.header = msg->header;
+       reply.header.flags &= ~VHOST_USER_FLAG_NEED_REPLY;
+       reply.header.flags |= VHOST_USER_FLAG_REPLY;
+       reply.header.size = sizeof(reply.payload.integer);
+
+       rc = full_sendmsg_fds(vu_dev->req_fd, &reply, size, NULL, 0);
+
+       if (rc)
+               vu_err(vu_dev,
+                      "sending reply to slave request failed: %d (size %zu)\n",
+                      rc, size);
+}
+
+static irqreturn_t vu_req_interrupt(int irq, void *data)
+{
+       struct virtio_uml_device *vu_dev = data;
+       int response = 1;
+       struct {
+               struct vhost_user_msg msg;
+               u8 extra_payload[512];
+       } msg;
+       int rc;
+
+       rc = vhost_user_recv_req(vu_dev, &msg.msg,
+                                sizeof(msg.msg.payload) +
+                                sizeof(msg.extra_payload));
+
+       if (rc)
+               return IRQ_NONE;
+
+       switch (msg.msg.header.request) {
+       case VHOST_USER_SLAVE_CONFIG_CHANGE_MSG:
+               virtio_config_changed(&vu_dev->vdev);
+               response = 0;
+               break;
+       case VHOST_USER_SLAVE_IOTLB_MSG:
+               /* not supported - VIRTIO_F_IOMMU_PLATFORM */
+       case VHOST_USER_SLAVE_VRING_HOST_NOTIFIER_MSG:
+               /* not supported - VHOST_USER_PROTOCOL_F_HOST_NOTIFIER */
+       default:
+               vu_err(vu_dev, "unexpected slave request %d\n",
+                      msg.msg.header.request);
+       }
+
+       if (msg.msg.header.flags & VHOST_USER_FLAG_NEED_REPLY)
+               vhost_user_reply(vu_dev, &msg.msg, response);
+
+       return IRQ_HANDLED;
+}
+
+static int vhost_user_init_slave_req(struct virtio_uml_device *vu_dev)
+{
+       int rc, req_fds[2];
+
+       /* Use a pipe for slave req fd, SIGIO is not supported for eventfd */
+       rc = os_pipe(req_fds, true, true);
+       if (rc < 0)
+               return rc;
+       vu_dev->req_fd = req_fds[0];
+
+       rc = um_request_irq(VIRTIO_IRQ, vu_dev->req_fd, IRQ_READ,
+                           vu_req_interrupt, IRQF_SHARED,
+                           vu_dev->pdev->name, vu_dev);
+       if (rc)
+               goto err_close;
+
+       rc = vhost_user_send_no_payload_fd(vu_dev, VHOST_USER_SET_SLAVE_REQ_FD,
+                                          req_fds[1]);
+       if (rc)
+               goto err_free_irq;
+
+       goto out;
+
+err_free_irq:
+       um_free_irq(VIRTIO_IRQ, vu_dev);
+err_close:
+       os_close_file(req_fds[0]);
+out:
+       /* Close unused write end of request fds */
+       os_close_file(req_fds[1]);
+       return rc;
+}
+
+static int vhost_user_init(struct virtio_uml_device *vu_dev)
+{
+       int rc = vhost_user_set_owner(vu_dev);
+
+       if (rc)
+               return rc;
+       rc = vhost_user_get_features(vu_dev, &vu_dev->features);
+       if (rc)
+               return rc;
+
+       if (vu_dev->features & BIT_ULL(VHOST_USER_F_PROTOCOL_FEATURES)) {
+               rc = vhost_user_get_protocol_features(vu_dev,
+                               &vu_dev->protocol_features);
+               if (rc)
+                       return rc;
+               vu_dev->protocol_features &= VHOST_USER_SUPPORTED_PROTOCOL_F;
+               rc = vhost_user_set_protocol_features(vu_dev,
+                               vu_dev->protocol_features);
+               if (rc)
+                       return rc;
+       }
+
+       if (vu_dev->protocol_features &
+                       BIT_ULL(VHOST_USER_PROTOCOL_F_SLAVE_REQ)) {
+               rc = vhost_user_init_slave_req(vu_dev);
+               if (rc)
+                       return rc;
+       }
+
+       return 0;
+}
+
+static void vhost_user_get_config(struct virtio_uml_device *vu_dev,
+                                 u32 offset, void *buf, u32 len)
+{
+       u32 cfg_size = offset + len;
+       struct vhost_user_msg *msg;
+       size_t payload_size = sizeof(msg->payload.config) + cfg_size;
+       size_t msg_size = sizeof(msg->header) + payload_size;
+       int rc;
+
+       if (!(vu_dev->protocol_features &
+             BIT_ULL(VHOST_USER_PROTOCOL_F_CONFIG)))
+               return;
+
+       msg = kzalloc(msg_size, GFP_KERNEL);
+       if (!msg)
+               return;
+       msg->header.request = VHOST_USER_GET_CONFIG;
+       msg->header.size = payload_size;
+       msg->payload.config.offset = 0;
+       msg->payload.config.size = cfg_size;
+
+       rc = vhost_user_send(vu_dev, true, msg, NULL, 0);
+       if (rc) {
+               vu_err(vu_dev, "sending VHOST_USER_GET_CONFIG failed: %d\n",
+                      rc);
+               goto free;
+       }
+
+       rc = vhost_user_recv_resp(vu_dev, msg, msg_size);
+       if (rc) {
+               vu_err(vu_dev,
+                      "receiving VHOST_USER_GET_CONFIG response failed: %d\n",
+                      rc);
+               goto free;
+       }
+
+       if (msg->header.size != payload_size ||
+           msg->payload.config.size != cfg_size) {
+               rc = -EPROTO;
+               vu_err(vu_dev,
+                      "Invalid VHOST_USER_GET_CONFIG sizes (payload %d expected %zu, config %u expected %u)\n",
+                      msg->header.size, payload_size,
+                      msg->payload.config.size, cfg_size);
+               goto free;
+       }
+       memcpy(buf, msg->payload.config.payload + offset, len);
+
+free:
+       kfree(msg);
+}
+
+static void vhost_user_set_config(struct virtio_uml_device *vu_dev,
+                                 u32 offset, const void *buf, u32 len)
+{
+       struct vhost_user_msg *msg;
+       size_t payload_size = sizeof(msg->payload.config) + len;
+       size_t msg_size = sizeof(msg->header) + payload_size;
+       int rc;
+
+       if (!(vu_dev->protocol_features &
+             BIT_ULL(VHOST_USER_PROTOCOL_F_CONFIG)))
+               return;
+
+       msg = kzalloc(msg_size, GFP_KERNEL);
+       if (!msg)
+               return;
+       msg->header.request = VHOST_USER_SET_CONFIG;
+       msg->header.size = payload_size;
+       msg->payload.config.offset = offset;
+       msg->payload.config.size = len;
+       memcpy(msg->payload.config.payload, buf, len);
+
+       rc = vhost_user_send(vu_dev, false, msg, NULL, 0);
+       if (rc)
+               vu_err(vu_dev, "sending VHOST_USER_SET_CONFIG failed: %d\n",
+                      rc);
+
+       kfree(msg);
+}
+
+static int vhost_user_init_mem_region(u64 addr, u64 size, int *fd_out,
+                                     struct vhost_user_mem_region *region_out)
+{
+       unsigned long long mem_offset;
+       int rc = phys_mapping(addr, &mem_offset);
+
+       if (WARN(rc < 0, "phys_mapping of 0x%llx returned %d\n", addr, rc))
+               return -EFAULT;
+       *fd_out = rc;
+       region_out->guest_addr = addr;
+       region_out->user_addr = addr;
+       region_out->size = size;
+       region_out->mmap_offset = mem_offset;
+
+       /* Ensure mapping is valid for the entire region */
+       rc = phys_mapping(addr + size - 1, &mem_offset);
+       if (WARN(rc != *fd_out, "phys_mapping of 0x%llx failed: %d != %d\n",
+                addr + size - 1, rc, *fd_out))
+               return -EFAULT;
+       return 0;
+}
+
+static int vhost_user_set_mem_table(struct virtio_uml_device *vu_dev)
+{
+       struct vhost_user_msg msg = {
+               .header.request = VHOST_USER_SET_MEM_TABLE,
+               .header.size = sizeof(msg.payload.mem_regions),
+               .payload.mem_regions.num = 1,
+       };
+       unsigned long reserved = uml_reserved - uml_physmem;
+       int fds[2];
+       int rc;
+
+       /*
+        * This is a bit tricky, see also the comment with setup_physmem().
+        *
+        * Essentially, setup_physmem() uses a file to mmap() our physmem,
+        * but the code and data we *already* have is omitted. To us, this
+        * is no difference, since they both become part of our address
+        * space and memory consumption. To somebody looking in from the
+        * outside, however, it is different because the part of our memory
+        * consumption that's already part of the binary (code/data) is not
+        * mapped from the file, so it's not visible to another mmap from
+        * the file descriptor.
+        *
+        * Thus, don't advertise this space to the vhost-user slave. This
+        * means that the slave will likely abort or similar when we give
+        * it an address from the hidden range, since it's not marked as
+        * a valid address, but at least that way we detect the issue and
+        * don't just have the slave read an all-zeroes buffer from the
+        * shared memory file, or write something there that we can never
+        * see (depending on the direction of the virtqueue traffic.)
+        *
+        * Since we usually don't want to use .text for virtio buffers,
+        * this effectively means that you cannot use
+        *  1) global variables, which are in the .bss and not in the shm
+        *     file-backed memory
+        *  2) the stack in some processes, depending on where they have
+        *     their stack (or maybe only no interrupt stack?)
+        *
+        * The stack is already not typically valid for DMA, so this isn't
+        * much of a restriction, but global variables might be encountered.
+        *
+        * It might be possible to fix it by copying around the data that's
+        * between bss_start and where we map the file now, but it's not
+        * something that you typically encounter with virtio drivers, so
+        * it didn't seem worthwhile.
+        */
+       rc = vhost_user_init_mem_region(reserved, physmem_size - reserved,
+                                       &fds[0],
+                                       &msg.payload.mem_regions.regions[0]);
+
+       if (rc < 0)
+               return rc;
+       if (highmem) {
+               msg.payload.mem_regions.num++;
+               rc = vhost_user_init_mem_region(__pa(end_iomem), highmem,
+                               &fds[1], &msg.payload.mem_regions.regions[1]);
+               if (rc < 0)
+                       return rc;
+       }
+
+       return vhost_user_send(vu_dev, false, &msg, fds,
+                              msg.payload.mem_regions.num);
+}
+
+static int vhost_user_set_vring_state(struct virtio_uml_device *vu_dev,
+                                     u32 request, u32 index, u32 num)
+{
+       struct vhost_user_msg msg = {
+               .header.request = request,
+               .header.size = sizeof(msg.payload.vring_state),
+               .payload.vring_state.index = index,
+               .payload.vring_state.num = num,
+       };
+
+       return vhost_user_send(vu_dev, false, &msg, NULL, 0);
+}
+
+static int vhost_user_set_vring_num(struct virtio_uml_device *vu_dev,
+                                   u32 index, u32 num)
+{
+       return vhost_user_set_vring_state(vu_dev, VHOST_USER_SET_VRING_NUM,
+                                         index, num);
+}
+
+static int vhost_user_set_vring_base(struct virtio_uml_device *vu_dev,
+                                    u32 index, u32 offset)
+{
+       return vhost_user_set_vring_state(vu_dev, VHOST_USER_SET_VRING_BASE,
+                                         index, offset);
+}
+
+static int vhost_user_set_vring_addr(struct virtio_uml_device *vu_dev,
+                                    u32 index, u64 desc, u64 used, u64 avail,
+                                    u64 log)
+{
+       struct vhost_user_msg msg = {
+               .header.request = VHOST_USER_SET_VRING_ADDR,
+               .header.size = sizeof(msg.payload.vring_addr),
+               .payload.vring_addr.index = index,
+               .payload.vring_addr.desc = desc,
+               .payload.vring_addr.used = used,
+               .payload.vring_addr.avail = avail,
+               .payload.vring_addr.log = log,
+       };
+
+       return vhost_user_send(vu_dev, false, &msg, NULL, 0);
+}
+
+static int vhost_user_set_vring_fd(struct virtio_uml_device *vu_dev,
+                                  u32 request, int index, int fd)
+{
+       struct vhost_user_msg msg = {
+               .header.request = request,
+               .header.size = sizeof(msg.payload.integer),
+               .payload.integer = index,
+       };
+
+       if (index & ~VHOST_USER_VRING_INDEX_MASK)
+               return -EINVAL;
+       if (fd < 0) {
+               msg.payload.integer |= VHOST_USER_VRING_POLL_MASK;
+               return vhost_user_send(vu_dev, false, &msg, NULL, 0);
+       }
+       return vhost_user_send(vu_dev, false, &msg, &fd, 1);
+}
+
+static int vhost_user_set_vring_call(struct virtio_uml_device *vu_dev,
+                                    int index, int fd)
+{
+       return vhost_user_set_vring_fd(vu_dev, VHOST_USER_SET_VRING_CALL,
+                                      index, fd);
+}
+
+static int vhost_user_set_vring_kick(struct virtio_uml_device *vu_dev,
+                                    int index, int fd)
+{
+       return vhost_user_set_vring_fd(vu_dev, VHOST_USER_SET_VRING_KICK,
+                                      index, fd);
+}
+
+static int vhost_user_set_vring_enable(struct virtio_uml_device *vu_dev,
+                                      u32 index, bool enable)
+{
+       if (!(vu_dev->features & BIT_ULL(VHOST_USER_F_PROTOCOL_FEATURES)))
+               return 0;
+
+       return vhost_user_set_vring_state(vu_dev, VHOST_USER_SET_VRING_ENABLE,
+                                         index, enable);
+}
+
+
+/* Virtio interface */
+
+static bool vu_notify(struct virtqueue *vq)
+{
+       struct virtio_uml_vq_info *info = vq->priv;
+       const uint64_t n = 1;
+       int rc;
+
+       do {
+               rc = os_write_file(info->kick_fd, &n, sizeof(n));
+       } while (rc == -EINTR);
+       return !WARN(rc != sizeof(n), "write returned %d\n", rc);
+}
+
+static irqreturn_t vu_interrupt(int irq, void *opaque)
+{
+       struct virtqueue *vq = opaque;
+       struct virtio_uml_vq_info *info = vq->priv;
+       uint64_t n;
+       int rc;
+       irqreturn_t ret = IRQ_NONE;
+
+       do {
+               rc = os_read_file(info->call_fd, &n, sizeof(n));
+               if (rc == sizeof(n))
+                       ret |= vring_interrupt(irq, vq);
+       } while (rc == sizeof(n) || rc == -EINTR);
+       WARN(rc != -EAGAIN, "read returned %d\n", rc);
+       return ret;
+}
+
+
+static void vu_get(struct virtio_device *vdev, unsigned offset,
+                  void *buf, unsigned len)
+{
+       struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
+
+       vhost_user_get_config(vu_dev, offset, buf, len);
+}
+
+static void vu_set(struct virtio_device *vdev, unsigned offset,
+                  const void *buf, unsigned len)
+{
+       struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
+
+       vhost_user_set_config(vu_dev, offset, buf, len);
+}
+
+static u8 vu_get_status(struct virtio_device *vdev)
+{
+       struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
+
+       return vu_dev->status;
+}
+
+static void vu_set_status(struct virtio_device *vdev, u8 status)
+{
+       struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
+
+       vu_dev->status = status;
+}
+
+static void vu_reset(struct virtio_device *vdev)
+{
+       struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
+
+       vu_dev->status = 0;
+}
+
+static void vu_del_vq(struct virtqueue *vq)
+{
+       struct virtio_uml_vq_info *info = vq->priv;
+
+       um_free_irq(VIRTIO_IRQ, vq);
+
+       os_close_file(info->call_fd);
+       os_close_file(info->kick_fd);
+
+       vring_del_virtqueue(vq);
+       kfree(info);
+}
+
+static void vu_del_vqs(struct virtio_device *vdev)
+{
+       struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
+       struct virtqueue *vq, *n;
+       u64 features;
+
+       /* Note: reverse order as a workaround to a decoding bug in snabb */
+       list_for_each_entry_reverse(vq, &vdev->vqs, list)
+               WARN_ON(vhost_user_set_vring_enable(vu_dev, vq->index, false));
+
+       /* Ensure previous messages have been processed */
+       WARN_ON(vhost_user_get_features(vu_dev, &features));
+
+       list_for_each_entry_safe(vq, n, &vdev->vqs, list)
+               vu_del_vq(vq);
+}
+
+static int vu_setup_vq_call_fd(struct virtio_uml_device *vu_dev,
+                              struct virtqueue *vq)
+{
+       struct virtio_uml_vq_info *info = vq->priv;
+       int call_fds[2];
+       int rc;
+
+       /* Use a pipe for call fd, since SIGIO is not supported for eventfd */
+       rc = os_pipe(call_fds, true, true);
+       if (rc < 0)
+               return rc;
+
+       info->call_fd = call_fds[0];
+       rc = um_request_irq(VIRTIO_IRQ, info->call_fd, IRQ_READ,
+                           vu_interrupt, IRQF_SHARED, info->name, vq);
+       if (rc)
+               goto close_both;
+
+       rc = vhost_user_set_vring_call(vu_dev, vq->index, call_fds[1]);
+       if (rc)
+               goto release_irq;
+
+       goto out;
+
+release_irq:
+       um_free_irq(VIRTIO_IRQ, vq);
+close_both:
+       os_close_file(call_fds[0]);
+out:
+       /* Close (unused) write end of call fds */
+       os_close_file(call_fds[1]);
+
+       return rc;
+}
+
+static struct virtqueue *vu_setup_vq(struct virtio_device *vdev,
+                                    unsigned index, vq_callback_t *callback,
+                                    const char *name, bool ctx)
+{
+       struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
+       struct platform_device *pdev = vu_dev->pdev;
+       struct virtio_uml_vq_info *info;
+       struct virtqueue *vq;
+       int num = MAX_SUPPORTED_QUEUE_SIZE;
+       int rc;
+
+       info = kzalloc(sizeof(*info), GFP_KERNEL);
+       if (!info) {
+               rc = -ENOMEM;
+               goto error_kzalloc;
+       }
+       snprintf(info->name, sizeof(info->name), "%s.%d-%s", pdev->name,
+                pdev->id, name);
+
+       vq = vring_create_virtqueue(index, num, PAGE_SIZE, vdev, true, true,
+                                   ctx, vu_notify, callback, info->name);
+       if (!vq) {
+               rc = -ENOMEM;
+               goto error_create;
+       }
+       vq->priv = info;
+       num = virtqueue_get_vring_size(vq);
+
+       rc = os_eventfd(0, 0);
+       if (rc < 0)
+               goto error_kick;
+       info->kick_fd = rc;
+
+       rc = vu_setup_vq_call_fd(vu_dev, vq);
+       if (rc)
+               goto error_call;
+
+       rc = vhost_user_set_vring_num(vu_dev, index, num);
+       if (rc)
+               goto error_setup;
+
+       rc = vhost_user_set_vring_base(vu_dev, index, 0);
+       if (rc)
+               goto error_setup;
+
+       rc = vhost_user_set_vring_addr(vu_dev, index,
+                                      virtqueue_get_desc_addr(vq),
+                                      virtqueue_get_used_addr(vq),
+                                      virtqueue_get_avail_addr(vq),
+                                      (u64) -1);
+       if (rc)
+               goto error_setup;
+
+       return vq;
+
+error_setup:
+       um_free_irq(VIRTIO_IRQ, vq);
+       os_close_file(info->call_fd);
+error_call:
+       os_close_file(info->kick_fd);
+error_kick:
+       vring_del_virtqueue(vq);
+error_create:
+       kfree(info);
+error_kzalloc:
+       return ERR_PTR(rc);
+}
+
+static int vu_find_vqs(struct virtio_device *vdev, unsigned nvqs,
+                      struct virtqueue *vqs[], vq_callback_t *callbacks[],
+                      const char * const names[], const bool *ctx,
+                      struct irq_affinity *desc)
+{
+       struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
+       int i, queue_idx = 0, rc;
+       struct virtqueue *vq;
+
+       rc = vhost_user_set_mem_table(vu_dev);
+       if (rc)
+               return rc;
+
+       for (i = 0; i < nvqs; ++i) {
+               if (!names[i]) {
+                       vqs[i] = NULL;
+                       continue;
+               }
+
+               vqs[i] = vu_setup_vq(vdev, queue_idx++, callbacks[i], names[i],
+                                    ctx ? ctx[i] : false);
+               if (IS_ERR(vqs[i])) {
+                       rc = PTR_ERR(vqs[i]);
+                       goto error_setup;
+               }
+       }
+
+       list_for_each_entry(vq, &vdev->vqs, list) {
+               struct virtio_uml_vq_info *info = vq->priv;
+
+               rc = vhost_user_set_vring_kick(vu_dev, vq->index,
+                                              info->kick_fd);
+               if (rc)
+                       goto error_setup;
+
+               rc = vhost_user_set_vring_enable(vu_dev, vq->index, true);
+               if (rc)
+                       goto error_setup;
+       }
+
+       return 0;
+
+error_setup:
+       vu_del_vqs(vdev);
+       return rc;
+}
+
+static u64 vu_get_features(struct virtio_device *vdev)
+{
+       struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
+
+       return vu_dev->features;
+}
+
+static int vu_finalize_features(struct virtio_device *vdev)
+{
+       struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
+       u64 supported = vdev->features & VHOST_USER_SUPPORTED_F;
+
+       vring_transport_features(vdev);
+       vu_dev->features = vdev->features | supported;
+
+       return vhost_user_set_features(vu_dev, vu_dev->features);
+}
+
+static const char *vu_bus_name(struct virtio_device *vdev)
+{
+       struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
+
+       return vu_dev->pdev->name;
+}
+
+static const struct virtio_config_ops virtio_uml_config_ops = {
+       .get = vu_get,
+       .set = vu_set,
+       .get_status = vu_get_status,
+       .set_status = vu_set_status,
+       .reset = vu_reset,
+       .find_vqs = vu_find_vqs,
+       .del_vqs = vu_del_vqs,
+       .get_features = vu_get_features,
+       .finalize_features = vu_finalize_features,
+       .bus_name = vu_bus_name,
+};
+
+static void virtio_uml_release_dev(struct device *d)
+{
+       struct virtio_device *vdev =
+                       container_of(d, struct virtio_device, dev);
+       struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
+
+       /* might not have been opened due to not negotiating the feature */
+       if (vu_dev->req_fd >= 0) {
+               um_free_irq(VIRTIO_IRQ, vu_dev);
+               os_close_file(vu_dev->req_fd);
+       }
+
+       os_close_file(vu_dev->sock);
+}
+
+/* Platform device */
+
+struct virtio_uml_platform_data {
+       u32 virtio_device_id;
+       const char *socket_path;
+};
+
+static int virtio_uml_probe(struct platform_device *pdev)
+{
+       struct virtio_uml_platform_data *pdata = pdev->dev.platform_data;
+       struct virtio_uml_device *vu_dev;
+       int rc;
+
+       if (!pdata)
+               return -EINVAL;
+
+       vu_dev = devm_kzalloc(&pdev->dev, sizeof(*vu_dev), GFP_KERNEL);
+       if (!vu_dev)
+               return -ENOMEM;
+
+       vu_dev->vdev.dev.parent = &pdev->dev;
+       vu_dev->vdev.dev.release = virtio_uml_release_dev;
+       vu_dev->vdev.config = &virtio_uml_config_ops;
+       vu_dev->vdev.id.device = pdata->virtio_device_id;
+       vu_dev->vdev.id.vendor = VIRTIO_DEV_ANY_ID;
+       vu_dev->pdev = pdev;
+       vu_dev->req_fd = -1;
+
+       do {
+               rc = os_connect_socket(pdata->socket_path);
+       } while (rc == -EINTR);
+       if (rc < 0)
+               return rc;
+       vu_dev->sock = rc;
+
+       rc = vhost_user_init(vu_dev);
+       if (rc)
+               goto error_init;
+
+       platform_set_drvdata(pdev, vu_dev);
+
+       rc = register_virtio_device(&vu_dev->vdev);
+       if (rc)
+               put_device(&vu_dev->vdev.dev);
+       return rc;
+
+error_init:
+       os_close_file(vu_dev->sock);
+       return rc;
+}
+
+static int virtio_uml_remove(struct platform_device *pdev)
+{
+       struct virtio_uml_device *vu_dev = platform_get_drvdata(pdev);
+
+       unregister_virtio_device(&vu_dev->vdev);
+       return 0;
+}
+
+/* Command line device list */
+
+static void vu_cmdline_release_dev(struct device *d)
+{
+}
+
+static struct device vu_cmdline_parent = {
+       .init_name = "virtio-uml-cmdline",
+       .release = vu_cmdline_release_dev,
+};
+
+static bool vu_cmdline_parent_registered;
+static int vu_cmdline_id;
+
+static int vu_cmdline_set(const char *device, const struct kernel_param *kp)
+{
+       const char *ids = strchr(device, ':');
+       unsigned int virtio_device_id;
+       int processed, consumed, err;
+       char *socket_path;
+       struct virtio_uml_platform_data pdata;
+       struct platform_device *pdev;
+
+       if (!ids || ids == device)
+               return -EINVAL;
+
+       processed = sscanf(ids, ":%u%n:%d%n",
+                          &virtio_device_id, &consumed,
+                          &vu_cmdline_id, &consumed);
+
+       if (processed < 1 || ids[consumed])
+               return -EINVAL;
+
+       if (!vu_cmdline_parent_registered) {
+               err = device_register(&vu_cmdline_parent);
+               if (err) {
+                       pr_err("Failed to register parent device!\n");
+                       put_device(&vu_cmdline_parent);
+                       return err;
+               }
+               vu_cmdline_parent_registered = true;
+       }
+
+       socket_path = kmemdup_nul(device, ids - device, GFP_KERNEL);
+       if (!socket_path)
+               return -ENOMEM;
+
+       pdata.virtio_device_id = (u32) virtio_device_id;
+       pdata.socket_path = socket_path;
+
+       pr_info("Registering device virtio-uml.%d id=%d at %s\n",
+               vu_cmdline_id, virtio_device_id, socket_path);
+
+       pdev = platform_device_register_data(&vu_cmdline_parent, "virtio-uml",
+                                            vu_cmdline_id++, &pdata,
+                                            sizeof(pdata));
+       err = PTR_ERR_OR_ZERO(pdev);
+       if (err)
+               goto free;
+       return 0;
+
+free:
+       kfree(socket_path);
+       return err;
+}
+
+static int vu_cmdline_get_device(struct device *dev, void *data)
+{
+       struct platform_device *pdev = to_platform_device(dev);
+       struct virtio_uml_platform_data *pdata = pdev->dev.platform_data;
+       char *buffer = data;
+       unsigned int len = strlen(buffer);
+
+       snprintf(buffer + len, PAGE_SIZE - len, "%s:%d:%d\n",
+                pdata->socket_path, pdata->virtio_device_id, pdev->id);
+       return 0;
+}
+
+static int vu_cmdline_get(char *buffer, const struct kernel_param *kp)
+{
+       buffer[0] = '\0';
+       if (vu_cmdline_parent_registered)
+               device_for_each_child(&vu_cmdline_parent, buffer,
+                                     vu_cmdline_get_device);
+       return strlen(buffer) + 1;
+}
+
+static const struct kernel_param_ops vu_cmdline_param_ops = {
+       .set = vu_cmdline_set,
+       .get = vu_cmdline_get,
+};
+
+device_param_cb(device, &vu_cmdline_param_ops, NULL, S_IRUSR);
+__uml_help(vu_cmdline_param_ops,
+"virtio_uml.device=<socket>:<virtio_id>[:<platform_id>]\n"
+"    Configure a virtio device over a vhost-user socket.\n"
+"    See virtio_ids.h for a list of possible virtio device id values.\n"
+"    Optionally use a specific platform_device id.\n\n"
+);
+
+
+static int vu_unregister_cmdline_device(struct device *dev, void *data)
+{
+       struct platform_device *pdev = to_platform_device(dev);
+       struct virtio_uml_platform_data *pdata = pdev->dev.platform_data;
+
+       kfree(pdata->socket_path);
+       platform_device_unregister(pdev);
+       return 0;
+}
+
+static void vu_unregister_cmdline_devices(void)
+{
+       if (vu_cmdline_parent_registered) {
+               device_for_each_child(&vu_cmdline_parent, NULL,
+                                     vu_unregister_cmdline_device);
+               device_unregister(&vu_cmdline_parent);
+               vu_cmdline_parent_registered = false;
+       }
+}
+
+/* Platform driver */
+
+static const struct of_device_id virtio_uml_match[] = {
+       { .compatible = "virtio,uml", },
+       { }
+};
+MODULE_DEVICE_TABLE(of, virtio_uml_match);
+
+static struct platform_driver virtio_uml_driver = {
+       .probe = virtio_uml_probe,
+       .remove = virtio_uml_remove,
+       .driver = {
+               .name = "virtio-uml",
+               .of_match_table = virtio_uml_match,
+       },
+};
+
+static int __init virtio_uml_init(void)
+{
+       return platform_driver_register(&virtio_uml_driver);
+}
+
+static void __exit virtio_uml_exit(void)
+{
+       platform_driver_unregister(&virtio_uml_driver);
+       vu_unregister_cmdline_devices();
+}
+
+module_init(virtio_uml_init);
+module_exit(virtio_uml_exit);
+__uml_exitcall(virtio_uml_exit);
+
+MODULE_DESCRIPTION("UML driver for vhost-user virtio devices");
+MODULE_LICENSE("GPL");
index 20e30be..fc7f1e7 100644 (file)
@@ -1,6 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
- * Licensed under the GPL
  */
 
 #include <stddef.h>
index 56b9c4a..5968da3 100644 (file)
@@ -1,6 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /* 
  * Copyright (C) 2002 Jeff Dike (jdike@karaya.com)
- * Licensed under the GPL
  */
 
 #ifndef __XTERM_H__
index e8f9957..d64ef6d 100644 (file)
@@ -1,6 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0
 /* 
  * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
- * Licensed under the GPL
  */
 
 #include <linux/slab.h>
index b352ed0..398006d 100644 (file)
@@ -1,5 +1,4 @@
 # SPDX-License-Identifier: GPL-2.0
-generic-y += barrier.h
 generic-y += bpf_perf_event.h
 generic-y += bug.h
 generic-y += compat.h
index 4049f2c..d7086b9 100644 (file)
@@ -83,8 +83,8 @@
        __preinit_array_end = .;
   }
   .init_array : {
+        /* dummy - we call this ourselves */
        __init_array_start = .;
-       *(.init_array)
        __init_array_end = .;
   }
   .fini_array : {
index 49ed3e3..42c6205 100644 (file)
 #define TELNETD_IRQ            12
 #define XTERM_IRQ              13
 #define RANDOM_IRQ             14
+#define VIRTIO_IRQ             15
 
 #ifdef CONFIG_UML_NET_VECTOR
 
-#define VECTOR_BASE_IRQ                15
+#define VECTOR_BASE_IRQ                (VIRTIO_IRQ + 1)
 #define VECTOR_IRQ_SPACE       8
 
-#define LAST_IRQ (VECTOR_IRQ_SPACE + VECTOR_BASE_IRQ)
+#define LAST_IRQ (VECTOR_IRQ_SPACE + VECTOR_BASE_IRQ - 1)
 
 #else
 
-#define LAST_IRQ RANDOM_IRQ
+#define LAST_IRQ VIRTIO_IRQ
 
 #endif
 
index cbc6c00..0642ad9 100644 (file)
@@ -32,7 +32,6 @@ static inline void arch_local_irq_disable(void)
 }
 
 #define ARCH_IRQ_DISABLED      0
-#define ARCh_IRQ_ENABLED       (SIGIO|SIGVTALRM)
 
 #include <asm-generic/irqflags.h>
 
index 2e0a6b1..b0bd12d 100644 (file)
@@ -1,6 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /* 
  * Copyright (C) 2002 Jeff Dike (jdike@karaya.com)
- * Licensed under the GPL
  */
 
 #ifndef __UM_KMAP_TYPES_H
index da70544..5b072ab 100644 (file)
@@ -1,6 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /* 
  * Copyright (C) 2002 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
- * Licensed under the GPL
  */
 
 #ifndef __ARCH_UM_MMU_H
index 00cefd3..5aee062 100644 (file)
@@ -1,6 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /* 
  * Copyright (C) 2002 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
- * Licensed under the GPL
  */
 
 #ifndef __UM_MMU_CONTEXT_H
index f878bec..95af12e 100644 (file)
@@ -1,7 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  * Copyright (C) 2000 - 2003 Jeff Dike (jdike@addtoit.com)
  * Copyright 2003 PathScale, Inc.
- * Licensed under the GPL
  */
 
 #ifndef __UM_PAGE_H
index d7b282e..881e76d 100644 (file)
@@ -1,8 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /* 
  * Copyright (C) 2000, 2001, 2002 Jeff Dike (jdike@karaya.com)
  * Copyright 2003 PathScale, Inc.
  * Derived from include/asm-i386/pgalloc.h and include/asm-i386/pgtable.h
- * Licensed under the GPL
  */
 
 #ifndef __UM_PGALLOC_H
@@ -29,7 +29,7 @@ extern void pgd_free(struct mm_struct *mm, pgd_t *pgd);
 
 #define __pte_free_tlb(tlb,pte, address)               \
 do {                                                   \
-       pgtable_page_dtor(pte);                         \
+       pgtable_pte_page_dtor(pte);                     \
        tlb_remove_page((tlb),(pte));                   \
 } while (0)
 
@@ -43,7 +43,5 @@ static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
 #define __pmd_free_tlb(tlb,x, address)   tlb_remove_page((tlb),virt_to_page(x))
 #endif
 
-#define check_pgt_cache()      do { } while (0)
-
 #endif
 
index 179c0ea..32b3d26 100644 (file)
@@ -1,8 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  * Copyright (C) 2000, 2001, 2002 Jeff Dike (jdike@karaya.com)
  * Copyright 2003 PathScale, Inc.
  * Derived from include/asm-i386/pgtable.h
- * Licensed under the GPL
  */
 
 #ifndef __UM_PGTABLE_2LEVEL_H
index c4d876d..9812269 100644 (file)
@@ -1,7 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  * Copyright 2003 PathScale Inc
  * Derived from include/asm-i386/pgtable.h
- * Licensed under the GPL
  */
 
 #ifndef __UM_PGTABLE_3LEVEL_H
index b377df7..36a44d5 100644 (file)
@@ -1,8 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /* 
  * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
  * Copyright 2003 PathScale, Inc.
  * Derived from include/asm-i386/pgtable.h
- * Licensed under the GPL
  */
 
 #ifndef __UM_PGTABLE_H
@@ -32,8 +32,6 @@ extern pgd_t swapper_pg_dir[PTRS_PER_PGD];
 /* zero page used for uninitialized stuff */
 extern unsigned long *empty_zero_page;
 
-#define pgtable_cache_init() do ; while (0)
-
 /* Just any arbitrary offset to the start of the vmalloc VM area: the
  * current 8MB value just means that there will be a 8MB "hole" after the
  * physical memory until the kernel virtual memory starts.  That means that
index b58b746..afd9b26 100644 (file)
@@ -1,6 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /* 
  * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
- * Licensed under the GPL
  */
 
 #ifndef __UM_PROCESSOR_GENERIC_H
index 5ab2062..81c647e 100644 (file)
@@ -1,6 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /* 
  * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
- * Licensed under the GPL
  */
 
 #ifndef __UM_PTRACE_GENERIC_H
index 4eecd96..4c19ce4 100644 (file)
@@ -1,6 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  * Copyright (C) 2002 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
- * Licensed under the GPL
  */
 
 #ifndef __UM_THREAD_INFO_H
index 614f2c0..a5bda89 100644 (file)
@@ -1,6 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  * Copyright (C) 2002 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
- * Licensed under the GPL
  */
 
 #ifndef __UM_TLBFLUSH_H
index cc00fc5..fe66d65 100644 (file)
@@ -1,7 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /* 
  * Copyright (C) 2002 Jeff Dike (jdike@karaya.com)
  * Copyright (C) 2015 Richard Weinberger (richard@nod.at)
- * Licensed under the GPL
  */
 
 #ifndef __UM_UACCESS_H
index 4f46abd..880ee42 100644 (file)
@@ -1,6 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  * Copyright (C) 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
- * Licensed under the GPL
  */
 
 #ifndef __ARCH_H__
index ca1843e..5f286ef 100644 (file)
@@ -1,6 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  * Copyright (C) 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
- * Licensed under the GPL
  */
 
 #ifndef __START_H__
index 53516b6..fd461ee 100644 (file)
@@ -1,7 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  * Copyright (C) 2004 Fujitsu Siemens Computers GmbH
  * Author: Bodo Stroesser <bstroesser@fujitsu-siemens.com>
- * Licensed under the GPL
  */
 
 #ifndef __ELF_USER_H__
index a5cde5c..ed952ac 100644 (file)
@@ -1,6 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /* 
  * Copyright (C) 2002 Jeff Dike (jdike@karaya.com)
- * Licensed under the GPL
  */
 
 #ifndef __FRAME_KERN_H_
index e05bd66..7cd1a10 100644 (file)
@@ -1,6 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  * Copyright (C) 2001, 2002 Jeff Dike (jdike@karaya.com)
- * Licensed under the GPL
  */
 
 #ifndef __IRQ_KERN_H__
index e7242a0..107751d 100644 (file)
@@ -1,6 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
- * Licensed under the GPL
  */
 
 #ifndef __IRQ_USER_H__
index 6cd0124..3a9c75a 100644 (file)
@@ -1,6 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /* 
  * Copyright (C) 2000 Jeff Dike (jdike@karaya.com)
- * Licensed under the GPL
  */
 
 #ifndef __KERN_H__
index 35ab97e..ccafb62 100644 (file)
@@ -1,6 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
- * Licensed under the GPL
  */
 
 #ifndef __KERN_UTIL_H__
index b3315c1..85a1cc2 100644 (file)
@@ -18,7 +18,7 @@ extern void longjmp(jmp_buf, int);
        enable = get_signals(); \
        n = setjmp(*buf); \
        if(n != 0) \
-               set_signals(enable); \
+               set_signals_trace(enable); \
        n; })
 
 #endif
index 5cd40e9..4862c91 100644 (file)
@@ -1,6 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /* 
  * Copyright (C) 2002 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
- * Licensed under the GPL
  */
 
 #ifndef __MEM_H__
index 40442b9..a87be13 100644 (file)
@@ -1,6 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  * Copyright (C) 2002 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
- * Licensed under the GPL
  */
 
 #ifndef __UM_NET_KERN_H
index 3dabbe1..1b05317 100644 (file)
@@ -1,6 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  * Copyright (C) 2002 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
- * Licensed under the GPL
  */
 
 #ifndef __UM_NET_USER_H__
index 4a62ac4..506bcd1 100644 (file)
@@ -1,8 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  * Copyright (C) 2015 Anton Ivanov (aivanov@{brocade.com,kot-begemot.co.uk})
  * Copyright (C) 2015 Thomas Meyer (thomas@m3y3r.de)
  * Copyright (C) 2002 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
- * Licensed under the GPL
  */
 
 #ifndef __OS_H__
@@ -36,6 +36,8 @@
 #define OS_LIB_PATH    "/usr/lib/"
 #endif
 
+#define OS_SENDMSG_MAX_FDS 8
+
 /*
  * types taken from stat_file() in hostfs_user.c
  * (if they are wrong here, they are wrong there...).
@@ -176,6 +178,9 @@ extern unsigned os_major(unsigned long long dev);
 extern unsigned os_minor(unsigned long long dev);
 extern unsigned long long os_makedev(unsigned major, unsigned minor);
 extern int os_falloc_punch(int fd, unsigned long long offset, int count);
+extern int os_eventfd(unsigned int initval, int flags);
+extern int os_sendmsg_fds(int fd, const void *buf, unsigned int len,
+                         const int *fds, unsigned int fds_num);
 
 /* start_up.c */
 extern void os_early_checks(void);
@@ -232,6 +237,7 @@ extern void block_signals(void);
 extern void unblock_signals(void);
 extern int get_signals(void);
 extern int set_signals(int enable);
+extern int set_signals_trace(int enable);
 extern int os_is_signal_stack(void);
 extern void deliver_alarm(void);
 
@@ -317,4 +323,10 @@ extern unsigned long os_get_top_address(void);
 
 long syscall(long number, ...);
 
+/* irqflags tracing */
+extern void block_signals_trace(void);
+extern void unblock_signals_trace(void);
+extern void um_trace_signals_on(void);
+extern void um_trace_signals_off(void);
+
 #endif
index 56b2f28..95455e8 100644 (file)
@@ -1,6 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /* 
  * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
- * Licensed under the GPL
  */
 
 #ifndef __PTRACE_USER_H__
index a74449b..0c50fa6 100644 (file)
@@ -1,6 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  * Copyright (C) 2004 PathScale, Inc
- * Licensed under the GPL
  */
 
 #ifndef __REGISTERS_H
index 434f1a9..8fe8f57 100644 (file)
@@ -1,6 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /* 
  * Copyright (C) 2002 Jeff Dike (jdike@karaya.com)
- * Licensed under the GPL
  */
 
 #ifndef __SIGIO_H__
index 48dd098..4337b4c 100644 (file)
@@ -1,6 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  * Copyright (C) 2005 Jeff Dike (jdike@karaya.com)
- * Licensed under the GPL
  */
 
 #ifndef __MM_ID_H
index 911f3c4..c93d2cb 100644 (file)
@@ -1,6 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  * Copyright (C) 2002 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
- * Licensed under the GPL
  */
 
 #ifndef __SKAS_H
index 13f404e..6b01d97 100644 (file)
@@ -1,8 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
 
  * Copyright (C) 2015 Thomas Meyer (thomas@m3y3r.de)
  * Copyright (C) 2005 Jeff Dike (jdike@karaya.com)
- * Licensed under the GPL
  */
 
 #ifndef __STUB_DATA_H
index 9991ec2..2d2d13c 100644 (file)
@@ -1,7 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  * Copyright (C) 2012 - 2014 Cisco Systems
  * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
- * Licensed under the GPL
  */
 
 #ifndef __TIMER_INTERNAL_H__
@@ -43,6 +43,11 @@ static inline void time_travel_set_timer_expiry(unsigned long long expiry)
 {
        time_travel_timer_expiry = expiry;
 }
+
+static inline void time_travel_set_timer_interval(unsigned long long interval)
+{
+       time_travel_timer_interval = interval;
+}
 #else
 #define time_travel_mode TT_MODE_OFF
 #define time_travel_time 0
@@ -61,6 +66,10 @@ static inline void time_travel_set_timer_expiry(unsigned long long expiry)
 {
 }
 
+static inline void time_travel_set_timer_interval(unsigned long long interval)
+{
+}
+
 #define time_travel_timer_mode TT_TMR_DISABLED
 #endif
 
index 6395fef..13da932 100644 (file)
@@ -1,6 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  * Copyright (C) 2005 Paolo 'Blaisorblade' Giarrusso <blaisorblade@yahoo.it>
- * Licensed under the GPL
  */
 
 #ifndef __UM_MALLOC_H__
index 4cff19f..e793e42 100644 (file)
@@ -1,6 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /* 
  * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
- * Licensed under the GPL
  */
 
 #ifndef __USER_H__
index 2f36d51..5aa8820 100644 (file)
@@ -1,6 +1,6 @@
+# SPDX-License-Identifier: GPL-2.0
 #
 # Copyright (C) 2002 - 2007 Jeff Dike (jdike@{addtoit,linux,intel}.com)
-# Licensed under the GPL
 #
 
 # Don't instrument UML-specific code; without this, we may crash when
@@ -13,7 +13,6 @@ CPPFLAGS_vmlinux.lds := -DSTART=$(LDS_START)          \
                         -DELF_FORMAT=$(LDS_ELF_FORMAT) \
                        $(LDS_EXTRA)
 extra-y := vmlinux.lds
-clean-files :=
 
 obj-y = config.o exec.o exitcode.o irq.o ksyms.o mem.o \
        physmem.o process.o ptrace.o reboot.o sigio.o \
index 972bf16..3ece3c3 100644 (file)
@@ -1,6 +1,6 @@
-/* 
+// SPDX-License-Identifier: GPL-2.0
+/*
  * Copyright (C) 2002 Jeff Dike (jdike@karaya.com)
- * Licensed under the GPL
  */
 
 #include <stdio.h>
index 5568cf8..c69d69e 100644 (file)
@@ -70,6 +70,8 @@ SECTIONS
     SCHED_TEXT
     CPUIDLE_TEXT
     LOCK_TEXT
+    IRQENTRY_TEXT
+    SOFTIRQENTRY_TEXT
     *(.fixup)
     *(.stub .text.* .gnu.linkonce.t.*)
     /* .gnu.warning sections are handled specially by elf32.em.  */
@@ -101,7 +103,6 @@ SECTIONS
      be empty, which isn't pretty.  */
   . = ALIGN(32 / 8);
   .preinit_array     : { *(.preinit_array) }
-  .init_array     : { *(.init_array) }
   .fini_array     : { *(.fini_array) }
   .data           : {
     INIT_TASK_DATA(KERNEL_STACK_SIZE)
index 783b924..e8fd5d5 100644 (file)
@@ -1,6 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
- * Licensed under the GPL
  */
 
 #include <linux/stddef.h>
index 546302e..369fd84 100644 (file)
@@ -1,6 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Copyright (C) 2002 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
- * Licensed under the GPL
  */
 
 #include <linux/ctype.h>
index f138a4a..9361a8e 100644 (file)
@@ -1,6 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
- * Licensed under the GPL
  */
 
 #include <linux/module.h>
index 74ddb44..84d5369 100644 (file)
@@ -1,6 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0
 /* 
  * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
- * Licensed under the GPL
  */
 
 #include <linux/module.h>
index 1dcd310..c1981ff 100644 (file)
@@ -1,6 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
- * Licensed under the GPL
  */
 
 #include <linux/init.h>
index efde1f1..3577118 100644 (file)
@@ -1,8 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Copyright (C) 2017 - Cambridge Greys Ltd
  * Copyright (C) 2011 - 2014 Cisco Systems Inc
  * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
- * Licensed under the GPL
  * Derived (i.e. mostly copied) from arch/i386/kernel/irq.c:
  *     Copyright (C) 1992, 1998 Linus Torvalds, Ingo Molnar
  */
@@ -480,7 +480,7 @@ void __init init_IRQ(void)
        irq_set_chip_and_handler(TIMER_IRQ, &SIGVTALRM_irq_type, handle_edge_irq);
 
 
-       for (i = 1; i < LAST_IRQ; i++)
+       for (i = 1; i <= LAST_IRQ; i++)
                irq_set_chip_and_handler(i, &normal_irq_type, handle_edge_irq);
        /* Initialize EPOLL Loop */
        os_setup_epoll();
index 232b223..8ade54a 100644 (file)
@@ -1,6 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0
 /* 
  * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
- * Licensed under the GPL
  */
 
 #include <linux/module.h>
@@ -38,6 +38,8 @@ EXPORT_SYMBOL(run_helper);
 EXPORT_SYMBOL(os_major);
 EXPORT_SYMBOL(os_minor);
 EXPORT_SYMBOL(os_makedev);
+EXPORT_SYMBOL(os_eventfd);
+EXPORT_SYMBOL(os_sendmsg_fds);
 
 EXPORT_SYMBOL(add_sigio_fd);
 EXPORT_SYMBOL(ignore_sigio_fd);
index de58e97..417ff64 100644 (file)
@@ -1,6 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
- * Licensed under the GPL
  */
 
 #include <linux/stddef.h>
@@ -31,6 +31,7 @@ pgd_t swapper_pg_dir[PTRS_PER_PGD];
 
 /* Initialized at boot time, and readonly after that */
 unsigned long long highmem;
+EXPORT_SYMBOL(highmem);
 int kmalloc_ok = 0;
 
 /* Used during early boot */
index 5bf56af..e7c7b53 100644 (file)
@@ -1,6 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
- * Licensed under the GPL
  */
 
 #include <linux/module.h>
@@ -143,6 +143,7 @@ int phys_mapping(unsigned long phys, unsigned long long *offset_out)
 
        return fd;
 }
+EXPORT_SYMBOL(phys_mapping);
 
 static int __init uml_mem_setup(char *line, int *add)
 {
index 6bede78..263a8f0 100644 (file)
@@ -1,9 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Copyright (C) 2015 Anton Ivanov (aivanov@{brocade.com,kot-begemot.co.uk})
  * Copyright (C) 2015 Thomas Meyer (thomas@m3y3r.de)
  * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
  * Copyright 2003 PathScale, Inc.
- * Licensed under the GPL
  */
 
 #include <linux/stddef.h>
@@ -210,15 +210,23 @@ static void time_travel_sleep(unsigned long long duration)
        if (time_travel_mode != TT_MODE_INFCPU)
                os_timer_disable();
 
-       if (time_travel_timer_mode != TT_TMR_DISABLED ||
+       while (time_travel_timer_mode == TT_TMR_PERIODIC &&
+              time_travel_timer_expiry < time_travel_time)
+               time_travel_set_timer_expiry(time_travel_timer_expiry +
+                                            time_travel_timer_interval);
+
+       if (time_travel_timer_mode != TT_TMR_DISABLED &&
            time_travel_timer_expiry < next) {
                if (time_travel_timer_mode == TT_TMR_ONESHOT)
                        time_travel_set_timer_mode(TT_TMR_DISABLED);
                /*
-                * time_travel_time will be adjusted in the timer
-                * IRQ handler so it works even when the signal
-                * comes from the OS timer
+                * In basic mode, time_travel_time will be adjusted in
+                * the timer IRQ handler so it works even when the signal
+                * comes from the OS timer, see there.
                 */
+               if (time_travel_mode != TT_MODE_BASIC)
+                       time_travel_set_time(time_travel_timer_expiry);
+
                deliver_alarm();
        } else {
                time_travel_set_time(next);
index da1e96b..b425f47 100644 (file)
@@ -1,6 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
- * Licensed under the GPL
  */
 
 #include <linux/audit.h>
index 71f3e92..48c0610 100644 (file)
@@ -1,6 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0
 /* 
  * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
- * Licensed under the GPL
  */
 
 #include <linux/sched/signal.h>
index 3fb6a40..10c99e0 100644 (file)
@@ -1,6 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Copyright (C) 2002 - 2007 Jeff Dike (jdike@{linux.intel,addtoit}.com)
- * Licensed under the GPL
  */
 
 #include <linux/interrupt.h>
index 57acbd6..3d57c71 100644 (file)
@@ -1,20 +1,48 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
- * Licensed under the GPL
  */
 
 #include <linux/module.h>
 #include <linux/ptrace.h>
 #include <linux/sched.h>
+#include <linux/ftrace.h>
 #include <asm/siginfo.h>
 #include <asm/signal.h>
 #include <asm/unistd.h>
 #include <frame_kern.h>
 #include <kern_util.h>
+#include <os.h>
 
 EXPORT_SYMBOL(block_signals);
 EXPORT_SYMBOL(unblock_signals);
 
+void block_signals_trace(void)
+{
+       block_signals();
+       if (current_thread_info())
+               trace_hardirqs_off();
+}
+
+void unblock_signals_trace(void)
+{
+       if (current_thread_info())
+               trace_hardirqs_on();
+       unblock_signals();
+}
+
+void um_trace_signals_on(void)
+{
+       if (current_thread_info())
+               trace_hardirqs_on();
+}
+
+void um_trace_signals_off(void)
+{
+       if (current_thread_info())
+               trace_hardirqs_off();
+}
+
 /*
  * OK, we're invoking a handler
  */
index 5bd3edf..f3d494a 100644 (file)
@@ -1,6 +1,6 @@
+# SPDX-License-Identifier: GPL-2.0
 #
 # Copyright (C) 2002 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
-# Licensed under the GPL
 #
 
 obj-y := clone.o mmu.o process.o syscall.o uaccess.o
index 0f25d41..bfb70c4 100644 (file)
@@ -1,7 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Copyright (C) 2015 Thomas Meyer (thomas@m3y3r.de)
  * Copyright (C) 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
- * Licensed under the GPL
  */
 
 #include <signal.h>
index 29e7f5f..b5e3d91 100644 (file)
@@ -1,7 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Copyright (C) 2015 Thomas Meyer (thomas@m3y3r.de)
  * Copyright (C) 2002 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
- * Licensed under the GPL
  */
 
 #include <linux/mm.h>
@@ -63,12 +63,12 @@ int init_new_context(struct task_struct *task, struct mm_struct *mm)
        if (current->mm != NULL && current->mm != &init_mm)
                from_mm = &current->mm->context;
 
-       block_signals();
+       block_signals_trace();
        if (from_mm)
                to_mm->id.u.pid = copy_context_skas0(stack,
                                                     from_mm->id.u.pid);
        else to_mm->id.u.pid = start_userspace(stack);
-       unblock_signals();
+       unblock_signals_trace();
 
        if (to_mm->id.u.pid < 0) {
                ret = to_mm->id.u.pid;
index d4dbf08..f2ac134 100644 (file)
@@ -1,6 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Copyright (C) 2002 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
- * Licensed under the GPL
  */
 
 #include <linux/init.h>
@@ -19,7 +19,7 @@ static int __init start_kernel_proc(void *unused)
 {
        int pid;
 
-       block_signals();
+       block_signals_trace();
        pid = os_getpid();
 
        cpu_tasks[0].pid = pid;
index 44bb107..f574b18 100644 (file)
@@ -1,6 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Copyright (C) 2002 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
- * Licensed under the GPL
  */
 
 #include <linux/kernel.h>
index bd3cb69..3236052 100644 (file)
@@ -1,6 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Copyright (C) 2002 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
- * Licensed under the GPL
  */
 
 #include <linux/err.h>
index 35f7047..eed54c5 100644 (file)
@@ -1,6 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
- * Licensed under the GPL
  */
 
 #include <linux/file.h>
index 2347572..94ea87b 100644 (file)
@@ -1,9 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Copyright (C) 2015 Anton Ivanov (aivanov@{brocade.com,kot-begemot.co.uk})
  * Copyright (C) 2015 Thomas Meyer (thomas@m3y3r.de)
  * Copyright (C) 2012-2014 Cisco Systems
  * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
- * Licensed under the GPL
  */
 
 #include <linux/clockchips.h>
@@ -39,7 +39,15 @@ void timer_handler(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs)
 {
        unsigned long flags;
 
-       if (time_travel_mode != TT_MODE_OFF)
+       /*
+        * In basic time-travel mode we still get real interrupts
+        * (signals) but since we don't read time from the OS, we
+        * must update the simulated time here to the expiry when
+        * we get a signal.
+        * This is not the case in inf-cpu mode, since there we
+        * never get any real signals from the OS.
+        */
+       if (time_travel_mode == TT_MODE_BASIC)
                time_travel_set_time(time_travel_timer_expiry);
 
        local_irq_save(flags);
@@ -65,6 +73,7 @@ static int itimer_set_periodic(struct clock_event_device *evt)
        if (time_travel_mode != TT_MODE_OFF) {
                time_travel_set_timer_mode(TT_TMR_PERIODIC);
                time_travel_set_timer_expiry(time_travel_time + interval);
+               time_travel_set_timer_interval(interval);
        }
 
        if (time_travel_mode != TT_MODE_INFCPU)
index 45f739b..b7eaf65 100644 (file)
@@ -1,6 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
- * Licensed under the GPL
  */
 
 #include <linux/mm.h>
index 58fe368..e62296c 100644 (file)
@@ -1,6 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
- * Licensed under the GPL
  */
 
 #include <linux/mm.h>
index a818cce..0f40ecc 100644 (file)
@@ -1,6 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
- * Licensed under the GPL
  */
 
 #include <linux/delay.h>
@@ -113,6 +113,7 @@ static int have_root __initdata = 0;
 
 /* Set in uml_mem_setup and modified in linux_main */
 long long physmem_size = 32 * 1024 * 1024;
+EXPORT_SYMBOL(physmem_size);
 
 static const char *usage_string =
 "User Mode Linux v%s\n"
index 10bf4ac..8031a03 100644 (file)
@@ -1,6 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
- * Licensed under the GPL
  */
 
 #include <asm/errno.h>
index 36b07ec..9f21443 100644 (file)
@@ -31,6 +31,8 @@ SECTIONS
     SCHED_TEXT
     CPUIDLE_TEXT
     LOCK_TEXT
+    IRQENTRY_TEXT
+    SOFTIRQENTRY_TEXT
     *(.fixup)
     /* .gnu.warning sections are handled specially by elf32.em.  */
     *(.gnu.warning)
index 455b500..839915b 100644 (file)
@@ -1,6 +1,6 @@
+# SPDX-License-Identifier: GPL-2.0
 # 
 # Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
-# Licensed under the GPL
 #
 
 # Don't instrument UML-specific code
index 6c546dc..d79e75f 100644 (file)
@@ -1,6 +1,6 @@
+# SPDX-License-Identifier: GPL-2.0
 # 
 # Copyright (C) 2000, 2002 Jeff Dike (jdike@karaya.com)
-# Licensed under the GPL
 #
 
 ethertap-objs := ethertap_kern.o ethertap_user.o
index 54183a6..a475259 100644 (file)
@@ -1,6 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /* 
  * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
- * Licensed under the GPL
  */
 
 #ifndef __DRIVERS_ETAP_H
index f424600..3182e75 100644 (file)
@@ -1,9 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Copyright (C) 2001 Lennert Buytenhek (buytenh@gnu.org) and
  * James Leu (jleu@mindspring.net).
  * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
  * Copyright (C) 2001 by various other people who didn't put their name here.
- * Licensed under the GPL.
  */
 
 #include <linux/init.h>
index 6d49182..9483021 100644 (file)
@@ -1,9 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
  * Copyright (C) 2001 Lennert Buytenhek (buytenh@gnu.org) and
  * James Leu (jleu@mindspring.net).
  * Copyright (C) 2001 by various other people who didn't put their name here.
- * Licensed under the GPL.
  */
 
 #include <stdio.h>
index 7367354..e364e42 100644 (file)
@@ -1,6 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /* 
  * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
- * Licensed under the GPL
  */
 
 #ifndef __UM_TUNTAP_H
index d9d56e5..adcb671 100644 (file)
@@ -1,6 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
- * Licensed under the GPL
  */
 
 #include <linux/netdevice.h>
index db24ce0..53eb3d5 100644 (file)
@@ -1,6 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0
 /* 
  * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
- * Licensed under the GPL
  */
 
 #include <stdio.h>
index f25b110..5133e3a 100644 (file)
@@ -1,6 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Copyright (C) 2002 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
- * Licensed under the GPL
  */
 
 #include <stdio.h>
@@ -15,6 +15,7 @@
 #include <sys/sysmacros.h>
 #include <sys/un.h>
 #include <sys/types.h>
+#include <sys/eventfd.h>
 #include <os.h>
 
 static void copy_stat(struct uml_stat *dst, const struct stat64 *src)
@@ -620,3 +621,46 @@ int os_falloc_punch(int fd, unsigned long long offset, int len)
        return n;
 }
 
+int os_eventfd(unsigned int initval, int flags)
+{
+       int fd = eventfd(initval, flags);
+
+       if (fd < 0)
+               return -errno;
+       return fd;
+}
+
+int os_sendmsg_fds(int fd, const void *buf, unsigned int len, const int *fds,
+                  unsigned int fds_num)
+{
+       struct iovec iov = {
+               .iov_base = (void *) buf,
+               .iov_len = len,
+       };
+       union {
+               char control[CMSG_SPACE(sizeof(*fds) * OS_SENDMSG_MAX_FDS)];
+               struct cmsghdr align;
+       } u;
+       unsigned int fds_size = sizeof(*fds) * fds_num;
+       struct msghdr msg = {
+               .msg_iov = &iov,
+               .msg_iovlen = 1,
+               .msg_control = u.control,
+               .msg_controllen = CMSG_SPACE(fds_size),
+       };
+       struct cmsghdr *cmsg = CMSG_FIRSTHDR(&msg);
+       int err;
+
+       if (fds_num > OS_SENDMSG_MAX_FDS)
+               return -EINVAL;
+       memset(u.control, 0, sizeof(u.control));
+       cmsg->cmsg_level = SOL_SOCKET;
+       cmsg->cmsg_type = SCM_RIGHTS;
+       cmsg->cmsg_len = CMSG_LEN(fds_size);
+       memcpy(CMSG_DATA(cmsg), fds, fds_size);
+       err = sendmsg(fd, &msg, 0);
+
+       if (err < 0)
+               return -errno;
+       return err;
+}
index 3f02d42..9fa6e41 100644 (file)
@@ -1,6 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Copyright (C) 2002 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
- * Licensed under the GPL
  */
 
 #include <stdlib.h>
index 3658230..d508310 100644 (file)
@@ -1,8 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Copyright (C) 2017 - Cambridge Greys Ltd
  * Copyright (C) 2011 - 2014 Cisco Systems Inc
  * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
- * Licensed under the GPL
  */
 
 #include <stdlib.h>
index f1fee2b..8014dfa 100644 (file)
@@ -1,7 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Copyright (C) 2015 Thomas Meyer (thomas@m3y3r.de)
  * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
- * Licensed under the GPL
  */
 
 #include <stdio.h>
@@ -170,7 +170,7 @@ int __init main(int argc, char **argv, char **envp)
         * that they won't be delivered after the exec, when
         * they are definitely not expected.
         */
-       unblock_signals();
+       unblock_signals_trace();
 
        os_info("\n");
        /* Reboot */
index e162a95..3c1b774 100644 (file)
@@ -1,6 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Copyright (C) 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
- * Licensed under the GPL
  */
 
 #include <stdio.h>
index b3e0d40..e52dd37 100644 (file)
@@ -1,7 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Copyright (C) 2015 Thomas Meyer (thomas@m3y3r.de)
  * Copyright (C) 2002 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
- * Licensed under the GPL
  */
 
 #include <stdio.h>
index 2ff8d4f..2d92705 100644 (file)
@@ -1,7 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Copyright (C) 2004 PathScale, Inc
  * Copyright (C) 2004 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
- * Licensed under the GPL
  */
 
 #include <errno.h>
index 46e762f..7555808 100644 (file)
@@ -1,6 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Copyright (C) 2002 - 2008 Jeff Dike (jdike@{addtoit,linux.intel}.com)
- * Licensed under the GPL
  */
 
 #include <unistd.h>
@@ -132,7 +132,7 @@ static void update_thread(void)
        int n;
        char c;
 
-       flags = set_signals(0);
+       flags = set_signals_trace(0);
        CATCH_EINTR(n = write(sigio_private[0], &c, sizeof(c)));
        if (n != sizeof(c)) {
                printk(UM_KERN_ERR "update_thread : write failed, err = %d\n",
@@ -147,7 +147,7 @@ static void update_thread(void)
                goto fail;
        }
 
-       set_signals(flags);
+       set_signals_trace(flags);
        return;
  fail:
        /* Critical section start */
@@ -161,7 +161,7 @@ static void update_thread(void)
        close(write_sigio_fds[0]);
        close(write_sigio_fds[1]);
        /* Critical section end */
-       set_signals(flags);
+       set_signals_trace(flags);
 }
 
 int add_sigio_fd(int fd)
index 75b1023..b58bc68 100644 (file)
@@ -1,15 +1,16 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Copyright (C) 2015 Anton Ivanov (aivanov@{brocade.com,kot-begemot.co.uk})
  * Copyright (C) 2015 Thomas Meyer (thomas@m3y3r.de)
  * Copyright (C) 2004 PathScale, Inc
  * Copyright (C) 2004 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
- * Licensed under the GPL
  */
 
 #include <stdlib.h>
 #include <stdarg.h>
 #include <errno.h>
 #include <signal.h>
+#include <string.h>
 #include <strings.h>
 #include <as-layout.h>
 #include <kern_util.h>
@@ -26,7 +27,6 @@ void (*sig_info[NSIG])(int, struct siginfo *, struct uml_pt_regs *) = {
        [SIGBUS]        = bus_handler,
        [SIGSEGV]       = segv_handler,
        [SIGIO]         = sigio_handler,
-       [SIGALRM]       = timer_handler
 };
 
 static void sig_handler_common(int sig, struct siginfo *si, mcontext_t *mc)
@@ -42,8 +42,8 @@ static void sig_handler_common(int sig, struct siginfo *si, mcontext_t *mc)
        }
 
        /* enable signals if sig isn't IRQ signal */
-       if ((sig != SIGIO) && (sig != SIGWINCH) && (sig != SIGALRM))
-               unblock_signals();
+       if ((sig != SIGIO) && (sig != SIGWINCH))
+               unblock_signals_trace();
 
        (*sig_info[sig])(sig, si, &r);
 
@@ -76,11 +76,11 @@ void sig_handler(int sig, struct siginfo *si, mcontext_t *mc)
                return;
        }
 
-       block_signals();
+       block_signals_trace();
 
        sig_handler_common(sig, si, mc);
 
-       set_signals(enabled);
+       set_signals_trace(enabled);
 }
 
 static void timer_real_alarm_handler(mcontext_t *mc)
@@ -89,6 +89,8 @@ static void timer_real_alarm_handler(mcontext_t *mc)
 
        if (mc != NULL)
                get_regs_from_mc(&regs, mc);
+       else
+               memset(&regs, 0, sizeof(regs));
        timer_handler(SIGALRM, NULL, &regs);
 }
 
@@ -102,7 +104,7 @@ void timer_alarm_handler(int sig, struct siginfo *unused_si, mcontext_t *mc)
                return;
        }
 
-       block_signals();
+       block_signals_trace();
 
        signals_active |= SIGALRM_MASK;
 
@@ -110,7 +112,7 @@ void timer_alarm_handler(int sig, struct siginfo *unused_si, mcontext_t *mc)
 
        signals_active &= ~SIGALRM_MASK;
 
-       set_signals(enabled);
+       set_signals_trace(enabled);
 }
 
 void deliver_alarm(void) {
@@ -251,6 +253,8 @@ void unblock_signals(void)
        if (signals_enabled == 1)
                return;
 
+       signals_enabled = 1;
+
        /*
         * We loop because the IRQ handler returns with interrupts off.  So,
         * interrupts may have arrived and we need to re-enable them and
@@ -260,12 +264,9 @@ void unblock_signals(void)
                /*
                 * Save and reset save_pending after enabling signals.  This
                 * way, signals_pending won't be changed while we're reading it.
-                */
-               signals_enabled = 1;
-
-               /*
+                *
                 * Setting signals_enabled and reading signals_pending must
-                * happen in this order.
+                * happen in this order, so have the barrier here.
                 */
                barrier();
 
@@ -278,10 +279,13 @@ void unblock_signals(void)
                /*
                 * We have pending interrupts, so disable signals, as the
                 * handlers expect them off when they are called.  They will
-                * be enabled again above.
+                * be enabled again above. We need to trace this, as we're
+                * expected to be enabling interrupts already, but any more
+                * tracing that happens inside the handlers we call for the
+                * pending signals will mess up the tracing state.
                 */
-
                signals_enabled = 0;
+               um_trace_signals_off();
 
                /*
                 * Deal with SIGIO first because the alarm handler might
@@ -304,6 +308,9 @@ void unblock_signals(void)
                if (!(signals_pending & SIGIO_MASK) && (signals_active & SIGALRM_MASK))
                        return;
 
+               /* Re-enable signals and trace that we're doing so. */
+               um_trace_signals_on();
+               signals_enabled = 1;
        }
 }
 
@@ -326,6 +333,21 @@ int set_signals(int enable)
        return ret;
 }
 
+int set_signals_trace(int enable)
+{
+       int ret;
+       if (signals_enabled == enable)
+               return enable;
+
+       ret = signals_enabled;
+       if (enable)
+               unblock_signals_trace();
+       else
+               block_signals_trace();
+
+       return ret;
+}
+
 int os_is_signal_stack(void)
 {
        stack_t ss;
index d2ea340..c4566e7 100644 (file)
@@ -1,6 +1,6 @@
+# SPDX-License-Identifier: GPL-2.0
 #
 # Copyright (C) 2002 - 2007 Jeff Dike (jdike@{linux.intel,addtoit}.com)
-# Licensed under the GPL
 #
 
 obj-y := mem.o process.o
index 35015e3..c546d16 100644 (file)
@@ -1,6 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Copyright (C) 2002 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
- * Licensed under the GPL
  */
 
 #include <stddef.h>
index df4a985..4fb877b 100644 (file)
@@ -1,7 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Copyright (C) 2015 Thomas Meyer (thomas@m3y3r.de)
  * Copyright (C) 2002- 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
- * Licensed under the GPL
  */
 
 #include <stdlib.h>
@@ -425,9 +425,9 @@ void userspace(struct uml_pt_regs *regs, unsigned long *aux_fp_regs)
                        case SIGBUS:
                        case SIGFPE:
                        case SIGWINCH:
-                               block_signals();
+                               block_signals_trace();
                                (*sig_info[sig])(sig, (struct siginfo *)&si, regs);
-                               unblock_signals();
+                               unblock_signals_trace();
                                break;
                        default:
                                printk(UM_KERN_ERR "userspace - child stopped "
@@ -625,10 +625,10 @@ void initial_thread_cb_skas(void (*proc)(void *), void *arg)
        cb_arg = arg;
        cb_back = &here;
 
-       block_signals();
+       block_signals_trace();
        if (UML_SETJMP(&here) == 0)
                UML_LONGJMP(&initial_jmpbuf, INIT_JMP_CALLBACK);
-       unblock_signals();
+       unblock_signals_trace();
 
        cb_proc = NULL;
        cb_arg = NULL;
@@ -637,13 +637,13 @@ void initial_thread_cb_skas(void (*proc)(void *), void *arg)
 
 void halt_skas(void)
 {
-       block_signals();
+       block_signals_trace();
        UML_LONGJMP(&initial_jmpbuf, INIT_JMP_HALT);
 }
 
 void reboot_skas(void)
 {
-       block_signals();
+       block_signals_trace();
        UML_LONGJMP(&initial_jmpbuf, INIT_JMP_REBOOT);
 }
 
index 82bf5f8..f79dc33 100644 (file)
@@ -1,6 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
- * Licensed under the GPL
  */
 
 #include <stdio.h>
index 6d94ff5..432f8e1 100644 (file)
@@ -1,9 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Copyright (C) 2015 Anton Ivanov (aivanov@{brocade.com,kot-begemot.co.uk})
  * Copyright (C) 2015 Thomas Meyer (thomas@m3y3r.de)
  * Copyright (C) 2012-2014 Cisco Systems
  * Copyright (C) 2000 - 2007 Jeff Dike (jdike{addtoit,linux.intel}.com)
- * Licensed under the GPL
  */
 
 #include <stddef.h>
index 721d8af..f784db8 100644 (file)
@@ -1,6 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Copyright (C) 2002 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
- * Licensed under the GPL
  */
 
 #include <stdlib.h>
index e261656..44def53 100644 (file)
@@ -1,6 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Copyright (C) 2002 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
- * Licensed under the GPL
  */
 
 #include <stdio.h>
index 8cc8b26..ecf2f39 100644 (file)
@@ -1,6 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
- * Licensed under the GPL
  */
 
 #include <stdio.h>
index 3f0903b..ba1c9a7 100644 (file)
@@ -18,8 +18,6 @@
 #define __HAVE_ARCH_PTE_ALLOC_ONE
 #include <asm-generic/pgalloc.h>
 
-#define check_pgt_cache()              do { } while (0)
-
 #define _PAGE_USER_TABLE       (PMD_TYPE_TABLE | PMD_PRESENT)
 #define _PAGE_KERNEL_TABLE     (PMD_TYPE_TABLE | PMD_PRESENT)
 
index 126e961..c8f7ba1 100644 (file)
@@ -285,8 +285,6 @@ extern pgd_t swapper_pg_dir[PTRS_PER_PGD];
 
 #include <asm-generic/pgtable.h>
 
-#define pgtable_cache_init() do { } while (0)
-
 #endif /* !__ASSEMBLY__ */
 
 #endif /* __UNICORE_PGTABLE_H__ */
index 10d2356..4663d8c 100644 (file)
@@ -15,7 +15,7 @@
 
 #define __pte_free_tlb(tlb, pte, addr)                         \
        do {                                                    \
-               pgtable_page_dtor(pte);                         \
+               pgtable_pte_page_dtor(pte);                     \
                tlb_remove_page((tlb), (pte));                  \
        } while (0)
 
index 37ed5f5..d6e1faa 100644 (file)
@@ -2031,20 +2031,30 @@ config KEXEC_FILE
 config ARCH_HAS_KEXEC_PURGATORY
        def_bool KEXEC_FILE
 
-config KEXEC_VERIFY_SIG
+config KEXEC_SIG
        bool "Verify kernel signature during kexec_file_load() syscall"
        depends on KEXEC_FILE
        ---help---
-         This option makes kernel signature verification mandatory for
-         the kexec_file_load() syscall.
 
-         In addition to that option, you need to enable signature
+         This option makes the kexec_file_load() syscall check for a valid
+         signature of the kernel image.  The image can still be loaded without
+         a valid signature unless you also enable KEXEC_SIG_FORCE, though if
+         there's a signature that we can check, then it must be valid.
+
+         In addition to this option, you need to enable signature
          verification for the corresponding kernel image type being
          loaded in order for this to work.
 
+config KEXEC_SIG_FORCE
+       bool "Require a valid signature in kexec_file_load() syscall"
+       depends on KEXEC_SIG
+       ---help---
+         This option makes kernel signature verification mandatory for
+         the kexec_file_load() syscall.
+
 config KEXEC_BZIMAGE_VERIFY_SIG
        bool "Enable bzImage signature verification support"
-       depends on KEXEC_VERIFY_SIG
+       depends on KEXEC_SIG
        depends on SIGNED_PE_FILE_VERIFICATION
        select SYSTEM_TRUSTED_KEYRING
        ---help---
index 15255f3..149795c 100644 (file)
@@ -26,7 +26,7 @@ struct mem_vector immovable_mem[MAX_NUMNODES*2];
  */
 #define MAX_ADDR_LEN 19
 
-static acpi_physical_address get_acpi_rsdp(void)
+static acpi_physical_address get_cmdline_acpi_rsdp(void)
 {
        acpi_physical_address addr = 0;
 
@@ -278,10 +278,7 @@ acpi_physical_address get_rsdp_addr(void)
 {
        acpi_physical_address pa;
 
-       pa = get_acpi_rsdp();
-
-       if (!pa)
-               pa = boot_params->acpi_rsdp_addr;
+       pa = boot_params->acpi_rsdp_addr;
 
        /*
         * Try to get EFI data from setup_data. This can happen when we're a
@@ -311,7 +308,17 @@ static unsigned long get_acpi_srat_table(void)
        char arg[10];
        u8 *entry;
 
-       rsdp = (struct acpi_table_rsdp *)(long)boot_params->acpi_rsdp_addr;
+       /*
+        * Check whether we were given an RSDP on the command line. We don't
+        * stash this in boot params because the kernel itself may have
+        * different ideas about whether to trust a command-line parameter.
+        */
+       rsdp = (struct acpi_table_rsdp *)get_cmdline_acpi_rsdp();
+
+       if (!rsdp)
+               rsdp = (struct acpi_table_rsdp *)(long)
+                       boot_params->acpi_rsdp_addr;
+
        if (!rsdp)
                return 0;
 
index aac686e..bc9693c 100644 (file)
@@ -117,6 +117,12 @@ static inline bool acpi_has_cpu_in_madt(void)
        return !!acpi_lapic;
 }
 
+#define ACPI_HAVE_ARCH_SET_ROOT_POINTER
+static inline void acpi_arch_set_root_pointer(u64 addr)
+{
+       x86_init.acpi.set_root_pointer(addr);
+}
+
 #define ACPI_HAVE_ARCH_GET_ROOT_POINTER
 static inline u64 acpi_arch_get_root_pointer(void)
 {
@@ -125,6 +131,7 @@ static inline u64 acpi_arch_get_root_pointer(void)
 
 void acpi_generic_reduced_hw_init(void);
 
+void x86_default_set_root_pointer(u64 addr);
 u64 x86_default_get_root_pointer(void);
 
 #else /* !CONFIG_ACPI */
@@ -138,6 +145,8 @@ static inline void disable_acpi(void) { }
 
 static inline void acpi_generic_reduced_hw_init(void) { }
 
+static inline void x86_default_set_root_pointer(u64 addr) { }
+
 static inline u64 x86_default_get_root_pointer(void)
 {
        return 0;
index 094fbc9..13adca3 100644 (file)
@@ -201,10 +201,10 @@ static inline int alternatives_text_reserved(void *start, void *end)
  * without volatile and memory clobber.
  */
 #define alternative(oldinstr, newinstr, feature)                       \
-       asm volatile (ALTERNATIVE(oldinstr, newinstr, feature) : : : "memory")
+       asm_inline volatile (ALTERNATIVE(oldinstr, newinstr, feature) : : : "memory")
 
 #define alternative_2(oldinstr, newinstr1, feature1, newinstr2, feature2) \
-       asm volatile(ALTERNATIVE_2(oldinstr, newinstr1, feature1, newinstr2, feature2) ::: "memory")
+       asm_inline volatile(ALTERNATIVE_2(oldinstr, newinstr1, feature1, newinstr2, feature2) ::: "memory")
 
 /*
  * Alternative inline assembly with input.
@@ -218,7 +218,7 @@ static inline int alternatives_text_reserved(void *start, void *end)
  * Leaving an unused argument 0 to keep API compatibility.
  */
 #define alternative_input(oldinstr, newinstr, feature, input...)       \
-       asm volatile (ALTERNATIVE(oldinstr, newinstr, feature)          \
+       asm_inline volatile (ALTERNATIVE(oldinstr, newinstr, feature)   \
                : : "i" (0), ## input)
 
 /*
@@ -231,18 +231,18 @@ static inline int alternatives_text_reserved(void *start, void *end)
  */
 #define alternative_input_2(oldinstr, newinstr1, feature1, newinstr2,       \
                           feature2, input...)                               \
-       asm volatile(ALTERNATIVE_2(oldinstr, newinstr1, feature1,            \
+       asm_inline volatile(ALTERNATIVE_2(oldinstr, newinstr1, feature1,     \
                newinstr2, feature2)                                         \
                : : "i" (0), ## input)
 
 /* Like alternative_input, but with a single output argument */
 #define alternative_io(oldinstr, newinstr, feature, output, input...)  \
-       asm volatile (ALTERNATIVE(oldinstr, newinstr, feature)          \
+       asm_inline volatile (ALTERNATIVE(oldinstr, newinstr, feature)   \
                : output : "i" (0), ## input)
 
 /* Like alternative_io, but for replacing a direct call with another one. */
 #define alternative_call(oldfunc, newfunc, feature, output, input...)  \
-       asm volatile (ALTERNATIVE("call %P[old]", "call %P[new]", feature) \
+       asm_inline volatile (ALTERNATIVE("call %P[old]", "call %P[new]", feature) \
                : output : [old] "i" (oldfunc), [new] "i" (newfunc), ## input)
 
 /*
@@ -253,7 +253,7 @@ static inline int alternatives_text_reserved(void *start, void *end)
  */
 #define alternative_call_2(oldfunc, newfunc1, feature1, newfunc2, feature2,   \
                           output, input...)                                  \
-       asm volatile (ALTERNATIVE_2("call %P[old]", "call %P[new1]", feature1,\
+       asm_inline volatile (ALTERNATIVE_2("call %P[old]", "call %P[new1]", feature1,\
                "call %P[new2]", feature2)                                    \
                : output, ASM_CALL_CONSTRAINT                                 \
                : [old] "i" (oldfunc), [new1] "i" (newfunc1),                 \
index 6804d66..facba9b 100644 (file)
@@ -32,7 +32,7 @@
 
 #define _BUG_FLAGS(ins, flags)                                         \
 do {                                                                   \
-       asm volatile("1:\t" ins "\n"                                    \
+       asm_inline volatile("1:\t" ins "\n"                             \
                     ".pushsection __bug_table,\"aw\"\n"                \
                     "2:\t" __BUG_REL(1b) "\t# bug_entry::bug_addr\n"   \
                     "\t"  __BUG_REL(%c0) "\t# bug_entry::file\n"       \
@@ -49,7 +49,7 @@ do {                                                                  \
 
 #define _BUG_FLAGS(ins, flags)                                         \
 do {                                                                   \
-       asm volatile("1:\t" ins "\n"                                    \
+       asm_inline volatile("1:\t" ins "\n"                             \
                     ".pushsection __bug_table,\"aw\"\n"                \
                     "2:\t" __BUG_REL(1b) "\t# bug_entry::bug_addr\n"   \
                     "\t.word %c0"        "\t# bug_entry::flags\n"      \
index 7a27056..7741e21 100644 (file)
 /* Recommend using enlightened VMCS */
 #define HV_X64_ENLIGHTENED_VMCS_RECOMMENDED            BIT(14)
 
+/*
+ * Virtual processor will never share a physical core with another virtual
+ * processor, except for virtual processors that are reported as sibling SMT
+ * threads.
+ */
+#define HV_X64_NO_NONARCH_CORESHARING                  BIT(18)
+
 /* Nested features. These are HYPERV_CPUID_NESTED_FEATURES.EAX bits. */
+#define HV_X64_NESTED_DIRECT_FLUSH                     BIT(17)
 #define HV_X64_NESTED_GUEST_MAPPING_FLUSH              BIT(18)
 #define HV_X64_NESTED_MSR_BITMAP                       BIT(19)
 
@@ -524,14 +532,24 @@ struct hv_timer_message_payload {
        __u64 delivery_time;    /* When the message was delivered */
 } __packed;
 
+struct hv_nested_enlightenments_control {
+       struct {
+               __u32 directhypercall:1;
+               __u32 reserved:31;
+       } features;
+       struct {
+               __u32 reserved;
+       } hypercallControls;
+} __packed;
+
 /* Define virtual processor assist page structure. */
 struct hv_vp_assist_page {
        __u32 apic_assist;
-       __u32 reserved;
-       __u64 vtl_control[2];
-       __u64 nested_enlightenments_control[2];
-       __u32 enlighten_vmentry;
-       __u32 padding;
+       __u32 reserved1;
+       __u64 vtl_control[3];
+       struct hv_nested_enlightenments_control nested_control;
+       __u8 enlighten_vmentry;
+       __u8 reserved2[7];
        __u64 current_nested_vmcs;
 } __packed;
 
@@ -882,4 +900,7 @@ struct hv_tlb_flush_ex {
        u64 gva_list[];
 } __packed;
 
+struct hv_partition_assist_pg {
+       u32 tlb_lock_count;
+};
 #endif
index a3a3ec7..23edf56 100644 (file)
@@ -320,6 +320,7 @@ struct kvm_mmu_page {
        struct list_head link;
        struct hlist_node hash_link;
        bool unsync;
+       u8 mmu_valid_gen;
        bool mmio_cached;
 
        /*
@@ -335,7 +336,6 @@ struct kvm_mmu_page {
        int root_count;          /* Currently serving as active root */
        unsigned int unsync_children;
        struct kvm_rmap_head parent_ptes; /* rmap pointers to parent sptes */
-       unsigned long mmu_valid_gen;
        DECLARE_BITMAP(unsync_child_bitmap, 512);
 
 #ifdef CONFIG_X86_32
@@ -844,6 +844,8 @@ struct kvm_hv {
 
        /* How many vCPUs have VP index != vCPU index */
        atomic_t num_mismatched_vp_indexes;
+
+       struct hv_partition_assist_pg *hv_pa_pg;
 };
 
 enum kvm_irqchip_mode {
@@ -857,12 +859,13 @@ struct kvm_arch {
        unsigned long n_requested_mmu_pages;
        unsigned long n_max_mmu_pages;
        unsigned int indirect_shadow_pages;
-       unsigned long mmu_valid_gen;
+       u8 mmu_valid_gen;
        struct hlist_head mmu_page_hash[KVM_NUM_MMU_PAGES];
        /*
         * Hash table of struct kvm_mmu_page.
         */
        struct list_head active_mmu_pages;
+       struct list_head zapped_obsolete_pages;
        struct kvm_page_track_notifier_node mmu_sp_tracker;
        struct kvm_page_track_notifier_head track_notifier_head;
 
@@ -1213,6 +1216,7 @@ struct kvm_x86_ops {
        bool (*need_emulation_on_page_fault)(struct kvm_vcpu *vcpu);
 
        bool (*apic_init_signal_blocked)(struct kvm_vcpu *vcpu);
+       int (*enable_direct_tlbflush)(struct kvm_vcpu *vcpu);
 };
 
 struct kvm_arch_async_pf {
@@ -1312,18 +1316,42 @@ extern u64  kvm_default_tsc_scaling_ratio;
 
 extern u64 kvm_mce_cap_supported;
 
-enum emulation_result {
-       EMULATE_DONE,         /* no further processing */
-       EMULATE_USER_EXIT,    /* kvm_run ready for userspace exit */
-       EMULATE_FAIL,         /* can't emulate this instruction */
-};
-
+/*
+ * EMULTYPE_NO_DECODE - Set when re-emulating an instruction (after completing
+ *                     userspace I/O) to indicate that the emulation context
+ *                     should be resued as is, i.e. skip initialization of
+ *                     emulation context, instruction fetch and decode.
+ *
+ * EMULTYPE_TRAP_UD - Set when emulating an intercepted #UD from hardware.
+ *                   Indicates that only select instructions (tagged with
+ *                   EmulateOnUD) should be emulated (to minimize the emulator
+ *                   attack surface).  See also EMULTYPE_TRAP_UD_FORCED.
+ *
+ * EMULTYPE_SKIP - Set when emulating solely to skip an instruction, i.e. to
+ *                decode the instruction length.  For use *only* by
+ *                kvm_x86_ops->skip_emulated_instruction() implementations.
+ *
+ * EMULTYPE_ALLOW_RETRY - Set when the emulator should resume the guest to
+ *                       retry native execution under certain conditions.
+ *
+ * EMULTYPE_TRAP_UD_FORCED - Set when emulating an intercepted #UD that was
+ *                          triggered by KVM's magic "force emulation" prefix,
+ *                          which is opt in via module param (off by default).
+ *                          Bypasses EmulateOnUD restriction despite emulating
+ *                          due to an intercepted #UD (see EMULTYPE_TRAP_UD).
+ *                          Used to test the full emulator from userspace.
+ *
+ * EMULTYPE_VMWARE_GP - Set when emulating an intercepted #GP for VMware
+ *                     backdoor emulation, which is opt in via module param.
+ *                     VMware backoor emulation handles select instructions
+ *                     and reinjects the #GP for all other cases.
+ */
 #define EMULTYPE_NO_DECODE         (1 << 0)
 #define EMULTYPE_TRAP_UD           (1 << 1)
 #define EMULTYPE_SKIP              (1 << 2)
 #define EMULTYPE_ALLOW_RETRY       (1 << 3)
-#define EMULTYPE_NO_UD_ON_FAIL     (1 << 4)
-#define EMULTYPE_VMWARE                    (1 << 5)
+#define EMULTYPE_TRAP_UD_FORCED            (1 << 4)
+#define EMULTYPE_VMWARE_GP         (1 << 5)
 int kvm_emulate_instruction(struct kvm_vcpu *vcpu, int emulation_type);
 int kvm_emulate_instruction_from_buffer(struct kvm_vcpu *vcpu,
                                        void *insn, int insn_len);
@@ -1506,7 +1534,7 @@ enum {
 #define kvm_arch_vcpu_memslots_id(vcpu) ((vcpu)->arch.hflags & HF_SMM_MASK ? 1 : 0)
 #define kvm_memslots_for_spte_role(kvm, role) __kvm_memslots(kvm, (role).smm)
 
-asmlinkage void __noreturn kvm_spurious_fault(void);
+asmlinkage void kvm_spurious_fault(void);
 
 /*
  * Hardware virtualization extension instructions may fault if a
@@ -1514,24 +1542,14 @@ asmlinkage void __noreturn kvm_spurious_fault(void);
  * Usually after catching the fault we just panic; during reboot
  * instead the instruction is ignored.
  */
-#define ____kvm_handle_fault_on_reboot(insn, cleanup_insn)             \
+#define __kvm_handle_fault_on_reboot(insn)                             \
        "666: \n\t"                                                     \
        insn "\n\t"                                                     \
        "jmp    668f \n\t"                                              \
        "667: \n\t"                                                     \
        "call   kvm_spurious_fault \n\t"                                \
        "668: \n\t"                                                     \
-       ".pushsection .fixup, \"ax\" \n\t"                              \
-       "700: \n\t"                                                     \
-       cleanup_insn "\n\t"                                             \
-       "cmpb   $0, kvm_rebooting\n\t"                                  \
-       "je     667b \n\t"                                              \
-       "jmp    668b \n\t"                                              \
-       ".popsection \n\t"                                              \
-       _ASM_EXTABLE(666b, 700b)
-
-#define __kvm_handle_fault_on_reboot(insn)             \
-       ____kvm_handle_fault_on_reboot(insn, "")
+       _ASM_EXTABLE(666b, 667b)
 
 #define KVM_ARCH_WANT_MMU_NOTIFIER
 int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end);
index c78da8e..0dca7f7 100644 (file)
@@ -29,8 +29,6 @@ extern pgd_t swapper_pg_dir[1024];
 extern pgd_t initial_page_table[1024];
 extern pmd_t initial_pg_pmd[];
 
-static inline void pgtable_cache_init(void) { }
-static inline void check_pgt_cache(void) { }
 void paging_init(void);
 void sync_initial_page_table(void);
 
index 4990d26..0b6c404 100644 (file)
@@ -241,9 +241,6 @@ extern void cleanup_highmap(void);
 #define HAVE_ARCH_UNMAPPED_AREA
 #define HAVE_ARCH_UNMAPPED_AREA_TOPDOWN
 
-#define pgtable_cache_init()   do { } while (0)
-#define check_pgt_cache()      do { } while (0)
-
 #define PAGE_AGP    PAGE_KERNEL_NOCACHE
 #define HAVE_PAGE_AGP 1
 
index dec9c1e..6ece856 100644 (file)
@@ -52,6 +52,7 @@ enum {
        INTERCEPT_MWAIT,
        INTERCEPT_MWAIT_COND,
        INTERCEPT_XSETBV,
+       INTERCEPT_RDPRU,
 };
 
 
index b15e646..1835767 100644 (file)
@@ -69,6 +69,7 @@
 #define SECONDARY_EXEC_PT_USE_GPA              0x01000000
 #define SECONDARY_EXEC_MODE_BASED_EPT_EXEC     0x00400000
 #define SECONDARY_EXEC_TSC_SCALING              0x02000000
+#define SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE   0x04000000
 
 #define PIN_BASED_EXT_INTR_MASK                 0x00000001
 #define PIN_BASED_NMI_EXITING                   0x00000008
 #define VMX_MISC_SAVE_EFER_LMA                 0x00000020
 #define VMX_MISC_ACTIVITY_HLT                  0x00000040
 #define VMX_MISC_ZERO_LEN_INS                  0x40000000
+#define VMX_MISC_MSR_LIST_MULTIPLIER           512
 
 /* VMFUNC functions */
 #define VMX_VMFUNC_EPTP_SWITCHING               0x00000001
index ac09341..1943585 100644 (file)
@@ -134,10 +134,12 @@ struct x86_hyper_init {
 
 /**
  * struct x86_init_acpi - x86 ACPI init functions
+ * @set_root_poitner:          set RSDP address
  * @get_root_pointer:          get RSDP address
  * @reduced_hw_early_init:     hardware reduced platform early init
  */
 struct x86_init_acpi {
+       void (*set_root_pointer)(u64 addr);
        u64 (*get_root_pointer)(void);
        void (*reduced_hw_early_init)(void);
 };
index a9731f8..2e8a30f 100644 (file)
@@ -75,6 +75,7 @@
 #define SVM_EXIT_MWAIT         0x08b
 #define SVM_EXIT_MWAIT_COND    0x08c
 #define SVM_EXIT_XSETBV        0x08d
+#define SVM_EXIT_RDPRU         0x08e
 #define SVM_EXIT_NPF           0x400
 #define SVM_EXIT_AVIC_INCOMPLETE_IPI           0x401
 #define SVM_EXIT_AVIC_UNACCELERATED_ACCESS     0x402
index f01950a..3eb8411 100644 (file)
@@ -86,6 +86,8 @@
 #define EXIT_REASON_PML_FULL            62
 #define EXIT_REASON_XSAVES              63
 #define EXIT_REASON_XRSTORS             64
+#define EXIT_REASON_UMWAIT              67
+#define EXIT_REASON_TPAUSE              68
 
 #define VMX_EXIT_REASONS \
        { EXIT_REASON_EXCEPTION_NMI,         "EXCEPTION_NMI" }, \
        { EXIT_REASON_RDSEED,                "RDSEED" }, \
        { EXIT_REASON_PML_FULL,              "PML_FULL" }, \
        { EXIT_REASON_XSAVES,                "XSAVES" }, \
-       { EXIT_REASON_XRSTORS,               "XRSTORS" }
+       { EXIT_REASON_XRSTORS,               "XRSTORS" }, \
+       { EXIT_REASON_UMWAIT,                "UMWAIT" }, \
+       { EXIT_REASON_TPAUSE,                "TPAUSE" }
 
 #define VMX_ABORT_SAVE_GUEST_MSR_FAIL        1
 #define VMX_ABORT_LOAD_HOST_PDPTE_FAIL       2
index 17b33ef..04205ce 100644 (file)
@@ -1760,6 +1760,11 @@ void __init arch_reserve_mem_area(acpi_physical_address addr, size_t size)
        e820__update_table_print();
 }
 
+void x86_default_set_root_pointer(u64 addr)
+{
+       boot_params.acpi_rsdp_addr = addr;
+}
+
 u64 x86_default_get_root_pointer(void)
 {
        return boot_params.acpi_rsdp_addr;
index 32b4dc9..c222f28 100644 (file)
  */
 static u32 umwait_control_cached = UMWAIT_CTRL_VAL(100000, UMWAIT_C02_ENABLE);
 
+u32 get_umwait_control_msr(void)
+{
+       return umwait_control_cached;
+}
+EXPORT_SYMBOL_GPL(get_umwait_control_msr);
+
 /*
  * Cache the original IA32_UMWAIT_CONTROL MSR value which is configured by
  * hardware or BIOS before kernel boot.
index 4c40783..4d4f5d9 100644 (file)
@@ -74,9 +74,9 @@ bool arch_ima_get_secureboot(void)
 
 /* secureboot arch rules */
 static const char * const sb_arch_rules[] = {
-#if !IS_ENABLED(CONFIG_KEXEC_VERIFY_SIG)
+#if !IS_ENABLED(CONFIG_KEXEC_SIG)
        "appraise func=KEXEC_KERNEL_CHECK appraise_type=imasig",
-#endif /* CONFIG_KEXEC_VERIFY_SIG */
+#endif /* CONFIG_KEXEC_SIG */
        "measure func=KEXEC_KERNEL_CHECK",
 #if !IS_ENABLED(CONFIG_MODULE_SIG)
        "appraise func=MODULE_CHECK appraise_type=imasig",
index 0fe1c87..61a89d3 100644 (file)
@@ -11,6 +11,7 @@
 #include <linux/errno.h>
 #include <linux/types.h>
 #include <linux/ioport.h>
+#include <linux/security.h>
 #include <linux/smp.h>
 #include <linux/stddef.h>
 #include <linux/slab.h>
@@ -31,7 +32,8 @@ long ksys_ioperm(unsigned long from, unsigned long num, int turn_on)
 
        if ((from + num <= from) || (from + num > IO_BITMAP_BITS))
                return -EINVAL;
-       if (turn_on && !capable(CAP_SYS_RAWIO))
+       if (turn_on && (!capable(CAP_SYS_RAWIO) ||
+                       security_locked_down(LOCKDOWN_IOPORT)))
                return -EPERM;
 
        /*
@@ -126,7 +128,8 @@ SYSCALL_DEFINE1(iopl, unsigned int, level)
                return -EINVAL;
        /* Trying to gain more privileges? */
        if (level > old) {
-               if (!capable(CAP_SYS_RAWIO))
+               if (!capable(CAP_SYS_RAWIO) ||
+                   security_locked_down(LOCKDOWN_IOPORT))
                        return -EPERM;
        }
        regs->flags = (regs->flags & ~X86_EFLAGS_IOPL) |
index 5ebcd02..d2f4e70 100644 (file)
@@ -180,6 +180,7 @@ setup_efi_state(struct boot_params *params, unsigned long params_load_addr,
        if (efi_enabled(EFI_OLD_MEMMAP))
                return 0;
 
+       params->secure_boot = boot_params.secure_boot;
        ei->efi_loader_signature = current_ei->efi_loader_signature;
        ei->efi_systab = current_ei->efi_systab;
        ei->efi_systab_hi = current_ei->efi_systab_hi;
index 3db2252..1547be3 100644 (file)
@@ -34,6 +34,7 @@
 #include <linux/notifier.h>
 #include <linux/uaccess.h>
 #include <linux/gfp.h>
+#include <linux/security.h>
 
 #include <asm/cpufeature.h>
 #include <asm/msr.h>
@@ -79,6 +80,10 @@ static ssize_t msr_write(struct file *file, const char __user *buf,
        int err = 0;
        ssize_t bytes = 0;
 
+       err = security_locked_down(LOCKDOWN_MSR);
+       if (err)
+               return err;
+
        if (count % 8)
                return -EINVAL; /* Invalid chunk size */
 
@@ -130,6 +135,9 @@ static long msr_ioctl(struct file *file, unsigned int ioc, unsigned long arg)
                        err = -EFAULT;
                        break;
                }
+               err = security_locked_down(LOCKDOWN_MSR);
+               if (err)
+                       break;
                err = wrmsr_safe_regs_on_cpu(cpu, regs);
                if (err)
                        break;
index 1bef687..18a799c 100644 (file)
@@ -95,6 +95,7 @@ struct x86_init_ops x86_init __initdata = {
        },
 
        .acpi = {
+               .set_root_pointer       = x86_default_set_root_pointer,
                .get_root_pointer       = x86_default_get_root_pointer,
                .reduced_hw_early_init  = acpi_generic_reduced_hw_init,
        },
index dd5985e..6331603 100644 (file)
@@ -304,7 +304,13 @@ static void do_host_cpuid(struct kvm_cpuid_entry2 *entry, u32 function,
        case 7:
        case 0xb:
        case 0xd:
+       case 0xf:
+       case 0x10:
+       case 0x12:
        case 0x14:
+       case 0x17:
+       case 0x18:
+       case 0x1f:
        case 0x8000001d:
                entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
                break;
@@ -360,7 +366,7 @@ static inline void do_cpuid_7_mask(struct kvm_cpuid_entry2 *entry, int index)
                F(AVX512VBMI) | F(LA57) | F(PKU) | 0 /*OSPKE*/ |
                F(AVX512_VPOPCNTDQ) | F(UMIP) | F(AVX512_VBMI2) | F(GFNI) |
                F(VAES) | F(VPCLMULQDQ) | F(AVX512_VNNI) | F(AVX512_BITALG) |
-               F(CLDEMOTE) | F(MOVDIRI) | F(MOVDIR64B);
+               F(CLDEMOTE) | F(MOVDIRI) | F(MOVDIR64B) | 0 /*WAITPKG*/;
 
        /* cpuid 7.0.edx*/
        const u32 kvm_cpuid_7_0_edx_x86_features =
index fff790a..23ff655 100644 (file)
@@ -23,6 +23,7 @@
 #include "ioapic.h"
 #include "hyperv.h"
 
+#include <linux/cpu.h>
 #include <linux/kvm_host.h>
 #include <linux/highmem.h>
 #include <linux/sched/cputime.h>
@@ -645,7 +646,9 @@ static int stimer_notify_direct(struct kvm_vcpu_hv_stimer *stimer)
                .vector = stimer->config.apic_vector
        };
 
-       return !kvm_apic_set_irq(vcpu, &irq, NULL);
+       if (lapic_in_kernel(vcpu))
+               return !kvm_apic_set_irq(vcpu, &irq, NULL);
+       return 0;
 }
 
 static void stimer_expiration(struct kvm_vcpu_hv_stimer *stimer)
@@ -1852,7 +1855,13 @@ int kvm_vcpu_ioctl_get_hv_cpuid(struct kvm_vcpu *vcpu, struct kvm_cpuid2 *cpuid,
 
                        ent->edx |= HV_FEATURE_FREQUENCY_MSRS_AVAILABLE;
                        ent->edx |= HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE;
-                       ent->edx |= HV_STIMER_DIRECT_MODE_AVAILABLE;
+
+                       /*
+                        * Direct Synthetic timers only make sense with in-kernel
+                        * LAPIC
+                        */
+                       if (lapic_in_kernel(vcpu))
+                               ent->edx |= HV_STIMER_DIRECT_MODE_AVAILABLE;
 
                        break;
 
@@ -1864,7 +1873,8 @@ int kvm_vcpu_ioctl_get_hv_cpuid(struct kvm_vcpu *vcpu, struct kvm_cpuid2 *cpuid,
                        ent->eax |= HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED;
                        if (evmcs_ver)
                                ent->eax |= HV_X64_ENLIGHTENED_VMCS_RECOMMENDED;
-
+                       if (!cpu_smt_possible())
+                               ent->eax |= HV_X64_NO_NONARCH_CORESHARING;
                        /*
                         * Default number of spinlock retry attempts, matches
                         * HyperV 2016.
index 8675458..3a3a685 100644 (file)
@@ -65,7 +65,9 @@
 #define APIC_BROADCAST                 0xFF
 #define X2APIC_BROADCAST               0xFFFFFFFFul
 
-#define LAPIC_TIMER_ADVANCE_ADJUST_DONE 100
+static bool lapic_timer_advance_dynamic __read_mostly;
+#define LAPIC_TIMER_ADVANCE_ADJUST_MIN 100
+#define LAPIC_TIMER_ADVANCE_ADJUST_MAX 5000
 #define LAPIC_TIMER_ADVANCE_ADJUST_INIT 1000
 /* step-by-step approximation to mitigate fluctuation */
 #define LAPIC_TIMER_ADVANCE_ADJUST_STEP 8
@@ -1485,26 +1487,25 @@ static inline void adjust_lapic_timer_advance(struct kvm_vcpu *vcpu,
        u32 timer_advance_ns = apic->lapic_timer.timer_advance_ns;
        u64 ns;
 
+       /* Do not adjust for tiny fluctuations or large random spikes. */
+       if (abs(advance_expire_delta) > LAPIC_TIMER_ADVANCE_ADJUST_MAX ||
+           abs(advance_expire_delta) < LAPIC_TIMER_ADVANCE_ADJUST_MIN)
+               return;
+
        /* too early */
        if (advance_expire_delta < 0) {
                ns = -advance_expire_delta * 1000000ULL;
                do_div(ns, vcpu->arch.virtual_tsc_khz);
-               timer_advance_ns -= min((u32)ns,
-                       timer_advance_ns / LAPIC_TIMER_ADVANCE_ADJUST_STEP);
+               timer_advance_ns -= ns/LAPIC_TIMER_ADVANCE_ADJUST_STEP;
        } else {
        /* too late */
                ns = advance_expire_delta * 1000000ULL;
                do_div(ns, vcpu->arch.virtual_tsc_khz);
-               timer_advance_ns += min((u32)ns,
-                       timer_advance_ns / LAPIC_TIMER_ADVANCE_ADJUST_STEP);
+               timer_advance_ns += ns/LAPIC_TIMER_ADVANCE_ADJUST_STEP;
        }
 
-       if (abs(advance_expire_delta) < LAPIC_TIMER_ADVANCE_ADJUST_DONE)
-               apic->lapic_timer.timer_advance_adjust_done = true;
-       if (unlikely(timer_advance_ns > 5000)) {
+       if (unlikely(timer_advance_ns > LAPIC_TIMER_ADVANCE_ADJUST_MAX))
                timer_advance_ns = LAPIC_TIMER_ADVANCE_ADJUST_INIT;
-               apic->lapic_timer.timer_advance_adjust_done = false;
-       }
        apic->lapic_timer.timer_advance_ns = timer_advance_ns;
 }
 
@@ -1524,7 +1525,7 @@ static void __kvm_wait_lapic_expire(struct kvm_vcpu *vcpu)
        if (guest_tsc < tsc_deadline)
                __wait_lapic_expire(vcpu, tsc_deadline - guest_tsc);
 
-       if (unlikely(!apic->lapic_timer.timer_advance_adjust_done))
+       if (lapic_timer_advance_dynamic)
                adjust_lapic_timer_advance(vcpu, apic->lapic_timer.advance_expire_delta);
 }
 
@@ -2302,13 +2303,12 @@ int kvm_create_lapic(struct kvm_vcpu *vcpu, int timer_advance_ns)
        apic->lapic_timer.timer.function = apic_timer_fn;
        if (timer_advance_ns == -1) {
                apic->lapic_timer.timer_advance_ns = LAPIC_TIMER_ADVANCE_ADJUST_INIT;
-               apic->lapic_timer.timer_advance_adjust_done = false;
+               lapic_timer_advance_dynamic = true;
        } else {
                apic->lapic_timer.timer_advance_ns = timer_advance_ns;
-               apic->lapic_timer.timer_advance_adjust_done = true;
+               lapic_timer_advance_dynamic = false;
        }
 
-
        /*
         * APIC is created enabled. This will prevent kvm_lapic_set_base from
         * thinking that APIC state has changed.
index 50053d2..2aad7e2 100644 (file)
@@ -35,7 +35,6 @@ struct kvm_timer {
        s64 advance_expire_delta;
        atomic_t pending;                       /* accumulated triggered timers */
        bool hv_timer_in_use;
-       bool timer_advance_adjust_done;
 };
 
 struct kvm_lapic {
index a10af9c..5269aa0 100644 (file)
@@ -403,8 +403,6 @@ static void mark_mmio_spte(struct kvm_vcpu *vcpu, u64 *sptep, u64 gfn,
        mask |= (gpa & shadow_nonpresent_or_rsvd_mask)
                << shadow_nonpresent_or_rsvd_mask_len;
 
-       page_header(__pa(sptep))->mmio_cached = true;
-
        trace_mark_mmio_spte(sptep, gfn, access, gen);
        mmu_spte_set(sptep, mask);
 }
@@ -2103,6 +2101,7 @@ static struct kvm_mmu_page *kvm_mmu_alloc_page(struct kvm_vcpu *vcpu, int direct
         * depends on valid pages being added to the head of the list.  See
         * comments in kvm_zap_obsolete_pages().
         */
+       sp->mmu_valid_gen = vcpu->kvm->arch.mmu_valid_gen;
        list_add(&sp->link, &vcpu->kvm->arch.active_mmu_pages);
        kvm_mod_used_mmu_pages(vcpu->kvm, +1);
        return sp;
@@ -2252,7 +2251,7 @@ static void kvm_mmu_commit_zap_page(struct kvm *kvm,
 #define for_each_valid_sp(_kvm, _sp, _gfn)                             \
        hlist_for_each_entry(_sp,                                       \
          &(_kvm)->arch.mmu_page_hash[kvm_page_table_hashfn(_gfn)], hash_link) \
-               if (is_obsolete_sp((_kvm), (_sp)) || (_sp)->role.invalid) {    \
+               if (is_obsolete_sp((_kvm), (_sp))) {                    \
                } else
 
 #define for_each_gfn_indirect_valid_sp(_kvm, _sp, _gfn)                        \
@@ -2311,7 +2310,8 @@ static void mmu_audit_disable(void) { }
 
 static bool is_obsolete_sp(struct kvm *kvm, struct kvm_mmu_page *sp)
 {
-       return unlikely(sp->mmu_valid_gen != kvm->arch.mmu_valid_gen);
+       return sp->role.invalid ||
+              unlikely(sp->mmu_valid_gen != kvm->arch.mmu_valid_gen);
 }
 
 static bool kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
@@ -2538,7 +2538,6 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
                if (level > PT_PAGE_TABLE_LEVEL && need_sync)
                        flush |= kvm_sync_pages(vcpu, gfn, &invalid_list);
        }
-       sp->mmu_valid_gen = vcpu->kvm->arch.mmu_valid_gen;
        clear_page(sp->spt);
        trace_kvm_mmu_get_page(sp, true);
 
@@ -2753,7 +2752,12 @@ static bool __kvm_mmu_prepare_zap_page(struct kvm *kvm,
        } else {
                list_move(&sp->link, &kvm->arch.active_mmu_pages);
 
-               if (!sp->role.invalid)
+               /*
+                * Obsolete pages cannot be used on any vCPUs, see the comment
+                * in kvm_mmu_zap_all_fast().  Note, is_obsolete_sp() also
+                * treats invalid shadow pages as being obsolete.
+                */
+               if (!is_obsolete_sp(kvm, sp))
                        kvm_reload_remote_mmus(kvm);
        }
 
@@ -5383,7 +5387,6 @@ int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u64 error_code,
                       void *insn, int insn_len)
 {
        int r, emulation_type = 0;
-       enum emulation_result er;
        bool direct = vcpu->arch.mmu->direct_map;
 
        /* With shadow page tables, fault_address contains a GVA or nGPA.  */
@@ -5450,19 +5453,8 @@ emulate:
                        return 1;
        }
 
-       er = x86_emulate_instruction(vcpu, cr2, emulation_type, insn, insn_len);
-
-       switch (er) {
-       case EMULATE_DONE:
-               return 1;
-       case EMULATE_USER_EXIT:
-               ++vcpu->stat.mmio_exits;
-               /* fall through */
-       case EMULATE_FAIL:
-               return 0;
-       default:
-               BUG();
-       }
+       return x86_emulate_instruction(vcpu, cr2, emulation_type, insn,
+                                      insn_len);
 }
 EXPORT_SYMBOL_GPL(kvm_mmu_page_fault);
 
@@ -5684,12 +5676,11 @@ int kvm_mmu_create(struct kvm_vcpu *vcpu)
        return ret;
 }
 
-
+#define BATCH_ZAP_PAGES        10
 static void kvm_zap_obsolete_pages(struct kvm *kvm)
 {
        struct kvm_mmu_page *sp, *node;
-       LIST_HEAD(invalid_list);
-       int ign;
+       int nr_zapped, batch = 0;
 
 restart:
        list_for_each_entry_safe_reverse(sp, node,
@@ -5702,46 +5693,39 @@ restart:
                        break;
 
                /*
-                * Do not repeatedly zap a root page to avoid unnecessary
-                * KVM_REQ_MMU_RELOAD, otherwise we may not be able to
-                * progress:
-                *    vcpu 0                        vcpu 1
-                *                         call vcpu_enter_guest():
-                *                            1): handle KVM_REQ_MMU_RELOAD
-                *                                and require mmu-lock to
-                *                                load mmu
-                * repeat:
-                *    1): zap root page and
-                *        send KVM_REQ_MMU_RELOAD
-                *
-                *    2): if (cond_resched_lock(mmu-lock))
-                *
-                *                            2): hold mmu-lock and load mmu
-                *
-                *                            3): see KVM_REQ_MMU_RELOAD bit
-                *                                on vcpu->requests is set
-                *                                then return 1 to call
-                *                                vcpu_enter_guest() again.
-                *            goto repeat;
-                *
-                * Since we are reversely walking the list and the invalid
-                * list will be moved to the head, skip the invalid page
-                * can help us to avoid the infinity list walking.
+                * Skip invalid pages with a non-zero root count, zapping pages
+                * with a non-zero root count will never succeed, i.e. the page
+                * will get thrown back on active_mmu_pages and we'll get stuck
+                * in an infinite loop.
                 */
-               if (sp->role.invalid)
+               if (sp->role.invalid && sp->root_count)
                        continue;
 
-               if (need_resched() || spin_needbreak(&kvm->mmu_lock)) {
-                       kvm_mmu_commit_zap_page(kvm, &invalid_list);
-                       cond_resched_lock(&kvm->mmu_lock);
+               /*
+                * No need to flush the TLB since we're only zapping shadow
+                * pages with an obsolete generation number and all vCPUS have
+                * loaded a new root, i.e. the shadow pages being zapped cannot
+                * be in active use by the guest.
+                */
+               if (batch >= BATCH_ZAP_PAGES &&
+                   cond_resched_lock(&kvm->mmu_lock)) {
+                       batch = 0;
                        goto restart;
                }
 
-               if (__kvm_mmu_prepare_zap_page(kvm, sp, &invalid_list, &ign))
+               if (__kvm_mmu_prepare_zap_page(kvm, sp,
+                               &kvm->arch.zapped_obsolete_pages, &nr_zapped)) {
+                       batch += nr_zapped;
                        goto restart;
+               }
        }
 
-       kvm_mmu_commit_zap_page(kvm, &invalid_list);
+       /*
+        * Trigger a remote TLB flush before freeing the page tables to ensure
+        * KVM is not in the middle of a lockless shadow page table walk, which
+        * may reference the pages.
+        */
+       kvm_mmu_commit_zap_page(kvm, &kvm->arch.zapped_obsolete_pages);
 }
 
 /*
@@ -5755,13 +5739,39 @@ restart:
  */
 static void kvm_mmu_zap_all_fast(struct kvm *kvm)
 {
+       lockdep_assert_held(&kvm->slots_lock);
+
        spin_lock(&kvm->mmu_lock);
-       kvm->arch.mmu_valid_gen++;
+       trace_kvm_mmu_zap_all_fast(kvm);
+
+       /*
+        * Toggle mmu_valid_gen between '0' and '1'.  Because slots_lock is
+        * held for the entire duration of zapping obsolete pages, it's
+        * impossible for there to be multiple invalid generations associated
+        * with *valid* shadow pages at any given time, i.e. there is exactly
+        * one valid generation and (at most) one invalid generation.
+        */
+       kvm->arch.mmu_valid_gen = kvm->arch.mmu_valid_gen ? 0 : 1;
+
+       /*
+        * Notify all vcpus to reload its shadow page table and flush TLB.
+        * Then all vcpus will switch to new shadow page table with the new
+        * mmu_valid_gen.
+        *
+        * Note: we need to do this under the protection of mmu_lock,
+        * otherwise, vcpu would purge shadow page but miss tlb flush.
+        */
+       kvm_reload_remote_mmus(kvm);
 
        kvm_zap_obsolete_pages(kvm);
        spin_unlock(&kvm->mmu_lock);
 }
 
+static bool kvm_has_zapped_obsolete_pages(struct kvm *kvm)
+{
+       return unlikely(!list_empty_careful(&kvm->arch.zapped_obsolete_pages));
+}
+
 static void kvm_mmu_invalidate_zap_pages_in_memslot(struct kvm *kvm,
                        struct kvm_memory_slot *slot,
                        struct kvm_page_track_notifier_node *node)
@@ -5959,7 +5969,7 @@ void kvm_mmu_slot_set_dirty(struct kvm *kvm,
 }
 EXPORT_SYMBOL_GPL(kvm_mmu_slot_set_dirty);
 
-static void __kvm_mmu_zap_all(struct kvm *kvm, bool mmio_only)
+void kvm_mmu_zap_all(struct kvm *kvm)
 {
        struct kvm_mmu_page *sp, *node;
        LIST_HEAD(invalid_list);
@@ -5968,14 +5978,10 @@ static void __kvm_mmu_zap_all(struct kvm *kvm, bool mmio_only)
        spin_lock(&kvm->mmu_lock);
 restart:
        list_for_each_entry_safe(sp, node, &kvm->arch.active_mmu_pages, link) {
-               if (mmio_only && !sp->mmio_cached)
-                       continue;
                if (sp->role.invalid && sp->root_count)
                        continue;
-               if (__kvm_mmu_prepare_zap_page(kvm, sp, &invalid_list, &ign)) {
-                       WARN_ON_ONCE(mmio_only);
+               if (__kvm_mmu_prepare_zap_page(kvm, sp, &invalid_list, &ign))
                        goto restart;
-               }
                if (cond_resched_lock(&kvm->mmu_lock))
                        goto restart;
        }
@@ -5984,11 +5990,6 @@ restart:
        spin_unlock(&kvm->mmu_lock);
 }
 
-void kvm_mmu_zap_all(struct kvm *kvm)
-{
-       return __kvm_mmu_zap_all(kvm, false);
-}
-
 void kvm_mmu_invalidate_mmio_sptes(struct kvm *kvm, u64 gen)
 {
        WARN_ON(gen & KVM_MEMSLOT_GEN_UPDATE_IN_PROGRESS);
@@ -6010,7 +6011,7 @@ void kvm_mmu_invalidate_mmio_sptes(struct kvm *kvm, u64 gen)
         */
        if (unlikely(gen == 0)) {
                kvm_debug_ratelimited("kvm: zapping shadow pages for mmio generation wraparound\n");
-               __kvm_mmu_zap_all(kvm, true);
+               kvm_mmu_zap_all_fast(kvm);
        }
 }
 
@@ -6041,16 +6042,24 @@ mmu_shrink_scan(struct shrinker *shrink, struct shrink_control *sc)
                 * want to shrink a VM that only started to populate its MMU
                 * anyway.
                 */
-               if (!kvm->arch.n_used_mmu_pages)
+               if (!kvm->arch.n_used_mmu_pages &&
+                   !kvm_has_zapped_obsolete_pages(kvm))
                        continue;
 
                idx = srcu_read_lock(&kvm->srcu);
                spin_lock(&kvm->mmu_lock);
 
+               if (kvm_has_zapped_obsolete_pages(kvm)) {
+                       kvm_mmu_commit_zap_page(kvm,
+                             &kvm->arch.zapped_obsolete_pages);
+                       goto unlock;
+               }
+
                if (prepare_zap_oldest_mmu_page(kvm, &invalid_list))
                        freed++;
                kvm_mmu_commit_zap_page(kvm, &invalid_list);
 
+unlock:
                spin_unlock(&kvm->mmu_lock);
                srcu_read_unlock(&kvm->srcu, idx);
 
index d8001b4..7ca8831 100644 (file)
@@ -8,16 +8,18 @@
 #undef TRACE_SYSTEM
 #define TRACE_SYSTEM kvmmmu
 
-#define KVM_MMU_PAGE_FIELDS \
-       __field(__u64, gfn) \
-       __field(__u32, role) \
-       __field(__u32, root_count) \
+#define KVM_MMU_PAGE_FIELDS            \
+       __field(__u8, mmu_valid_gen)    \
+       __field(__u64, gfn)             \
+       __field(__u32, role)            \
+       __field(__u32, root_count)      \
        __field(bool, unsync)
 
-#define KVM_MMU_PAGE_ASSIGN(sp)                             \
-       __entry->gfn = sp->gfn;                      \
-       __entry->role = sp->role.word;               \
-       __entry->root_count = sp->root_count;        \
+#define KVM_MMU_PAGE_ASSIGN(sp)                                \
+       __entry->mmu_valid_gen = sp->mmu_valid_gen;     \
+       __entry->gfn = sp->gfn;                         \
+       __entry->role = sp->role.word;                  \
+       __entry->root_count = sp->root_count;           \
        __entry->unsync = sp->unsync;
 
 #define KVM_MMU_PAGE_PRINTK() ({                                       \
@@ -29,8 +31,9 @@
                                                                        \
        role.word = __entry->role;                                      \
                                                                        \
-       trace_seq_printf(p, "sp gfn %llx l%u %u-byte q%u%s %s%s"        \
+       trace_seq_printf(p, "sp gen %u gfn %llx l%u %u-byte q%u%s %s%s" \
                         " %snxe %sad root %u %s%c",                    \
+                        __entry->mmu_valid_gen,                        \
                         __entry->gfn, role.level,                      \
                         role.gpte_is_8_bytes ? 8 : 4,                  \
                         role.quadrant,                                 \
@@ -280,6 +283,27 @@ TRACE_EVENT(
 );
 
 TRACE_EVENT(
+       kvm_mmu_zap_all_fast,
+       TP_PROTO(struct kvm *kvm),
+       TP_ARGS(kvm),
+
+       TP_STRUCT__entry(
+               __field(__u8, mmu_valid_gen)
+               __field(unsigned int, mmu_used_pages)
+       ),
+
+       TP_fast_assign(
+               __entry->mmu_valid_gen = kvm->arch.mmu_valid_gen;
+               __entry->mmu_used_pages = kvm->arch.n_used_mmu_pages;
+       ),
+
+       TP_printk("kvm-mmu-valid-gen %u used_pages %x",
+                 __entry->mmu_valid_gen, __entry->mmu_used_pages
+       )
+);
+
+
+TRACE_EVENT(
        check_mmio_spte,
        TP_PROTO(u64 spte, unsigned int kvm_gen, unsigned int spte_gen),
        TP_ARGS(spte, kvm_gen, spte_gen),
index 04fe218..f8ecb6d 100644 (file)
@@ -777,17 +777,18 @@ static int skip_emulated_instruction(struct kvm_vcpu *vcpu)
                svm->next_rip = svm->vmcb->control.next_rip;
        }
 
-       if (!svm->next_rip)
-               return kvm_emulate_instruction(vcpu, EMULTYPE_SKIP);
-
-       if (svm->next_rip - kvm_rip_read(vcpu) > MAX_INST_SIZE)
-               printk(KERN_ERR "%s: ip 0x%lx next 0x%llx\n",
-                      __func__, kvm_rip_read(vcpu), svm->next_rip);
-
-       kvm_rip_write(vcpu, svm->next_rip);
+       if (!svm->next_rip) {
+               if (!kvm_emulate_instruction(vcpu, EMULTYPE_SKIP))
+                       return 0;
+       } else {
+               if (svm->next_rip - kvm_rip_read(vcpu) > MAX_INST_SIZE)
+                       pr_err("%s: ip 0x%lx next 0x%llx\n",
+                              __func__, kvm_rip_read(vcpu), svm->next_rip);
+               kvm_rip_write(vcpu, svm->next_rip);
+       }
        svm_set_interrupt_shadow(vcpu, 0);
 
-       return EMULATE_DONE;
+       return 1;
 }
 
 static void svm_queue_exception(struct kvm_vcpu *vcpu)
@@ -1539,6 +1540,7 @@ static void init_vmcb(struct vcpu_svm *svm)
        set_intercept(svm, INTERCEPT_SKINIT);
        set_intercept(svm, INTERCEPT_WBINVD);
        set_intercept(svm, INTERCEPT_XSETBV);
+       set_intercept(svm, INTERCEPT_RDPRU);
        set_intercept(svm, INTERCEPT_RSM);
 
        if (!kvm_mwait_in_guest(svm->vcpu.kvm)) {
@@ -2768,17 +2770,18 @@ static int gp_interception(struct vcpu_svm *svm)
 {
        struct kvm_vcpu *vcpu = &svm->vcpu;
        u32 error_code = svm->vmcb->control.exit_info_1;
-       int er;
 
        WARN_ON_ONCE(!enable_vmware_backdoor);
 
-       er = kvm_emulate_instruction(vcpu,
-               EMULTYPE_VMWARE | EMULTYPE_NO_UD_ON_FAIL);
-       if (er == EMULATE_USER_EXIT)
-               return 0;
-       else if (er != EMULATE_DONE)
+       /*
+        * VMware backdoor emulation on #GP interception only handles IN{S},
+        * OUT{S}, and RDPMC, none of which generate a non-zero error code.
+        */
+       if (error_code) {
                kvm_queue_exception_e(vcpu, GP_VECTOR, error_code);
-       return 1;
+               return 1;
+       }
+       return kvm_emulate_instruction(vcpu, EMULTYPE_VMWARE_GP);
 }
 
 static bool is_erratum_383(void)
@@ -2876,7 +2879,7 @@ static int io_interception(struct vcpu_svm *svm)
        string = (io_info & SVM_IOIO_STR_MASK) != 0;
        in = (io_info & SVM_IOIO_TYPE_MASK) != 0;
        if (string)
-               return kvm_emulate_instruction(vcpu, 0) == EMULATE_DONE;
+               return kvm_emulate_instruction(vcpu, 0);
 
        port = io_info >> 16;
        size = (io_info & SVM_IOIO_SIZE_MASK) >> SVM_IOIO_SIZE_SHIFT;
@@ -3830,6 +3833,12 @@ static int xsetbv_interception(struct vcpu_svm *svm)
        return 1;
 }
 
+static int rdpru_interception(struct vcpu_svm *svm)
+{
+       kvm_queue_exception(&svm->vcpu, UD_VECTOR);
+       return 1;
+}
+
 static int task_switch_interception(struct vcpu_svm *svm)
 {
        u16 tss_selector;
@@ -3883,24 +3892,15 @@ static int task_switch_interception(struct vcpu_svm *svm)
            int_type == SVM_EXITINTINFO_TYPE_SOFT ||
            (int_type == SVM_EXITINTINFO_TYPE_EXEPT &&
             (int_vec == OF_VECTOR || int_vec == BP_VECTOR))) {
-               if (skip_emulated_instruction(&svm->vcpu) != EMULATE_DONE)
-                       goto fail;
+               if (!skip_emulated_instruction(&svm->vcpu))
+                       return 0;
        }
 
        if (int_type != SVM_EXITINTINFO_TYPE_SOFT)
                int_vec = -1;
 
-       if (kvm_task_switch(&svm->vcpu, tss_selector, int_vec, reason,
-                               has_error_code, error_code) == EMULATE_FAIL)
-               goto fail;
-
-       return 1;
-
-fail:
-       svm->vcpu.run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
-       svm->vcpu.run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION;
-       svm->vcpu.run->internal.ndata = 0;
-       return 0;
+       return kvm_task_switch(&svm->vcpu, tss_selector, int_vec, reason,
+                              has_error_code, error_code);
 }
 
 static int cpuid_interception(struct vcpu_svm *svm)
@@ -3921,7 +3921,7 @@ static int iret_interception(struct vcpu_svm *svm)
 static int invlpg_interception(struct vcpu_svm *svm)
 {
        if (!static_cpu_has(X86_FEATURE_DECODEASSISTS))
-               return kvm_emulate_instruction(&svm->vcpu, 0) == EMULATE_DONE;
+               return kvm_emulate_instruction(&svm->vcpu, 0);
 
        kvm_mmu_invlpg(&svm->vcpu, svm->vmcb->control.exit_info_1);
        return kvm_skip_emulated_instruction(&svm->vcpu);
@@ -3929,13 +3929,12 @@ static int invlpg_interception(struct vcpu_svm *svm)
 
 static int emulate_on_interception(struct vcpu_svm *svm)
 {
-       return kvm_emulate_instruction(&svm->vcpu, 0) == EMULATE_DONE;
+       return kvm_emulate_instruction(&svm->vcpu, 0);
 }
 
 static int rsm_interception(struct vcpu_svm *svm)
 {
-       return kvm_emulate_instruction_from_buffer(&svm->vcpu,
-                                       rsm_ins_bytes, 2) == EMULATE_DONE;
+       return kvm_emulate_instruction_from_buffer(&svm->vcpu, rsm_ins_bytes, 2);
 }
 
 static int rdpmc_interception(struct vcpu_svm *svm)
@@ -4724,7 +4723,7 @@ static int avic_unaccelerated_access_interception(struct vcpu_svm *svm)
                ret = avic_unaccel_trap_write(svm);
        } else {
                /* Handling Fault */
-               ret = (kvm_emulate_instruction(&svm->vcpu, 0) == EMULATE_DONE);
+               ret = kvm_emulate_instruction(&svm->vcpu, 0);
        }
 
        return ret;
@@ -4791,6 +4790,7 @@ static int (*const svm_exit_handlers[])(struct vcpu_svm *svm) = {
        [SVM_EXIT_MONITOR]                      = monitor_interception,
        [SVM_EXIT_MWAIT]                        = mwait_interception,
        [SVM_EXIT_XSETBV]                       = xsetbv_interception,
+       [SVM_EXIT_RDPRU]                        = rdpru_interception,
        [SVM_EXIT_NPF]                          = npf_interception,
        [SVM_EXIT_RSM]                          = rsm_interception,
        [SVM_EXIT_AVIC_INCOMPLETE_IPI]          = avic_incomplete_ipi_interception,
@@ -7099,13 +7099,6 @@ failed:
        return ret;
 }
 
-static int nested_enable_evmcs(struct kvm_vcpu *vcpu,
-                                  uint16_t *vmcs_version)
-{
-       /* Intel-only feature */
-       return -ENODEV;
-}
-
 static bool svm_need_emulation_on_page_fault(struct kvm_vcpu *vcpu)
 {
        unsigned long cr4 = kvm_read_cr4(vcpu);
@@ -7311,7 +7304,7 @@ static struct kvm_x86_ops svm_x86_ops __ro_after_init = {
        .mem_enc_reg_region = svm_register_enc_region,
        .mem_enc_unreg_region = svm_unregister_enc_region,
 
-       .nested_enable_evmcs = nested_enable_evmcs,
+       .nested_enable_evmcs = NULL,
        .nested_get_evmcs_version = NULL,
 
        .need_emulation_on_page_fault = svm_need_emulation_on_page_fault,
index d6664ee..7aa6971 100644 (file)
@@ -247,6 +247,12 @@ static inline bool vmx_xsaves_supported(void)
                SECONDARY_EXEC_XSAVES;
 }
 
+static inline bool vmx_waitpkg_supported(void)
+{
+       return vmcs_config.cpu_based_2nd_exec_ctrl &
+               SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE;
+}
+
 static inline bool cpu_has_vmx_tsc_scaling(void)
 {
        return vmcs_config.cpu_based_2nd_exec_ctrl &
index 39a24ee..07ebf68 100644 (file)
@@ -178,6 +178,8 @@ static inline void evmcs_load(u64 phys_addr)
        struct hv_vp_assist_page *vp_ap =
                hv_get_vp_assist_page(smp_processor_id());
 
+       if (current_evmcs->hv_enlightenments_control.nested_flush_hypercall)
+               vp_ap->nested_control.features.directhypercall = 1;
        vp_ap->current_nested_vmcs = phys_addr;
        vp_ap->enlighten_vmentry = 1;
 }
index 1a10cd3..41abc62 100644 (file)
@@ -198,6 +198,16 @@ static void nested_vmx_abort(struct kvm_vcpu *vcpu, u32 indicator)
        pr_debug_ratelimited("kvm: nested vmx abort, indicator %d\n", indicator);
 }
 
+static inline bool vmx_control_verify(u32 control, u32 low, u32 high)
+{
+       return fixed_bits_valid(control, low, high);
+}
+
+static inline u64 vmx_control_msr(u32 low, u32 high)
+{
+       return low | ((u64)high << 32);
+}
+
 static void vmx_disable_shadow_vmcs(struct vcpu_vmx *vmx)
 {
        secondary_exec_controls_clearbit(vmx, SECONDARY_EXEC_SHADOW_VMCS);
@@ -866,16 +876,34 @@ static int nested_vmx_store_msr_check(struct kvm_vcpu *vcpu,
        return 0;
 }
 
+static u32 nested_vmx_max_atomic_switch_msrs(struct kvm_vcpu *vcpu)
+{
+       struct vcpu_vmx *vmx = to_vmx(vcpu);
+       u64 vmx_misc = vmx_control_msr(vmx->nested.msrs.misc_low,
+                                      vmx->nested.msrs.misc_high);
+
+       return (vmx_misc_max_msr(vmx_misc) + 1) * VMX_MISC_MSR_LIST_MULTIPLIER;
+}
+
 /*
  * Load guest's/host's msr at nested entry/exit.
  * return 0 for success, entry index for failure.
+ *
+ * One of the failure modes for MSR load/store is when a list exceeds the
+ * virtual hardware's capacity. To maintain compatibility with hardware inasmuch
+ * as possible, process all valid entries before failing rather than precheck
+ * for a capacity violation.
  */
 static u32 nested_vmx_load_msr(struct kvm_vcpu *vcpu, u64 gpa, u32 count)
 {
        u32 i;
        struct vmx_msr_entry e;
+       u32 max_msr_list_size = nested_vmx_max_atomic_switch_msrs(vcpu);
 
        for (i = 0; i < count; i++) {
+               if (unlikely(i >= max_msr_list_size))
+                       goto fail;
+
                if (kvm_vcpu_read_guest(vcpu, gpa + i * sizeof(e),
                                        &e, sizeof(e))) {
                        pr_debug_ratelimited(
@@ -906,8 +934,12 @@ static int nested_vmx_store_msr(struct kvm_vcpu *vcpu, u64 gpa, u32 count)
        u64 data;
        u32 i;
        struct vmx_msr_entry e;
+       u32 max_msr_list_size = nested_vmx_max_atomic_switch_msrs(vcpu);
 
        for (i = 0; i < count; i++) {
+               if (unlikely(i >= max_msr_list_size))
+                       return -EINVAL;
+
                if (kvm_vcpu_read_guest(vcpu,
                                        gpa + i * sizeof(e),
                                        &e, 2 * sizeof(u32))) {
@@ -1013,17 +1045,6 @@ static u16 nested_get_vpid02(struct kvm_vcpu *vcpu)
        return vmx->nested.vpid02 ? vmx->nested.vpid02 : vmx->vpid;
 }
 
-
-static inline bool vmx_control_verify(u32 control, u32 low, u32 high)
-{
-       return fixed_bits_valid(control, low, high);
-}
-
-static inline u64 vmx_control_msr(u32 low, u32 high)
-{
-       return low | ((u64)high << 32);
-}
-
 static bool is_bitwise_subset(u64 superset, u64 subset, u64 mask)
 {
        superset &= mask;
@@ -2089,6 +2110,7 @@ static void prepare_vmcs02_early(struct vcpu_vmx *vmx, struct vmcs12 *vmcs12)
                                  SECONDARY_EXEC_ENABLE_INVPCID |
                                  SECONDARY_EXEC_RDTSCP |
                                  SECONDARY_EXEC_XSAVES |
+                                 SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE |
                                  SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY |
                                  SECONDARY_EXEC_APIC_REGISTER_VIRT |
                                  SECONDARY_EXEC_ENABLE_VMFUNC);
@@ -2642,8 +2664,23 @@ static int nested_vmx_check_host_state(struct kvm_vcpu *vcpu,
            CC(!kvm_pat_valid(vmcs12->host_ia32_pat)))
                return -EINVAL;
 
-       ia32e = (vmcs12->vm_exit_controls &
-                VM_EXIT_HOST_ADDR_SPACE_SIZE) != 0;
+#ifdef CONFIG_X86_64
+       ia32e = !!(vcpu->arch.efer & EFER_LMA);
+#else
+       ia32e = false;
+#endif
+
+       if (ia32e) {
+               if (CC(!(vmcs12->vm_exit_controls & VM_EXIT_HOST_ADDR_SPACE_SIZE)) ||
+                   CC(!(vmcs12->host_cr4 & X86_CR4_PAE)))
+                       return -EINVAL;
+       } else {
+               if (CC(vmcs12->vm_exit_controls & VM_EXIT_HOST_ADDR_SPACE_SIZE) ||
+                   CC(vmcs12->vm_entry_controls & VM_ENTRY_IA32E_MODE) ||
+                   CC(vmcs12->host_cr4 & X86_CR4_PCIDE) ||
+                   CC((vmcs12->host_rip) >> 32))
+                       return -EINVAL;
+       }
 
        if (CC(vmcs12->host_cs_selector & (SEGMENT_RPL_MASK | SEGMENT_TI_MASK)) ||
            CC(vmcs12->host_ss_selector & (SEGMENT_RPL_MASK | SEGMENT_TI_MASK)) ||
@@ -2662,7 +2699,8 @@ static int nested_vmx_check_host_state(struct kvm_vcpu *vcpu,
            CC(is_noncanonical_address(vmcs12->host_gs_base, vcpu)) ||
            CC(is_noncanonical_address(vmcs12->host_gdtr_base, vcpu)) ||
            CC(is_noncanonical_address(vmcs12->host_idtr_base, vcpu)) ||
-           CC(is_noncanonical_address(vmcs12->host_tr_base, vcpu)))
+           CC(is_noncanonical_address(vmcs12->host_tr_base, vcpu)) ||
+           CC(is_noncanonical_address(vmcs12->host_rip, vcpu)))
                return -EINVAL;
 #endif
 
@@ -5441,6 +5479,10 @@ bool nested_vmx_exit_reflected(struct kvm_vcpu *vcpu, u32 exit_reason)
        case EXIT_REASON_ENCLS:
                /* SGX is never exposed to L1 */
                return false;
+       case EXIT_REASON_UMWAIT:
+       case EXIT_REASON_TPAUSE:
+               return nested_cpu_has2(vmcs12,
+                       SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE);
        default:
                return true;
        }
index 2200fb6..45eaede 100644 (file)
 #include "vmcs.h"
 
 #define __ex(x) __kvm_handle_fault_on_reboot(x)
-#define __ex_clear(x, reg) \
-       ____kvm_handle_fault_on_reboot(x, "xor " reg ", " reg)
+
+asmlinkage void vmread_error(unsigned long field, bool fault);
+void vmwrite_error(unsigned long field, unsigned long value);
+void vmclear_error(struct vmcs *vmcs, u64 phys_addr);
+void vmptrld_error(struct vmcs *vmcs, u64 phys_addr);
+void invvpid_error(unsigned long ext, u16 vpid, gva_t gva);
+void invept_error(unsigned long ext, u64 eptp, gpa_t gpa);
 
 static __always_inline void vmcs_check16(unsigned long field)
 {
@@ -62,8 +67,22 @@ static __always_inline unsigned long __vmcs_readl(unsigned long field)
 {
        unsigned long value;
 
-       asm volatile (__ex_clear("vmread %1, %0", "%k0")
-                     : "=r"(value) : "r"(field));
+       asm volatile("1: vmread %2, %1\n\t"
+                    ".byte 0x3e\n\t" /* branch taken hint */
+                    "ja 3f\n\t"
+                    "mov %2, %%" _ASM_ARG1 "\n\t"
+                    "xor %%" _ASM_ARG2 ", %%" _ASM_ARG2 "\n\t"
+                    "2: call vmread_error\n\t"
+                    "xor %k1, %k1\n\t"
+                    "3:\n\t"
+
+                    ".pushsection .fixup, \"ax\"\n\t"
+                    "4: mov %2, %%" _ASM_ARG1 "\n\t"
+                    "mov $1, %%" _ASM_ARG2 "\n\t"
+                    "jmp 2b\n\t"
+                    ".popsection\n\t"
+                    _ASM_EXTABLE(1b, 4b)
+                    : ASM_CALL_CONSTRAINT, "=r"(value) : "r"(field) : "cc");
        return value;
 }
 
@@ -103,21 +122,39 @@ static __always_inline unsigned long vmcs_readl(unsigned long field)
        return __vmcs_readl(field);
 }
 
-static noinline void vmwrite_error(unsigned long field, unsigned long value)
-{
-       printk(KERN_ERR "vmwrite error: reg %lx value %lx (err %d)\n",
-              field, value, vmcs_read32(VM_INSTRUCTION_ERROR));
-       dump_stack();
-}
+#define vmx_asm1(insn, op1, error_args...)                             \
+do {                                                                   \
+       asm_volatile_goto("1: " __stringify(insn) " %0\n\t"             \
+                         ".byte 0x2e\n\t" /* branch not taken hint */  \
+                         "jna %l[error]\n\t"                           \
+                         _ASM_EXTABLE(1b, %l[fault])                   \
+                         : : op1 : "cc" : error, fault);               \
+       return;                                                         \
+error:                                                                 \
+       insn##_error(error_args);                                       \
+       return;                                                         \
+fault:                                                                 \
+       kvm_spurious_fault();                                           \
+} while (0)
+
+#define vmx_asm2(insn, op1, op2, error_args...)                                \
+do {                                                                   \
+       asm_volatile_goto("1: "  __stringify(insn) " %1, %0\n\t"        \
+                         ".byte 0x2e\n\t" /* branch not taken hint */  \
+                         "jna %l[error]\n\t"                           \
+                         _ASM_EXTABLE(1b, %l[fault])                   \
+                         : : op1, op2 : "cc" : error, fault);          \
+       return;                                                         \
+error:                                                                 \
+       insn##_error(error_args);                                       \
+       return;                                                         \
+fault:                                                                 \
+       kvm_spurious_fault();                                           \
+} while (0)
 
 static __always_inline void __vmcs_writel(unsigned long field, unsigned long value)
 {
-       bool error;
-
-       asm volatile (__ex("vmwrite %2, %1") CC_SET(na)
-                     : CC_OUT(na) (error) : "r"(field), "rm"(value));
-       if (unlikely(error))
-               vmwrite_error(field, value);
+       vmx_asm2(vmwrite, "r"(field), "rm"(value), field, value);
 }
 
 static __always_inline void vmcs_write16(unsigned long field, u16 value)
@@ -182,28 +219,18 @@ static __always_inline void vmcs_set_bits(unsigned long field, u32 mask)
 static inline void vmcs_clear(struct vmcs *vmcs)
 {
        u64 phys_addr = __pa(vmcs);
-       bool error;
 
-       asm volatile (__ex("vmclear %1") CC_SET(na)
-                     : CC_OUT(na) (error) : "m"(phys_addr));
-       if (unlikely(error))
-               printk(KERN_ERR "kvm: vmclear fail: %p/%llx\n",
-                      vmcs, phys_addr);
+       vmx_asm1(vmclear, "m"(phys_addr), vmcs, phys_addr);
 }
 
 static inline void vmcs_load(struct vmcs *vmcs)
 {
        u64 phys_addr = __pa(vmcs);
-       bool error;
 
        if (static_branch_unlikely(&enable_evmcs))
                return evmcs_load(phys_addr);
 
-       asm volatile (__ex("vmptrld %1") CC_SET(na)
-                     : CC_OUT(na) (error) : "m"(phys_addr));
-       if (unlikely(error))
-               printk(KERN_ERR "kvm: vmptrld %p/%llx failed\n",
-                      vmcs, phys_addr);
+       vmx_asm1(vmptrld, "m"(phys_addr), vmcs, phys_addr);
 }
 
 static inline void __invvpid(unsigned long ext, u16 vpid, gva_t gva)
@@ -213,11 +240,8 @@ static inline void __invvpid(unsigned long ext, u16 vpid, gva_t gva)
                u64 rsvd : 48;
                u64 gva;
        } operand = { vpid, 0, gva };
-       bool error;
 
-       asm volatile (__ex("invvpid %2, %1") CC_SET(na)
-                     : CC_OUT(na) (error) : "r"(ext), "m"(operand));
-       BUG_ON(error);
+       vmx_asm2(invvpid, "r"(ext), "m"(operand), ext, vpid, gva);
 }
 
 static inline void __invept(unsigned long ext, u64 eptp, gpa_t gpa)
@@ -225,11 +249,8 @@ static inline void __invept(unsigned long ext, u64 eptp, gpa_t gpa)
        struct {
                u64 eptp, gpa;
        } operand = {eptp, gpa};
-       bool error;
 
-       asm volatile (__ex("invept %2, %1") CC_SET(na)
-                     : CC_OUT(na) (error) : "r"(ext), "m"(operand));
-       BUG_ON(error);
+       vmx_asm2(invept, "r"(ext), "m"(operand), ext, eptp, gpa);
 }
 
 static inline bool vpid_sync_vcpu_addr(int vpid, gva_t addr)
index 4a99be1..d4575ff 100644 (file)
@@ -343,6 +343,48 @@ static __always_inline void vmx_disable_intercept_for_msr(unsigned long *msr_bit
 
 void vmx_vmexit(void);
 
+#define vmx_insn_failed(fmt...)                \
+do {                                   \
+       WARN_ONCE(1, fmt);              \
+       pr_warn_ratelimited(fmt);       \
+} while (0)
+
+asmlinkage void vmread_error(unsigned long field, bool fault)
+{
+       if (fault)
+               kvm_spurious_fault();
+       else
+               vmx_insn_failed("kvm: vmread failed: field=%lx\n", field);
+}
+
+noinline void vmwrite_error(unsigned long field, unsigned long value)
+{
+       vmx_insn_failed("kvm: vmwrite failed: field=%lx val=%lx err=%d\n",
+                       field, value, vmcs_read32(VM_INSTRUCTION_ERROR));
+}
+
+noinline void vmclear_error(struct vmcs *vmcs, u64 phys_addr)
+{
+       vmx_insn_failed("kvm: vmclear failed: %p/%llx\n", vmcs, phys_addr);
+}
+
+noinline void vmptrld_error(struct vmcs *vmcs, u64 phys_addr)
+{
+       vmx_insn_failed("kvm: vmptrld failed: %p/%llx\n", vmcs, phys_addr);
+}
+
+noinline void invvpid_error(unsigned long ext, u16 vpid, gva_t gva)
+{
+       vmx_insn_failed("kvm: invvpid failed: ext=0x%lx vpid=%u gva=0x%lx\n",
+                       ext, vpid, gva);
+}
+
+noinline void invept_error(unsigned long ext, u64 eptp, gpa_t gpa)
+{
+       vmx_insn_failed("kvm: invept failed: ext=0x%lx eptp=%llx gpa=0x%llx\n",
+                       ext, eptp, gpa);
+}
+
 static DEFINE_PER_CPU(struct vmcs *, vmxarea);
 DEFINE_PER_CPU(struct vmcs *, current_vmcs);
 /*
@@ -486,6 +528,31 @@ static int hv_remote_flush_tlb(struct kvm *kvm)
        return hv_remote_flush_tlb_with_range(kvm, NULL);
 }
 
+static int hv_enable_direct_tlbflush(struct kvm_vcpu *vcpu)
+{
+       struct hv_enlightened_vmcs *evmcs;
+       struct hv_partition_assist_pg **p_hv_pa_pg =
+                       &vcpu->kvm->arch.hyperv.hv_pa_pg;
+       /*
+        * Synthetic VM-Exit is not enabled in current code and so All
+        * evmcs in singe VM shares same assist page.
+        */
+       if (!*p_hv_pa_pg)
+               *p_hv_pa_pg = kzalloc(PAGE_SIZE, GFP_KERNEL);
+
+       if (!*p_hv_pa_pg)
+               return -ENOMEM;
+
+       evmcs = (struct hv_enlightened_vmcs *)to_vmx(vcpu)->loaded_vmcs->vmcs;
+
+       evmcs->partition_assist_page =
+               __pa(*p_hv_pa_pg);
+       evmcs->hv_vm_id = (unsigned long)vcpu->kvm;
+       evmcs->hv_enlightenments_control.nested_flush_hypercall = 1;
+
+       return 0;
+}
+
 #endif /* IS_ENABLED(CONFIG_HYPERV) */
 
 /*
@@ -1472,27 +1539,32 @@ static int vmx_rtit_ctl_check(struct kvm_vcpu *vcpu, u64 data)
        return 0;
 }
 
-/*
- * Returns an int to be compatible with SVM implementation (which can fail).
- * Do not use directly, use skip_emulated_instruction() instead.
- */
-static int __skip_emulated_instruction(struct kvm_vcpu *vcpu)
+static int skip_emulated_instruction(struct kvm_vcpu *vcpu)
 {
        unsigned long rip;
 
-       rip = kvm_rip_read(vcpu);
-       rip += vmcs_read32(VM_EXIT_INSTRUCTION_LEN);
-       kvm_rip_write(vcpu, rip);
+       /*
+        * Using VMCS.VM_EXIT_INSTRUCTION_LEN on EPT misconfig depends on
+        * undefined behavior: Intel's SDM doesn't mandate the VMCS field be
+        * set when EPT misconfig occurs.  In practice, real hardware updates
+        * VM_EXIT_INSTRUCTION_LEN on EPT misconfig, but other hypervisors
+        * (namely Hyper-V) don't set it due to it being undefined behavior,
+        * i.e. we end up advancing IP with some random value.
+        */
+       if (!static_cpu_has(X86_FEATURE_HYPERVISOR) ||
+           to_vmx(vcpu)->exit_reason != EXIT_REASON_EPT_MISCONFIG) {
+               rip = kvm_rip_read(vcpu);
+               rip += vmcs_read32(VM_EXIT_INSTRUCTION_LEN);
+               kvm_rip_write(vcpu, rip);
+       } else {
+               if (!kvm_emulate_instruction(vcpu, EMULTYPE_SKIP))
+                       return 0;
+       }
 
        /* skipping an emulated instruction also counts */
        vmx_set_interrupt_shadow(vcpu, 0);
 
-       return EMULATE_DONE;
-}
-
-static inline void skip_emulated_instruction(struct kvm_vcpu *vcpu)
-{
-       (void)__skip_emulated_instruction(vcpu);
+       return 1;
 }
 
 static void vmx_clear_hlt(struct kvm_vcpu *vcpu)
@@ -1527,8 +1599,7 @@ static void vmx_queue_exception(struct kvm_vcpu *vcpu)
                int inc_eip = 0;
                if (kvm_exception_is_soft(nr))
                        inc_eip = vcpu->arch.event_exit_inst_len;
-               if (kvm_inject_realmode_interrupt(vcpu, nr, inc_eip) != EMULATE_DONE)
-                       kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu);
+               kvm_inject_realmode_interrupt(vcpu, nr, inc_eip);
                return;
        }
 
@@ -1700,6 +1771,12 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 #endif
        case MSR_EFER:
                return kvm_get_msr_common(vcpu, msr_info);
+       case MSR_IA32_UMWAIT_CONTROL:
+               if (!msr_info->host_initiated && !vmx_has_waitpkg(vmx))
+                       return 1;
+
+               msr_info->data = vmx->msr_ia32_umwait_control;
+               break;
        case MSR_IA32_SPEC_CTRL:
                if (!msr_info->host_initiated &&
                    !guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL))
@@ -1873,6 +1950,16 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
                        return 1;
                vmcs_write64(GUEST_BNDCFGS, data);
                break;
+       case MSR_IA32_UMWAIT_CONTROL:
+               if (!msr_info->host_initiated && !vmx_has_waitpkg(vmx))
+                       return 1;
+
+               /* The reserved bit 1 and non-32 bit [63:32] should be zero */
+               if (data & (BIT_ULL(1) | GENMASK_ULL(63, 32)))
+                       return 1;
+
+               vmx->msr_ia32_umwait_control = data;
+               break;
        case MSR_IA32_SPEC_CTRL:
                if (!msr_info->host_initiated &&
                    !guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL))
@@ -2290,6 +2377,7 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf,
                        SECONDARY_EXEC_RDRAND_EXITING |
                        SECONDARY_EXEC_ENABLE_PML |
                        SECONDARY_EXEC_TSC_SCALING |
+                       SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE |
                        SECONDARY_EXEC_PT_USE_GPA |
                        SECONDARY_EXEC_PT_CONCEAL_VMX |
                        SECONDARY_EXEC_ENABLE_VMFUNC |
@@ -4026,6 +4114,23 @@ static void vmx_compute_secondary_exec_control(struct vcpu_vmx *vmx)
                }
        }
 
+       if (vmx_waitpkg_supported()) {
+               bool waitpkg_enabled =
+                       guest_cpuid_has(vcpu, X86_FEATURE_WAITPKG);
+
+               if (!waitpkg_enabled)
+                       exec_control &= ~SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE;
+
+               if (nested) {
+                       if (waitpkg_enabled)
+                               vmx->nested.msrs.secondary_ctls_high |=
+                                       SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE;
+                       else
+                               vmx->nested.msrs.secondary_ctls_high &=
+                                       ~SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE;
+               }
+       }
+
        vmx->secondary_exec_control = exec_control;
 }
 
@@ -4160,6 +4265,8 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
        vmx->rmode.vm86_active = 0;
        vmx->spec_ctrl = 0;
 
+       vmx->msr_ia32_umwait_control = 0;
+
        vcpu->arch.microcode_version = 0x100000000ULL;
        vmx->vcpu.arch.regs[VCPU_REGS_RDX] = get_rdx_init_val();
        vmx->hv_deadline_tsc = -1;
@@ -4277,8 +4384,7 @@ static void vmx_inject_irq(struct kvm_vcpu *vcpu)
                int inc_eip = 0;
                if (vcpu->arch.interrupt.soft)
                        inc_eip = vcpu->arch.event_exit_inst_len;
-               if (kvm_inject_realmode_interrupt(vcpu, irq, inc_eip) != EMULATE_DONE)
-                       kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu);
+               kvm_inject_realmode_interrupt(vcpu, irq, inc_eip);
                return;
        }
        intr = irq | INTR_INFO_VALID_MASK;
@@ -4314,8 +4420,7 @@ static void vmx_inject_nmi(struct kvm_vcpu *vcpu)
        vmx->loaded_vmcs->nmi_known_unmasked = false;
 
        if (vmx->rmode.vm86_active) {
-               if (kvm_inject_realmode_interrupt(vcpu, NMI_VECTOR, 0) != EMULATE_DONE)
-                       kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu);
+               kvm_inject_realmode_interrupt(vcpu, NMI_VECTOR, 0);
                return;
        }
 
@@ -4442,7 +4547,7 @@ static int handle_rmode_exception(struct kvm_vcpu *vcpu,
         * Cause the #SS fault with 0 error code in VM86 mode.
         */
        if (((vec == GP_VECTOR) || (vec == SS_VECTOR)) && err_code == 0) {
-               if (kvm_emulate_instruction(vcpu, 0) == EMULATE_DONE) {
+               if (kvm_emulate_instruction(vcpu, 0)) {
                        if (vcpu->arch.halt_request) {
                                vcpu->arch.halt_request = 0;
                                return kvm_vcpu_halt(vcpu);
@@ -4493,7 +4598,6 @@ static int handle_exception_nmi(struct kvm_vcpu *vcpu)
        u32 intr_info, ex_no, error_code;
        unsigned long cr2, rip, dr6;
        u32 vect_info;
-       enum emulation_result er;
 
        vect_info = vmx->idt_vectoring_info;
        intr_info = vmx->exit_intr_info;
@@ -4510,13 +4614,17 @@ static int handle_exception_nmi(struct kvm_vcpu *vcpu)
 
        if (!vmx->rmode.vm86_active && is_gp_fault(intr_info)) {
                WARN_ON_ONCE(!enable_vmware_backdoor);
-               er = kvm_emulate_instruction(vcpu,
-                       EMULTYPE_VMWARE | EMULTYPE_NO_UD_ON_FAIL);
-               if (er == EMULATE_USER_EXIT)
-                       return 0;
-               else if (er != EMULATE_DONE)
+
+               /*
+                * VMware backdoor emulation on #GP interception only handles
+                * IN{S}, OUT{S}, and RDPMC, none of which generate a non-zero
+                * error code on #GP.
+                */
+               if (error_code) {
                        kvm_queue_exception_e(vcpu, GP_VECTOR, error_code);
-               return 1;
+                       return 1;
+               }
+               return kvm_emulate_instruction(vcpu, EMULTYPE_VMWARE_GP);
        }
 
        /*
@@ -4558,7 +4666,7 @@ static int handle_exception_nmi(struct kvm_vcpu *vcpu)
                        vcpu->arch.dr6 &= ~DR_TRAP_BITS;
                        vcpu->arch.dr6 |= dr6 | DR6_RTM;
                        if (is_icebp(intr_info))
-                               skip_emulated_instruction(vcpu);
+                               WARN_ON(!skip_emulated_instruction(vcpu));
 
                        kvm_queue_exception(vcpu, DB_VECTOR);
                        return 1;
@@ -4613,7 +4721,7 @@ static int handle_io(struct kvm_vcpu *vcpu)
        ++vcpu->stat.io_exits;
 
        if (string)
-               return kvm_emulate_instruction(vcpu, 0) == EMULATE_DONE;
+               return kvm_emulate_instruction(vcpu, 0);
 
        port = exit_qualification >> 16;
        size = (exit_qualification & 7) + 1;
@@ -4687,7 +4795,7 @@ static int handle_set_cr4(struct kvm_vcpu *vcpu, unsigned long val)
 static int handle_desc(struct kvm_vcpu *vcpu)
 {
        WARN_ON(!(vcpu->arch.cr4 & X86_CR4_UMIP));
-       return kvm_emulate_instruction(vcpu, 0) == EMULATE_DONE;
+       return kvm_emulate_instruction(vcpu, 0);
 }
 
 static int handle_cr(struct kvm_vcpu *vcpu)
@@ -4903,7 +5011,7 @@ static int handle_vmcall(struct kvm_vcpu *vcpu)
 
 static int handle_invd(struct kvm_vcpu *vcpu)
 {
-       return kvm_emulate_instruction(vcpu, 0) == EMULATE_DONE;
+       return kvm_emulate_instruction(vcpu, 0);
 }
 
 static int handle_invlpg(struct kvm_vcpu *vcpu)
@@ -4937,20 +5045,6 @@ static int handle_xsetbv(struct kvm_vcpu *vcpu)
        return 1;
 }
 
-static int handle_xsaves(struct kvm_vcpu *vcpu)
-{
-       kvm_skip_emulated_instruction(vcpu);
-       WARN(1, "this should never happen\n");
-       return 1;
-}
-
-static int handle_xrstors(struct kvm_vcpu *vcpu)
-{
-       kvm_skip_emulated_instruction(vcpu);
-       WARN(1, "this should never happen\n");
-       return 1;
-}
-
 static int handle_apic_access(struct kvm_vcpu *vcpu)
 {
        if (likely(fasteoi)) {
@@ -4970,7 +5064,7 @@ static int handle_apic_access(struct kvm_vcpu *vcpu)
                        return kvm_skip_emulated_instruction(vcpu);
                }
        }
-       return kvm_emulate_instruction(vcpu, 0) == EMULATE_DONE;
+       return kvm_emulate_instruction(vcpu, 0);
 }
 
 static int handle_apic_eoi_induced(struct kvm_vcpu *vcpu)
@@ -5039,23 +5133,15 @@ static int handle_task_switch(struct kvm_vcpu *vcpu)
        if (!idt_v || (type != INTR_TYPE_HARD_EXCEPTION &&
                       type != INTR_TYPE_EXT_INTR &&
                       type != INTR_TYPE_NMI_INTR))
-               skip_emulated_instruction(vcpu);
-
-       if (kvm_task_switch(vcpu, tss_selector,
-                           type == INTR_TYPE_SOFT_INTR ? idt_index : -1, reason,
-                           has_error_code, error_code) == EMULATE_FAIL) {
-               vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
-               vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION;
-               vcpu->run->internal.ndata = 0;
-               return 0;
-       }
+               WARN_ON(!skip_emulated_instruction(vcpu));
 
        /*
         * TODO: What about debug traps on tss switch?
         *       Are we supposed to inject them and update dr6?
         */
-
-       return 1;
+       return kvm_task_switch(vcpu, tss_selector,
+                              type == INTR_TYPE_SOFT_INTR ? idt_index : -1,
+                              reason, has_error_code, error_code);
 }
 
 static int handle_ept_violation(struct kvm_vcpu *vcpu)
@@ -5114,21 +5200,7 @@ static int handle_ept_misconfig(struct kvm_vcpu *vcpu)
        if (!is_guest_mode(vcpu) &&
            !kvm_io_bus_write(vcpu, KVM_FAST_MMIO_BUS, gpa, 0, NULL)) {
                trace_kvm_fast_mmio(gpa);
-               /*
-                * Doing kvm_skip_emulated_instruction() depends on undefined
-                * behavior: Intel's manual doesn't mandate
-                * VM_EXIT_INSTRUCTION_LEN to be set in VMCS when EPT MISCONFIG
-                * occurs and while on real hardware it was observed to be set,
-                * other hypervisors (namely Hyper-V) don't set it, we end up
-                * advancing IP with some random value. Disable fast mmio when
-                * running nested and keep it for real hardware in hope that
-                * VM_EXIT_INSTRUCTION_LEN will always be set correctly.
-                */
-               if (!static_cpu_has(X86_FEATURE_HYPERVISOR))
-                       return kvm_skip_emulated_instruction(vcpu);
-               else
-                       return kvm_emulate_instruction(vcpu, EMULTYPE_SKIP) ==
-                                                               EMULATE_DONE;
+               return kvm_skip_emulated_instruction(vcpu);
        }
 
        return kvm_mmu_page_fault(vcpu, gpa, PFERR_RSVD_MASK, NULL, 0);
@@ -5147,8 +5219,6 @@ static int handle_nmi_window(struct kvm_vcpu *vcpu)
 static int handle_invalid_guest_state(struct kvm_vcpu *vcpu)
 {
        struct vcpu_vmx *vmx = to_vmx(vcpu);
-       enum emulation_result err = EMULATE_DONE;
-       int ret = 1;
        bool intr_window_requested;
        unsigned count = 130;
 
@@ -5169,41 +5239,35 @@ static int handle_invalid_guest_state(struct kvm_vcpu *vcpu)
                if (kvm_test_request(KVM_REQ_EVENT, vcpu))
                        return 1;
 
-               err = kvm_emulate_instruction(vcpu, 0);
-
-               if (err == EMULATE_USER_EXIT) {
-                       ++vcpu->stat.mmio_exits;
-                       ret = 0;
-                       goto out;
-               }
-
-               if (err != EMULATE_DONE)
-                       goto emulation_error;
+               if (!kvm_emulate_instruction(vcpu, 0))
+                       return 0;
 
                if (vmx->emulation_required && !vmx->rmode.vm86_active &&
-                   vcpu->arch.exception.pending)
-                       goto emulation_error;
+                   vcpu->arch.exception.pending) {
+                       vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
+                       vcpu->run->internal.suberror =
+                                               KVM_INTERNAL_ERROR_EMULATION;
+                       vcpu->run->internal.ndata = 0;
+                       return 0;
+               }
 
                if (vcpu->arch.halt_request) {
                        vcpu->arch.halt_request = 0;
-                       ret = kvm_vcpu_halt(vcpu);
-                       goto out;
+                       return kvm_vcpu_halt(vcpu);
                }
 
+               /*
+                * Note, return 1 and not 0, vcpu_run() is responsible for
+                * morphing the pending signal into the proper return code.
+                */
                if (signal_pending(current))
-                       goto out;
+                       return 1;
+
                if (need_resched())
                        schedule();
        }
 
-out:
-       return ret;
-
-emulation_error:
-       vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
-       vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION;
-       vcpu->run->internal.ndata = 0;
-       return 0;
+       return 1;
 }
 
 static void grow_ple_window(struct kvm_vcpu *vcpu)
@@ -5474,6 +5538,14 @@ static int handle_encls(struct kvm_vcpu *vcpu)
        return 1;
 }
 
+static int handle_unexpected_vmexit(struct kvm_vcpu *vcpu)
+{
+       kvm_skip_emulated_instruction(vcpu);
+       WARN_ONCE(1, "Unexpected VM-Exit Reason = 0x%x",
+               vmcs_read32(VM_EXIT_REASON));
+       return 1;
+}
+
 /*
  * The exit handlers return 1 if the exit was handled fully and guest execution
  * may resume.  Otherwise they set the kvm_run parameter to indicate what needs
@@ -5525,13 +5597,15 @@ static int (*kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = {
        [EXIT_REASON_INVVPID]                 = handle_vmx_instruction,
        [EXIT_REASON_RDRAND]                  = handle_invalid_op,
        [EXIT_REASON_RDSEED]                  = handle_invalid_op,
-       [EXIT_REASON_XSAVES]                  = handle_xsaves,
-       [EXIT_REASON_XRSTORS]                 = handle_xrstors,
+       [EXIT_REASON_XSAVES]                  = handle_unexpected_vmexit,
+       [EXIT_REASON_XRSTORS]                 = handle_unexpected_vmexit,
        [EXIT_REASON_PML_FULL]                = handle_pml_full,
        [EXIT_REASON_INVPCID]                 = handle_invpcid,
        [EXIT_REASON_VMFUNC]                  = handle_vmx_instruction,
        [EXIT_REASON_PREEMPTION_TIMER]        = handle_preemption_timer,
        [EXIT_REASON_ENCLS]                   = handle_encls,
+       [EXIT_REASON_UMWAIT]                  = handle_unexpected_vmexit,
+       [EXIT_REASON_TPAUSE]                  = handle_unexpected_vmexit,
 };
 
 static const int kvm_vmx_max_exit_handlers =
@@ -6362,6 +6436,23 @@ static void atomic_switch_perf_msrs(struct vcpu_vmx *vmx)
                                        msrs[i].host, false);
 }
 
+static void atomic_switch_umwait_control_msr(struct vcpu_vmx *vmx)
+{
+       u32 host_umwait_control;
+
+       if (!vmx_has_waitpkg(vmx))
+               return;
+
+       host_umwait_control = get_umwait_control_msr();
+
+       if (vmx->msr_ia32_umwait_control != host_umwait_control)
+               add_atomic_switch_msr(vmx, MSR_IA32_UMWAIT_CONTROL,
+                       vmx->msr_ia32_umwait_control,
+                       host_umwait_control, false);
+       else
+               clear_atomic_switch_msr(vmx, MSR_IA32_UMWAIT_CONTROL);
+}
+
 static void vmx_update_hv_timer(struct kvm_vcpu *vcpu)
 {
        struct vcpu_vmx *vmx = to_vmx(vcpu);
@@ -6456,6 +6547,7 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu)
        pt_guest_enter(vmx);
 
        atomic_switch_perf_msrs(vmx);
+       atomic_switch_umwait_control_msr(vmx);
 
        if (enable_preemption_timer)
                vmx_update_hv_timer(vcpu);
@@ -6511,6 +6603,9 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu)
                current_evmcs->hv_clean_fields |=
                        HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL;
 
+       if (static_branch_unlikely(&enable_evmcs))
+               current_evmcs->hv_vp_id = vcpu->arch.hyperv.vp_index;
+
        /* MSR_IA32_DEBUGCTLMSR is zeroed on vmexit. Restore it if needed */
        if (vmx->host_debugctlmsr)
                update_debugctlmsr(vmx->host_debugctlmsr);
@@ -6578,6 +6673,7 @@ static struct kvm *vmx_vm_alloc(void)
 
 static void vmx_vm_free(struct kvm *kvm)
 {
+       kfree(kvm->arch.hyperv.hv_pa_pg);
        vfree(to_kvm_vmx(kvm));
 }
 
@@ -7706,7 +7802,7 @@ static struct kvm_x86_ops vmx_x86_ops __ro_after_init = {
 
        .run = vmx_vcpu_run,
        .handle_exit = vmx_handle_exit,
-       .skip_emulated_instruction = __skip_emulated_instruction,
+       .skip_emulated_instruction = skip_emulated_instruction,
        .set_interrupt_shadow = vmx_set_interrupt_shadow,
        .get_interrupt_shadow = vmx_get_interrupt_shadow,
        .patch_hypercall = vmx_patch_hypercall,
@@ -7837,6 +7933,7 @@ static void vmx_exit(void)
                        if (!vp_ap)
                                continue;
 
+                       vp_ap->nested_control.features.directhypercall = 0;
                        vp_ap->current_nested_vmcs = 0;
                        vp_ap->enlighten_vmentry = 0;
                }
@@ -7876,6 +7973,11 @@ static int __init vmx_init(void)
                        pr_info("KVM: vmx: using Hyper-V Enlightened VMCS\n");
                        static_branch_enable(&enable_evmcs);
                }
+
+               if (ms_hyperv.nested_features & HV_X64_NESTED_DIRECT_FLUSH)
+                       vmx_x86_ops.enable_direct_tlbflush
+                               = hv_enable_direct_tlbflush;
+
        } else {
                enlightened_vmcs = false;
        }
index 64d5a48..bee1668 100644 (file)
@@ -14,6 +14,8 @@
 extern const u32 vmx_msr_index[];
 extern u64 host_efer;
 
+extern u32 get_umwait_control_msr(void);
+
 #define MSR_TYPE_R     1
 #define MSR_TYPE_W     2
 #define MSR_TYPE_RW    3
@@ -211,6 +213,7 @@ struct vcpu_vmx {
 #endif
 
        u64                   spec_ctrl;
+       u32                   msr_ia32_umwait_control;
 
        u32 secondary_exec_control;
 
@@ -497,6 +500,12 @@ static inline void decache_tsc_multiplier(struct vcpu_vmx *vmx)
        vmcs_write64(TSC_MULTIPLIER, vmx->current_tsc_ratio);
 }
 
+static inline bool vmx_has_waitpkg(struct vcpu_vmx *vmx)
+{
+       return vmx->secondary_exec_control &
+               SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE;
+}
+
 void dump_vmcs(void);
 
 #endif /* __KVM_X86_VMX_H */
index dfd6412..0ed07d8 100644 (file)
@@ -360,7 +360,8 @@ EXPORT_SYMBOL_GPL(kvm_set_apic_base);
 asmlinkage __visible void kvm_spurious_fault(void)
 {
        /* Fault while not rebooting.  We want the trace. */
-       BUG();
+       if (!kvm_rebooting)
+               BUG();
 }
 EXPORT_SYMBOL_GPL(kvm_spurious_fault);
 
@@ -1145,6 +1146,44 @@ static u32 msrs_to_save[] = {
        MSR_IA32_RTIT_ADDR1_A, MSR_IA32_RTIT_ADDR1_B,
        MSR_IA32_RTIT_ADDR2_A, MSR_IA32_RTIT_ADDR2_B,
        MSR_IA32_RTIT_ADDR3_A, MSR_IA32_RTIT_ADDR3_B,
+       MSR_IA32_UMWAIT_CONTROL,
+
+       MSR_ARCH_PERFMON_FIXED_CTR0, MSR_ARCH_PERFMON_FIXED_CTR1,
+       MSR_ARCH_PERFMON_FIXED_CTR0 + 2, MSR_ARCH_PERFMON_FIXED_CTR0 + 3,
+       MSR_CORE_PERF_FIXED_CTR_CTRL, MSR_CORE_PERF_GLOBAL_STATUS,
+       MSR_CORE_PERF_GLOBAL_CTRL, MSR_CORE_PERF_GLOBAL_OVF_CTRL,
+       MSR_ARCH_PERFMON_PERFCTR0, MSR_ARCH_PERFMON_PERFCTR1,
+       MSR_ARCH_PERFMON_PERFCTR0 + 2, MSR_ARCH_PERFMON_PERFCTR0 + 3,
+       MSR_ARCH_PERFMON_PERFCTR0 + 4, MSR_ARCH_PERFMON_PERFCTR0 + 5,
+       MSR_ARCH_PERFMON_PERFCTR0 + 6, MSR_ARCH_PERFMON_PERFCTR0 + 7,
+       MSR_ARCH_PERFMON_PERFCTR0 + 8, MSR_ARCH_PERFMON_PERFCTR0 + 9,
+       MSR_ARCH_PERFMON_PERFCTR0 + 10, MSR_ARCH_PERFMON_PERFCTR0 + 11,
+       MSR_ARCH_PERFMON_PERFCTR0 + 12, MSR_ARCH_PERFMON_PERFCTR0 + 13,
+       MSR_ARCH_PERFMON_PERFCTR0 + 14, MSR_ARCH_PERFMON_PERFCTR0 + 15,
+       MSR_ARCH_PERFMON_PERFCTR0 + 16, MSR_ARCH_PERFMON_PERFCTR0 + 17,
+       MSR_ARCH_PERFMON_PERFCTR0 + 18, MSR_ARCH_PERFMON_PERFCTR0 + 19,
+       MSR_ARCH_PERFMON_PERFCTR0 + 20, MSR_ARCH_PERFMON_PERFCTR0 + 21,
+       MSR_ARCH_PERFMON_PERFCTR0 + 22, MSR_ARCH_PERFMON_PERFCTR0 + 23,
+       MSR_ARCH_PERFMON_PERFCTR0 + 24, MSR_ARCH_PERFMON_PERFCTR0 + 25,
+       MSR_ARCH_PERFMON_PERFCTR0 + 26, MSR_ARCH_PERFMON_PERFCTR0 + 27,
+       MSR_ARCH_PERFMON_PERFCTR0 + 28, MSR_ARCH_PERFMON_PERFCTR0 + 29,
+       MSR_ARCH_PERFMON_PERFCTR0 + 30, MSR_ARCH_PERFMON_PERFCTR0 + 31,
+       MSR_ARCH_PERFMON_EVENTSEL0, MSR_ARCH_PERFMON_EVENTSEL1,
+       MSR_ARCH_PERFMON_EVENTSEL0 + 2, MSR_ARCH_PERFMON_EVENTSEL0 + 3,
+       MSR_ARCH_PERFMON_EVENTSEL0 + 4, MSR_ARCH_PERFMON_EVENTSEL0 + 5,
+       MSR_ARCH_PERFMON_EVENTSEL0 + 6, MSR_ARCH_PERFMON_EVENTSEL0 + 7,
+       MSR_ARCH_PERFMON_EVENTSEL0 + 8, MSR_ARCH_PERFMON_EVENTSEL0 + 9,
+       MSR_ARCH_PERFMON_EVENTSEL0 + 10, MSR_ARCH_PERFMON_EVENTSEL0 + 11,
+       MSR_ARCH_PERFMON_EVENTSEL0 + 12, MSR_ARCH_PERFMON_EVENTSEL0 + 13,
+       MSR_ARCH_PERFMON_EVENTSEL0 + 14, MSR_ARCH_PERFMON_EVENTSEL0 + 15,
+       MSR_ARCH_PERFMON_EVENTSEL0 + 16, MSR_ARCH_PERFMON_EVENTSEL0 + 17,
+       MSR_ARCH_PERFMON_EVENTSEL0 + 18, MSR_ARCH_PERFMON_EVENTSEL0 + 19,
+       MSR_ARCH_PERFMON_EVENTSEL0 + 20, MSR_ARCH_PERFMON_EVENTSEL0 + 21,
+       MSR_ARCH_PERFMON_EVENTSEL0 + 22, MSR_ARCH_PERFMON_EVENTSEL0 + 23,
+       MSR_ARCH_PERFMON_EVENTSEL0 + 24, MSR_ARCH_PERFMON_EVENTSEL0 + 25,
+       MSR_ARCH_PERFMON_EVENTSEL0 + 26, MSR_ARCH_PERFMON_EVENTSEL0 + 27,
+       MSR_ARCH_PERFMON_EVENTSEL0 + 28, MSR_ARCH_PERFMON_EVENTSEL0 + 29,
+       MSR_ARCH_PERFMON_EVENTSEL0 + 30, MSR_ARCH_PERFMON_EVENTSEL0 + 31,
 };
 
 static unsigned num_msrs_to_save;
@@ -3169,7 +3208,6 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
        case KVM_CAP_HYPERV_EVENTFD:
        case KVM_CAP_HYPERV_TLBFLUSH:
        case KVM_CAP_HYPERV_SEND_IPI:
-       case KVM_CAP_HYPERV_ENLIGHTENED_VMCS:
        case KVM_CAP_HYPERV_CPUID:
        case KVM_CAP_PCI_SEGMENT:
        case KVM_CAP_DEBUGREGS:
@@ -3246,6 +3284,12 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
                r = kvm_x86_ops->get_nested_state ?
                        kvm_x86_ops->get_nested_state(NULL, NULL, 0) : 0;
                break;
+       case KVM_CAP_HYPERV_DIRECT_TLBFLUSH:
+               r = kvm_x86_ops->enable_direct_tlbflush != NULL;
+               break;
+       case KVM_CAP_HYPERV_ENLIGHTENED_VMCS:
+               r = kvm_x86_ops->nested_enable_evmcs != NULL;
+               break;
        default:
                break;
        }
@@ -4019,6 +4063,11 @@ static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
                                r = -EFAULT;
                }
                return r;
+       case KVM_CAP_HYPERV_DIRECT_TLBFLUSH:
+               if (!kvm_x86_ops->enable_direct_tlbflush)
+                       return -ENOTTY;
+
+               return kvm_x86_ops->enable_direct_tlbflush(vcpu);
 
        default:
                return -EINVAL;
@@ -5051,6 +5100,11 @@ static void kvm_init_msr_list(void)
        u32 dummy[2];
        unsigned i, j;
 
+       BUILD_BUG_ON_MSG(INTEL_PMC_MAX_FIXED != 4,
+                        "Please update the fixed PMCs in msrs_to_save[]");
+       BUILD_BUG_ON_MSG(INTEL_PMC_MAX_GENERIC != 32,
+                        "Please update the generic perfctr/eventsel MSRs in msrs_to_save[]");
+
        for (i = j = 0; i < ARRAY_SIZE(msrs_to_save); i++) {
                if (rdmsr_safe(msrs_to_save[i], &dummy[0], &dummy[1]) < 0)
                        continue;
@@ -5389,7 +5443,6 @@ EXPORT_SYMBOL_GPL(kvm_write_guest_virt_system);
 int handle_ud(struct kvm_vcpu *vcpu)
 {
        int emul_type = EMULTYPE_TRAP_UD;
-       enum emulation_result er;
        char sig[5]; /* ud2; .ascii "kvm" */
        struct x86_exception e;
 
@@ -5398,15 +5451,10 @@ int handle_ud(struct kvm_vcpu *vcpu)
                                sig, sizeof(sig), &e) == 0 &&
            memcmp(sig, "\xf\xbkvm", sizeof(sig)) == 0) {
                kvm_rip_write(vcpu, kvm_rip_read(vcpu) + sizeof(sig));
-               emul_type = 0;
+               emul_type = EMULTYPE_TRAP_UD_FORCED;
        }
 
-       er = kvm_emulate_instruction(vcpu, emul_type);
-       if (er == EMULATE_USER_EXIT)
-               return 0;
-       if (er != EMULATE_DONE)
-               kvm_queue_exception(vcpu, UD_VECTOR);
-       return 1;
+       return kvm_emulate_instruction(vcpu, emul_type);
 }
 EXPORT_SYMBOL_GPL(handle_ud);
 
@@ -6228,7 +6276,7 @@ static void init_emulate_ctxt(struct kvm_vcpu *vcpu)
        vcpu->arch.emulate_regs_need_sync_from_vcpu = false;
 }
 
-int kvm_inject_realmode_interrupt(struct kvm_vcpu *vcpu, int irq, int inc_eip)
+void kvm_inject_realmode_interrupt(struct kvm_vcpu *vcpu, int irq, int inc_eip)
 {
        struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt;
        int ret;
@@ -6240,37 +6288,43 @@ int kvm_inject_realmode_interrupt(struct kvm_vcpu *vcpu, int irq, int inc_eip)
        ctxt->_eip = ctxt->eip + inc_eip;
        ret = emulate_int_real(ctxt, irq);
 
-       if (ret != X86EMUL_CONTINUE)
-               return EMULATE_FAIL;
-
-       ctxt->eip = ctxt->_eip;
-       kvm_rip_write(vcpu, ctxt->eip);
-       kvm_set_rflags(vcpu, ctxt->eflags);
-
-       return EMULATE_DONE;
+       if (ret != X86EMUL_CONTINUE) {
+               kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu);
+       } else {
+               ctxt->eip = ctxt->_eip;
+               kvm_rip_write(vcpu, ctxt->eip);
+               kvm_set_rflags(vcpu, ctxt->eflags);
+       }
 }
 EXPORT_SYMBOL_GPL(kvm_inject_realmode_interrupt);
 
 static int handle_emulation_failure(struct kvm_vcpu *vcpu, int emulation_type)
 {
-       int r = EMULATE_DONE;
-
        ++vcpu->stat.insn_emulation_fail;
        trace_kvm_emulate_insn_failed(vcpu);
 
-       if (emulation_type & EMULTYPE_NO_UD_ON_FAIL)
-               return EMULATE_FAIL;
+       if (emulation_type & EMULTYPE_VMWARE_GP) {
+               kvm_queue_exception_e(vcpu, GP_VECTOR, 0);
+               return 1;
+       }
 
-       if (!is_guest_mode(vcpu) && kvm_x86_ops->get_cpl(vcpu) == 0) {
+       if (emulation_type & EMULTYPE_SKIP) {
                vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
                vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION;
                vcpu->run->internal.ndata = 0;
-               r = EMULATE_USER_EXIT;
+               return 0;
        }
 
        kvm_queue_exception(vcpu, UD_VECTOR);
 
-       return r;
+       if (!is_guest_mode(vcpu) && kvm_x86_ops->get_cpl(vcpu) == 0) {
+               vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
+               vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION;
+               vcpu->run->internal.ndata = 0;
+               return 0;
+       }
+
+       return 1;
 }
 
 static bool reexecute_instruction(struct kvm_vcpu *vcpu, gva_t cr2,
@@ -6425,7 +6479,7 @@ static int kvm_vcpu_check_hw_bp(unsigned long addr, u32 type, u32 dr7,
        return dr6;
 }
 
-static void kvm_vcpu_do_singlestep(struct kvm_vcpu *vcpu, int *r)
+static int kvm_vcpu_do_singlestep(struct kvm_vcpu *vcpu)
 {
        struct kvm_run *kvm_run = vcpu->run;
 
@@ -6434,10 +6488,10 @@ static void kvm_vcpu_do_singlestep(struct kvm_vcpu *vcpu, int *r)
                kvm_run->debug.arch.pc = vcpu->arch.singlestep_rip;
                kvm_run->debug.arch.exception = DB_VECTOR;
                kvm_run->exit_reason = KVM_EXIT_DEBUG;
-               *r = EMULATE_USER_EXIT;
-       } else {
-               kvm_queue_exception_p(vcpu, DB_VECTOR, DR6_BS);
+               return 0;
        }
+       kvm_queue_exception_p(vcpu, DB_VECTOR, DR6_BS);
+       return 1;
 }
 
 int kvm_skip_emulated_instruction(struct kvm_vcpu *vcpu)
@@ -6446,7 +6500,7 @@ int kvm_skip_emulated_instruction(struct kvm_vcpu *vcpu)
        int r;
 
        r = kvm_x86_ops->skip_emulated_instruction(vcpu);
-       if (unlikely(r != EMULATE_DONE))
+       if (unlikely(!r))
                return 0;
 
        /*
@@ -6458,8 +6512,8 @@ int kvm_skip_emulated_instruction(struct kvm_vcpu *vcpu)
         * that sets the TF flag".
         */
        if (unlikely(rflags & X86_EFLAGS_TF))
-               kvm_vcpu_do_singlestep(vcpu, &r);
-       return r == EMULATE_DONE;
+               r = kvm_vcpu_do_singlestep(vcpu);
+       return r;
 }
 EXPORT_SYMBOL_GPL(kvm_skip_emulated_instruction);
 
@@ -6478,7 +6532,7 @@ static bool kvm_vcpu_check_breakpoint(struct kvm_vcpu *vcpu, int *r)
                        kvm_run->debug.arch.pc = eip;
                        kvm_run->debug.arch.exception = DB_VECTOR;
                        kvm_run->exit_reason = KVM_EXIT_DEBUG;
-                       *r = EMULATE_USER_EXIT;
+                       *r = 0;
                        return true;
                }
        }
@@ -6494,7 +6548,7 @@ static bool kvm_vcpu_check_breakpoint(struct kvm_vcpu *vcpu, int *r)
                        vcpu->arch.dr6 &= ~DR_TRAP_BITS;
                        vcpu->arch.dr6 |= dr6 | DR6_RTM;
                        kvm_queue_exception(vcpu, DB_VECTOR);
-                       *r = EMULATE_DONE;
+                       *r = 1;
                        return true;
                }
        }
@@ -6578,11 +6632,14 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu,
                trace_kvm_emulate_insn_start(vcpu);
                ++vcpu->stat.insn_emulation;
                if (r != EMULATION_OK)  {
-                       if (emulation_type & EMULTYPE_TRAP_UD)
-                               return EMULATE_FAIL;
+                       if ((emulation_type & EMULTYPE_TRAP_UD) ||
+                           (emulation_type & EMULTYPE_TRAP_UD_FORCED)) {
+                               kvm_queue_exception(vcpu, UD_VECTOR);
+                               return 1;
+                       }
                        if (reexecute_instruction(vcpu, cr2, write_fault_to_spt,
                                                emulation_type))
-                               return EMULATE_DONE;
+                               return 1;
                        if (ctxt->have_exception) {
                                /*
                                 * #UD should result in just EMULATION_FAILED, and trap-like
@@ -6591,28 +6648,32 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu,
                                WARN_ON_ONCE(ctxt->exception.vector == UD_VECTOR ||
                                             exception_type(ctxt->exception.vector) == EXCPT_TRAP);
                                inject_emulated_exception(vcpu);
-                               return EMULATE_DONE;
+                               return 1;
                        }
-                       if (emulation_type & EMULTYPE_SKIP)
-                               return EMULATE_FAIL;
                        return handle_emulation_failure(vcpu, emulation_type);
                }
        }
 
-       if ((emulation_type & EMULTYPE_VMWARE) &&
-           !is_vmware_backdoor_opcode(ctxt))
-               return EMULATE_FAIL;
+       if ((emulation_type & EMULTYPE_VMWARE_GP) &&
+           !is_vmware_backdoor_opcode(ctxt)) {
+               kvm_queue_exception_e(vcpu, GP_VECTOR, 0);
+               return 1;
+       }
 
+       /*
+        * Note, EMULTYPE_SKIP is intended for use *only* by vendor callbacks
+        * for kvm_skip_emulated_instruction().  The caller is responsible for
+        * updating interruptibility state and injecting single-step #DBs.
+        */
        if (emulation_type & EMULTYPE_SKIP) {
                kvm_rip_write(vcpu, ctxt->_eip);
                if (ctxt->eflags & X86_EFLAGS_RF)
                        kvm_set_rflags(vcpu, ctxt->eflags & ~X86_EFLAGS_RF);
-               kvm_x86_ops->set_interrupt_shadow(vcpu, 0);
-               return EMULATE_DONE;
+               return 1;
        }
 
        if (retry_instruction(ctxt, cr2, emulation_type))
-               return EMULATE_DONE;
+               return 1;
 
        /* this is needed for vmware backdoor interface to work since it
           changes registers values  during IO operation */
@@ -6628,18 +6689,18 @@ restart:
        r = x86_emulate_insn(ctxt);
 
        if (r == EMULATION_INTERCEPTED)
-               return EMULATE_DONE;
+               return 1;
 
        if (r == EMULATION_FAILED) {
                if (reexecute_instruction(vcpu, cr2, write_fault_to_spt,
                                        emulation_type))
-                       return EMULATE_DONE;
+                       return 1;
 
                return handle_emulation_failure(vcpu, emulation_type);
        }
 
        if (ctxt->have_exception) {
-               r = EMULATE_DONE;
+               r = 1;
                if (inject_emulated_exception(vcpu))
                        return r;
        } else if (vcpu->arch.pio.count) {
@@ -6650,16 +6711,18 @@ restart:
                        writeback = false;
                        vcpu->arch.complete_userspace_io = complete_emulated_pio;
                }
-               r = EMULATE_USER_EXIT;
+               r = 0;
        } else if (vcpu->mmio_needed) {
+               ++vcpu->stat.mmio_exits;
+
                if (!vcpu->mmio_is_write)
                        writeback = false;
-               r = EMULATE_USER_EXIT;
+               r = 0;
                vcpu->arch.complete_userspace_io = complete_emulated_mmio;
        } else if (r == EMULATION_RESTART)
                goto restart;
        else
-               r = EMULATE_DONE;
+               r = 1;
 
        if (writeback) {
                unsigned long rflags = kvm_x86_ops->get_rflags(vcpu);
@@ -6668,8 +6731,8 @@ restart:
                if (!ctxt->have_exception ||
                    exception_type(ctxt->exception.vector) == EXCPT_TRAP) {
                        kvm_rip_write(vcpu, ctxt->eip);
-                       if (r == EMULATE_DONE && ctxt->tf)
-                               kvm_vcpu_do_singlestep(vcpu, &r);
+                       if (r && ctxt->tf)
+                               r = kvm_vcpu_do_singlestep(vcpu);
                        __kvm_set_rflags(vcpu, ctxt->eflags);
                }
 
@@ -8263,12 +8326,11 @@ static int vcpu_run(struct kvm_vcpu *vcpu)
 static inline int complete_emulated_io(struct kvm_vcpu *vcpu)
 {
        int r;
+
        vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
        r = kvm_emulate_instruction(vcpu, EMULTYPE_NO_DECODE);
        srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
-       if (r != EMULATE_DONE)
-               return 0;
-       return 1;
+       return r;
 }
 
 static int complete_emulated_pio(struct kvm_vcpu *vcpu)
@@ -8636,14 +8698,17 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int idt_index,
 
        ret = emulator_task_switch(ctxt, tss_selector, idt_index, reason,
                                   has_error_code, error_code);
-
-       if (ret)
-               return EMULATE_FAIL;
+       if (ret) {
+               vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
+               vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION;
+               vcpu->run->internal.ndata = 0;
+               return 0;
+       }
 
        kvm_rip_write(vcpu, ctxt->eip);
        kvm_set_rflags(vcpu, ctxt->eflags);
        kvm_make_request(KVM_REQ_EVENT, vcpu);
-       return EMULATE_DONE;
+       return 1;
 }
 EXPORT_SYMBOL_GPL(kvm_task_switch);
 
@@ -9361,6 +9426,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
 
        INIT_HLIST_HEAD(&kvm->arch.mask_notifier_list);
        INIT_LIST_HEAD(&kvm->arch.active_mmu_pages);
+       INIT_LIST_HEAD(&kvm->arch.zapped_obsolete_pages);
        INIT_LIST_HEAD(&kvm->arch.assigned_dev_head);
        atomic_set(&kvm->arch.noncoherent_dma_count, 0);
 
@@ -9690,8 +9756,13 @@ void kvm_arch_commit_memory_region(struct kvm *kvm,
         * Scan sptes if dirty logging has been stopped, dropping those
         * which can be collapsed into a single large-page spte.  Later
         * page faults will create the large-page sptes.
+        *
+        * There is no need to do this in any of the following cases:
+        * CREATE:      No dirty mappings will already exist.
+        * MOVE/DELETE: The old mappings will already have been cleaned up by
+        *              kvm_arch_flush_shadow_memslot()
         */
-       if ((change != KVM_MR_DELETE) &&
+       if (change == KVM_MR_FLAGS_ONLY &&
                (old->flags & KVM_MEM_LOG_DIRTY_PAGES) &&
                !(new->flags & KVM_MEM_LOG_DIRTY_PAGES))
                kvm_mmu_zap_collapsible_sptes(kvm, new);
index b5274e2..dbf7442 100644 (file)
@@ -261,7 +261,7 @@ static inline bool kvm_check_has_quirk(struct kvm *kvm, u64 quirk)
 }
 
 void kvm_set_pending_timer(struct kvm_vcpu *vcpu);
-int kvm_inject_realmode_interrupt(struct kvm_vcpu *vcpu, int irq, int inc_eip);
+void kvm_inject_realmode_interrupt(struct kvm_vcpu *vcpu, int irq, int inc_eip);
 
 void kvm_write_tsc(struct kvm_vcpu *vcpu, struct msr_data *msr);
 u64 get_kvmclock_ns(struct kvm *kvm);
index fa16036..65ebe4b 100644 (file)
@@ -54,23 +54,10 @@ static u64 get_subtree_max_end(struct rb_node *node)
        return ret;
 }
 
-static u64 compute_subtree_max_end(struct memtype *data)
-{
-       u64 max_end = data->end, child_max_end;
-
-       child_max_end = get_subtree_max_end(data->rb.rb_right);
-       if (child_max_end > max_end)
-               max_end = child_max_end;
-
-       child_max_end = get_subtree_max_end(data->rb.rb_left);
-       if (child_max_end > max_end)
-               max_end = child_max_end;
-
-       return max_end;
-}
+#define NODE_END(node) ((node)->end)
 
-RB_DECLARE_CALLBACKS(static, memtype_rb_augment_cb, struct memtype, rb,
-                    u64, subtree_max_end, compute_subtree_max_end)
+RB_DECLARE_CALLBACKS_MAX(static, memtype_rb_augment_cb,
+                        struct memtype, rb, u64, subtree_max_end, NODE_END)
 
 /* Find the first (lowest start addr) overlapping range from rb tree */
 static struct memtype *memtype_rb_lowest_match(struct rb_root *root,
index 44816ff..3e4b903 100644 (file)
@@ -45,7 +45,7 @@ early_param("userpte", setup_userpte);
 
 void ___pte_free_tlb(struct mmu_gather *tlb, struct page *pte)
 {
-       pgtable_page_dtor(pte);
+       pgtable_pte_page_dtor(pte);
        paravirt_release_pte(page_to_pfn(pte));
        paravirt_tlb_remove_table(tlb, pte);
 }
@@ -357,7 +357,7 @@ static void pgd_prepopulate_user_pmd(struct mm_struct *mm,
 
 static struct kmem_cache *pgd_cache;
 
-void __init pgd_cache_init(void)
+void __init pgtable_cache_init(void)
 {
        /*
         * When PAE kernel is running as a Xen domain, it does not use
@@ -402,10 +402,6 @@ static inline void _pgd_free(pgd_t *pgd)
 }
 #else
 
-void __init pgd_cache_init(void)
-{
-}
-
 static inline pgd_t *_pgd_alloc(void)
 {
        return (pgd_t *)__get_free_pages(GFP_PGTABLE_USER,
index 0881e1f..a8bd952 100644 (file)
@@ -8,6 +8,7 @@
 #include <linux/module.h>
 #include <linux/io.h>
 #include <linux/mmiotrace.h>
+#include <linux/security.h>
 
 static unsigned long mmio_address;
 module_param_hw(mmio_address, ulong, iomem, 0);
@@ -115,6 +116,10 @@ static void do_test_bulk_ioremapping(void)
 static int __init init(void)
 {
        unsigned long size = (read_far) ? (8 << 20) : (16 << 10);
+       int ret = security_locked_down(LOCKDOWN_MMIOTRACE);
+
+       if (ret)
+               return ret;
 
        if (mmio_address == 0) {
                pr_err("you have to use the module argument mmio_address.\n");
index 5277490..fb4ee54 100644 (file)
@@ -25,6 +25,7 @@ KCOV_INSTRUMENT := n
 
 PURGATORY_CFLAGS_REMOVE := -mcmodel=kernel
 PURGATORY_CFLAGS := -mcmodel=large -ffreestanding -fno-zero-initialized-in-bss
+PURGATORY_CFLAGS += $(DISABLE_STACKLEAK_PLUGIN)
 
 # Default KBUILD_CFLAGS can have -pg option set when FTRACE is enabled. That
 # in turn leaves some undefined symbols like __fentry__ in purgatory and not
index f31e5d9..165be7f 100644 (file)
@@ -2,14 +2,7 @@
 #ifndef _ASM_UM_BARRIER_H_
 #define _ASM_UM_BARRIER_H_
 
-#include <asm/asm.h>
-#include <asm/segment.h>
-#include <asm/cpufeatures.h>
-#include <asm/cmpxchg.h>
-#include <asm/nops.h>
-
-#include <linux/kernel.h>
-#include <linux/irqflags.h>
+#include <asm/alternative.h>
 
 /*
  * Force strict CPU ordering.
@@ -30,9 +23,6 @@
 
 #endif /* CONFIG_X86_32 */
 
-#define dma_rmb()      barrier()
-#define dma_wmb()      barrier()
-
 #include <asm-generic/barrier.h>
 
 #endif
index ac9c02b..8918687 100644 (file)
@@ -47,7 +47,7 @@ time_t __vdso_time(time_t *t)
 
        return secs;
 }
-int time(time_t *t) __attribute__((weak, alias("__vdso_time")));
+time_t time(time_t *t) __attribute__((weak, alias("__vdso_time")));
 
 long
 __vdso_getcpu(unsigned *cpu, unsigned *node, struct getcpu_cache *unused)
index dd744aa..1d38f0e 100644 (file)
@@ -55,7 +55,7 @@ static inline pgtable_t pte_alloc_one(struct mm_struct *mm)
        if (!pte)
                return NULL;
        page = virt_to_page(pte);
-       if (!pgtable_page_ctor(page)) {
+       if (!pgtable_pte_page_ctor(page)) {
                __free_page(page);
                return NULL;
        }
@@ -69,7 +69,7 @@ static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
 
 static inline void pte_free(struct mm_struct *mm, pgtable_t pte)
 {
-       pgtable_page_dtor(pte);
+       pgtable_pte_page_dtor(pte);
        __free_page(pte);
 }
 #define pmd_pgtable(pmd) pmd_page(pmd)
index ce3ff5e..3f7fe5a 100644 (file)
@@ -238,7 +238,6 @@ extern void paging_init(void);
 # define swapper_pg_dir NULL
 static inline void paging_init(void) { }
 #endif
-static inline void pgtable_cache_init(void) { }
 
 /*
  * The pmd contains the kernel virtual address of the pte page.
index 06875fe..856e2da 100644 (file)
@@ -160,9 +160,6 @@ static inline void invalidate_dtlb_mapping (unsigned address)
                invalidate_dtlb_entry(tlb_entry);
 }
 
-#define check_pgt_cache()      do { } while (0)
-
-
 /*
  * DO NOT USE THESE FUNCTIONS.  These instructions aren't part of the Xtensa
  * ISA and exist only for test purposes..
index ebbb488..e5e6437 100644 (file)
 #define MADV_WIPEONFORK 18             /* Zero memory on fork, child only */
 #define MADV_KEEPONFORK 19             /* Undo MADV_WIPEONFORK */
 
+#define MADV_COLD      20              /* deactivate these pages */
+#define MADV_PAGEOUT   21              /* reclaim these pages */
+
 /* compatibility flags */
 #define MAP_FILE       0
 
index b33be92..0319d63 100644 (file)
@@ -2016,7 +2016,7 @@ static void bfq_add_request(struct request *rq)
                     (bfqq->last_serv_time_ns > 0 &&
                      bfqd->rqs_injected && bfqd->rq_in_driver > 0)) &&
                    time_is_before_eq_jiffies(bfqq->decrease_time_jif +
-                                             msecs_to_jiffies(100))) {
+                                             msecs_to_jiffies(10))) {
                        bfqd->last_empty_occupied_ns = ktime_get_ns();
                        /*
                         * Start the state machine for measuring the
@@ -2025,7 +2025,21 @@ static void bfq_add_request(struct request *rq)
                         * be set when rq will be dispatched.
                         */
                        bfqd->wait_dispatch = true;
-                       bfqd->rqs_injected = false;
+                       /*
+                        * If there is no I/O in service in the drive,
+                        * then possible injection occurred before the
+                        * arrival of rq will not affect the total
+                        * service time of rq. So the injection limit
+                        * must not be updated as a function of such
+                        * total service time, unless new injection
+                        * occurs before rq is completed. To have the
+                        * injection limit updated only in the latter
+                        * case, reset rqs_injected here (rqs_injected
+                        * will be set in case injection is performed
+                        * on bfqq before rq is completed).
+                        */
+                       if (bfqd->rq_in_driver == 0)
+                               bfqd->rqs_injected = false;
                }
        }
 
@@ -5784,14 +5798,14 @@ static void bfq_update_inject_limit(struct bfq_data *bfqd,
        u64 tot_time_ns = ktime_get_ns() - bfqd->last_empty_occupied_ns;
        unsigned int old_limit = bfqq->inject_limit;
 
-       if (bfqq->last_serv_time_ns > 0) {
+       if (bfqq->last_serv_time_ns > 0 && bfqd->rqs_injected) {
                u64 threshold = (bfqq->last_serv_time_ns * 3)>>1;
 
                if (tot_time_ns >= threshold && old_limit > 0) {
                        bfqq->inject_limit--;
                        bfqq->decrease_time_jif = jiffies;
                } else if (tot_time_ns < threshold &&
-                          old_limit < bfqd->max_rq_in_driver<<1)
+                          old_limit <= bfqd->max_rq_in_driver)
                        bfqq->inject_limit++;
        }
 
@@ -5809,12 +5823,14 @@ static void bfq_update_inject_limit(struct bfq_data *bfqd,
         */
        if ((bfqq->last_serv_time_ns == 0 && bfqd->rq_in_driver == 1) ||
            tot_time_ns < bfqq->last_serv_time_ns) {
+               if (bfqq->last_serv_time_ns == 0) {
+                       /*
+                        * Now we certainly have a base value: make sure we
+                        * start trying injection.
+                        */
+                       bfqq->inject_limit = max_t(unsigned int, 1, old_limit);
+               }
                bfqq->last_serv_time_ns = tot_time_ns;
-               /*
-                * Now we certainly have a base value: make sure we
-                * start trying injection.
-                */
-               bfqq->inject_limit = max_t(unsigned int, 1, old_limit);
        } else if (!bfqd->rqs_injected && bfqd->rq_in_driver == 1)
                /*
                 * No I/O injected and no request still in service in
@@ -5830,6 +5846,7 @@ static void bfq_update_inject_limit(struct bfq_data *bfqd,
 
        /* update complete, not waiting for any request completion any longer */
        bfqd->waited_rq = NULL;
+       bfqd->rqs_injected = false;
 }
 
 /*
index 875e8d1..d5e668e 100644 (file)
@@ -34,6 +34,7 @@
 #include <linux/ratelimit.h>
 #include <linux/pm_runtime.h>
 #include <linux/blk-cgroup.h>
+#include <linux/t10-pi.h>
 #include <linux/debugfs.h>
 #include <linux/bpf.h>
 #include <linux/psi.h>
@@ -1436,6 +1437,12 @@ bool blk_update_request(struct request *req, blk_status_t error,
        if (!req->bio)
                return false;
 
+#ifdef CONFIG_BLK_DEV_INTEGRITY
+       if (blk_integrity_rq(req) && req_op(req) == REQ_OP_READ &&
+           error == BLK_STS_OK)
+               req->q->integrity.profile->complete_fn(req, nr_bytes);
+#endif
+
        if (unlikely(error && !blk_rq_is_passthrough(req) &&
                     !(req->rq_flags & RQF_QUIET)))
                print_req_error(req, error, __func__);
index aedd932..1eec9cb 100644 (file)
@@ -214,6 +214,16 @@ static void flush_end_io(struct request *flush_rq, blk_status_t error)
 
        /* release the tag's ownership to the req cloned from */
        spin_lock_irqsave(&fq->mq_flush_lock, flags);
+
+       if (!refcount_dec_and_test(&flush_rq->ref)) {
+               fq->rq_status = error;
+               spin_unlock_irqrestore(&fq->mq_flush_lock, flags);
+               return;
+       }
+
+       if (fq->rq_status != BLK_STS_OK)
+               error = fq->rq_status;
+
        hctx = flush_rq->mq_hctx;
        if (!q->elevator) {
                blk_mq_tag_set_rq(hctx, flush_rq->tag, fq->orig_rq);
index ca39b46..ff1070e 100644 (file)
@@ -368,10 +368,21 @@ static blk_status_t blk_integrity_nop_fn(struct blk_integrity_iter *iter)
        return BLK_STS_OK;
 }
 
+static void blk_integrity_nop_prepare(struct request *rq)
+{
+}
+
+static void blk_integrity_nop_complete(struct request *rq,
+               unsigned int nr_bytes)
+{
+}
+
 static const struct blk_integrity_profile nop_profile = {
        .name = "nop",
        .generate_fn = blk_integrity_nop_fn,
        .verify_fn = blk_integrity_nop_fn,
+       .prepare_fn = blk_integrity_nop_prepare,
+       .complete_fn = blk_integrity_nop_complete,
 };
 
 /**
index 3b39deb..2a3db80 100644 (file)
@@ -529,8 +529,8 @@ struct iocg_wake_ctx {
 static const struct ioc_params autop[] = {
        [AUTOP_HDD] = {
                .qos                            = {
-                       [QOS_RLAT]              =         50000, /* 50ms */
-                       [QOS_WLAT]              =         50000,
+                       [QOS_RLAT]              =        250000, /* 250ms */
+                       [QOS_WLAT]              =        250000,
                        [QOS_MIN]               = VRATE_MIN_PPM,
                        [QOS_MAX]               = VRATE_MAX_PPM,
                },
@@ -1343,7 +1343,7 @@ static void ioc_timer_fn(struct timer_list *timer)
        u32 ppm_wthr = MILLION - ioc->params.qos[QOS_WPPM];
        u32 missed_ppm[2], rq_wait_pct;
        u64 period_vtime;
-       int i;
+       int prev_busy_level, i;
 
        /* how were the latencies during the period? */
        ioc_lat_stat(ioc, missed_ppm, &rq_wait_pct);
@@ -1407,7 +1407,8 @@ static void ioc_timer_fn(struct timer_list *timer)
                 * comparing vdone against period start.  If lagging behind
                 * IOs from past periods, don't increase vrate.
                 */
-               if (!atomic_read(&iocg_to_blkg(iocg)->use_delay) &&
+               if ((ppm_rthr != MILLION || ppm_wthr != MILLION) &&
+                   !atomic_read(&iocg_to_blkg(iocg)->use_delay) &&
                    time_after64(vtime, vdone) &&
                    time_after64(vtime, now.vnow -
                                 MAX_LAGGING_PERIODS * period_vtime) &&
@@ -1531,26 +1532,29 @@ skip_surplus_transfers:
         * and experiencing shortages but not surpluses, we're too stingy
         * and should increase vtime rate.
         */
+       prev_busy_level = ioc->busy_level;
        if (rq_wait_pct > RQ_WAIT_BUSY_PCT ||
            missed_ppm[READ] > ppm_rthr ||
            missed_ppm[WRITE] > ppm_wthr) {
                ioc->busy_level = max(ioc->busy_level, 0);
                ioc->busy_level++;
-       } else if (nr_lagging) {
-               ioc->busy_level = max(ioc->busy_level, 0);
-       } else if (nr_shortages && !nr_surpluses &&
-                  rq_wait_pct <= RQ_WAIT_BUSY_PCT * UNBUSY_THR_PCT / 100 &&
+       } else if (rq_wait_pct <= RQ_WAIT_BUSY_PCT * UNBUSY_THR_PCT / 100 &&
                   missed_ppm[READ] <= ppm_rthr * UNBUSY_THR_PCT / 100 &&
                   missed_ppm[WRITE] <= ppm_wthr * UNBUSY_THR_PCT / 100) {
-               ioc->busy_level = min(ioc->busy_level, 0);
-               ioc->busy_level--;
+               /* take action iff there is contention */
+               if (nr_shortages && !nr_lagging) {
+                       ioc->busy_level = min(ioc->busy_level, 0);
+                       /* redistribute surpluses first */
+                       if (!nr_surpluses)
+                               ioc->busy_level--;
+               }
        } else {
                ioc->busy_level = 0;
        }
 
        ioc->busy_level = clamp(ioc->busy_level, -1000, 1000);
 
-       if (ioc->busy_level) {
+       if (ioc->busy_level > 0 || (ioc->busy_level < 0 && !nr_lagging)) {
                u64 vrate = atomic64_read(&ioc->vtime_rate);
                u64 vrate_min = ioc->vrate_min, vrate_max = ioc->vrate_max;
 
@@ -1592,6 +1596,10 @@ skip_surplus_transfers:
                atomic64_set(&ioc->vtime_rate, vrate);
                ioc->inuse_margin_vtime = DIV64_U64_ROUND_UP(
                        ioc->period_us * vrate * INUSE_MARGIN_PCT, 100);
+       } else if (ioc->busy_level != prev_busy_level || nr_lagging) {
+               trace_iocost_ioc_vrate_adj(ioc, atomic64_read(&ioc->vtime_rate),
+                                          &missed_ppm, rq_wait_pct, nr_lagging,
+                                          nr_shortages, nr_surpluses);
        }
 
        ioc_refresh_params(ioc, false);
index c9d183d..ca22afd 100644 (file)
@@ -555,8 +555,6 @@ void blk_mq_sched_free_requests(struct request_queue *q)
        struct blk_mq_hw_ctx *hctx;
        int i;
 
-       lockdep_assert_held(&q->sysfs_lock);
-
        queue_for_each_hw_ctx(q, hctx, i) {
                if (hctx->sched_tags)
                        blk_mq_free_rqs(q->tag_set, hctx->sched_tags, i);
index 20a49be..6e3b15f 100644 (file)
@@ -30,6 +30,7 @@
 #include <trace/events/block.h>
 
 #include <linux/blk-mq.h>
+#include <linux/t10-pi.h>
 #include "blk.h"
 #include "blk-mq.h"
 #include "blk-mq-debugfs.h"
@@ -700,6 +701,11 @@ void blk_mq_start_request(struct request *rq)
                 */
                rq->nr_phys_segments++;
        }
+
+#ifdef CONFIG_BLK_DEV_INTEGRITY
+       if (blk_integrity_rq(rq) && req_op(rq) == REQ_OP_WRITE)
+               q->integrity.profile->prepare_fn(rq);
+#endif
 }
 EXPORT_SYMBOL(blk_mq_start_request);
 
@@ -912,7 +918,10 @@ static bool blk_mq_check_expired(struct blk_mq_hw_ctx *hctx,
         */
        if (blk_mq_req_expired(rq, next))
                blk_mq_rq_timed_out(rq, reserved);
-       if (refcount_dec_and_test(&rq->ref))
+
+       if (is_flush_rq(rq, hctx))
+               rq->end_io(rq, 0);
+       else if (refcount_dec_and_test(&rq->ref))
                __blk_mq_free_request(rq);
 
        return true;
index b82736c..46f5198 100644 (file)
@@ -482,7 +482,6 @@ static ssize_t queue_wb_lat_store(struct request_queue *q, const char *page,
        blk_mq_quiesce_queue(q);
 
        wbt_set_min_lat(q, val);
-       wbt_update_limits(q);
 
        blk_mq_unquiesce_queue(q);
        blk_mq_unfreeze_queue(q);
@@ -989,13 +988,11 @@ int blk_register_queue(struct gendisk *disk)
                blk_mq_debugfs_register(q);
        }
 
-       /*
-        * The flag of QUEUE_FLAG_REGISTERED isn't set yet, so elevator
-        * switch won't happen at all.
-        */
+       mutex_lock(&q->sysfs_lock);
        if (q->elevator) {
                ret = elv_register_queue(q, false);
                if (ret) {
+                       mutex_unlock(&q->sysfs_lock);
                        mutex_unlock(&q->sysfs_dir_lock);
                        kobject_del(&q->kobj);
                        blk_trace_remove_sysfs(dev);
@@ -1005,7 +1002,6 @@ int blk_register_queue(struct gendisk *disk)
                has_elevator = true;
        }
 
-       mutex_lock(&q->sysfs_lock);
        blk_queue_flag_set(QUEUE_FLAG_REGISTERED, q);
        wbt_enable_default(q);
        blk_throtl_register_queue(q);
@@ -1062,12 +1058,10 @@ void blk_unregister_queue(struct gendisk *disk)
        kobject_del(&q->kobj);
        blk_trace_remove_sysfs(disk_to_dev(disk));
 
-       /*
-        * q->kobj has been removed, so it is safe to check if elevator
-        * exists without holding q->sysfs_lock.
-        */
+       mutex_lock(&q->sysfs_lock);
        if (q->elevator)
                elv_unregister_queue(q);
+       mutex_unlock(&q->sysfs_lock);
        mutex_unlock(&q->sysfs_dir_lock);
 
        kobject_put(&disk_to_dev(disk)->kobj);
index ed347f7..47fba93 100644 (file)
@@ -19,6 +19,7 @@ struct blk_flush_queue {
        unsigned int            flush_queue_delayed:1;
        unsigned int            flush_pending_idx:1;
        unsigned int            flush_running_idx:1;
+       blk_status_t            rq_status;
        unsigned long           flush_pending_since;
        struct list_head        flush_queue[2];
        struct list_head        flush_data_in_flight;
@@ -47,6 +48,12 @@ static inline void __blk_get_queue(struct request_queue *q)
        kobject_get(&q->kobj);
 }
 
+static inline bool
+is_flush_rq(struct request *req, struct blk_mq_hw_ctx *hctx)
+{
+       return hctx->fq->flush_rq == req;
+}
+
 struct blk_flush_queue *blk_alloc_flush_queue(struct request_queue *q,
                int node, int cmd_size, gfp_t flags);
 void blk_free_flush_queue(struct blk_flush_queue *q);
@@ -194,6 +201,8 @@ void elv_unregister_queue(struct request_queue *q);
 static inline void elevator_exit(struct request_queue *q,
                struct elevator_queue *e)
 {
+       lockdep_assert_held(&q->sysfs_lock);
+
        blk_mq_sched_free_requests(q);
        __elevator_exit(q, e);
 }
index 785dd58..347dda1 100644 (file)
@@ -266,6 +266,7 @@ static blk_status_t bsg_queue_rq(struct blk_mq_hw_ctx *hctx,
        struct request *req = bd->rq;
        struct bsg_set *bset =
                container_of(q->tag_set, struct bsg_set, tag_set);
+       int sts = BLK_STS_IOERR;
        int ret;
 
        blk_mq_start_request(req);
@@ -274,14 +275,15 @@ static blk_status_t bsg_queue_rq(struct blk_mq_hw_ctx *hctx,
                return BLK_STS_IOERR;
 
        if (!bsg_prepare_job(dev, req))
-               return BLK_STS_IOERR;
+               goto out;
 
        ret = bset->job_fn(blk_mq_rq_to_pdu(req));
-       if (ret)
-               return BLK_STS_IOERR;
+       if (!ret)
+               sts = BLK_STS_OK;
 
+out:
        put_device(dev);
-       return BLK_STS_OK;
+       return sts;
 }
 
 /* called right after the request is allocated for the request_queue */
index bba10e8..5437059 100644 (file)
@@ -503,9 +503,7 @@ int elv_register_queue(struct request_queue *q, bool uevent)
                if (uevent)
                        kobject_uevent(&e->kobj, KOBJ_ADD);
 
-               mutex_lock(&q->sysfs_lock);
                e->registered = 1;
-               mutex_unlock(&q->sysfs_lock);
        }
        return error;
 }
@@ -523,11 +521,9 @@ void elv_unregister_queue(struct request_queue *q)
                kobject_uevent(&e->kobj, KOBJ_REMOVE);
                kobject_del(&e->kobj);
 
-               mutex_lock(&q->sysfs_lock);
                e->registered = 0;
                /* Re-enable throttling in case elevator disabled it */
                wbt_enable_default(q);
-               mutex_unlock(&q->sysfs_lock);
        }
 }
 
@@ -590,32 +586,11 @@ int elevator_switch_mq(struct request_queue *q,
        lockdep_assert_held(&q->sysfs_lock);
 
        if (q->elevator) {
-               if (q->elevator->registered) {
-                       mutex_unlock(&q->sysfs_lock);
-
-                       /*
-                        * Concurrent elevator switch can't happen becasue
-                        * sysfs write is always exclusively on same file.
-                        *
-                        * Also the elevator queue won't be freed after
-                        * sysfs_lock is released becasue kobject_del() in
-                        * blk_unregister_queue() waits for completion of
-                        * .store & .show on its attributes.
-                        */
+               if (q->elevator->registered)
                        elv_unregister_queue(q);
 
-                       mutex_lock(&q->sysfs_lock);
-               }
                ioc_clear_queue(q);
                elevator_exit(q, q->elevator);
-
-               /*
-                * sysfs_lock may be dropped, so re-check if queue is
-                * unregistered. If yes, don't switch to new elevator
-                * any more
-                */
-               if (!blk_queue_registered(q))
-                       return 0;
        }
 
        ret = blk_mq_init_sched(q, new_e);
@@ -623,11 +598,7 @@ int elevator_switch_mq(struct request_queue *q,
                goto out;
 
        if (new_e) {
-               mutex_unlock(&q->sysfs_lock);
-
                ret = elv_register_queue(q, true);
-
-               mutex_lock(&q->sysfs_lock);
                if (ret) {
                        elevator_exit(q, q->elevator);
                        goto out;
index 0c00946..9803c7e 100644 (file)
@@ -27,7 +27,7 @@ static __be16 t10_pi_ip_fn(void *data, unsigned int len)
  * tag.
  */
 static blk_status_t t10_pi_generate(struct blk_integrity_iter *iter,
-               csum_fn *fn, unsigned int type)
+               csum_fn *fn, enum t10_dif_type type)
 {
        unsigned int i;
 
@@ -37,7 +37,7 @@ static blk_status_t t10_pi_generate(struct blk_integrity_iter *iter,
                pi->guard_tag = fn(iter->data_buf, iter->interval);
                pi->app_tag = 0;
 
-               if (type == 1)
+               if (type == T10_PI_TYPE1_PROTECTION)
                        pi->ref_tag = cpu_to_be32(lower_32_bits(iter->seed));
                else
                        pi->ref_tag = 0;
@@ -51,17 +51,18 @@ static blk_status_t t10_pi_generate(struct blk_integrity_iter *iter,
 }
 
 static blk_status_t t10_pi_verify(struct blk_integrity_iter *iter,
-               csum_fn *fn, unsigned int type)
+               csum_fn *fn, enum t10_dif_type type)
 {
        unsigned int i;
 
+       BUG_ON(type == T10_PI_TYPE0_PROTECTION);
+
        for (i = 0 ; i < iter->data_size ; i += iter->interval) {
                struct t10_pi_tuple *pi = iter->prot_buf;
                __be16 csum;
 
-               switch (type) {
-               case 1:
-               case 2:
+               if (type == T10_PI_TYPE1_PROTECTION ||
+                   type == T10_PI_TYPE2_PROTECTION) {
                        if (pi->app_tag == T10_PI_APP_ESCAPE)
                                goto next;
 
@@ -73,12 +74,10 @@ static blk_status_t t10_pi_verify(struct blk_integrity_iter *iter,
                                       iter->seed, be32_to_cpu(pi->ref_tag));
                                return BLK_STS_PROTECTION;
                        }
-                       break;
-               case 3:
+               } else if (type == T10_PI_TYPE3_PROTECTION) {
                        if (pi->app_tag == T10_PI_APP_ESCAPE &&
                            pi->ref_tag == T10_PI_REF_ESCAPE)
                                goto next;
-                       break;
                }
 
                csum = fn(iter->data_buf, iter->interval);
@@ -102,94 +101,40 @@ next:
 
 static blk_status_t t10_pi_type1_generate_crc(struct blk_integrity_iter *iter)
 {
-       return t10_pi_generate(iter, t10_pi_crc_fn, 1);
+       return t10_pi_generate(iter, t10_pi_crc_fn, T10_PI_TYPE1_PROTECTION);
 }
 
 static blk_status_t t10_pi_type1_generate_ip(struct blk_integrity_iter *iter)
 {
-       return t10_pi_generate(iter, t10_pi_ip_fn, 1);
+       return t10_pi_generate(iter, t10_pi_ip_fn, T10_PI_TYPE1_PROTECTION);
 }
 
 static blk_status_t t10_pi_type1_verify_crc(struct blk_integrity_iter *iter)
 {
-       return t10_pi_verify(iter, t10_pi_crc_fn, 1);
+       return t10_pi_verify(iter, t10_pi_crc_fn, T10_PI_TYPE1_PROTECTION);
 }
 
 static blk_status_t t10_pi_type1_verify_ip(struct blk_integrity_iter *iter)
 {
-       return t10_pi_verify(iter, t10_pi_ip_fn, 1);
-}
-
-static blk_status_t t10_pi_type3_generate_crc(struct blk_integrity_iter *iter)
-{
-       return t10_pi_generate(iter, t10_pi_crc_fn, 3);
-}
-
-static blk_status_t t10_pi_type3_generate_ip(struct blk_integrity_iter *iter)
-{
-       return t10_pi_generate(iter, t10_pi_ip_fn, 3);
-}
-
-static blk_status_t t10_pi_type3_verify_crc(struct blk_integrity_iter *iter)
-{
-       return t10_pi_verify(iter, t10_pi_crc_fn, 3);
+       return t10_pi_verify(iter, t10_pi_ip_fn, T10_PI_TYPE1_PROTECTION);
 }
 
-static blk_status_t t10_pi_type3_verify_ip(struct blk_integrity_iter *iter)
-{
-       return t10_pi_verify(iter, t10_pi_ip_fn, 3);
-}
-
-const struct blk_integrity_profile t10_pi_type1_crc = {
-       .name                   = "T10-DIF-TYPE1-CRC",
-       .generate_fn            = t10_pi_type1_generate_crc,
-       .verify_fn              = t10_pi_type1_verify_crc,
-};
-EXPORT_SYMBOL(t10_pi_type1_crc);
-
-const struct blk_integrity_profile t10_pi_type1_ip = {
-       .name                   = "T10-DIF-TYPE1-IP",
-       .generate_fn            = t10_pi_type1_generate_ip,
-       .verify_fn              = t10_pi_type1_verify_ip,
-};
-EXPORT_SYMBOL(t10_pi_type1_ip);
-
-const struct blk_integrity_profile t10_pi_type3_crc = {
-       .name                   = "T10-DIF-TYPE3-CRC",
-       .generate_fn            = t10_pi_type3_generate_crc,
-       .verify_fn              = t10_pi_type3_verify_crc,
-};
-EXPORT_SYMBOL(t10_pi_type3_crc);
-
-const struct blk_integrity_profile t10_pi_type3_ip = {
-       .name                   = "T10-DIF-TYPE3-IP",
-       .generate_fn            = t10_pi_type3_generate_ip,
-       .verify_fn              = t10_pi_type3_verify_ip,
-};
-EXPORT_SYMBOL(t10_pi_type3_ip);
-
 /**
- * t10_pi_prepare - prepare PI prior submitting request to device
+ * t10_pi_type1_prepare - prepare PI prior submitting request to device
  * @rq:              request with PI that should be prepared
- * @protection_type: PI type (Type 1/Type 2/Type 3)
  *
  * For Type 1/Type 2, the virtual start sector is the one that was
  * originally submitted by the block layer for the ref_tag usage. Due to
  * partitioning, MD/DM cloning, etc. the actual physical start sector is
  * likely to be different. Remap protection information to match the
  * physical LBA.
- *
- * Type 3 does not have a reference tag so no remapping is required.
  */
-void t10_pi_prepare(struct request *rq, u8 protection_type)
+static void t10_pi_type1_prepare(struct request *rq)
 {
        const int tuple_sz = rq->q->integrity.tuple_size;
        u32 ref_tag = t10_pi_ref_tag(rq);
        struct bio *bio;
 
-       if (protection_type == T10_PI_TYPE3_PROTECTION)
-               return;
-
        __rq_for_each_bio(bio, rq) {
                struct bio_integrity_payload *bip = bio_integrity(bio);
                u32 virt = bip_get_seed(bip) & 0xffffffff;
@@ -222,13 +167,11 @@ void t10_pi_prepare(struct request *rq, u8 protection_type)
                bip->bip_flags |= BIP_MAPPED_INTEGRITY;
        }
 }
-EXPORT_SYMBOL(t10_pi_prepare);
 
 /**
- * t10_pi_complete - prepare PI prior returning request to the block layer
+ * t10_pi_type1_complete - prepare PI prior returning request to the blk layer
  * @rq:              request with PI that should be prepared
- * @protection_type: PI type (Type 1/Type 2/Type 3)
- * @intervals:       total elements to prepare
+ * @nr_bytes:        total bytes to prepare
  *
  * For Type 1/Type 2, the virtual start sector is the one that was
  * originally submitted by the block layer for the ref_tag usage. Due to
@@ -236,19 +179,14 @@ EXPORT_SYMBOL(t10_pi_prepare);
  * likely to be different. Since the physical start sector was submitted
  * to the device, we should remap it back to virtual values expected by the
  * block layer.
- *
- * Type 3 does not have a reference tag so no remapping is required.
  */
-void t10_pi_complete(struct request *rq, u8 protection_type,
-                    unsigned int intervals)
+static void t10_pi_type1_complete(struct request *rq, unsigned int nr_bytes)
 {
+       unsigned intervals = nr_bytes >> rq->q->integrity.interval_exp;
        const int tuple_sz = rq->q->integrity.tuple_size;
        u32 ref_tag = t10_pi_ref_tag(rq);
        struct bio *bio;
 
-       if (protection_type == T10_PI_TYPE3_PROTECTION)
-               return;
-
        __rq_for_each_bio(bio, rq) {
                struct bio_integrity_payload *bip = bio_integrity(bio);
                u32 virt = bip_get_seed(bip) & 0xffffffff;
@@ -276,4 +214,73 @@ void t10_pi_complete(struct request *rq, u8 protection_type,
                }
        }
 }
-EXPORT_SYMBOL(t10_pi_complete);
+
+static blk_status_t t10_pi_type3_generate_crc(struct blk_integrity_iter *iter)
+{
+       return t10_pi_generate(iter, t10_pi_crc_fn, T10_PI_TYPE3_PROTECTION);
+}
+
+static blk_status_t t10_pi_type3_generate_ip(struct blk_integrity_iter *iter)
+{
+       return t10_pi_generate(iter, t10_pi_ip_fn, T10_PI_TYPE3_PROTECTION);
+}
+
+static blk_status_t t10_pi_type3_verify_crc(struct blk_integrity_iter *iter)
+{
+       return t10_pi_verify(iter, t10_pi_crc_fn, T10_PI_TYPE3_PROTECTION);
+}
+
+static blk_status_t t10_pi_type3_verify_ip(struct blk_integrity_iter *iter)
+{
+       return t10_pi_verify(iter, t10_pi_ip_fn, T10_PI_TYPE3_PROTECTION);
+}
+
+/**
+ * Type 3 does not have a reference tag so no remapping is required.
+ */
+static void t10_pi_type3_prepare(struct request *rq)
+{
+}
+
+/**
+ * Type 3 does not have a reference tag so no remapping is required.
+ */
+static void t10_pi_type3_complete(struct request *rq, unsigned int nr_bytes)
+{
+}
+
+const struct blk_integrity_profile t10_pi_type1_crc = {
+       .name                   = "T10-DIF-TYPE1-CRC",
+       .generate_fn            = t10_pi_type1_generate_crc,
+       .verify_fn              = t10_pi_type1_verify_crc,
+       .prepare_fn             = t10_pi_type1_prepare,
+       .complete_fn            = t10_pi_type1_complete,
+};
+EXPORT_SYMBOL(t10_pi_type1_crc);
+
+const struct blk_integrity_profile t10_pi_type1_ip = {
+       .name                   = "T10-DIF-TYPE1-IP",
+       .generate_fn            = t10_pi_type1_generate_ip,
+       .verify_fn              = t10_pi_type1_verify_ip,
+       .prepare_fn             = t10_pi_type1_prepare,
+       .complete_fn            = t10_pi_type1_complete,
+};
+EXPORT_SYMBOL(t10_pi_type1_ip);
+
+const struct blk_integrity_profile t10_pi_type3_crc = {
+       .name                   = "T10-DIF-TYPE3-CRC",
+       .generate_fn            = t10_pi_type3_generate_crc,
+       .verify_fn              = t10_pi_type3_verify_crc,
+       .prepare_fn             = t10_pi_type3_prepare,
+       .complete_fn            = t10_pi_type3_complete,
+};
+EXPORT_SYMBOL(t10_pi_type3_crc);
+
+const struct blk_integrity_profile t10_pi_type3_ip = {
+       .name                   = "T10-DIF-TYPE3-IP",
+       .generate_fn            = t10_pi_type3_generate_ip,
+       .verify_fn              = t10_pi_type3_verify_ip,
+       .prepare_fn             = t10_pi_type3_prepare,
+       .complete_fn            = t10_pi_type3_complete,
+};
+EXPORT_SYMBOL(t10_pi_type3_ip);
index 1eba08a..7982911 100644 (file)
@@ -190,33 +190,27 @@ late_initcall(load_system_certificate_list);
 #ifdef CONFIG_SYSTEM_DATA_VERIFICATION
 
 /**
- * verify_pkcs7_signature - Verify a PKCS#7-based signature on system data.
+ * verify_pkcs7_message_sig - Verify a PKCS#7-based signature on system data.
  * @data: The data to be verified (NULL if expecting internal data).
  * @len: Size of @data.
- * @raw_pkcs7: The PKCS#7 message that is the signature.
- * @pkcs7_len: The size of @raw_pkcs7.
+ * @pkcs7: The PKCS#7 message that is the signature.
  * @trusted_keys: Trusted keys to use (NULL for builtin trusted keys only,
  *                                     (void *)1UL for all trusted keys).
  * @usage: The use to which the key is being put.
  * @view_content: Callback to gain access to content.
  * @ctx: Context for callback.
  */
-int verify_pkcs7_signature(const void *data, size_t len,
-                          const void *raw_pkcs7, size_t pkcs7_len,
-                          struct key *trusted_keys,
-                          enum key_being_used_for usage,
-                          int (*view_content)(void *ctx,
-                                              const void *data, size_t len,
-                                              size_t asn1hdrlen),
-                          void *ctx)
+int verify_pkcs7_message_sig(const void *data, size_t len,
+                            struct pkcs7_message *pkcs7,
+                            struct key *trusted_keys,
+                            enum key_being_used_for usage,
+                            int (*view_content)(void *ctx,
+                                                const void *data, size_t len,
+                                                size_t asn1hdrlen),
+                            void *ctx)
 {
-       struct pkcs7_message *pkcs7;
        int ret;
 
-       pkcs7 = pkcs7_parse_message(raw_pkcs7, pkcs7_len);
-       if (IS_ERR(pkcs7))
-               return PTR_ERR(pkcs7);
-
        /* The data should be detached - so we need to supply it. */
        if (data && pkcs7_supply_detached_data(pkcs7, data, len) < 0) {
                pr_err("PKCS#7 signature with non-detached data\n");
@@ -269,6 +263,41 @@ int verify_pkcs7_signature(const void *data, size_t len,
        }
 
 error:
+       pr_devel("<==%s() = %d\n", __func__, ret);
+       return ret;
+}
+
+/**
+ * verify_pkcs7_signature - Verify a PKCS#7-based signature on system data.
+ * @data: The data to be verified (NULL if expecting internal data).
+ * @len: Size of @data.
+ * @raw_pkcs7: The PKCS#7 message that is the signature.
+ * @pkcs7_len: The size of @raw_pkcs7.
+ * @trusted_keys: Trusted keys to use (NULL for builtin trusted keys only,
+ *                                     (void *)1UL for all trusted keys).
+ * @usage: The use to which the key is being put.
+ * @view_content: Callback to gain access to content.
+ * @ctx: Context for callback.
+ */
+int verify_pkcs7_signature(const void *data, size_t len,
+                          const void *raw_pkcs7, size_t pkcs7_len,
+                          struct key *trusted_keys,
+                          enum key_being_used_for usage,
+                          int (*view_content)(void *ctx,
+                                              const void *data, size_t len,
+                                              size_t asn1hdrlen),
+                          void *ctx)
+{
+       struct pkcs7_message *pkcs7;
+       int ret;
+
+       pkcs7 = pkcs7_parse_message(raw_pkcs7, pkcs7_len);
+       if (IS_ERR(pkcs7))
+               return PTR_ERR(pkcs7);
+
+       ret = verify_pkcs7_message_sig(data, len, pkcs7, trusted_keys, usage,
+                                      view_content, ctx);
+
        pkcs7_free_message(pkcs7);
        pr_devel("<==%s() = %d\n", __func__, ret);
        return ret;
index ad86463..9e52404 100644 (file)
@@ -487,6 +487,34 @@ config CRYPTO_ADIANTUM
 
          If unsure, say N.
 
+config CRYPTO_ESSIV
+       tristate "ESSIV support for block encryption"
+       select CRYPTO_AUTHENC
+       help
+         Encrypted salt-sector initialization vector (ESSIV) is an IV
+         generation method that is used in some cases by fscrypt and/or
+         dm-crypt. It uses the hash of the block encryption key as the
+         symmetric key for a block encryption pass applied to the input
+         IV, making low entropy IV sources more suitable for block
+         encryption.
+
+         This driver implements a crypto API template that can be
+         instantiated either as a skcipher or as a aead (depending on the
+         type of the first template argument), and which defers encryption
+         and decryption requests to the encapsulated cipher after applying
+         ESSIV to the input IV. Note that in the aead case, it is assumed
+         that the keys are presented in the same format used by the authenc
+         template, and that the IV appears at the end of the authenticated
+         associated data (AAD) region (which is how dm-crypt uses it.)
+
+         Note that the use of ESSIV is not recommended for new deployments,
+         and so this only needs to be enabled when interoperability with
+         existing encrypted volumes of filesystems is required, or when
+         building for a particular system that requires it (e.g., when
+         the SoC in question has accelerated CBC but not XTS, making CBC
+         combined with ESSIV the only feasible mode for h/w accelerated
+         block encryption)
+
 comment "Hash modes"
 
 config CRYPTO_CMAC
index 0d2cdd5..fcb1ee6 100644 (file)
@@ -165,6 +165,7 @@ obj-$(CONFIG_CRYPTO_USER_API_AEAD) += algif_aead.o
 obj-$(CONFIG_CRYPTO_ZSTD) += zstd.o
 obj-$(CONFIG_CRYPTO_OFB) += ofb.o
 obj-$(CONFIG_CRYPTO_ECC) += ecc.o
+obj-$(CONFIG_CRYPTO_ESSIV) += essiv.o
 
 ecdh_generic-y += ecdh.o
 ecdh_generic-y += ecdh_helper.o
index 11bee67..ce49820 100644 (file)
@@ -12,6 +12,7 @@
 #include <linux/err.h>
 #include <linux/asn1.h>
 #include <crypto/hash.h>
+#include <crypto/hash_info.h>
 #include <crypto/public_key.h>
 #include "pkcs7_parser.h"
 
@@ -29,6 +30,10 @@ static int pkcs7_digest(struct pkcs7_message *pkcs7,
 
        kenter(",%u,%s", sinfo->index, sinfo->sig->hash_algo);
 
+       /* The digest was calculated already. */
+       if (sig->digest)
+               return 0;
+
        if (!sinfo->sig->hash_algo)
                return -ENOPKG;
 
@@ -117,6 +122,34 @@ error_no_desc:
        return ret;
 }
 
+int pkcs7_get_digest(struct pkcs7_message *pkcs7, const u8 **buf, u32 *len,
+                    enum hash_algo *hash_algo)
+{
+       struct pkcs7_signed_info *sinfo = pkcs7->signed_infos;
+       int i, ret;
+
+       /*
+        * This function doesn't support messages with more than one signature.
+        */
+       if (sinfo == NULL || sinfo->next != NULL)
+               return -EBADMSG;
+
+       ret = pkcs7_digest(pkcs7, sinfo);
+       if (ret)
+               return ret;
+
+       *buf = sinfo->sig->digest;
+       *len = sinfo->sig->digest_size;
+
+       for (i = 0; i < HASH_ALGO__LAST; i++)
+               if (!strcmp(hash_algo_name[i], sinfo->sig->hash_algo)) {
+                       *hash_algo = i;
+                       break;
+               }
+
+       return 0;
+}
+
 /*
  * Find the key (X.509 certificate) to use to verify a PKCS#7 message.  PKCS#7
  * uses the issuer's name and the issuing certificate serial number for
index 3b303fe..cc9dbce 100644 (file)
@@ -96,7 +96,7 @@ static int pefile_parse_binary(const void *pebuf, unsigned int pelen,
 
        if (!ddir->certs.virtual_address || !ddir->certs.size) {
                pr_debug("Unsigned PE binary\n");
-               return -EKEYREJECTED;
+               return -ENODATA;
        }
 
        chkaddr(ctx->header_size, ddir->certs.virtual_address,
@@ -403,6 +403,8 @@ error_no_desc:
  *  (*) 0 if at least one signature chain intersects with the keys in the trust
  *     keyring, or:
  *
+ *  (*) -ENODATA if there is no signature present.
+ *
  *  (*) -ENOPKG if a suitable crypto module couldn't be found for a check on a
  *     chain.
  *
diff --git a/crypto/essiv.c b/crypto/essiv.c
new file mode 100644 (file)
index 0000000..a8befc8
--- /dev/null
@@ -0,0 +1,663 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * ESSIV skcipher and aead template for block encryption
+ *
+ * This template encapsulates the ESSIV IV generation algorithm used by
+ * dm-crypt and fscrypt, which converts the initial vector for the skcipher
+ * used for block encryption, by encrypting it using the hash of the
+ * skcipher key as encryption key. Usually, the input IV is a 64-bit sector
+ * number in LE representation zero-padded to the size of the IV, but this
+ * is not assumed by this driver.
+ *
+ * The typical use of this template is to instantiate the skcipher
+ * 'essiv(cbc(aes),sha256)', which is the only instantiation used by
+ * fscrypt, and the most relevant one for dm-crypt. However, dm-crypt
+ * also permits ESSIV to be used in combination with the authenc template,
+ * e.g., 'essiv(authenc(hmac(sha256),cbc(aes)),sha256)', in which case
+ * we need to instantiate an aead that accepts the same special key format
+ * as the authenc template, and deals with the way the encrypted IV is
+ * embedded into the AAD area of the aead request. This means the AEAD
+ * flavor produced by this template is tightly coupled to the way dm-crypt
+ * happens to use it.
+ *
+ * Copyright (c) 2019 Linaro, Ltd. <ard.biesheuvel@linaro.org>
+ *
+ * Heavily based on:
+ * adiantum length-preserving encryption mode
+ *
+ * Copyright 2018 Google LLC
+ */
+
+#include <crypto/authenc.h>
+#include <crypto/internal/aead.h>
+#include <crypto/internal/hash.h>
+#include <crypto/internal/skcipher.h>
+#include <crypto/scatterwalk.h>
+#include <linux/module.h>
+
+#include "internal.h"
+
+struct essiv_instance_ctx {
+       union {
+               struct crypto_skcipher_spawn    skcipher_spawn;
+               struct crypto_aead_spawn        aead_spawn;
+       } u;
+       char    essiv_cipher_name[CRYPTO_MAX_ALG_NAME];
+       char    shash_driver_name[CRYPTO_MAX_ALG_NAME];
+};
+
+struct essiv_tfm_ctx {
+       union {
+               struct crypto_skcipher  *skcipher;
+               struct crypto_aead      *aead;
+       } u;
+       struct crypto_cipher            *essiv_cipher;
+       struct crypto_shash             *hash;
+       int                             ivoffset;
+};
+
+struct essiv_aead_request_ctx {
+       struct scatterlist              sg[4];
+       u8                              *assoc;
+       struct aead_request             aead_req;
+};
+
+static int essiv_skcipher_setkey(struct crypto_skcipher *tfm,
+                                const u8 *key, unsigned int keylen)
+{
+       struct essiv_tfm_ctx *tctx = crypto_skcipher_ctx(tfm);
+       SHASH_DESC_ON_STACK(desc, tctx->hash);
+       u8 salt[HASH_MAX_DIGESTSIZE];
+       int err;
+
+       crypto_skcipher_clear_flags(tctx->u.skcipher, CRYPTO_TFM_REQ_MASK);
+       crypto_skcipher_set_flags(tctx->u.skcipher,
+                                 crypto_skcipher_get_flags(tfm) &
+                                 CRYPTO_TFM_REQ_MASK);
+       err = crypto_skcipher_setkey(tctx->u.skcipher, key, keylen);
+       crypto_skcipher_set_flags(tfm,
+                                 crypto_skcipher_get_flags(tctx->u.skcipher) &
+                                 CRYPTO_TFM_RES_MASK);
+       if (err)
+               return err;
+
+       desc->tfm = tctx->hash;
+       err = crypto_shash_digest(desc, key, keylen, salt);
+       if (err)
+               return err;
+
+       crypto_cipher_clear_flags(tctx->essiv_cipher, CRYPTO_TFM_REQ_MASK);
+       crypto_cipher_set_flags(tctx->essiv_cipher,
+                               crypto_skcipher_get_flags(tfm) &
+                               CRYPTO_TFM_REQ_MASK);
+       err = crypto_cipher_setkey(tctx->essiv_cipher, salt,
+                                  crypto_shash_digestsize(tctx->hash));
+       crypto_skcipher_set_flags(tfm,
+                                 crypto_cipher_get_flags(tctx->essiv_cipher) &
+                                 CRYPTO_TFM_RES_MASK);
+
+       return err;
+}
+
+static int essiv_aead_setkey(struct crypto_aead *tfm, const u8 *key,
+                            unsigned int keylen)
+{
+       struct essiv_tfm_ctx *tctx = crypto_aead_ctx(tfm);
+       SHASH_DESC_ON_STACK(desc, tctx->hash);
+       struct crypto_authenc_keys keys;
+       u8 salt[HASH_MAX_DIGESTSIZE];
+       int err;
+
+       crypto_aead_clear_flags(tctx->u.aead, CRYPTO_TFM_REQ_MASK);
+       crypto_aead_set_flags(tctx->u.aead, crypto_aead_get_flags(tfm) &
+                                           CRYPTO_TFM_REQ_MASK);
+       err = crypto_aead_setkey(tctx->u.aead, key, keylen);
+       crypto_aead_set_flags(tfm, crypto_aead_get_flags(tctx->u.aead) &
+                                  CRYPTO_TFM_RES_MASK);
+       if (err)
+               return err;
+
+       if (crypto_authenc_extractkeys(&keys, key, keylen) != 0) {
+               crypto_aead_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
+               return -EINVAL;
+       }
+
+       desc->tfm = tctx->hash;
+       err = crypto_shash_init(desc) ?:
+             crypto_shash_update(desc, keys.enckey, keys.enckeylen) ?:
+             crypto_shash_finup(desc, keys.authkey, keys.authkeylen, salt);
+       if (err)
+               return err;
+
+       crypto_cipher_clear_flags(tctx->essiv_cipher, CRYPTO_TFM_REQ_MASK);
+       crypto_cipher_set_flags(tctx->essiv_cipher, crypto_aead_get_flags(tfm) &
+                                                   CRYPTO_TFM_REQ_MASK);
+       err = crypto_cipher_setkey(tctx->essiv_cipher, salt,
+                                  crypto_shash_digestsize(tctx->hash));
+       crypto_aead_set_flags(tfm, crypto_cipher_get_flags(tctx->essiv_cipher) &
+                                  CRYPTO_TFM_RES_MASK);
+
+       return err;
+}
+
+static int essiv_aead_setauthsize(struct crypto_aead *tfm,
+                                 unsigned int authsize)
+{
+       struct essiv_tfm_ctx *tctx = crypto_aead_ctx(tfm);
+
+       return crypto_aead_setauthsize(tctx->u.aead, authsize);
+}
+
+static void essiv_skcipher_done(struct crypto_async_request *areq, int err)
+{
+       struct skcipher_request *req = areq->data;
+
+       skcipher_request_complete(req, err);
+}
+
+static int essiv_skcipher_crypt(struct skcipher_request *req, bool enc)
+{
+       struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+       const struct essiv_tfm_ctx *tctx = crypto_skcipher_ctx(tfm);
+       struct skcipher_request *subreq = skcipher_request_ctx(req);
+
+       crypto_cipher_encrypt_one(tctx->essiv_cipher, req->iv, req->iv);
+
+       skcipher_request_set_tfm(subreq, tctx->u.skcipher);
+       skcipher_request_set_crypt(subreq, req->src, req->dst, req->cryptlen,
+                                  req->iv);
+       skcipher_request_set_callback(subreq, skcipher_request_flags(req),
+                                     essiv_skcipher_done, req);
+
+       return enc ? crypto_skcipher_encrypt(subreq) :
+                    crypto_skcipher_decrypt(subreq);
+}
+
+static int essiv_skcipher_encrypt(struct skcipher_request *req)
+{
+       return essiv_skcipher_crypt(req, true);
+}
+
+static int essiv_skcipher_decrypt(struct skcipher_request *req)
+{
+       return essiv_skcipher_crypt(req, false);
+}
+
+static void essiv_aead_done(struct crypto_async_request *areq, int err)
+{
+       struct aead_request *req = areq->data;
+       struct essiv_aead_request_ctx *rctx = aead_request_ctx(req);
+
+       if (rctx->assoc)
+               kfree(rctx->assoc);
+       aead_request_complete(req, err);
+}
+
+static int essiv_aead_crypt(struct aead_request *req, bool enc)
+{
+       struct crypto_aead *tfm = crypto_aead_reqtfm(req);
+       const struct essiv_tfm_ctx *tctx = crypto_aead_ctx(tfm);
+       struct essiv_aead_request_ctx *rctx = aead_request_ctx(req);
+       struct aead_request *subreq = &rctx->aead_req;
+       struct scatterlist *src = req->src;
+       int err;
+
+       crypto_cipher_encrypt_one(tctx->essiv_cipher, req->iv, req->iv);
+
+       /*
+        * dm-crypt embeds the sector number and the IV in the AAD region, so
+        * we have to copy the converted IV into the right scatterlist before
+        * we pass it on.
+        */
+       rctx->assoc = NULL;
+       if (req->src == req->dst || !enc) {
+               scatterwalk_map_and_copy(req->iv, req->dst,
+                                        req->assoclen - crypto_aead_ivsize(tfm),
+                                        crypto_aead_ivsize(tfm), 1);
+       } else {
+               u8 *iv = (u8 *)aead_request_ctx(req) + tctx->ivoffset;
+               int ivsize = crypto_aead_ivsize(tfm);
+               int ssize = req->assoclen - ivsize;
+               struct scatterlist *sg;
+               int nents;
+
+               if (ssize < 0)
+                       return -EINVAL;
+
+               nents = sg_nents_for_len(req->src, ssize);
+               if (nents < 0)
+                       return -EINVAL;
+
+               memcpy(iv, req->iv, ivsize);
+               sg_init_table(rctx->sg, 4);
+
+               if (unlikely(nents > 1)) {
+                       /*
+                        * This is a case that rarely occurs in practice, but
+                        * for correctness, we have to deal with it nonetheless.
+                        */
+                       rctx->assoc = kmalloc(ssize, GFP_ATOMIC);
+                       if (!rctx->assoc)
+                               return -ENOMEM;
+
+                       scatterwalk_map_and_copy(rctx->assoc, req->src, 0,
+                                                ssize, 0);
+                       sg_set_buf(rctx->sg, rctx->assoc, ssize);
+               } else {
+                       sg_set_page(rctx->sg, sg_page(req->src), ssize,
+                                   req->src->offset);
+               }
+
+               sg_set_buf(rctx->sg + 1, iv, ivsize);
+               sg = scatterwalk_ffwd(rctx->sg + 2, req->src, req->assoclen);
+               if (sg != rctx->sg + 2)
+                       sg_chain(rctx->sg, 3, sg);
+
+               src = rctx->sg;
+       }
+
+       aead_request_set_tfm(subreq, tctx->u.aead);
+       aead_request_set_ad(subreq, req->assoclen);
+       aead_request_set_callback(subreq, aead_request_flags(req),
+                                 essiv_aead_done, req);
+       aead_request_set_crypt(subreq, src, req->dst, req->cryptlen, req->iv);
+
+       err = enc ? crypto_aead_encrypt(subreq) :
+                   crypto_aead_decrypt(subreq);
+
+       if (rctx->assoc && err != -EINPROGRESS)
+               kfree(rctx->assoc);
+       return err;
+}
+
+static int essiv_aead_encrypt(struct aead_request *req)
+{
+       return essiv_aead_crypt(req, true);
+}
+
+static int essiv_aead_decrypt(struct aead_request *req)
+{
+       return essiv_aead_crypt(req, false);
+}
+
+static int essiv_init_tfm(struct essiv_instance_ctx *ictx,
+                         struct essiv_tfm_ctx *tctx)
+{
+       struct crypto_cipher *essiv_cipher;
+       struct crypto_shash *hash;
+       int err;
+
+       essiv_cipher = crypto_alloc_cipher(ictx->essiv_cipher_name, 0, 0);
+       if (IS_ERR(essiv_cipher))
+               return PTR_ERR(essiv_cipher);
+
+       hash = crypto_alloc_shash(ictx->shash_driver_name, 0, 0);
+       if (IS_ERR(hash)) {
+               err = PTR_ERR(hash);
+               goto err_free_essiv_cipher;
+       }
+
+       tctx->essiv_cipher = essiv_cipher;
+       tctx->hash = hash;
+
+       return 0;
+
+err_free_essiv_cipher:
+       crypto_free_cipher(essiv_cipher);
+       return err;
+}
+
+static int essiv_skcipher_init_tfm(struct crypto_skcipher *tfm)
+{
+       struct skcipher_instance *inst = skcipher_alg_instance(tfm);
+       struct essiv_instance_ctx *ictx = skcipher_instance_ctx(inst);
+       struct essiv_tfm_ctx *tctx = crypto_skcipher_ctx(tfm);
+       struct crypto_skcipher *skcipher;
+       int err;
+
+       skcipher = crypto_spawn_skcipher(&ictx->u.skcipher_spawn);
+       if (IS_ERR(skcipher))
+               return PTR_ERR(skcipher);
+
+       crypto_skcipher_set_reqsize(tfm, sizeof(struct skcipher_request) +
+                                        crypto_skcipher_reqsize(skcipher));
+
+       err = essiv_init_tfm(ictx, tctx);
+       if (err) {
+               crypto_free_skcipher(skcipher);
+               return err;
+       }
+
+       tctx->u.skcipher = skcipher;
+       return 0;
+}
+
+static int essiv_aead_init_tfm(struct crypto_aead *tfm)
+{
+       struct aead_instance *inst = aead_alg_instance(tfm);
+       struct essiv_instance_ctx *ictx = aead_instance_ctx(inst);
+       struct essiv_tfm_ctx *tctx = crypto_aead_ctx(tfm);
+       struct crypto_aead *aead;
+       unsigned int subreq_size;
+       int err;
+
+       BUILD_BUG_ON(offsetofend(struct essiv_aead_request_ctx, aead_req) !=
+                    sizeof(struct essiv_aead_request_ctx));
+
+       aead = crypto_spawn_aead(&ictx->u.aead_spawn);
+       if (IS_ERR(aead))
+               return PTR_ERR(aead);
+
+       subreq_size = FIELD_SIZEOF(struct essiv_aead_request_ctx, aead_req) +
+                     crypto_aead_reqsize(aead);
+
+       tctx->ivoffset = offsetof(struct essiv_aead_request_ctx, aead_req) +
+                        subreq_size;
+       crypto_aead_set_reqsize(tfm, tctx->ivoffset + crypto_aead_ivsize(aead));
+
+       err = essiv_init_tfm(ictx, tctx);
+       if (err) {
+               crypto_free_aead(aead);
+               return err;
+       }
+
+       tctx->u.aead = aead;
+       return 0;
+}
+
+static void essiv_skcipher_exit_tfm(struct crypto_skcipher *tfm)
+{
+       struct essiv_tfm_ctx *tctx = crypto_skcipher_ctx(tfm);
+
+       crypto_free_skcipher(tctx->u.skcipher);
+       crypto_free_cipher(tctx->essiv_cipher);
+       crypto_free_shash(tctx->hash);
+}
+
+static void essiv_aead_exit_tfm(struct crypto_aead *tfm)
+{
+       struct essiv_tfm_ctx *tctx = crypto_aead_ctx(tfm);
+
+       crypto_free_aead(tctx->u.aead);
+       crypto_free_cipher(tctx->essiv_cipher);
+       crypto_free_shash(tctx->hash);
+}
+
+static void essiv_skcipher_free_instance(struct skcipher_instance *inst)
+{
+       struct essiv_instance_ctx *ictx = skcipher_instance_ctx(inst);
+
+       crypto_drop_skcipher(&ictx->u.skcipher_spawn);
+       kfree(inst);
+}
+
+static void essiv_aead_free_instance(struct aead_instance *inst)
+{
+       struct essiv_instance_ctx *ictx = aead_instance_ctx(inst);
+
+       crypto_drop_aead(&ictx->u.aead_spawn);
+       kfree(inst);
+}
+
+static bool parse_cipher_name(char *essiv_cipher_name, const char *cra_name)
+{
+       const char *p, *q;
+       int len;
+
+       /* find the last opening parens */
+       p = strrchr(cra_name, '(');
+       if (!p++)
+               return false;
+
+       /* find the first closing parens in the tail of the string */
+       q = strchr(p, ')');
+       if (!q)
+               return false;
+
+       len = q - p;
+       if (len >= CRYPTO_MAX_ALG_NAME)
+               return false;
+
+       memcpy(essiv_cipher_name, p, len);
+       essiv_cipher_name[len] = '\0';
+       return true;
+}
+
+static bool essiv_supported_algorithms(const char *essiv_cipher_name,
+                                      struct shash_alg *hash_alg,
+                                      int ivsize)
+{
+       struct crypto_alg *alg;
+       bool ret = false;
+
+       alg = crypto_alg_mod_lookup(essiv_cipher_name,
+                                   CRYPTO_ALG_TYPE_CIPHER,
+                                   CRYPTO_ALG_TYPE_MASK);
+       if (IS_ERR(alg))
+               return false;
+
+       if (hash_alg->digestsize < alg->cra_cipher.cia_min_keysize ||
+           hash_alg->digestsize > alg->cra_cipher.cia_max_keysize)
+               goto out;
+
+       if (ivsize != alg->cra_blocksize)
+               goto out;
+
+       if (crypto_shash_alg_has_setkey(hash_alg))
+               goto out;
+
+       ret = true;
+
+out:
+       crypto_mod_put(alg);
+       return ret;
+}
+
+static int essiv_create(struct crypto_template *tmpl, struct rtattr **tb)
+{
+       struct crypto_attr_type *algt;
+       const char *inner_cipher_name;
+       const char *shash_name;
+       struct skcipher_instance *skcipher_inst = NULL;
+       struct aead_instance *aead_inst = NULL;
+       struct crypto_instance *inst;
+       struct crypto_alg *base, *block_base;
+       struct essiv_instance_ctx *ictx;
+       struct skcipher_alg *skcipher_alg = NULL;
+       struct aead_alg *aead_alg = NULL;
+       struct crypto_alg *_hash_alg;
+       struct shash_alg *hash_alg;
+       int ivsize;
+       u32 type;
+       int err;
+
+       algt = crypto_get_attr_type(tb);
+       if (IS_ERR(algt))
+               return PTR_ERR(algt);
+
+       inner_cipher_name = crypto_attr_alg_name(tb[1]);
+       if (IS_ERR(inner_cipher_name))
+               return PTR_ERR(inner_cipher_name);
+
+       shash_name = crypto_attr_alg_name(tb[2]);
+       if (IS_ERR(shash_name))
+               return PTR_ERR(shash_name);
+
+       type = algt->type & algt->mask;
+
+       switch (type) {
+       case CRYPTO_ALG_TYPE_BLKCIPHER:
+               skcipher_inst = kzalloc(sizeof(*skcipher_inst) +
+                                       sizeof(*ictx), GFP_KERNEL);
+               if (!skcipher_inst)
+                       return -ENOMEM;
+               inst = skcipher_crypto_instance(skcipher_inst);
+               base = &skcipher_inst->alg.base;
+               ictx = crypto_instance_ctx(inst);
+
+               /* Symmetric cipher, e.g., "cbc(aes)" */
+               crypto_set_skcipher_spawn(&ictx->u.skcipher_spawn, inst);
+               err = crypto_grab_skcipher(&ictx->u.skcipher_spawn,
+                                          inner_cipher_name, 0,
+                                          crypto_requires_sync(algt->type,
+                                                               algt->mask));
+               if (err)
+                       goto out_free_inst;
+               skcipher_alg = crypto_spawn_skcipher_alg(&ictx->u.skcipher_spawn);
+               block_base = &skcipher_alg->base;
+               ivsize = crypto_skcipher_alg_ivsize(skcipher_alg);
+               break;
+
+       case CRYPTO_ALG_TYPE_AEAD:
+               aead_inst = kzalloc(sizeof(*aead_inst) +
+                                   sizeof(*ictx), GFP_KERNEL);
+               if (!aead_inst)
+                       return -ENOMEM;
+               inst = aead_crypto_instance(aead_inst);
+               base = &aead_inst->alg.base;
+               ictx = crypto_instance_ctx(inst);
+
+               /* AEAD cipher, e.g., "authenc(hmac(sha256),cbc(aes))" */
+               crypto_set_aead_spawn(&ictx->u.aead_spawn, inst);
+               err = crypto_grab_aead(&ictx->u.aead_spawn,
+                                      inner_cipher_name, 0,
+                                      crypto_requires_sync(algt->type,
+                                                           algt->mask));
+               if (err)
+                       goto out_free_inst;
+               aead_alg = crypto_spawn_aead_alg(&ictx->u.aead_spawn);
+               block_base = &aead_alg->base;
+               if (!strstarts(block_base->cra_name, "authenc(")) {
+                       pr_warn("Only authenc() type AEADs are supported by ESSIV\n");
+                       err = -EINVAL;
+                       goto out_drop_skcipher;
+               }
+               ivsize = aead_alg->ivsize;
+               break;
+
+       default:
+               return -EINVAL;
+       }
+
+       if (!parse_cipher_name(ictx->essiv_cipher_name, block_base->cra_name)) {
+               pr_warn("Failed to parse ESSIV cipher name from skcipher cra_name\n");
+               err = -EINVAL;
+               goto out_drop_skcipher;
+       }
+
+       /* Synchronous hash, e.g., "sha256" */
+       _hash_alg = crypto_alg_mod_lookup(shash_name,
+                                         CRYPTO_ALG_TYPE_SHASH,
+                                         CRYPTO_ALG_TYPE_MASK);
+       if (IS_ERR(_hash_alg)) {
+               err = PTR_ERR(_hash_alg);
+               goto out_drop_skcipher;
+       }
+       hash_alg = __crypto_shash_alg(_hash_alg);
+
+       /* Check the set of algorithms */
+       if (!essiv_supported_algorithms(ictx->essiv_cipher_name, hash_alg,
+                                       ivsize)) {
+               pr_warn("Unsupported essiv instantiation: essiv(%s,%s)\n",
+                       block_base->cra_name, hash_alg->base.cra_name);
+               err = -EINVAL;
+               goto out_free_hash;
+       }
+
+       /* record the driver name so we can instantiate this exact algo later */
+       strlcpy(ictx->shash_driver_name, hash_alg->base.cra_driver_name,
+               CRYPTO_MAX_ALG_NAME);
+
+       /* Instance fields */
+
+       err = -ENAMETOOLONG;
+       if (snprintf(base->cra_name, CRYPTO_MAX_ALG_NAME,
+                    "essiv(%s,%s)", block_base->cra_name,
+                    hash_alg->base.cra_name) >= CRYPTO_MAX_ALG_NAME)
+               goto out_free_hash;
+       if (snprintf(base->cra_driver_name, CRYPTO_MAX_ALG_NAME,
+                    "essiv(%s,%s)", block_base->cra_driver_name,
+                    hash_alg->base.cra_driver_name) >= CRYPTO_MAX_ALG_NAME)
+               goto out_free_hash;
+
+       base->cra_flags         = block_base->cra_flags & CRYPTO_ALG_ASYNC;
+       base->cra_blocksize     = block_base->cra_blocksize;
+       base->cra_ctxsize       = sizeof(struct essiv_tfm_ctx);
+       base->cra_alignmask     = block_base->cra_alignmask;
+       base->cra_priority      = block_base->cra_priority;
+
+       if (type == CRYPTO_ALG_TYPE_BLKCIPHER) {
+               skcipher_inst->alg.setkey       = essiv_skcipher_setkey;
+               skcipher_inst->alg.encrypt      = essiv_skcipher_encrypt;
+               skcipher_inst->alg.decrypt      = essiv_skcipher_decrypt;
+               skcipher_inst->alg.init         = essiv_skcipher_init_tfm;
+               skcipher_inst->alg.exit         = essiv_skcipher_exit_tfm;
+
+               skcipher_inst->alg.min_keysize  = crypto_skcipher_alg_min_keysize(skcipher_alg);
+               skcipher_inst->alg.max_keysize  = crypto_skcipher_alg_max_keysize(skcipher_alg);
+               skcipher_inst->alg.ivsize       = ivsize;
+               skcipher_inst->alg.chunksize    = crypto_skcipher_alg_chunksize(skcipher_alg);
+               skcipher_inst->alg.walksize     = crypto_skcipher_alg_walksize(skcipher_alg);
+
+               skcipher_inst->free             = essiv_skcipher_free_instance;
+
+               err = skcipher_register_instance(tmpl, skcipher_inst);
+       } else {
+               aead_inst->alg.setkey           = essiv_aead_setkey;
+               aead_inst->alg.setauthsize      = essiv_aead_setauthsize;
+               aead_inst->alg.encrypt          = essiv_aead_encrypt;
+               aead_inst->alg.decrypt          = essiv_aead_decrypt;
+               aead_inst->alg.init             = essiv_aead_init_tfm;
+               aead_inst->alg.exit             = essiv_aead_exit_tfm;
+
+               aead_inst->alg.ivsize           = ivsize;
+               aead_inst->alg.maxauthsize      = crypto_aead_alg_maxauthsize(aead_alg);
+               aead_inst->alg.chunksize        = crypto_aead_alg_chunksize(aead_alg);
+
+               aead_inst->free                 = essiv_aead_free_instance;
+
+               err = aead_register_instance(tmpl, aead_inst);
+       }
+
+       if (err)
+               goto out_free_hash;
+
+       crypto_mod_put(_hash_alg);
+       return 0;
+
+out_free_hash:
+       crypto_mod_put(_hash_alg);
+out_drop_skcipher:
+       if (type == CRYPTO_ALG_TYPE_BLKCIPHER)
+               crypto_drop_skcipher(&ictx->u.skcipher_spawn);
+       else
+               crypto_drop_aead(&ictx->u.aead_spawn);
+out_free_inst:
+       kfree(skcipher_inst);
+       kfree(aead_inst);
+       return err;
+}
+
+/* essiv(cipher_name, shash_name) */
+static struct crypto_template essiv_tmpl = {
+       .name   = "essiv",
+       .create = essiv_create,
+       .module = THIS_MODULE,
+};
+
+static int __init essiv_module_init(void)
+{
+       return crypto_register_template(&essiv_tmpl);
+}
+
+static void __exit essiv_module_exit(void)
+{
+       crypto_unregister_template(&essiv_tmpl);
+}
+
+subsys_initcall(essiv_module_init);
+module_exit(essiv_module_exit);
+
+MODULE_DESCRIPTION("ESSIV skcipher/aead wrapper for block encryption");
+MODULE_LICENSE("GPL v2");
+MODULE_ALIAS_CRYPTO("essiv");
index 7cd0c9a..71511ae 100644 (file)
@@ -160,11 +160,17 @@ static const struct apd_device_desc hip08_i2c_desc = {
        .setup = acpi_apd_setup,
        .fixed_clk_rate = 250000000,
 };
+
 static const struct apd_device_desc thunderx2_i2c_desc = {
        .setup = acpi_apd_setup,
        .fixed_clk_rate = 125000000,
 };
 
+static const struct apd_device_desc nxp_i2c_desc = {
+       .setup = acpi_apd_setup,
+       .fixed_clk_rate = 350000000,
+};
+
 static const struct apd_device_desc hip08_spi_desc = {
        .setup = acpi_apd_setup,
        .fixed_clk_rate = 250000000,
@@ -238,6 +244,7 @@ static const struct acpi_device_id acpi_apd_device_ids[] = {
        { "HISI02A1", APD_ADDR(hip07_i2c_desc) },
        { "HISI02A2", APD_ADDR(hip08_i2c_desc) },
        { "HISI0173", APD_ADDR(hip08_spi_desc) },
+       { "NXP0001", APD_ADDR(nxp_i2c_desc) },
 #endif
        { }
 };
index fd66a73..b097ef2 100644 (file)
@@ -9,6 +9,7 @@
 #include <linux/uaccess.h>
 #include <linux/debugfs.h>
 #include <linux/acpi.h>
+#include <linux/security.h>
 
 #include "internal.h"
 
@@ -29,6 +30,11 @@ static ssize_t cm_write(struct file *file, const char __user * user_buf,
 
        struct acpi_table_header table;
        acpi_status status;
+       int ret;
+
+       ret = security_locked_down(LOCKDOWN_ACPI_TABLES);
+       if (ret)
+               return ret;
 
        if (!(*ppos)) {
                /* parse the table header to get the table length */
index cddd0fc..1113b67 100644 (file)
@@ -7,10 +7,11 @@
 #include "intel.h"
 #include "nfit.h"
 
-static enum nvdimm_security_state intel_security_state(struct nvdimm *nvdimm,
+static unsigned long intel_security_flags(struct nvdimm *nvdimm,
                enum nvdimm_passphrase_type ptype)
 {
        struct nfit_mem *nfit_mem = nvdimm_provider_data(nvdimm);
+       unsigned long security_flags = 0;
        struct {
                struct nd_cmd_pkg pkg;
                struct nd_intel_get_security_state cmd;
@@ -27,7 +28,7 @@ static enum nvdimm_security_state intel_security_state(struct nvdimm *nvdimm,
        int rc;
 
        if (!test_bit(NVDIMM_INTEL_GET_SECURITY_STATE, &nfit_mem->dsm_mask))
-               return -ENXIO;
+               return 0;
 
        /*
         * Short circuit the state retrieval while we are doing overwrite.
@@ -35,38 +36,42 @@ static enum nvdimm_security_state intel_security_state(struct nvdimm *nvdimm,
         * until the overwrite DSM completes.
         */
        if (nvdimm_in_overwrite(nvdimm) && ptype == NVDIMM_USER)
-               return NVDIMM_SECURITY_OVERWRITE;
+               return BIT(NVDIMM_SECURITY_OVERWRITE);
 
        rc = nvdimm_ctl(nvdimm, ND_CMD_CALL, &nd_cmd, sizeof(nd_cmd), NULL);
-       if (rc < 0)
-               return rc;
-       if (nd_cmd.cmd.status)
-               return -EIO;
+       if (rc < 0 || nd_cmd.cmd.status) {
+               pr_err("%s: security state retrieval failed (%d:%#x)\n",
+                               nvdimm_name(nvdimm), rc, nd_cmd.cmd.status);
+               return 0;
+       }
 
        /* check and see if security is enabled and locked */
        if (ptype == NVDIMM_MASTER) {
                if (nd_cmd.cmd.extended_state & ND_INTEL_SEC_ESTATE_ENABLED)
-                       return NVDIMM_SECURITY_UNLOCKED;
-               else if (nd_cmd.cmd.extended_state &
-                               ND_INTEL_SEC_ESTATE_PLIMIT)
-                       return NVDIMM_SECURITY_FROZEN;
-       } else {
-               if (nd_cmd.cmd.state & ND_INTEL_SEC_STATE_UNSUPPORTED)
-                       return -ENXIO;
-               else if (nd_cmd.cmd.state & ND_INTEL_SEC_STATE_ENABLED) {
-                       if (nd_cmd.cmd.state & ND_INTEL_SEC_STATE_LOCKED)
-                               return NVDIMM_SECURITY_LOCKED;
-                       else if (nd_cmd.cmd.state & ND_INTEL_SEC_STATE_FROZEN
-                                       || nd_cmd.cmd.state &
-                                       ND_INTEL_SEC_STATE_PLIMIT)
-                               return NVDIMM_SECURITY_FROZEN;
-                       else
-                               return NVDIMM_SECURITY_UNLOCKED;
-               }
+                       set_bit(NVDIMM_SECURITY_UNLOCKED, &security_flags);
+               else
+                       set_bit(NVDIMM_SECURITY_DISABLED, &security_flags);
+               if (nd_cmd.cmd.extended_state & ND_INTEL_SEC_ESTATE_PLIMIT)
+                       set_bit(NVDIMM_SECURITY_FROZEN, &security_flags);
+               return security_flags;
        }
 
-       /* this should cover master security disabled as well */
-       return NVDIMM_SECURITY_DISABLED;
+       if (nd_cmd.cmd.state & ND_INTEL_SEC_STATE_UNSUPPORTED)
+               return 0;
+
+       if (nd_cmd.cmd.state & ND_INTEL_SEC_STATE_ENABLED) {
+               if (nd_cmd.cmd.state & ND_INTEL_SEC_STATE_FROZEN ||
+                   nd_cmd.cmd.state & ND_INTEL_SEC_STATE_PLIMIT)
+                       set_bit(NVDIMM_SECURITY_FROZEN, &security_flags);
+
+               if (nd_cmd.cmd.state & ND_INTEL_SEC_STATE_LOCKED)
+                       set_bit(NVDIMM_SECURITY_LOCKED, &security_flags);
+               else
+                       set_bit(NVDIMM_SECURITY_UNLOCKED, &security_flags);
+       } else
+               set_bit(NVDIMM_SECURITY_DISABLED, &security_flags);
+
+       return security_flags;
 }
 
 static int intel_security_freeze(struct nvdimm *nvdimm)
@@ -371,7 +376,7 @@ static void nvdimm_invalidate_cache(void)
 #endif
 
 static const struct nvdimm_security_ops __intel_security_ops = {
-       .state = intel_security_state,
+       .get_flags = intel_security_flags,
        .freeze = intel_security_freeze,
        .change_key = intel_security_change_key,
        .disable = intel_security_disable,
index 2f9d0d2..a2e844a 100644 (file)
@@ -27,6 +27,7 @@
 #include <linux/list.h>
 #include <linux/jiffies.h>
 #include <linux/semaphore.h>
+#include <linux/security.h>
 
 #include <asm/io.h>
 #include <linux/uaccess.h>
@@ -182,8 +183,19 @@ acpi_physical_address __init acpi_os_get_root_pointer(void)
        acpi_physical_address pa;
 
 #ifdef CONFIG_KEXEC
-       if (acpi_rsdp)
+       /*
+        * We may have been provided with an RSDP on the command line,
+        * but if a malicious user has done so they may be pointing us
+        * at modified ACPI tables that could alter kernel behaviour -
+        * so, we check the lockdown status before making use of
+        * it. If we trust it then also stash it in an architecture
+        * specific location (if appropriate) so it can be carried
+        * over further kexec()s.
+        */
+       if (acpi_rsdp && !security_locked_down(LOCKDOWN_ACPI_TABLES)) {
+               acpi_arch_set_root_pointer(acpi_rsdp);
                return acpi_rsdp;
+       }
 #endif
        pa = acpi_arch_get_root_pointer();
        if (pa)
index 314a187..d1e666e 100644 (file)
@@ -15,7 +15,6 @@
 #include <linux/pm_runtime.h>
 #include <linux/pci.h>
 #include <linux/pci-acpi.h>
-#include <linux/pci-aspm.h>
 #include <linux/dmar.h>
 #include <linux/acpi.h>
 #include <linux/slab.h>
index b323277..180ac43 100644 (file)
@@ -20,6 +20,7 @@
 #include <linux/memblock.h>
 #include <linux/earlycpio.h>
 #include <linux/initrd.h>
+#include <linux/security.h>
 #include "internal.h"
 
 #ifdef CONFIG_ACPI_CUSTOM_DSDT
@@ -578,6 +579,11 @@ void __init acpi_table_upgrade(void)
        if (table_nr == 0)
                return;
 
+       if (security_locked_down(LOCKDOWN_ACPI_TABLES)) {
+               pr_notice("kernel is locked down, ignoring table override\n");
+               return;
+       }
+
        acpi_tables_addr =
                memblock_find_in_range(0, ACPI_TABLE_UPGRADE_MAX_PHYS,
                                       all_tables_size, PAGE_SIZE);
index 100e798..f39f075 100644 (file)
@@ -18,6 +18,7 @@
 #include <linux/limits.h>
 #include <linux/clk/clk-conf.h>
 #include <linux/platform_device.h>
+#include <linux/reset.h>
 
 #include <asm/irq.h>
 
@@ -401,6 +402,19 @@ static int amba_device_try_add(struct amba_device *dev, struct resource *parent)
        ret = amba_get_enable_pclk(dev);
        if (ret == 0) {
                u32 pid, cid;
+               struct reset_control *rstc;
+
+               /*
+                * Find reset control(s) of the amba bus and de-assert them.
+                */
+               rstc = of_reset_control_array_get_optional_shared(dev->dev.of_node);
+               if (IS_ERR(rstc)) {
+                       if (PTR_ERR(rstc) != -EPROBE_DEFER)
+                               dev_err(&dev->dev, "Can't get amba reset!\n");
+                       return PTR_ERR(rstc);
+               }
+               reset_control_deassert(rstc);
+               reset_control_put(rstc);
 
                /*
                 * Read pid and cid based on size of resource
index 9e9583a..e742780 100644 (file)
@@ -497,6 +497,7 @@ struct ahci_host_priv *ahci_platform_get_resources(struct platform_device *pdev,
 
                        if (of_property_read_u32(child, "reg", &port)) {
                                rc = -EINVAL;
+                               of_node_put(child);
                                goto err_out;
                        }
 
@@ -514,14 +515,18 @@ struct ahci_host_priv *ahci_platform_get_resources(struct platform_device *pdev,
                        if (port_dev) {
                                rc = ahci_platform_get_regulator(hpriv, port,
                                                                &port_dev->dev);
-                               if (rc == -EPROBE_DEFER)
+                               if (rc == -EPROBE_DEFER) {
+                                       of_node_put(child);
                                        goto err_out;
+                               }
                        }
 #endif
 
                        rc = ahci_platform_get_phy(hpriv, port, dev, child);
-                       if (rc)
+                       if (rc) {
+                               of_node_put(child);
                                goto err_out;
+                       }
 
                        enabled_ports++;
                }
index 20c39d1..6bea4f3 100644 (file)
@@ -100,26 +100,9 @@ unsigned long __weak memory_block_size_bytes(void)
 }
 EXPORT_SYMBOL_GPL(memory_block_size_bytes);
 
-static unsigned long get_memory_block_size(void)
-{
-       unsigned long block_sz;
-
-       block_sz = memory_block_size_bytes();
-
-       /* Validate blk_sz is a power of 2 and not less than section size */
-       if ((block_sz & (block_sz - 1)) || (block_sz < MIN_MEMORY_BLOCK_SIZE)) {
-               WARN_ON(1);
-               block_sz = MIN_MEMORY_BLOCK_SIZE;
-       }
-
-       return block_sz;
-}
-
 /*
- * use this as the physical section index that this memsection
- * uses.
+ * Show the first physical section index (number) of this memory block.
  */
-
 static ssize_t phys_index_show(struct device *dev,
                               struct device_attribute *attr, char *buf)
 {
@@ -131,7 +114,10 @@ static ssize_t phys_index_show(struct device *dev,
 }
 
 /*
- * Show whether the section of memory is likely to be hot-removable
+ * Show whether the memory block is likely to be offlineable (or is already
+ * offline). Once offline, the memory block could be removed. The return
+ * value does, however, not indicate that there is a way to remove the
+ * memory block.
  */
 static ssize_t removable_show(struct device *dev, struct device_attribute *attr,
                              char *buf)
@@ -455,12 +441,12 @@ static DEVICE_ATTR_RO(phys_device);
 static DEVICE_ATTR_RO(removable);
 
 /*
- * Block size attribute stuff
+ * Show the memory block size (shared by all memory blocks).
  */
 static ssize_t block_size_bytes_show(struct device *dev,
                                     struct device_attribute *attr, char *buf)
 {
-       return sprintf(buf, "%lx\n", get_memory_block_size());
+       return sprintf(buf, "%lx\n", memory_block_size_bytes());
 }
 
 static DEVICE_ATTR_RO(block_size_bytes);
@@ -670,10 +656,10 @@ static int init_memory_block(struct memory_block **memory,
                return -ENOMEM;
 
        mem->start_section_nr = block_id * sections_per_block;
-       mem->end_section_nr = mem->start_section_nr + sections_per_block - 1;
        mem->state = state;
        start_pfn = section_nr_to_pfn(mem->start_section_nr);
        mem->phys_device = arch_get_memory_phys_device(start_pfn);
+       mem->nid = NUMA_NO_NODE;
 
        ret = register_memory(mem);
 
@@ -810,19 +796,22 @@ static const struct attribute_group *memory_root_attr_groups[] = {
 /*
  * Initialize the sysfs support for memory devices...
  */
-int __init memory_dev_init(void)
+void __init memory_dev_init(void)
 {
        int ret;
        int err;
        unsigned long block_sz, nr;
 
+       /* Validate the configured memory block size */
+       block_sz = memory_block_size_bytes();
+       if (!is_power_of_2(block_sz) || block_sz < MIN_MEMORY_BLOCK_SIZE)
+               panic("Memory block size not suitable: 0x%lx\n", block_sz);
+       sections_per_block = block_sz / MIN_MEMORY_BLOCK_SIZE;
+
        ret = subsys_system_register(&memory_subsys, memory_root_attr_groups);
        if (ret)
                goto out;
 
-       block_sz = get_memory_block_size();
-       sections_per_block = block_sz / MIN_MEMORY_BLOCK_SIZE;
-
        /*
         * Create entries for memory sections that were found
         * during boot and have been initialized
@@ -838,8 +827,7 @@ int __init memory_dev_init(void)
 
 out:
        if (ret)
-               printk(KERN_ERR "%s() failed: %d\n", __func__, ret);
-       return ret;
+               panic("%s() failed: %d\n", __func__, ret);
 }
 
 /**
index 75b7e6f..296546f 100644 (file)
@@ -427,6 +427,8 @@ static ssize_t node_read_meminfo(struct device *dev,
                       "Node %d AnonHugePages:  %8lu kB\n"
                       "Node %d ShmemHugePages: %8lu kB\n"
                       "Node %d ShmemPmdMapped: %8lu kB\n"
+                      "Node %d FileHugePages: %8lu kB\n"
+                      "Node %d FilePmdMapped: %8lu kB\n"
 #endif
                        ,
                       nid, K(node_page_state(pgdat, NR_FILE_DIRTY)),
@@ -452,6 +454,10 @@ static ssize_t node_read_meminfo(struct device *dev,
                       nid, K(node_page_state(pgdat, NR_SHMEM_THPS) *
                                       HPAGE_PMD_NR),
                       nid, K(node_page_state(pgdat, NR_SHMEM_PMDMAPPED) *
+                                      HPAGE_PMD_NR),
+                      nid, K(node_page_state(pgdat, NR_FILE_THPS) *
+                                      HPAGE_PMD_NR),
+                      nid, K(node_page_state(pgdat, NR_FILE_PMDMAPPED) *
                                       HPAGE_PMD_NR)
 #endif
                       );
@@ -756,15 +762,13 @@ static int __ref get_nid_for_pfn(unsigned long pfn)
 static int register_mem_sect_under_node(struct memory_block *mem_blk,
                                         void *arg)
 {
+       unsigned long memory_block_pfns = memory_block_size_bytes() / PAGE_SIZE;
+       unsigned long start_pfn = section_nr_to_pfn(mem_blk->start_section_nr);
+       unsigned long end_pfn = start_pfn + memory_block_pfns - 1;
        int ret, nid = *(int *)arg;
-       unsigned long pfn, sect_start_pfn, sect_end_pfn;
+       unsigned long pfn;
 
-       mem_blk->nid = nid;
-
-       sect_start_pfn = section_nr_to_pfn(mem_blk->start_section_nr);
-       sect_end_pfn = section_nr_to_pfn(mem_blk->end_section_nr);
-       sect_end_pfn += PAGES_PER_SECTION - 1;
-       for (pfn = sect_start_pfn; pfn <= sect_end_pfn; pfn++) {
+       for (pfn = start_pfn; pfn <= end_pfn; pfn++) {
                int page_nid;
 
                /*
@@ -789,6 +793,13 @@ static int register_mem_sect_under_node(struct memory_block *mem_blk,
                        if (page_nid != nid)
                                continue;
                }
+
+               /*
+                * If this memory block spans multiple nodes, we only indicate
+                * the last processed node.
+                */
+               mem_blk->nid = nid;
+
                ret = sysfs_create_link_nowarn(&node_devices[nid]->dev.kobj,
                                        &mem_blk->dev.kobj,
                                        kobject_name(&mem_blk->dev.kobj));
@@ -804,32 +815,18 @@ static int register_mem_sect_under_node(struct memory_block *mem_blk,
 }
 
 /*
- * Unregister memory block device under all nodes that it spans.
- * Has to be called with mem_sysfs_mutex held (due to unlinked_nodes).
+ * Unregister a memory block device under the node it spans. Memory blocks
+ * with multiple nodes cannot be offlined and therefore also never be removed.
  */
 void unregister_memory_block_under_nodes(struct memory_block *mem_blk)
 {
-       unsigned long pfn, sect_start_pfn, sect_end_pfn;
-       static nodemask_t unlinked_nodes;
-
-       nodes_clear(unlinked_nodes);
-       sect_start_pfn = section_nr_to_pfn(mem_blk->start_section_nr);
-       sect_end_pfn = section_nr_to_pfn(mem_blk->end_section_nr);
-       for (pfn = sect_start_pfn; pfn <= sect_end_pfn; pfn++) {
-               int nid;
+       if (mem_blk->nid == NUMA_NO_NODE)
+               return;
 
-               nid = get_nid_for_pfn(pfn);
-               if (nid < 0)
-                       continue;
-               if (!node_online(nid))
-                       continue;
-               if (node_test_and_set(nid, unlinked_nodes))
-                       continue;
-               sysfs_remove_link(&node_devices[nid]->dev.kobj,
-                        kobject_name(&mem_blk->dev.kobj));
-               sysfs_remove_link(&mem_blk->dev.kobj,
-                        kobject_name(&node_devices[nid]->dev.kobj));
-       }
+       sysfs_remove_link(&node_devices[mem_blk->nid]->dev.kobj,
+                         kobject_name(&mem_blk->dev.kobj));
+       sysfs_remove_link(&mem_blk->dev.kobj,
+                         kobject_name(&node_devices[mem_blk->nid]->dev.kobj));
 }
 
 int link_mem_sections(int nid, unsigned long start_pfn, unsigned long end_pfn)
index c589865..651bd02 100644 (file)
@@ -13,33 +13,10 @@ sector_t interval_end(struct rb_node *node)
        return this->end;
 }
 
-/**
- * compute_subtree_last  -  compute end of @node
- *
- * The end of an interval is the highest (start + (size >> 9)) value of this
- * node and of its children.  Called for @node and its parents whenever the end
- * may have changed.
- */
-static inline sector_t
-compute_subtree_last(struct drbd_interval *node)
-{
-       sector_t max = node->sector + (node->size >> 9);
-
-       if (node->rb.rb_left) {
-               sector_t left = interval_end(node->rb.rb_left);
-               if (left > max)
-                       max = left;
-       }
-       if (node->rb.rb_right) {
-               sector_t right = interval_end(node->rb.rb_right);
-               if (right > max)
-                       max = right;
-       }
-       return max;
-}
+#define NODE_END(node) ((node)->sector + ((node)->size >> 9))
 
-RB_DECLARE_CALLBACKS(static, augment_callbacks, struct drbd_interval, rb,
-                    sector_t, end, compute_subtree_last);
+RB_DECLARE_CALLBACKS_MAX(static, augment_callbacks,
+                        struct drbd_interval, rb, sector_t, end, NODE_END);
 
 /**
  * drbd_insert_interval  -  insert a new interval into a tree
index a8e3815..ac07e8c 100644 (file)
@@ -26,6 +26,7 @@
 #include <linux/ioctl.h>
 #include <linux/mutex.h>
 #include <linux/compiler.h>
+#include <linux/completion.h>
 #include <linux/err.h>
 #include <linux/kernel.h>
 #include <linux/slab.h>
@@ -71,14 +72,17 @@ struct link_dead_args {
        int index;
 };
 
-#define NBD_TIMEDOUT                   0
+#define NBD_RT_TIMEDOUT                        0
+#define NBD_RT_DISCONNECT_REQUESTED    1
+#define NBD_RT_DISCONNECTED            2
+#define NBD_RT_HAS_PID_FILE            3
+#define NBD_RT_HAS_CONFIG_REF          4
+#define NBD_RT_BOUND                   5
+#define NBD_RT_DESTROY_ON_DISCONNECT   6
+#define NBD_RT_DISCONNECT_ON_CLOSE     7
+
+#define NBD_DESTROY_ON_DISCONNECT      0
 #define NBD_DISCONNECT_REQUESTED       1
-#define NBD_DISCONNECTED               2
-#define NBD_HAS_PID_FILE               3
-#define NBD_HAS_CONFIG_REF             4
-#define NBD_BOUND                      5
-#define NBD_DESTROY_ON_DISCONNECT      6
-#define NBD_DISCONNECT_ON_CLOSE        7
 
 struct nbd_config {
        u32 flags;
@@ -113,6 +117,9 @@ struct nbd_device {
        struct list_head list;
        struct task_struct *task_recv;
        struct task_struct *task_setup;
+
+       struct completion *destroy_complete;
+       unsigned long flags;
 };
 
 #define NBD_CMD_REQUEUED       1
@@ -223,6 +230,16 @@ static void nbd_dev_remove(struct nbd_device *nbd)
                disk->private_data = NULL;
                put_disk(disk);
        }
+
+       /*
+        * Place this in the last just before the nbd is freed to
+        * make sure that the disk and the related kobject are also
+        * totally removed to avoid duplicate creation of the same
+        * one.
+        */
+       if (test_bit(NBD_DESTROY_ON_DISCONNECT, &nbd->flags) && nbd->destroy_complete)
+               complete(nbd->destroy_complete);
+
        kfree(nbd);
 }
 
@@ -238,8 +255,8 @@ static void nbd_put(struct nbd_device *nbd)
 
 static int nbd_disconnected(struct nbd_config *config)
 {
-       return test_bit(NBD_DISCONNECTED, &config->runtime_flags) ||
-               test_bit(NBD_DISCONNECT_REQUESTED, &config->runtime_flags);
+       return test_bit(NBD_RT_DISCONNECTED, &config->runtime_flags) ||
+               test_bit(NBD_RT_DISCONNECT_REQUESTED, &config->runtime_flags);
 }
 
 static void nbd_mark_nsock_dead(struct nbd_device *nbd, struct nbd_sock *nsock,
@@ -257,9 +274,9 @@ static void nbd_mark_nsock_dead(struct nbd_device *nbd, struct nbd_sock *nsock,
        if (!nsock->dead) {
                kernel_sock_shutdown(nsock->sock, SHUT_RDWR);
                if (atomic_dec_return(&nbd->config->live_connections) == 0) {
-                       if (test_and_clear_bit(NBD_DISCONNECT_REQUESTED,
+                       if (test_and_clear_bit(NBD_RT_DISCONNECT_REQUESTED,
                                               &nbd->config->runtime_flags)) {
-                               set_bit(NBD_DISCONNECTED,
+                               set_bit(NBD_RT_DISCONNECTED,
                                        &nbd->config->runtime_flags);
                                dev_info(nbd_to_dev(nbd),
                                        "Disconnected due to user request.\n");
@@ -333,7 +350,7 @@ static void sock_shutdown(struct nbd_device *nbd)
 
        if (config->num_connections == 0)
                return;
-       if (test_and_set_bit(NBD_DISCONNECTED, &config->runtime_flags))
+       if (test_and_set_bit(NBD_RT_DISCONNECTED, &config->runtime_flags))
                return;
 
        for (i = 0; i < config->num_connections; i++) {
@@ -427,7 +444,7 @@ static enum blk_eh_timer_return nbd_xmit_timeout(struct request *req,
        }
 
        dev_err_ratelimited(nbd_to_dev(nbd), "Connection timed out\n");
-       set_bit(NBD_TIMEDOUT, &config->runtime_flags);
+       set_bit(NBD_RT_TIMEDOUT, &config->runtime_flags);
        cmd->status = BLK_STS_IOERR;
        mutex_unlock(&cmd->lock);
        sock_shutdown(nbd);
@@ -795,7 +812,7 @@ static int find_fallback(struct nbd_device *nbd, int index)
        struct nbd_sock *nsock = config->socks[index];
        int fallback = nsock->fallback_index;
 
-       if (test_bit(NBD_DISCONNECTED, &config->runtime_flags))
+       if (test_bit(NBD_RT_DISCONNECTED, &config->runtime_flags))
                return new_index;
 
        if (config->num_connections <= 1) {
@@ -836,7 +853,7 @@ static int wait_for_reconnect(struct nbd_device *nbd)
        struct nbd_config *config = nbd->config;
        if (!config->dead_conn_timeout)
                return 0;
-       if (test_bit(NBD_DISCONNECTED, &config->runtime_flags))
+       if (test_bit(NBD_RT_DISCONNECTED, &config->runtime_flags))
                return 0;
        return wait_event_timeout(config->conn_wait,
                                  atomic_read(&config->live_connections) > 0,
@@ -969,12 +986,12 @@ static int nbd_add_socket(struct nbd_device *nbd, unsigned long arg,
                return err;
 
        if (!netlink && !nbd->task_setup &&
-           !test_bit(NBD_BOUND, &config->runtime_flags))
+           !test_bit(NBD_RT_BOUND, &config->runtime_flags))
                nbd->task_setup = current;
 
        if (!netlink &&
            (nbd->task_setup != current ||
-            test_bit(NBD_BOUND, &config->runtime_flags))) {
+            test_bit(NBD_RT_BOUND, &config->runtime_flags))) {
                dev_err(disk_to_dev(nbd->disk),
                        "Device being setup by another task");
                sockfd_put(sock);
@@ -1053,7 +1070,7 @@ static int nbd_reconnect_socket(struct nbd_device *nbd, unsigned long arg)
                mutex_unlock(&nsock->tx_lock);
                sockfd_put(old);
 
-               clear_bit(NBD_DISCONNECTED, &config->runtime_flags);
+               clear_bit(NBD_RT_DISCONNECTED, &config->runtime_flags);
 
                /* We take the tx_mutex in an error path in the recv_work, so we
                 * need to queue_work outside of the tx_mutex.
@@ -1124,7 +1141,8 @@ static int nbd_disconnect(struct nbd_device *nbd)
        struct nbd_config *config = nbd->config;
 
        dev_info(disk_to_dev(nbd->disk), "NBD_DISCONNECT\n");
-       set_bit(NBD_DISCONNECT_REQUESTED, &config->runtime_flags);
+       set_bit(NBD_RT_DISCONNECT_REQUESTED, &config->runtime_flags);
+       set_bit(NBD_DISCONNECT_REQUESTED, &nbd->flags);
        send_disconnects(nbd);
        return 0;
 }
@@ -1143,7 +1161,7 @@ static void nbd_config_put(struct nbd_device *nbd)
                struct nbd_config *config = nbd->config;
                nbd_dev_dbg_close(nbd);
                nbd_size_clear(nbd);
-               if (test_and_clear_bit(NBD_HAS_PID_FILE,
+               if (test_and_clear_bit(NBD_RT_HAS_PID_FILE,
                                       &config->runtime_flags))
                        device_remove_file(disk_to_dev(nbd->disk), &pid_attr);
                nbd->task_recv = NULL;
@@ -1209,7 +1227,7 @@ static int nbd_start_device(struct nbd_device *nbd)
                dev_err(disk_to_dev(nbd->disk), "device_create_file failed!\n");
                return error;
        }
-       set_bit(NBD_HAS_PID_FILE, &config->runtime_flags);
+       set_bit(NBD_RT_HAS_PID_FILE, &config->runtime_flags);
 
        nbd_dev_dbg_init(nbd);
        for (i = 0; i < num_connections; i++) {
@@ -1256,9 +1274,9 @@ static int nbd_start_device_ioctl(struct nbd_device *nbd, struct block_device *b
        mutex_lock(&nbd->config_lock);
        nbd_bdev_reset(bdev);
        /* user requested, ignore socket errors */
-       if (test_bit(NBD_DISCONNECT_REQUESTED, &config->runtime_flags))
+       if (test_bit(NBD_RT_DISCONNECT_REQUESTED, &config->runtime_flags))
                ret = 0;
-       if (test_bit(NBD_TIMEDOUT, &config->runtime_flags))
+       if (test_bit(NBD_RT_TIMEDOUT, &config->runtime_flags))
                ret = -ETIMEDOUT;
        return ret;
 }
@@ -1269,7 +1287,7 @@ static void nbd_clear_sock_ioctl(struct nbd_device *nbd,
        sock_shutdown(nbd);
        __invalidate_device(bdev, true);
        nbd_bdev_reset(bdev);
-       if (test_and_clear_bit(NBD_HAS_CONFIG_REF,
+       if (test_and_clear_bit(NBD_RT_HAS_CONFIG_REF,
                               &nbd->config->runtime_flags))
                nbd_config_put(nbd);
 }
@@ -1364,7 +1382,7 @@ static int nbd_ioctl(struct block_device *bdev, fmode_t mode,
        /* Don't allow ioctl operations on a nbd device that was created with
         * netlink, unless it's DISCONNECT or CLEAR_SOCK, which are fine.
         */
-       if (!test_bit(NBD_BOUND, &config->runtime_flags) ||
+       if (!test_bit(NBD_RT_BOUND, &config->runtime_flags) ||
            (cmd == NBD_DISCONNECT || cmd == NBD_CLEAR_SOCK))
                error = __nbd_ioctl(bdev, nbd, cmd, arg);
        else
@@ -1435,7 +1453,7 @@ static void nbd_release(struct gendisk *disk, fmode_t mode)
        struct nbd_device *nbd = disk->private_data;
        struct block_device *bdev = bdget_disk(disk, 0);
 
-       if (test_bit(NBD_DISCONNECT_ON_CLOSE, &nbd->config->runtime_flags) &&
+       if (test_bit(NBD_RT_DISCONNECT_ON_CLOSE, &nbd->config->runtime_flags) &&
                        bdev->bd_openers == 0)
                nbd_disconnect_and_put(nbd);
 
@@ -1636,6 +1654,7 @@ static int nbd_dev_add(int index)
        nbd->tag_set.flags = BLK_MQ_F_SHOULD_MERGE |
                BLK_MQ_F_BLOCKING;
        nbd->tag_set.driver_data = nbd;
+       nbd->destroy_complete = NULL;
 
        err = blk_mq_alloc_tag_set(&nbd->tag_set);
        if (err)
@@ -1750,6 +1769,7 @@ static int nbd_genl_size_set(struct genl_info *info, struct nbd_device *nbd)
 
 static int nbd_genl_connect(struct sk_buff *skb, struct genl_info *info)
 {
+       DECLARE_COMPLETION_ONSTACK(destroy_complete);
        struct nbd_device *nbd = NULL;
        struct nbd_config *config;
        int index = -1;
@@ -1801,6 +1821,17 @@ again:
                mutex_unlock(&nbd_index_mutex);
                return -EINVAL;
        }
+
+       if (test_bit(NBD_DESTROY_ON_DISCONNECT, &nbd->flags) &&
+           test_bit(NBD_DISCONNECT_REQUESTED, &nbd->flags)) {
+               nbd->destroy_complete = &destroy_complete;
+               mutex_unlock(&nbd_index_mutex);
+
+               /* Wait untill the the nbd stuff is totally destroyed */
+               wait_for_completion(&destroy_complete);
+               goto again;
+       }
+
        if (!refcount_inc_not_zero(&nbd->refs)) {
                mutex_unlock(&nbd_index_mutex);
                if (index == -1)
@@ -1833,7 +1864,7 @@ again:
                return -ENOMEM;
        }
        refcount_set(&nbd->config_refs, 1);
-       set_bit(NBD_BOUND, &config->runtime_flags);
+       set_bit(NBD_RT_BOUND, &config->runtime_flags);
 
        ret = nbd_genl_size_set(info, nbd);
        if (ret)
@@ -1853,12 +1884,15 @@ again:
        if (info->attrs[NBD_ATTR_CLIENT_FLAGS]) {
                u64 flags = nla_get_u64(info->attrs[NBD_ATTR_CLIENT_FLAGS]);
                if (flags & NBD_CFLAG_DESTROY_ON_DISCONNECT) {
-                       set_bit(NBD_DESTROY_ON_DISCONNECT,
+                       set_bit(NBD_RT_DESTROY_ON_DISCONNECT,
                                &config->runtime_flags);
+                       set_bit(NBD_DESTROY_ON_DISCONNECT, &nbd->flags);
                        put_dev = true;
+               } else {
+                       clear_bit(NBD_DESTROY_ON_DISCONNECT, &nbd->flags);
                }
                if (flags & NBD_CFLAG_DISCONNECT_ON_CLOSE) {
-                       set_bit(NBD_DISCONNECT_ON_CLOSE,
+                       set_bit(NBD_RT_DISCONNECT_ON_CLOSE,
                                &config->runtime_flags);
                }
        }
@@ -1897,7 +1931,7 @@ again:
 out:
        mutex_unlock(&nbd->config_lock);
        if (!ret) {
-               set_bit(NBD_HAS_CONFIG_REF, &config->runtime_flags);
+               set_bit(NBD_RT_HAS_CONFIG_REF, &config->runtime_flags);
                refcount_inc(&nbd->config_refs);
                nbd_connect_reply(info, nbd->index);
        }
@@ -1919,7 +1953,7 @@ static void nbd_disconnect_and_put(struct nbd_device *nbd)
         * queue.
         */
        flush_workqueue(nbd->recv_workq);
-       if (test_and_clear_bit(NBD_HAS_CONFIG_REF,
+       if (test_and_clear_bit(NBD_RT_HAS_CONFIG_REF,
                               &nbd->config->runtime_flags))
                nbd_config_put(nbd);
 }
@@ -2003,7 +2037,7 @@ static int nbd_genl_reconfigure(struct sk_buff *skb, struct genl_info *info)
 
        mutex_lock(&nbd->config_lock);
        config = nbd->config;
-       if (!test_bit(NBD_BOUND, &config->runtime_flags) ||
+       if (!test_bit(NBD_RT_BOUND, &config->runtime_flags) ||
            !nbd->task_recv) {
                dev_err(nbd_to_dev(nbd),
                        "not configured, cannot reconfigure\n");
@@ -2026,20 +2060,22 @@ static int nbd_genl_reconfigure(struct sk_buff *skb, struct genl_info *info)
        if (info->attrs[NBD_ATTR_CLIENT_FLAGS]) {
                u64 flags = nla_get_u64(info->attrs[NBD_ATTR_CLIENT_FLAGS]);
                if (flags & NBD_CFLAG_DESTROY_ON_DISCONNECT) {
-                       if (!test_and_set_bit(NBD_DESTROY_ON_DISCONNECT,
+                       if (!test_and_set_bit(NBD_RT_DESTROY_ON_DISCONNECT,
                                              &config->runtime_flags))
                                put_dev = true;
+                       set_bit(NBD_DESTROY_ON_DISCONNECT, &nbd->flags);
                } else {
-                       if (test_and_clear_bit(NBD_DESTROY_ON_DISCONNECT,
+                       if (test_and_clear_bit(NBD_RT_DESTROY_ON_DISCONNECT,
                                               &config->runtime_flags))
                                refcount_inc(&nbd->refs);
+                       clear_bit(NBD_DESTROY_ON_DISCONNECT, &nbd->flags);
                }
 
                if (flags & NBD_CFLAG_DISCONNECT_ON_CLOSE) {
-                       set_bit(NBD_DISCONNECT_ON_CLOSE,
+                       set_bit(NBD_RT_DISCONNECT_ON_CLOSE,
                                        &config->runtime_flags);
                } else {
-                       clear_bit(NBD_DISCONNECT_ON_CLOSE,
+                       clear_bit(NBD_RT_DISCONNECT_ON_CLOSE,
                                        &config->runtime_flags);
                }
        }
index 0240601..7645700 100644 (file)
@@ -2594,7 +2594,6 @@ static int pkt_new_dev(struct pktcdvd_device *pd, dev_t dev)
        if (ret)
                return ret;
        if (!blk_queue_scsi_passthrough(bdev_get_queue(bdev))) {
-               WARN_ONCE(true, "Attempt to register a non-SCSI queue\n");
                blkdev_put(bdev, FMODE_READ | FMODE_NDELAY);
                return -EINVAL;
        }
index c8fb886..7c4350c 100644 (file)
@@ -1754,8 +1754,6 @@ static struct rbd_img_request *rbd_img_request_create(
        mutex_init(&img_request->state_mutex);
        kref_init(&img_request->kref);
 
-       dout("%s: rbd_dev %p %s -> img %p\n", __func__, rbd_dev,
-            obj_op_name(op_type), img_request);
        return img_request;
 }
 
@@ -2944,6 +2942,9 @@ static int rbd_obj_read_from_parent(struct rbd_obj_request *obj_req)
        __set_bit(IMG_REQ_CHILD, &child_img_req->flags);
        child_img_req->obj_request = obj_req;
 
+       dout("%s child_img_req %p for obj_req %p\n", __func__, child_img_req,
+            obj_req);
+
        if (!rbd_img_is_write(img_req)) {
                switch (img_req->data_type) {
                case OBJ_REQUEST_BIO:
@@ -4877,6 +4878,9 @@ static void rbd_queue_workfn(struct work_struct *work)
        img_request->rq = rq;
        snapc = NULL; /* img_request consumes a ref */
 
+       dout("%s rbd_dev %p img_req %p %s %llu~%llu\n", __func__, rbd_dev,
+            img_request, obj_op_name(op_type), offset, length);
+
        if (op_type == OBJ_OP_DISCARD || op_type == OBJ_OP_ZEROOUT)
                result = rbd_img_fill_nodata(img_request, offset, length);
        else
@@ -5669,17 +5673,20 @@ static int rbd_dev_v2_image_size(struct rbd_device *rbd_dev)
 
 static int rbd_dev_v2_object_prefix(struct rbd_device *rbd_dev)
 {
+       size_t size;
        void *reply_buf;
        int ret;
        void *p;
 
-       reply_buf = kzalloc(RBD_OBJ_PREFIX_LEN_MAX, GFP_KERNEL);
+       /* Response will be an encoded string, which includes a length */
+       size = sizeof(__le32) + RBD_OBJ_PREFIX_LEN_MAX;
+       reply_buf = kzalloc(size, GFP_KERNEL);
        if (!reply_buf)
                return -ENOMEM;
 
        ret = rbd_obj_method_sync(rbd_dev, &rbd_dev->header_oid,
                                  &rbd_dev->header_oloc, "get_object_prefix",
-                                 NULL, 0, reply_buf, RBD_OBJ_PREFIX_LEN_MAX);
+                                 NULL, 0, reply_buf, size);
        dout("%s: rbd_obj_method_sync returned %d\n", __func__, ret);
        if (ret < 0)
                goto out;
@@ -6696,7 +6703,6 @@ static int rbd_dev_image_id(struct rbd_device *rbd_dev)
        dout("rbd id object name is %s\n", oid.name);
 
        /* Response will be an encoded string, which includes a length */
-
        size = sizeof (__le32) + RBD_IMAGE_ID_LEN_MAX;
        response = kzalloc(size, GFP_NOIO);
        if (!response) {
@@ -6708,7 +6714,7 @@ static int rbd_dev_image_id(struct rbd_device *rbd_dev)
 
        ret = rbd_obj_method_sync(rbd_dev, &oid, &rbd_dev->header_oloc,
                                  "get_id", NULL, 0,
-                                 response, RBD_IMAGE_ID_LEN_MAX);
+                                 response, size);
        dout("%s: rbd_obj_method_sync returned %d\n", __func__, ret);
        if (ret == -ENOENT) {
                image_id = kstrdup("", GFP_KERNEL);
index 2db474a..9207ac2 100644 (file)
@@ -1,14 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * ti-sysc.c - Texas Instruments sysc interconnect target driver
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed "as is" WITHOUT ANY WARRANTY of any
- * kind, whether express or implied; without even the implied warranty
- * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
  */
 
 #include <linux/io.h>
@@ -62,18 +54,26 @@ static const char * const clock_names[SYSC_MAX_CLOCKS] = {
  * @module_size: size of the interconnect target module
  * @module_va: virtual address of the interconnect target module
  * @offsets: register offsets from module base
+ * @mdata: ti-sysc to hwmod translation data for a module
  * @clocks: clocks used by the interconnect target module
  * @clock_roles: clock role names for the found clocks
  * @nr_clocks: number of clocks used by the interconnect target module
+ * @rsts: resets used by the interconnect target module
  * @legacy_mode: configured for legacy mode if set
  * @cap: interconnect target module capabilities
  * @cfg: interconnect target module configuration
+ * @cookie: data used by legacy platform callbacks
  * @name: name if available
  * @revision: interconnect target module revision
+ * @enabled: sysc runtime enabled status
  * @needs_resume: runtime resume needed on resume from suspend
+ * @child_needs_resume: runtime resume needed for child on resume from suspend
+ * @disable_on_idle: status flag used for disabling modules with resets
+ * @idle_work: work structure used to perform delayed idle on a module
  * @clk_enable_quirk: module specific clock enable quirk
  * @clk_disable_quirk: module specific clock disable quirk
  * @reset_done_quirk: module specific reset done quirk
+ * @module_enable_quirk: module specific enable quirk
  */
 struct sysc {
        struct device *dev;
@@ -95,11 +95,11 @@ struct sysc {
        unsigned int enabled:1;
        unsigned int needs_resume:1;
        unsigned int child_needs_resume:1;
-       unsigned int disable_on_idle:1;
        struct delayed_work idle_work;
        void (*clk_enable_quirk)(struct sysc *sysc);
        void (*clk_disable_quirk)(struct sysc *sysc);
        void (*reset_done_quirk)(struct sysc *sysc);
+       void (*module_enable_quirk)(struct sysc *sysc);
 };
 
 static void sysc_parse_dts_quirks(struct sysc *ddata, struct device_node *np,
@@ -503,7 +503,7 @@ static void sysc_clkdm_allow_idle(struct sysc *ddata)
 static int sysc_init_resets(struct sysc *ddata)
 {
        ddata->rsts =
-               devm_reset_control_get_optional(ddata->dev, "rstctrl");
+               devm_reset_control_get_optional_shared(ddata->dev, "rstctrl");
        if (IS_ERR(ddata->rsts))
                return PTR_ERR(ddata->rsts);
 
@@ -615,8 +615,8 @@ static void sysc_check_quirk_stdout(struct sysc *ddata,
  * node but children have "ti,hwmods". These belong to the interconnect
  * target node and are managed by this driver.
  */
-static int sysc_check_one_child(struct sysc *ddata,
-                               struct device_node *np)
+static void sysc_check_one_child(struct sysc *ddata,
+                                struct device_node *np)
 {
        const char *name;
 
@@ -626,22 +626,14 @@ static int sysc_check_one_child(struct sysc *ddata,
 
        sysc_check_quirk_stdout(ddata, np);
        sysc_parse_dts_quirks(ddata, np, true);
-
-       return 0;
 }
 
-static int sysc_check_children(struct sysc *ddata)
+static void sysc_check_children(struct sysc *ddata)
 {
        struct device_node *child;
-       int error;
-
-       for_each_child_of_node(ddata->dev->of_node, child) {
-               error = sysc_check_one_child(ddata, child);
-               if (error)
-                       return error;
-       }
 
-       return 0;
+       for_each_child_of_node(ddata->dev->of_node, child)
+               sysc_check_one_child(ddata, child);
 }
 
 /*
@@ -794,9 +786,7 @@ static int sysc_map_and_check_registers(struct sysc *ddata)
        if (error)
                return error;
 
-       error = sysc_check_children(ddata);
-       if (error)
-               return error;
+       sysc_check_children(ddata);
 
        error = sysc_parse_registers(ddata);
        if (error)
@@ -940,6 +930,9 @@ set_autoidle:
                sysc_write(ddata, ddata->offsets[SYSC_SYSCONFIG], reg);
        }
 
+       if (ddata->module_enable_quirk)
+               ddata->module_enable_quirk(ddata);
+
        return 0;
 }
 
@@ -1031,8 +1024,7 @@ static int __maybe_unused sysc_runtime_suspend_legacy(struct device *dev,
                dev_err(dev, "%s: could not idle: %i\n",
                        __func__, error);
 
-       if (ddata->disable_on_idle)
-               reset_control_assert(ddata->rsts);
+       reset_control_assert(ddata->rsts);
 
        return 0;
 }
@@ -1043,8 +1035,7 @@ static int __maybe_unused sysc_runtime_resume_legacy(struct device *dev,
        struct ti_sysc_platform_data *pdata;
        int error;
 
-       if (ddata->disable_on_idle)
-               reset_control_deassert(ddata->rsts);
+       reset_control_deassert(ddata->rsts);
 
        pdata = dev_get_platdata(ddata->dev);
        if (!pdata)
@@ -1091,10 +1082,9 @@ static int __maybe_unused sysc_runtime_suspend(struct device *dev)
        ddata->enabled = false;
 
 err_allow_idle:
-       sysc_clkdm_allow_idle(ddata);
+       reset_control_assert(ddata->rsts);
 
-       if (ddata->disable_on_idle)
-               reset_control_assert(ddata->rsts);
+       sysc_clkdm_allow_idle(ddata);
 
        return error;
 }
@@ -1109,11 +1099,11 @@ static int __maybe_unused sysc_runtime_resume(struct device *dev)
        if (ddata->enabled)
                return 0;
 
-       if (ddata->disable_on_idle)
-               reset_control_deassert(ddata->rsts);
 
        sysc_clkdm_deny_idle(ddata);
 
+       reset_control_deassert(ddata->rsts);
+
        if (sysc_opt_clks_needed(ddata)) {
                error = sysc_enable_opt_clocks(ddata);
                if (error)
@@ -1256,6 +1246,9 @@ static const struct sysc_revision_quirk sysc_revision_quirks[] = {
                   SYSC_MODULE_QUIRK_I2C),
        SYSC_QUIRK("i2c", 0, 0, 0x10, 0x90, 0x5040000a, 0xfffff0f0,
                   SYSC_MODULE_QUIRK_I2C),
+       SYSC_QUIRK("gpu", 0x50000000, 0x14, -1, -1, 0x00010201, 0xffffffff, 0),
+       SYSC_QUIRK("gpu", 0x50000000, 0xfe00, 0xfe10, -1, 0x40000000 , 0xffffffff,
+                  SYSC_MODULE_QUIRK_SGX),
        SYSC_QUIRK("wdt", 0, 0, 0x10, 0x14, 0x502a0500, 0xfffff0f0,
                   SYSC_MODULE_QUIRK_WDT),
 
@@ -1271,8 +1264,11 @@ static const struct sysc_revision_quirk sysc_revision_quirks[] = {
        SYSC_QUIRK("dcan", 0, 0x20, -1, -1, 0x4edb1902, 0xffffffff, 0),
        SYSC_QUIRK("dmic", 0, 0, 0x10, -1, 0x50010000, 0xffffffff, 0),
        SYSC_QUIRK("dwc3", 0, 0, 0x10, -1, 0x500a0200, 0xffffffff, 0),
+       SYSC_QUIRK("d2d", 0x4a0b6000, 0, 0x10, 0x14, 0x00000010, 0xffffffff, 0),
+       SYSC_QUIRK("d2d", 0x4a0cd000, 0, 0x10, 0x14, 0x00000010, 0xffffffff, 0),
        SYSC_QUIRK("epwmss", 0, 0, 0x4, -1, 0x47400001, 0xffffffff, 0),
        SYSC_QUIRK("gpu", 0, 0x1fc00, 0x1fc10, -1, 0, 0, 0),
+       SYSC_QUIRK("gpu", 0, 0xfe00, 0xfe10, -1, 0x40000000 , 0xffffffff, 0),
        SYSC_QUIRK("hsi", 0, 0, 0x10, 0x14, 0x50043101, 0xffffffff, 0),
        SYSC_QUIRK("iss", 0, 0, 0x10, -1, 0x40000101, 0xffffffff, 0),
        SYSC_QUIRK("lcdc", 0, 0, 0x54, -1, 0x4f201000, 0xffffffff, 0),
@@ -1424,6 +1420,15 @@ static void sysc_clk_disable_quirk_i2c(struct sysc *ddata)
        sysc_clk_quirk_i2c(ddata, false);
 }
 
+/* 36xx SGX needs a quirk for to bypass OCP IPG interrupt logic */
+static void sysc_module_enable_quirk_sgx(struct sysc *ddata)
+{
+       int offset = 0xff08;    /* OCP_DEBUG_CONFIG */
+       u32 val = BIT(31);      /* THALIA_INT_BYPASS */
+
+       sysc_write(ddata, offset, val);
+}
+
 /* Watchdog timer needs a disable sequence after reset */
 static void sysc_reset_done_quirk_wdt(struct sysc *ddata)
 {
@@ -1466,6 +1471,9 @@ static void sysc_init_module_quirks(struct sysc *ddata)
                return;
        }
 
+       if (ddata->cfg.quirks & SYSC_MODULE_QUIRK_SGX)
+               ddata->module_enable_quirk = sysc_module_enable_quirk_sgx;
+
        if (ddata->cfg.quirks & SYSC_MODULE_QUIRK_WDT)
                ddata->reset_done_quirk = sysc_reset_done_quirk_wdt;
 }
@@ -1532,7 +1540,7 @@ static int sysc_legacy_init(struct sysc *ddata)
  */
 static int sysc_rstctrl_reset_deassert(struct sysc *ddata, bool reset)
 {
-       int error, val;
+       int error;
 
        if (!ddata->rsts)
                return 0;
@@ -1543,14 +1551,9 @@ static int sysc_rstctrl_reset_deassert(struct sysc *ddata, bool reset)
                        return error;
        }
 
-       error = reset_control_deassert(ddata->rsts);
-       if (error == -EEXIST)
-               return 0;
+       reset_control_deassert(ddata->rsts);
 
-       error = readx_poll_timeout(reset_control_status, ddata->rsts, val,
-                                  val == 0, 100, MAX_MODULE_SOFTRESET_WAIT);
-
-       return error;
+       return 0;
 }
 
 /*
@@ -1559,12 +1562,11 @@ static int sysc_rstctrl_reset_deassert(struct sysc *ddata, bool reset)
  */
 static int sysc_reset(struct sysc *ddata)
 {
-       int sysc_offset, syss_offset, sysc_val, rstval, quirks, error = 0;
+       int sysc_offset, syss_offset, sysc_val, rstval, error = 0;
        u32 sysc_mask, syss_done;
 
        sysc_offset = ddata->offsets[SYSC_SYSCONFIG];
        syss_offset = ddata->offsets[SYSC_SYSSTATUS];
-       quirks = ddata->cfg.quirks;
 
        if (ddata->legacy_mode || sysc_offset < 0 ||
            ddata->cap->regbits->srst_shift < 0 ||
@@ -2427,6 +2429,10 @@ static int sysc_probe(struct platform_device *pdev)
                goto unprepare;
        }
 
+       /* Balance reset counts */
+       if (ddata->rsts)
+               reset_control_assert(ddata->rsts);
+
        sysc_show_registers(ddata);
 
        ddata->dev->type = &sysc_device_type;
@@ -2446,9 +2452,6 @@ static int sysc_probe(struct platform_device *pdev)
                pm_runtime_put(&pdev->dev);
        }
 
-       if (!of_get_available_child_count(ddata->dev->of_node))
-               ddata->disable_on_idle = true;
-
        return 0;
 
 err:
index bdab5d9..80b850e 100644 (file)
@@ -68,7 +68,7 @@ static void add_early_randomness(struct hwrng *rng)
        size_t size = min_t(size_t, 16, rng_buffer_size());
 
        mutex_lock(&reading_mutex);
-       bytes_read = rng_get_data(rng, rng_buffer, size, 1);
+       bytes_read = rng_get_data(rng, rng_buffer, size, 0);
        mutex_unlock(&reading_mutex);
        if (bytes_read > 0)
                add_device_randomness(rng_buffer, bytes_read);
index 9eb564c..43dd089 100644 (file)
@@ -29,8 +29,8 @@
 #include <linux/export.h>
 #include <linux/io.h>
 #include <linux/uio.h>
-
 #include <linux/uaccess.h>
+#include <linux/security.h>
 
 #ifdef CONFIG_IA64
 # include <linux/efi.h>
@@ -807,7 +807,10 @@ static loff_t memory_lseek(struct file *file, loff_t offset, int orig)
 
 static int open_port(struct inode *inode, struct file *filp)
 {
-       return capable(CAP_SYS_RAWIO) ? 0 : -EPERM;
+       if (!capable(CAP_SYS_RAWIO))
+               return -EPERM;
+
+       return security_locked_down(LOCKDOWN_DEV_MEM);
 }
 
 #define zero_lseek     null_lseek
index 1b4f95c..d7a3888 100644 (file)
@@ -320,18 +320,22 @@ int tpm_pcr_extend(struct tpm_chip *chip, u32 pcr_idx,
        if (!chip)
                return -ENODEV;
 
-       for (i = 0; i < chip->nr_allocated_banks; i++)
-               if (digests[i].alg_id != chip->allocated_banks[i].alg_id)
-                       return -EINVAL;
+       for (i = 0; i < chip->nr_allocated_banks; i++) {
+               if (digests[i].alg_id != chip->allocated_banks[i].alg_id) {
+                       rc = EINVAL;
+                       goto out;
+               }
+       }
 
        if (chip->flags & TPM_CHIP_FLAG_TPM2) {
                rc = tpm2_pcr_extend(chip, pcr_idx, digests);
-               tpm_put_ops(chip);
-               return rc;
+               goto out;
        }
 
        rc = tpm1_pcr_extend(chip, pcr_idx, digests[0].digest,
                             "attempting extend a PCR value");
+
+out:
        tpm_put_ops(chip);
        return rc;
 }
@@ -354,14 +358,9 @@ int tpm_send(struct tpm_chip *chip, void *cmd, size_t buflen)
        if (!chip)
                return -ENODEV;
 
-       rc = tpm_buf_init(&buf, 0, 0);
-       if (rc)
-               goto out;
-
-       memcpy(buf.data, cmd, buflen);
+       buf.data = cmd;
        rc = tpm_transmit_cmd(chip, &buf, 0, "attempting to a send a command");
-       tpm_buf_destroy(&buf);
-out:
+
        tpm_put_ops(chip);
        return rc;
 }
index 02c1595..18b0c39 100644 (file)
@@ -9,7 +9,6 @@
 
 #include <linux/module.h>
 #include <linux/pci.h>
-#include <linux/pci-aspm.h>
 #include <linux/slab.h>
 #include "xillybus.h"
 
index 801fa1c..c44247d 100644 (file)
@@ -116,7 +116,6 @@ config COMMON_CLK_SI514
        depends on OF
        select REGMAP_I2C
        help
-       ---help---
          This driver supports the Silicon Labs 514 programmable clock
          generator.
 
@@ -125,7 +124,6 @@ config COMMON_CLK_SI544
        depends on I2C
        select REGMAP_I2C
        help
-       ---help---
          This driver supports the Silicon Labs 544 programmable clock
          generator.
 
@@ -135,7 +133,6 @@ config COMMON_CLK_SI570
        depends on OF
        select REGMAP_I2C
        help
-       ---help---
          This driver supports Silicon Labs 570/571/598/599 programmable
          clock generators.
 
@@ -153,7 +150,6 @@ config COMMON_CLK_CDCE925
        depends on OF
        select REGMAP_I2C
        help
-       ---help---
          This driver supports the TI CDCE913/925/937/949 programmable clock
          synthesizer. Each chip has different number of PLLs and outputs.
          For example, the CDCE925 contains two PLLs with spread-spectrum
@@ -212,7 +208,6 @@ config COMMON_CLK_AXI_CLKGEN
        tristate "AXI clkgen driver"
        depends on ARCH_ZYNQ || MICROBLAZE || COMPILE_TEST
        help
-       ---help---
          Support for the Analog Devices axi-clkgen pcore clock generator for Xilinx
          FPGAs. It is commonly used in Analog Devices' reference designs.
 
@@ -279,26 +274,22 @@ config COMMON_CLK_VC5
        depends on OF
        select REGMAP_I2C
        help
-       ---help---
          This driver supports the IDT VersaClock 5 and VersaClock 6
          programmable clock generators.
 
 config COMMON_CLK_STM32MP157
        def_bool COMMON_CLK && MACH_STM32MP157
        help
-       ---help---
          Support for stm32mp157 SoC family clocks
 
 config COMMON_CLK_STM32F
        def_bool COMMON_CLK && (MACH_STM32F429 || MACH_STM32F469 || MACH_STM32F746)
        help
-       ---help---
          Support for stm32f4 and stm32f7 SoC families clocks
 
 config COMMON_CLK_STM32H7
        def_bool COMMON_CLK && MACH_STM32H743
        help
-       ---help---
          Support for stm32h7 SoC family clocks
 
 config COMMON_CLK_BD718XX
index 0cad760..0138fb1 100644 (file)
@@ -30,6 +30,7 @@ obj-$(CONFIG_ARCH_EFM32)              += clk-efm32gg.o
 obj-$(CONFIG_COMMON_CLK_FIXED_MMIO)    += clk-fixed-mmio.o
 obj-$(CONFIG_COMMON_CLK_GEMINI)                += clk-gemini.o
 obj-$(CONFIG_COMMON_CLK_ASPEED)                += clk-aspeed.o
+obj-$(CONFIG_MACH_ASPEED_G6)           += clk-ast2600.o
 obj-$(CONFIG_ARCH_HIGHBANK)            += clk-highbank.o
 obj-$(CONFIG_CLK_HSDK)                 += clk-hsdk-pll.o
 obj-$(CONFIG_COMMON_CLK_LOCHNAGAR)     += clk-lochnagar.o
index 32dd29e..4de97cc 100644 (file)
@@ -68,16 +68,17 @@ int owl_clk_probe(struct device *dev, struct clk_hw_onecell_data *hw_clks)
        struct clk_hw *hw;
 
        for (i = 0; i < hw_clks->num; i++) {
+               const char *name;
 
                hw = hw_clks->hws[i];
-
                if (IS_ERR_OR_NULL(hw))
                        continue;
 
+               name = hw->init->name;
                ret = devm_clk_hw_register(dev, hw);
                if (ret) {
                        dev_err(dev, "Couldn't register clock %d - %s\n",
-                               i, hw->init->name);
+                               i, name);
                        return ret;
                }
        }
index 317d4a9..f15e262 100644 (file)
@@ -64,11 +64,10 @@ static unsigned int _get_table_val(const struct clk_factor_table *table,
        return val;
 }
 
-static int clk_val_best(struct clk_hw *hw, unsigned long rate,
+static int owl_clk_val_best(const struct owl_factor_hw *factor_hw,
+                       struct clk_hw *hw, unsigned long rate,
                        unsigned long *best_parent_rate)
 {
-       struct owl_factor *factor = hw_to_owl_factor(hw);
-       struct owl_factor_hw *factor_hw = &factor->factor_hw;
        const struct clk_factor_table *clkt = factor_hw->table;
        unsigned long parent_rate, try_parent_rate, best = 0, cur_rate;
        unsigned long parent_rate_saved = *best_parent_rate;
@@ -126,7 +125,7 @@ long owl_factor_helper_round_rate(struct owl_clk_common *common,
        const struct clk_factor_table *clkt = factor_hw->table;
        unsigned int val, mul = 0, div = 1;
 
-       val = clk_val_best(&common->hw, rate, parent_rate);
+       val = owl_clk_val_best(factor_hw, &common->hw, rate, parent_rate);
        _get_table_div_mul(clkt, val, &mul, &div);
 
        return *parent_rate * mul / div;
index f607ee7..87083b3 100644 (file)
 
 #define MOR_KEY_MASK           (0xff << 16)
 
+#define clk_main_parent_select(s)      (((s) & \
+                                       (AT91_PMC_MOSCEN | \
+                                       AT91_PMC_OSCBYPASS)) ? 1 : 0)
+
 struct clk_main_osc {
        struct clk_hw hw;
        struct regmap *regmap;
@@ -113,7 +117,7 @@ static int clk_main_osc_is_prepared(struct clk_hw *hw)
 
        regmap_read(regmap, AT91_PMC_SR, &status);
 
-       return (status & AT91_PMC_MOSCS) && (tmp & AT91_PMC_MOSCEN);
+       return (status & AT91_PMC_MOSCS) && clk_main_parent_select(tmp);
 }
 
 static const struct clk_ops main_osc_ops = {
@@ -152,7 +156,7 @@ at91_clk_register_main_osc(struct regmap *regmap,
        if (bypass)
                regmap_update_bits(regmap,
                                   AT91_CKGR_MOR, MOR_KEY_MASK |
-                                  AT91_PMC_MOSCEN,
+                                  AT91_PMC_OSCBYPASS,
                                   AT91_PMC_OSCBYPASS | AT91_PMC_KEY);
 
        hw = &osc->hw;
@@ -450,7 +454,7 @@ static u8 clk_sam9x5_main_get_parent(struct clk_hw *hw)
 
        regmap_read(clkmain->regmap, AT91_CKGR_MOR, &status);
 
-       return status & AT91_PMC_MOSCEN ? 1 : 0;
+       return clk_main_parent_select(status);
 }
 
 static const struct clk_ops sam9x5_main_ops = {
@@ -492,7 +496,7 @@ at91_clk_register_sam9x5_main(struct regmap *regmap,
        clkmain->hw.init = &init;
        clkmain->regmap = regmap;
        regmap_read(clkmain->regmap, AT91_CKGR_MOR, &status);
-       clkmain->parent = status & AT91_PMC_MOSCEN ? 1 : 0;
+       clkmain->parent = clk_main_parent_select(status);
 
        hw = &clkmain->hw;
        ret = clk_hw_register(NULL, &clkmain->hw);
index 6509d09..0de1108 100644 (file)
@@ -21,7 +21,7 @@ static const struct clk_range plla_outputs[] = {
 };
 
 static const struct clk_pll_characteristics plla_characteristics = {
-       .input = { .min = 12000000, .max = 12000000 },
+       .input = { .min = 12000000, .max = 24000000 },
        .num_output = ARRAY_SIZE(plla_outputs),
        .output = plla_outputs,
        .icpll = plla_icpll,
index 867ae3c..802e488 100644 (file)
@@ -31,7 +31,7 @@
 #include <linux/delay.h>
 #include <linux/io.h>
 #include <linux/module.h>
-#include <linux/of.h>
+#include <linux/of_device.h>
 #include <linux/platform_device.h>
 #include <linux/slab.h>
 #include <dt-bindings/clock/bcm2835.h>
 #define CM_AVEODIV             0x1bc
 #define CM_EMMCCTL             0x1c0
 #define CM_EMMCDIV             0x1c4
+#define CM_EMMC2CTL            0x1d0
+#define CM_EMMC2DIV            0x1d4
 
 /* General bits for the CM_*CTL regs */
 # define CM_ENABLE                     BIT(4)
 #define LOCK_TIMEOUT_NS                100000000
 #define BCM2835_MAX_FB_RATE    1750000000u
 
+#define SOC_BCM2835            BIT(0)
+#define SOC_BCM2711            BIT(1)
+#define SOC_ALL                        (SOC_BCM2835 | SOC_BCM2711)
+
 /*
  * Names of clocks used within the driver that need to be replaced
  * with an external parent's name.  This array is in the order that
@@ -320,6 +326,10 @@ struct bcm2835_cprman {
        struct clk_hw_onecell_data onecell;
 };
 
+struct cprman_plat_data {
+       unsigned int soc;
+};
+
 static inline void cprman_write(struct bcm2835_cprman *cprman, u32 reg, u32 val)
 {
        writel(CM_PASSWORD | val, cprman->regs + reg);
@@ -1451,22 +1461,28 @@ typedef struct clk_hw *(*bcm2835_clk_register)(struct bcm2835_cprman *cprman,
                                               const void *data);
 struct bcm2835_clk_desc {
        bcm2835_clk_register clk_register;
+       unsigned int supported;
        const void *data;
 };
 
 /* assignment helper macros for different clock types */
-#define _REGISTER(f, ...) { .clk_register = (bcm2835_clk_register)f, \
-                           .data = __VA_ARGS__ }
-#define REGISTER_PLL(...)      _REGISTER(&bcm2835_register_pll,        \
+#define _REGISTER(f, s, ...) { .clk_register = (bcm2835_clk_register)f, \
+                              .supported = s,                          \
+                              .data = __VA_ARGS__ }
+#define REGISTER_PLL(s, ...)   _REGISTER(&bcm2835_register_pll,        \
+                                         s,                            \
                                          &(struct bcm2835_pll_data)    \
                                          {__VA_ARGS__})
-#define REGISTER_PLL_DIV(...)  _REGISTER(&bcm2835_register_pll_divider, \
-                                         &(struct bcm2835_pll_divider_data) \
-                                         {__VA_ARGS__})
-#define REGISTER_CLK(...)      _REGISTER(&bcm2835_register_clock,      \
+#define REGISTER_PLL_DIV(s, ...) _REGISTER(&bcm2835_register_pll_divider, \
+                                          s,                             \
+                                          &(struct bcm2835_pll_divider_data) \
+                                          {__VA_ARGS__})
+#define REGISTER_CLK(s, ...)   _REGISTER(&bcm2835_register_clock,      \
+                                         s,                            \
                                          &(struct bcm2835_clock_data)  \
                                          {__VA_ARGS__})
-#define REGISTER_GATE(...)     _REGISTER(&bcm2835_register_gate,       \
+#define REGISTER_GATE(s, ...)  _REGISTER(&bcm2835_register_gate,       \
+                                         s,                            \
                                          &(struct bcm2835_gate_data)   \
                                          {__VA_ARGS__})
 
@@ -1480,7 +1496,8 @@ static const char *const bcm2835_clock_osc_parents[] = {
        "testdebug1"
 };
 
-#define REGISTER_OSC_CLK(...)  REGISTER_CLK(                           \
+#define REGISTER_OSC_CLK(s, ...)       REGISTER_CLK(                   \
+       s,                                                              \
        .num_mux_parents = ARRAY_SIZE(bcm2835_clock_osc_parents),       \
        .parents = bcm2835_clock_osc_parents,                           \
        __VA_ARGS__)
@@ -1497,7 +1514,8 @@ static const char *const bcm2835_clock_per_parents[] = {
        "pllh_aux",
 };
 
-#define REGISTER_PER_CLK(...)  REGISTER_CLK(                           \
+#define REGISTER_PER_CLK(s, ...)       REGISTER_CLK(                   \
+       s,                                                              \
        .num_mux_parents = ARRAY_SIZE(bcm2835_clock_per_parents),       \
        .parents = bcm2835_clock_per_parents,                           \
        __VA_ARGS__)
@@ -1522,7 +1540,8 @@ static const char *const bcm2835_pcm_per_parents[] = {
        "-",
 };
 
-#define REGISTER_PCM_CLK(...)  REGISTER_CLK(                           \
+#define REGISTER_PCM_CLK(s, ...)       REGISTER_CLK(                   \
+       s,                                                              \
        .num_mux_parents = ARRAY_SIZE(bcm2835_pcm_per_parents),         \
        .parents = bcm2835_pcm_per_parents,                             \
        __VA_ARGS__)
@@ -1541,7 +1560,8 @@ static const char *const bcm2835_clock_vpu_parents[] = {
        "pllc_core2",
 };
 
-#define REGISTER_VPU_CLK(...)  REGISTER_CLK(                           \
+#define REGISTER_VPU_CLK(s, ...)       REGISTER_CLK(                   \
+       s,                                                              \
        .num_mux_parents = ARRAY_SIZE(bcm2835_clock_vpu_parents),       \
        .parents = bcm2835_clock_vpu_parents,                           \
        __VA_ARGS__)
@@ -1577,12 +1597,14 @@ static const char *const bcm2835_clock_dsi1_parents[] = {
        "dsi1_byte_inv",
 };
 
-#define REGISTER_DSI0_CLK(...) REGISTER_CLK(                           \
+#define REGISTER_DSI0_CLK(s, ...)      REGISTER_CLK(                   \
+       s,                                                              \
        .num_mux_parents = ARRAY_SIZE(bcm2835_clock_dsi0_parents),      \
        .parents = bcm2835_clock_dsi0_parents,                          \
        __VA_ARGS__)
 
-#define REGISTER_DSI1_CLK(...) REGISTER_CLK(                           \
+#define REGISTER_DSI1_CLK(s, ...)      REGISTER_CLK(                   \
+       s,                                                              \
        .num_mux_parents = ARRAY_SIZE(bcm2835_clock_dsi1_parents),      \
        .parents = bcm2835_clock_dsi1_parents,                          \
        __VA_ARGS__)
@@ -1602,6 +1624,7 @@ static const struct bcm2835_clk_desc clk_desc_array[] = {
         * AUDIO domain is on.
         */
        [BCM2835_PLLA]          = REGISTER_PLL(
+               SOC_ALL,
                .name = "plla",
                .cm_ctrl_reg = CM_PLLA,
                .a2w_ctrl_reg = A2W_PLLA_CTRL,
@@ -1616,6 +1639,7 @@ static const struct bcm2835_clk_desc clk_desc_array[] = {
                .max_rate = 2400000000u,
                .max_fb_rate = BCM2835_MAX_FB_RATE),
        [BCM2835_PLLA_CORE]     = REGISTER_PLL_DIV(
+               SOC_ALL,
                .name = "plla_core",
                .source_pll = "plla",
                .cm_reg = CM_PLLA,
@@ -1625,6 +1649,7 @@ static const struct bcm2835_clk_desc clk_desc_array[] = {
                .fixed_divider = 1,
                .flags = CLK_SET_RATE_PARENT),
        [BCM2835_PLLA_PER]      = REGISTER_PLL_DIV(
+               SOC_ALL,
                .name = "plla_per",
                .source_pll = "plla",
                .cm_reg = CM_PLLA,
@@ -1634,6 +1659,7 @@ static const struct bcm2835_clk_desc clk_desc_array[] = {
                .fixed_divider = 1,
                .flags = CLK_SET_RATE_PARENT),
        [BCM2835_PLLA_DSI0]     = REGISTER_PLL_DIV(
+               SOC_ALL,
                .name = "plla_dsi0",
                .source_pll = "plla",
                .cm_reg = CM_PLLA,
@@ -1642,6 +1668,7 @@ static const struct bcm2835_clk_desc clk_desc_array[] = {
                .hold_mask = CM_PLLA_HOLDDSI0,
                .fixed_divider = 1),
        [BCM2835_PLLA_CCP2]     = REGISTER_PLL_DIV(
+               SOC_ALL,
                .name = "plla_ccp2",
                .source_pll = "plla",
                .cm_reg = CM_PLLA,
@@ -1663,6 +1690,7 @@ static const struct bcm2835_clk_desc clk_desc_array[] = {
         * AUDIO domain is on.
         */
        [BCM2835_PLLC]          = REGISTER_PLL(
+               SOC_ALL,
                .name = "pllc",
                .cm_ctrl_reg = CM_PLLC,
                .a2w_ctrl_reg = A2W_PLLC_CTRL,
@@ -1677,6 +1705,7 @@ static const struct bcm2835_clk_desc clk_desc_array[] = {
                .max_rate = 3000000000u,
                .max_fb_rate = BCM2835_MAX_FB_RATE),
        [BCM2835_PLLC_CORE0]    = REGISTER_PLL_DIV(
+               SOC_ALL,
                .name = "pllc_core0",
                .source_pll = "pllc",
                .cm_reg = CM_PLLC,
@@ -1686,6 +1715,7 @@ static const struct bcm2835_clk_desc clk_desc_array[] = {
                .fixed_divider = 1,
                .flags = CLK_SET_RATE_PARENT),
        [BCM2835_PLLC_CORE1]    = REGISTER_PLL_DIV(
+               SOC_ALL,
                .name = "pllc_core1",
                .source_pll = "pllc",
                .cm_reg = CM_PLLC,
@@ -1695,6 +1725,7 @@ static const struct bcm2835_clk_desc clk_desc_array[] = {
                .fixed_divider = 1,
                .flags = CLK_SET_RATE_PARENT),
        [BCM2835_PLLC_CORE2]    = REGISTER_PLL_DIV(
+               SOC_ALL,
                .name = "pllc_core2",
                .source_pll = "pllc",
                .cm_reg = CM_PLLC,
@@ -1704,6 +1735,7 @@ static const struct bcm2835_clk_desc clk_desc_array[] = {
                .fixed_divider = 1,
                .flags = CLK_SET_RATE_PARENT),
        [BCM2835_PLLC_PER]      = REGISTER_PLL_DIV(
+               SOC_ALL,
                .name = "pllc_per",
                .source_pll = "pllc",
                .cm_reg = CM_PLLC,
@@ -1720,6 +1752,7 @@ static const struct bcm2835_clk_desc clk_desc_array[] = {
         * AUDIO domain is on.
         */
        [BCM2835_PLLD]          = REGISTER_PLL(
+               SOC_ALL,
                .name = "plld",
                .cm_ctrl_reg = CM_PLLD,
                .a2w_ctrl_reg = A2W_PLLD_CTRL,
@@ -1734,6 +1767,7 @@ static const struct bcm2835_clk_desc clk_desc_array[] = {
                .max_rate = 2400000000u,
                .max_fb_rate = BCM2835_MAX_FB_RATE),
        [BCM2835_PLLD_CORE]     = REGISTER_PLL_DIV(
+               SOC_ALL,
                .name = "plld_core",
                .source_pll = "plld",
                .cm_reg = CM_PLLD,
@@ -1742,7 +1776,13 @@ static const struct bcm2835_clk_desc clk_desc_array[] = {
                .hold_mask = CM_PLLD_HOLDCORE,
                .fixed_divider = 1,
                .flags = CLK_SET_RATE_PARENT),
+       /*
+        * VPU firmware assumes that PLLD_PER isn't disabled by the ARM core.
+        * Otherwise this could cause firmware lookups. That's why we mark
+        * it as critical.
+        */
        [BCM2835_PLLD_PER]      = REGISTER_PLL_DIV(
+               SOC_ALL,
                .name = "plld_per",
                .source_pll = "plld",
                .cm_reg = CM_PLLD,
@@ -1750,8 +1790,9 @@ static const struct bcm2835_clk_desc clk_desc_array[] = {
                .load_mask = CM_PLLD_LOADPER,
                .hold_mask = CM_PLLD_HOLDPER,
                .fixed_divider = 1,
-               .flags = CLK_SET_RATE_PARENT),
+               .flags = CLK_IS_CRITICAL | CLK_SET_RATE_PARENT),
        [BCM2835_PLLD_DSI0]     = REGISTER_PLL_DIV(
+               SOC_ALL,
                .name = "plld_dsi0",
                .source_pll = "plld",
                .cm_reg = CM_PLLD,
@@ -1760,6 +1801,7 @@ static const struct bcm2835_clk_desc clk_desc_array[] = {
                .hold_mask = CM_PLLD_HOLDDSI0,
                .fixed_divider = 1),
        [BCM2835_PLLD_DSI1]     = REGISTER_PLL_DIV(
+               SOC_ALL,
                .name = "plld_dsi1",
                .source_pll = "plld",
                .cm_reg = CM_PLLD,
@@ -1775,6 +1817,7 @@ static const struct bcm2835_clk_desc clk_desc_array[] = {
         * It is in the HDMI power domain.
         */
        [BCM2835_PLLH]          = REGISTER_PLL(
+               SOC_BCM2835,
                "pllh",
                .cm_ctrl_reg = CM_PLLH,
                .a2w_ctrl_reg = A2W_PLLH_CTRL,
@@ -1789,6 +1832,7 @@ static const struct bcm2835_clk_desc clk_desc_array[] = {
                .max_rate = 3000000000u,
                .max_fb_rate = BCM2835_MAX_FB_RATE),
        [BCM2835_PLLH_RCAL]     = REGISTER_PLL_DIV(
+               SOC_BCM2835,
                .name = "pllh_rcal",
                .source_pll = "pllh",
                .cm_reg = CM_PLLH,
@@ -1798,6 +1842,7 @@ static const struct bcm2835_clk_desc clk_desc_array[] = {
                .fixed_divider = 10,
                .flags = CLK_SET_RATE_PARENT),
        [BCM2835_PLLH_AUX]      = REGISTER_PLL_DIV(
+               SOC_BCM2835,
                .name = "pllh_aux",
                .source_pll = "pllh",
                .cm_reg = CM_PLLH,
@@ -1807,6 +1852,7 @@ static const struct bcm2835_clk_desc clk_desc_array[] = {
                .fixed_divider = 1,
                .flags = CLK_SET_RATE_PARENT),
        [BCM2835_PLLH_PIX]      = REGISTER_PLL_DIV(
+               SOC_BCM2835,
                .name = "pllh_pix",
                .source_pll = "pllh",
                .cm_reg = CM_PLLH,
@@ -1822,6 +1868,7 @@ static const struct bcm2835_clk_desc clk_desc_array[] = {
 
        /* One Time Programmable Memory clock.  Maximum 10Mhz. */
        [BCM2835_CLOCK_OTP]     = REGISTER_OSC_CLK(
+               SOC_ALL,
                .name = "otp",
                .ctl_reg = CM_OTPCTL,
                .div_reg = CM_OTPDIV,
@@ -1833,6 +1880,7 @@ static const struct bcm2835_clk_desc clk_desc_array[] = {
         * bythe watchdog timer and the camera pulse generator.
         */
        [BCM2835_CLOCK_TIMER]   = REGISTER_OSC_CLK(
+               SOC_ALL,
                .name = "timer",
                .ctl_reg = CM_TIMERCTL,
                .div_reg = CM_TIMERDIV,
@@ -1843,12 +1891,14 @@ static const struct bcm2835_clk_desc clk_desc_array[] = {
         * Generally run at 2Mhz, max 5Mhz.
         */
        [BCM2835_CLOCK_TSENS]   = REGISTER_OSC_CLK(
+               SOC_ALL,
                .name = "tsens",
                .ctl_reg = CM_TSENSCTL,
                .div_reg = CM_TSENSDIV,
                .int_bits = 5,
                .frac_bits = 0),
        [BCM2835_CLOCK_TEC]     = REGISTER_OSC_CLK(
+               SOC_ALL,
                .name = "tec",
                .ctl_reg = CM_TECCTL,
                .div_reg = CM_TECDIV,
@@ -1857,6 +1907,7 @@ static const struct bcm2835_clk_desc clk_desc_array[] = {
 
        /* clocks with vpu parent mux */
        [BCM2835_CLOCK_H264]    = REGISTER_VPU_CLK(
+               SOC_ALL,
                .name = "h264",
                .ctl_reg = CM_H264CTL,
                .div_reg = CM_H264DIV,
@@ -1864,6 +1915,7 @@ static const struct bcm2835_clk_desc clk_desc_array[] = {
                .frac_bits = 8,
                .tcnt_mux = 1),
        [BCM2835_CLOCK_ISP]     = REGISTER_VPU_CLK(
+               SOC_ALL,
                .name = "isp",
                .ctl_reg = CM_ISPCTL,
                .div_reg = CM_ISPDIV,
@@ -1876,6 +1928,7 @@ static const struct bcm2835_clk_desc clk_desc_array[] = {
         * in the SDRAM controller can't be used.
         */
        [BCM2835_CLOCK_SDRAM]   = REGISTER_VPU_CLK(
+               SOC_ALL,
                .name = "sdram",
                .ctl_reg = CM_SDCCTL,
                .div_reg = CM_SDCDIV,
@@ -1883,6 +1936,7 @@ static const struct bcm2835_clk_desc clk_desc_array[] = {
                .frac_bits = 0,
                .tcnt_mux = 3),
        [BCM2835_CLOCK_V3D]     = REGISTER_VPU_CLK(
+               SOC_ALL,
                .name = "v3d",
                .ctl_reg = CM_V3DCTL,
                .div_reg = CM_V3DDIV,
@@ -1896,6 +1950,7 @@ static const struct bcm2835_clk_desc clk_desc_array[] = {
         * in various hardware documentation.
         */
        [BCM2835_CLOCK_VPU]     = REGISTER_VPU_CLK(
+               SOC_ALL,
                .name = "vpu",
                .ctl_reg = CM_VPUCTL,
                .div_reg = CM_VPUDIV,
@@ -1907,6 +1962,7 @@ static const struct bcm2835_clk_desc clk_desc_array[] = {
 
        /* clocks with per parent mux */
        [BCM2835_CLOCK_AVEO]    = REGISTER_PER_CLK(
+               SOC_ALL,
                .name = "aveo",
                .ctl_reg = CM_AVEOCTL,
                .div_reg = CM_AVEODIV,
@@ -1914,6 +1970,7 @@ static const struct bcm2835_clk_desc clk_desc_array[] = {
                .frac_bits = 0,
                .tcnt_mux = 38),
        [BCM2835_CLOCK_CAM0]    = REGISTER_PER_CLK(
+               SOC_ALL,
                .name = "cam0",
                .ctl_reg = CM_CAM0CTL,
                .div_reg = CM_CAM0DIV,
@@ -1921,6 +1978,7 @@ static const struct bcm2835_clk_desc clk_desc_array[] = {
                .frac_bits = 8,
                .tcnt_mux = 14),
        [BCM2835_CLOCK_CAM1]    = REGISTER_PER_CLK(
+               SOC_ALL,
                .name = "cam1",
                .ctl_reg = CM_CAM1CTL,
                .div_reg = CM_CAM1DIV,
@@ -1928,12 +1986,14 @@ static const struct bcm2835_clk_desc clk_desc_array[] = {
                .frac_bits = 8,
                .tcnt_mux = 15),
        [BCM2835_CLOCK_DFT]     = REGISTER_PER_CLK(
+               SOC_ALL,
                .name = "dft",
                .ctl_reg = CM_DFTCTL,
                .div_reg = CM_DFTDIV,
                .int_bits = 5,
                .frac_bits = 0),
        [BCM2835_CLOCK_DPI]     = REGISTER_PER_CLK(
+               SOC_ALL,
                .name = "dpi",
                .ctl_reg = CM_DPICTL,
                .div_reg = CM_DPIDIV,
@@ -1943,6 +2003,7 @@ static const struct bcm2835_clk_desc clk_desc_array[] = {
 
        /* Arasan EMMC clock */
        [BCM2835_CLOCK_EMMC]    = REGISTER_PER_CLK(
+               SOC_ALL,
                .name = "emmc",
                .ctl_reg = CM_EMMCCTL,
                .div_reg = CM_EMMCDIV,
@@ -1950,8 +2011,19 @@ static const struct bcm2835_clk_desc clk_desc_array[] = {
                .frac_bits = 8,
                .tcnt_mux = 39),
 
+       /* EMMC2 clock (only available for BCM2711) */
+       [BCM2711_CLOCK_EMMC2]   = REGISTER_PER_CLK(
+               SOC_BCM2711,
+               .name = "emmc2",
+               .ctl_reg = CM_EMMC2CTL,
+               .div_reg = CM_EMMC2DIV,
+               .int_bits = 4,
+               .frac_bits = 8,
+               .tcnt_mux = 42),
+
        /* General purpose (GPIO) clocks */
        [BCM2835_CLOCK_GP0]     = REGISTER_PER_CLK(
+               SOC_ALL,
                .name = "gp0",
                .ctl_reg = CM_GP0CTL,
                .div_reg = CM_GP0DIV,
@@ -1960,6 +2032,7 @@ static const struct bcm2835_clk_desc clk_desc_array[] = {
                .is_mash_clock = true,
                .tcnt_mux = 20),
        [BCM2835_CLOCK_GP1]     = REGISTER_PER_CLK(
+               SOC_ALL,
                .name = "gp1",
                .ctl_reg = CM_GP1CTL,
                .div_reg = CM_GP1DIV,
@@ -1969,6 +2042,7 @@ static const struct bcm2835_clk_desc clk_desc_array[] = {
                .is_mash_clock = true,
                .tcnt_mux = 21),
        [BCM2835_CLOCK_GP2]     = REGISTER_PER_CLK(
+               SOC_ALL,
                .name = "gp2",
                .ctl_reg = CM_GP2CTL,
                .div_reg = CM_GP2DIV,
@@ -1978,6 +2052,7 @@ static const struct bcm2835_clk_desc clk_desc_array[] = {
 
        /* HDMI state machine */
        [BCM2835_CLOCK_HSM]     = REGISTER_PER_CLK(
+               SOC_ALL,
                .name = "hsm",
                .ctl_reg = CM_HSMCTL,
                .div_reg = CM_HSMDIV,
@@ -1985,6 +2060,7 @@ static const struct bcm2835_clk_desc clk_desc_array[] = {
                .frac_bits = 8,
                .tcnt_mux = 22),
        [BCM2835_CLOCK_PCM]     = REGISTER_PCM_CLK(
+               SOC_ALL,
                .name = "pcm",
                .ctl_reg = CM_PCMCTL,
                .div_reg = CM_PCMDIV,
@@ -1994,6 +2070,7 @@ static const struct bcm2835_clk_desc clk_desc_array[] = {
                .low_jitter = true,
                .tcnt_mux = 23),
        [BCM2835_CLOCK_PWM]     = REGISTER_PER_CLK(
+               SOC_ALL,
                .name = "pwm",
                .ctl_reg = CM_PWMCTL,
                .div_reg = CM_PWMDIV,
@@ -2002,6 +2079,7 @@ static const struct bcm2835_clk_desc clk_desc_array[] = {
                .is_mash_clock = true,
                .tcnt_mux = 24),
        [BCM2835_CLOCK_SLIM]    = REGISTER_PER_CLK(
+               SOC_ALL,
                .name = "slim",
                .ctl_reg = CM_SLIMCTL,
                .div_reg = CM_SLIMDIV,
@@ -2010,6 +2088,7 @@ static const struct bcm2835_clk_desc clk_desc_array[] = {
                .is_mash_clock = true,
                .tcnt_mux = 25),
        [BCM2835_CLOCK_SMI]     = REGISTER_PER_CLK(
+               SOC_ALL,
                .name = "smi",
                .ctl_reg = CM_SMICTL,
                .div_reg = CM_SMIDIV,
@@ -2017,6 +2096,7 @@ static const struct bcm2835_clk_desc clk_desc_array[] = {
                .frac_bits = 8,
                .tcnt_mux = 27),
        [BCM2835_CLOCK_UART]    = REGISTER_PER_CLK(
+               SOC_ALL,
                .name = "uart",
                .ctl_reg = CM_UARTCTL,
                .div_reg = CM_UARTDIV,
@@ -2026,6 +2106,7 @@ static const struct bcm2835_clk_desc clk_desc_array[] = {
 
        /* TV encoder clock.  Only operating frequency is 108Mhz.  */
        [BCM2835_CLOCK_VEC]     = REGISTER_PER_CLK(
+               SOC_ALL,
                .name = "vec",
                .ctl_reg = CM_VECCTL,
                .div_reg = CM_VECDIV,
@@ -2040,6 +2121,7 @@ static const struct bcm2835_clk_desc clk_desc_array[] = {
 
        /* dsi clocks */
        [BCM2835_CLOCK_DSI0E]   = REGISTER_PER_CLK(
+               SOC_ALL,
                .name = "dsi0e",
                .ctl_reg = CM_DSI0ECTL,
                .div_reg = CM_DSI0EDIV,
@@ -2047,6 +2129,7 @@ static const struct bcm2835_clk_desc clk_desc_array[] = {
                .frac_bits = 8,
                .tcnt_mux = 18),
        [BCM2835_CLOCK_DSI1E]   = REGISTER_PER_CLK(
+               SOC_ALL,
                .name = "dsi1e",
                .ctl_reg = CM_DSI1ECTL,
                .div_reg = CM_DSI1EDIV,
@@ -2054,6 +2137,7 @@ static const struct bcm2835_clk_desc clk_desc_array[] = {
                .frac_bits = 8,
                .tcnt_mux = 19),
        [BCM2835_CLOCK_DSI0P]   = REGISTER_DSI0_CLK(
+               SOC_ALL,
                .name = "dsi0p",
                .ctl_reg = CM_DSI0PCTL,
                .div_reg = CM_DSI0PDIV,
@@ -2061,6 +2145,7 @@ static const struct bcm2835_clk_desc clk_desc_array[] = {
                .frac_bits = 0,
                .tcnt_mux = 12),
        [BCM2835_CLOCK_DSI1P]   = REGISTER_DSI1_CLK(
+               SOC_ALL,
                .name = "dsi1p",
                .ctl_reg = CM_DSI1PCTL,
                .div_reg = CM_DSI1PDIV,
@@ -2077,6 +2162,7 @@ static const struct bcm2835_clk_desc clk_desc_array[] = {
         * non-stop vpu clock.
         */
        [BCM2835_CLOCK_PERI_IMAGE] = REGISTER_GATE(
+               SOC_ALL,
                .name = "peri_image",
                .parent = "vpu",
                .ctl_reg = CM_PERIICTL),
@@ -2109,9 +2195,14 @@ static int bcm2835_clk_probe(struct platform_device *pdev)
        struct resource *res;
        const struct bcm2835_clk_desc *desc;
        const size_t asize = ARRAY_SIZE(clk_desc_array);
+       const struct cprman_plat_data *pdata;
        size_t i;
        int ret;
 
+       pdata = of_device_get_match_data(&pdev->dev);
+       if (!pdata)
+               return -ENODEV;
+
        cprman = devm_kzalloc(dev,
                              struct_size(cprman, onecell.hws, asize),
                              GFP_KERNEL);
@@ -2147,8 +2238,10 @@ static int bcm2835_clk_probe(struct platform_device *pdev)
 
        for (i = 0; i < asize; i++) {
                desc = &clk_desc_array[i];
-               if (desc->clk_register && desc->data)
+               if (desc->clk_register && desc->data &&
+                   (desc->supported & pdata->soc)) {
                        hws[i] = desc->clk_register(cprman, desc->data);
+               }
        }
 
        ret = bcm2835_mark_sdc_parent_critical(hws[BCM2835_CLOCK_SDRAM]->clk);
@@ -2159,8 +2252,17 @@ static int bcm2835_clk_probe(struct platform_device *pdev)
                                      &cprman->onecell);
 }
 
+static const struct cprman_plat_data cprman_bcm2835_plat_data = {
+       .soc = SOC_BCM2835,
+};
+
+static const struct cprman_plat_data cprman_bcm2711_plat_data = {
+       .soc = SOC_BCM2711,
+};
+
 static const struct of_device_id bcm2835_clk_of_match[] = {
-       { .compatible = "brcm,bcm2835-cprman", },
+       { .compatible = "brcm,bcm2835-cprman", .data = &cprman_bcm2835_plat_data },
+       { .compatible = "brcm,bcm2711-cprman", .data = &cprman_bcm2711_plat_data },
        {}
 };
 MODULE_DEVICE_TABLE(of, bcm2835_clk_of_match);
index 9e1dcd4..98e8849 100644 (file)
@@ -146,7 +146,6 @@ static int clk_bcm63xx_probe(struct platform_device *pdev)
 {
        const struct clk_bcm63xx_table_entry *entry, *table;
        struct clk_bcm63xx_hw *hw;
-       struct resource *r;
        u8 maxbit = 0;
        int i, ret;
 
@@ -170,8 +169,7 @@ static int clk_bcm63xx_probe(struct platform_device *pdev)
        for (i = 0; i < maxbit; i++)
                hw->data.hws[i] = ERR_PTR(-ENODEV);
 
-       r = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-       hw->regs = devm_ioremap_resource(&pdev->dev, r);
+       hw->regs = devm_platform_ioremap_resource(pdev, 0);
        if (IS_ERR(hw->regs))
                return PTR_ERR(hw->regs);
 
index 42b4df6..abf06fb 100644 (file)
@@ -1,19 +1,19 @@
 // SPDX-License-Identifier: GPL-2.0+
+// Copyright IBM Corp
 
 #define pr_fmt(fmt) "clk-aspeed: " fmt
 
-#include <linux/clk-provider.h>
 #include <linux/mfd/syscon.h>
 #include <linux/of_address.h>
 #include <linux/of_device.h>
 #include <linux/platform_device.h>
 #include <linux/regmap.h>
-#include <linux/reset-controller.h>
 #include <linux/slab.h>
-#include <linux/spinlock.h>
 
 #include <dt-bindings/clock/aspeed-clock.h>
 
+#include "clk-aspeed.h"
+
 #define ASPEED_NUM_CLKS                36
 
 #define ASPEED_RESET2_OFFSET   32
@@ -42,48 +42,6 @@ static struct clk_hw_onecell_data *aspeed_clk_data;
 
 static void __iomem *scu_base;
 
-/**
- * struct aspeed_gate_data - Aspeed gated clocks
- * @clock_idx: bit used to gate this clock in the clock register
- * @reset_idx: bit used to reset this IP in the reset register. -1 if no
- *             reset is required when enabling the clock
- * @name: the clock name
- * @parent_name: the name of the parent clock
- * @flags: standard clock framework flags
- */
-struct aspeed_gate_data {
-       u8              clock_idx;
-       s8              reset_idx;
-       const char      *name;
-       const char      *parent_name;
-       unsigned long   flags;
-};
-
-/**
- * struct aspeed_clk_gate - Aspeed specific clk_gate structure
- * @hw:                handle between common and hardware-specific interfaces
- * @reg:       register controlling gate
- * @clock_idx: bit used to gate this clock in the clock register
- * @reset_idx: bit used to reset this IP in the reset register. -1 if no
- *             reset is required when enabling the clock
- * @flags:     hardware-specific flags
- * @lock:      register lock
- *
- * Some of the clocks in the Aspeed SoC must be put in reset before enabling.
- * This modified version of clk_gate allows an optional reset bit to be
- * specified.
- */
-struct aspeed_clk_gate {
-       struct clk_hw   hw;
-       struct regmap   *map;
-       u8              clock_idx;
-       s8              reset_idx;
-       u8              flags;
-       spinlock_t      *lock;
-};
-
-#define to_aspeed_clk_gate(_hw) container_of(_hw, struct aspeed_clk_gate, hw)
-
 /* TODO: ask Aspeed about the actual parent data */
 static const struct aspeed_gate_data aspeed_gates[] = {
        /*                               clk rst   name                 parent  flags */
@@ -208,13 +166,6 @@ static struct clk_hw *aspeed_ast2500_calc_pll(const char *name, u32 val)
                        mult, div);
 }
 
-struct aspeed_clk_soc_data {
-       const struct clk_div_table *div_table;
-       const struct clk_div_table *eclk_div_table;
-       const struct clk_div_table *mac_div_table;
-       struct clk_hw *(*calc_pll)(const char *name, u32 val);
-};
-
 static const struct aspeed_clk_soc_data ast2500_data = {
        .div_table = ast2500_div_table,
        .eclk_div_table = ast2500_eclk_div_table,
@@ -315,18 +266,6 @@ static const struct clk_ops aspeed_clk_gate_ops = {
        .is_enabled = aspeed_clk_is_enabled,
 };
 
-/**
- * struct aspeed_reset - Aspeed reset controller
- * @map: regmap to access the containing system controller
- * @rcdev: reset controller device
- */
-struct aspeed_reset {
-       struct regmap                   *map;
-       struct reset_controller_dev     rcdev;
-};
-
-#define to_aspeed_reset(p) container_of((p), struct aspeed_reset, rcdev)
-
 static const u8 aspeed_resets[] = {
        /* SCU04 resets */
        [ASPEED_RESET_XDMA]     = 25,
@@ -500,9 +439,14 @@ static int aspeed_clk_probe(struct platform_device *pdev)
                return PTR_ERR(hw);
        aspeed_clk_data->hws[ASPEED_CLK_MPLL] = hw;
 
-       /* SD/SDIO clock divider (TODO: There's a gate too) */
-       hw = clk_hw_register_divider_table(dev, "sdio", "hpll", 0,
-                       scu_base + ASPEED_CLK_SELECTION, 12, 3, 0,
+       /* SD/SDIO clock divider and gate */
+       hw = clk_hw_register_gate(dev, "sd_extclk_gate", "hpll", 0,
+                                 scu_base + ASPEED_CLK_SELECTION, 15, 0,
+                                 &aspeed_clk_lock);
+       if (IS_ERR(hw))
+               return PTR_ERR(hw);
+       hw = clk_hw_register_divider_table(dev, "sd_extclk", "sd_extclk_gate",
+                       0, scu_base + ASPEED_CLK_SELECTION, 12, 3, 0,
                        soc_data->div_table,
                        &aspeed_clk_lock);
        if (IS_ERR(hw))
diff --git a/drivers/clk/clk-aspeed.h b/drivers/clk/clk-aspeed.h
new file mode 100644 (file)
index 0000000..5296b15
--- /dev/null
@@ -0,0 +1,82 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Structures used by ASPEED clock drivers
+ *
+ * Copyright 2019 IBM Corp.
+ */
+
+#include <linux/clk-provider.h>
+#include <linux/kernel.h>
+#include <linux/reset-controller.h>
+#include <linux/spinlock.h>
+
+struct clk_div_table;
+struct regmap;
+
+/**
+ * struct aspeed_gate_data - Aspeed gated clocks
+ * @clock_idx: bit used to gate this clock in the clock register
+ * @reset_idx: bit used to reset this IP in the reset register. -1 if no
+ *             reset is required when enabling the clock
+ * @name: the clock name
+ * @parent_name: the name of the parent clock
+ * @flags: standard clock framework flags
+ */
+struct aspeed_gate_data {
+       u8              clock_idx;
+       s8              reset_idx;
+       const char      *name;
+       const char      *parent_name;
+       unsigned long   flags;
+};
+
+/**
+ * struct aspeed_clk_gate - Aspeed specific clk_gate structure
+ * @hw:                handle between common and hardware-specific interfaces
+ * @reg:       register controlling gate
+ * @clock_idx: bit used to gate this clock in the clock register
+ * @reset_idx: bit used to reset this IP in the reset register. -1 if no
+ *             reset is required when enabling the clock
+ * @flags:     hardware-specific flags
+ * @lock:      register lock
+ *
+ * Some of the clocks in the Aspeed SoC must be put in reset before enabling.
+ * This modified version of clk_gate allows an optional reset bit to be
+ * specified.
+ */
+struct aspeed_clk_gate {
+       struct clk_hw   hw;
+       struct regmap   *map;
+       u8              clock_idx;
+       s8              reset_idx;
+       u8              flags;
+       spinlock_t      *lock;
+};
+
+#define to_aspeed_clk_gate(_hw) container_of(_hw, struct aspeed_clk_gate, hw)
+
+/**
+ * struct aspeed_reset - Aspeed reset controller
+ * @map: regmap to access the containing system controller
+ * @rcdev: reset controller device
+ */
+struct aspeed_reset {
+       struct regmap                   *map;
+       struct reset_controller_dev     rcdev;
+};
+
+#define to_aspeed_reset(p) container_of((p), struct aspeed_reset, rcdev)
+
+/**
+ * struct aspeed_clk_soc_data - Aspeed SoC specific divisor information
+ * @div_table: Common divider lookup table
+ * @eclk_div_table: Divider lookup table for ECLK
+ * @mac_div_table: Divider lookup table for MAC (Ethernet) clocks
+ * @calc_pll: Callback to maculate common PLL settings
+ */
+struct aspeed_clk_soc_data {
+       const struct clk_div_table *div_table;
+       const struct clk_div_table *eclk_div_table;
+       const struct clk_div_table *mac_div_table;
+       struct clk_hw *(*calc_pll)(const char *name, u32 val);
+};
diff --git a/drivers/clk/clk-ast2600.c b/drivers/clk/clk-ast2600.c
new file mode 100644 (file)
index 0000000..1c1bb39
--- /dev/null
@@ -0,0 +1,704 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+// Copyright IBM Corp
+// Copyright ASPEED Technology
+
+#define pr_fmt(fmt) "clk-ast2600: " fmt
+
+#include <linux/mfd/syscon.h>
+#include <linux/of_address.h>
+#include <linux/of_device.h>
+#include <linux/platform_device.h>
+#include <linux/regmap.h>
+#include <linux/slab.h>
+
+#include <dt-bindings/clock/ast2600-clock.h>
+
+#include "clk-aspeed.h"
+
+#define ASPEED_G6_NUM_CLKS             67
+
+#define ASPEED_G6_SILICON_REV          0x004
+
+#define ASPEED_G6_RESET_CTRL           0x040
+#define ASPEED_G6_RESET_CTRL2          0x050
+
+#define ASPEED_G6_CLK_STOP_CTRL                0x080
+#define ASPEED_G6_CLK_STOP_CTRL2       0x090
+
+#define ASPEED_G6_MISC_CTRL            0x0C0
+#define  UART_DIV13_EN                 BIT(12)
+
+#define ASPEED_G6_CLK_SELECTION1       0x300
+#define ASPEED_G6_CLK_SELECTION2       0x304
+#define ASPEED_G6_CLK_SELECTION4       0x310
+
+#define ASPEED_HPLL_PARAM              0x200
+#define ASPEED_APLL_PARAM              0x210
+#define ASPEED_MPLL_PARAM              0x220
+#define ASPEED_EPLL_PARAM              0x240
+#define ASPEED_DPLL_PARAM              0x260
+
+#define ASPEED_G6_STRAP1               0x500
+
+/* Globally visible clocks */
+static DEFINE_SPINLOCK(aspeed_g6_clk_lock);
+
+/* Keeps track of all clocks */
+static struct clk_hw_onecell_data *aspeed_g6_clk_data;
+
+static void __iomem *scu_g6_base;
+
+/*
+ * Clocks marked with CLK_IS_CRITICAL:
+ *
+ *  ref0 and ref1 are essential for the SoC to operate
+ *  mpll is required if SDRAM is used
+ */
+static const struct aspeed_gate_data aspeed_g6_gates[] = {
+       /*                                  clk rst  name               parent   flags */
+       [ASPEED_CLK_GATE_MCLK]          = {  0, -1, "mclk-gate",        "mpll",  CLK_IS_CRITICAL }, /* SDRAM */
+       [ASPEED_CLK_GATE_ECLK]          = {  1, -1, "eclk-gate",        "eclk",  0 },   /* Video Engine */
+       [ASPEED_CLK_GATE_GCLK]          = {  2,  7, "gclk-gate",        NULL,    0 },   /* 2D engine */
+       /* vclk parent - dclk/d1clk/hclk/mclk */
+       [ASPEED_CLK_GATE_VCLK]          = {  3,  6, "vclk-gate",        NULL,    0 },   /* Video Capture */
+       [ASPEED_CLK_GATE_BCLK]          = {  4,  8, "bclk-gate",        "bclk",  0 }, /* PCIe/PCI */
+       /* From dpll */
+       [ASPEED_CLK_GATE_DCLK]          = {  5, -1, "dclk-gate",        NULL,    CLK_IS_CRITICAL }, /* DAC */
+       [ASPEED_CLK_GATE_REF0CLK]       = {  6, -1, "ref0clk-gate",     "clkin", CLK_IS_CRITICAL },
+       [ASPEED_CLK_GATE_USBPORT2CLK]   = {  7,  3, "usb-port2-gate",   NULL,    0 },   /* USB2.0 Host port 2 */
+       /* Reserved 8 */
+       [ASPEED_CLK_GATE_USBUHCICLK]    = {  9, 15, "usb-uhci-gate",    NULL,    0 },   /* USB1.1 (requires port 2 enabled) */
+       /* From dpll/epll/40mhz usb p1 phy/gpioc6/dp phy pll */
+       [ASPEED_CLK_GATE_D1CLK]         = { 10, 13, "d1clk-gate",       "d1clk", 0 },   /* GFX CRT */
+       /* Reserved 11/12 */
+       [ASPEED_CLK_GATE_YCLK]          = { 13,  4, "yclk-gate",        NULL,    0 },   /* HAC */
+       [ASPEED_CLK_GATE_USBPORT1CLK]   = { 14, 14, "usb-port1-gate",   NULL,    0 },   /* USB2 hub/USB2 host port 1/USB1.1 dev */
+       [ASPEED_CLK_GATE_UART5CLK]      = { 15, -1, "uart5clk-gate",    "uart",  0 },   /* UART5 */
+       /* Reserved 16/19 */
+       [ASPEED_CLK_GATE_MAC1CLK]       = { 20, 11, "mac1clk-gate",     "mac12", 0 },   /* MAC1 */
+       [ASPEED_CLK_GATE_MAC2CLK]       = { 21, 12, "mac2clk-gate",     "mac12", 0 },   /* MAC2 */
+       /* Reserved 22/23 */
+       [ASPEED_CLK_GATE_RSACLK]        = { 24,  4, "rsaclk-gate",      NULL,    0 },   /* HAC */
+       [ASPEED_CLK_GATE_RVASCLK]       = { 25,  9, "rvasclk-gate",     NULL,    0 },   /* RVAS */
+       /* Reserved 26 */
+       [ASPEED_CLK_GATE_EMMCCLK]       = { 27, 16, "emmcclk-gate",     NULL,    0 },   /* For card clk */
+       /* Reserved 28/29/30 */
+       [ASPEED_CLK_GATE_LCLK]          = { 32, 32, "lclk-gate",        NULL,    0 }, /* LPC */
+       [ASPEED_CLK_GATE_ESPICLK]       = { 33, -1, "espiclk-gate",     NULL,    0 }, /* eSPI */
+       [ASPEED_CLK_GATE_REF1CLK]       = { 34, -1, "ref1clk-gate",     "clkin", CLK_IS_CRITICAL },
+       /* Reserved 35 */
+       [ASPEED_CLK_GATE_SDCLK]         = { 36, 56, "sdclk-gate",       NULL,    0 },   /* SDIO/SD */
+       [ASPEED_CLK_GATE_LHCCLK]        = { 37, -1, "lhclk-gate",       "lhclk", 0 },   /* LPC master/LPC+ */
+       /* Reserved 38 RSA: no longer used */
+       /* Reserved 39 */
+       [ASPEED_CLK_GATE_I3C0CLK]       = { 40,  40, "i3c0clk-gate",    NULL,    0 },   /* I3C0 */
+       [ASPEED_CLK_GATE_I3C1CLK]       = { 41,  41, "i3c1clk-gate",    NULL,    0 },   /* I3C1 */
+       [ASPEED_CLK_GATE_I3C2CLK]       = { 42,  42, "i3c2clk-gate",    NULL,    0 },   /* I3C2 */
+       [ASPEED_CLK_GATE_I3C3CLK]       = { 43,  43, "i3c3clk-gate",    NULL,    0 },   /* I3C3 */
+       [ASPEED_CLK_GATE_I3C4CLK]       = { 44,  44, "i3c4clk-gate",    NULL,    0 },   /* I3C4 */
+       [ASPEED_CLK_GATE_I3C5CLK]       = { 45,  45, "i3c5clk-gate",    NULL,    0 },   /* I3C5 */
+       [ASPEED_CLK_GATE_I3C6CLK]       = { 46,  46, "i3c6clk-gate",    NULL,    0 },   /* I3C6 */
+       [ASPEED_CLK_GATE_I3C7CLK]       = { 47,  47, "i3c7clk-gate",    NULL,    0 },   /* I3C7 */
+       [ASPEED_CLK_GATE_UART1CLK]      = { 48,  -1, "uart1clk-gate",   "uart",  0 },   /* UART1 */
+       [ASPEED_CLK_GATE_UART2CLK]      = { 49,  -1, "uart2clk-gate",   "uart",  0 },   /* UART2 */
+       [ASPEED_CLK_GATE_UART3CLK]      = { 50,  -1, "uart3clk-gate",   "uart",  0 },   /* UART3 */
+       [ASPEED_CLK_GATE_UART4CLK]      = { 51,  -1, "uart4clk-gate",   "uart",  0 },   /* UART4 */
+       [ASPEED_CLK_GATE_MAC3CLK]       = { 52,  52, "mac3clk-gate",    "mac34", 0 },   /* MAC3 */
+       [ASPEED_CLK_GATE_MAC4CLK]       = { 53,  53, "mac4clk-gate",    "mac34", 0 },   /* MAC4 */
+       [ASPEED_CLK_GATE_UART6CLK]      = { 54,  -1, "uart6clk-gate",   "uartx", 0 },   /* UART6 */
+       [ASPEED_CLK_GATE_UART7CLK]      = { 55,  -1, "uart7clk-gate",   "uartx", 0 },   /* UART7 */
+       [ASPEED_CLK_GATE_UART8CLK]      = { 56,  -1, "uart8clk-gate",   "uartx", 0 },   /* UART8 */
+       [ASPEED_CLK_GATE_UART9CLK]      = { 57,  -1, "uart9clk-gate",   "uartx", 0 },   /* UART9 */
+       [ASPEED_CLK_GATE_UART10CLK]     = { 58,  -1, "uart10clk-gate",  "uartx", 0 },   /* UART10 */
+       [ASPEED_CLK_GATE_UART11CLK]     = { 59,  -1, "uart11clk-gate",  "uartx", 0 },   /* UART11 */
+       [ASPEED_CLK_GATE_UART12CLK]     = { 60,  -1, "uart12clk-gate",  "uartx", 0 },   /* UART12 */
+       [ASPEED_CLK_GATE_UART13CLK]     = { 61,  -1, "uart13clk-gate",  "uartx", 0 },   /* UART13 */
+       [ASPEED_CLK_GATE_FSICLK]        = { 62,  59, "fsiclk-gate",     NULL,    0 },   /* FSI */
+};
+
+static const char * const eclk_parent_names[] = { "mpll", "hpll", "dpll" };
+
+static const struct clk_div_table ast2600_eclk_div_table[] = {
+       { 0x0, 2 },
+       { 0x1, 2 },
+       { 0x2, 3 },
+       { 0x3, 4 },
+       { 0x4, 5 },
+       { 0x5, 6 },
+       { 0x6, 7 },
+       { 0x7, 8 },
+       { 0 }
+};
+
+static const struct clk_div_table ast2600_mac_div_table[] = {
+       { 0x0, 4 },
+       { 0x1, 4 },
+       { 0x2, 6 },
+       { 0x3, 8 },
+       { 0x4, 10 },
+       { 0x5, 12 },
+       { 0x6, 14 },
+       { 0x7, 16 },
+       { 0 }
+};
+
+static const struct clk_div_table ast2600_div_table[] = {
+       { 0x0, 4 },
+       { 0x1, 8 },
+       { 0x2, 12 },
+       { 0x3, 16 },
+       { 0x4, 20 },
+       { 0x5, 24 },
+       { 0x6, 28 },
+       { 0x7, 32 },
+       { 0 }
+};
+
+/* For hpll/dpll/epll/mpll */
+static struct clk_hw *ast2600_calc_pll(const char *name, u32 val)
+{
+       unsigned int mult, div;
+
+       if (val & BIT(24)) {
+               /* Pass through mode */
+               mult = div = 1;
+       } else {
+               /* F = 25Mhz * [(M + 2) / (n + 1)] / (p + 1) */
+               u32 m = val  & 0x1fff;
+               u32 n = (val >> 13) & 0x3f;
+               u32 p = (val >> 19) & 0xf;
+               mult = (m + 1) / (n + 1);
+               div = (p + 1);
+       }
+       return clk_hw_register_fixed_factor(NULL, name, "clkin", 0,
+                       mult, div);
+};
+
+static struct clk_hw *ast2600_calc_apll(const char *name, u32 val)
+{
+       unsigned int mult, div;
+
+       if (val & BIT(20)) {
+               /* Pass through mode */
+               mult = div = 1;
+       } else {
+               /* F = 25Mhz * (2-od) * [(m + 2) / (n + 1)] */
+               u32 m = (val >> 5) & 0x3f;
+               u32 od = (val >> 4) & 0x1;
+               u32 n = val & 0xf;
+
+               mult = (2 - od) * (m + 2);
+               div = n + 1;
+       }
+       return clk_hw_register_fixed_factor(NULL, name, "clkin", 0,
+                       mult, div);
+};
+
+static u32 get_bit(u8 idx)
+{
+       return BIT(idx % 32);
+}
+
+static u32 get_reset_reg(struct aspeed_clk_gate *gate)
+{
+       if (gate->reset_idx < 32)
+               return ASPEED_G6_RESET_CTRL;
+
+       return ASPEED_G6_RESET_CTRL2;
+}
+
+static u32 get_clock_reg(struct aspeed_clk_gate *gate)
+{
+       if (gate->clock_idx < 32)
+               return ASPEED_G6_CLK_STOP_CTRL;
+
+       return ASPEED_G6_CLK_STOP_CTRL2;
+}
+
+static int aspeed_g6_clk_is_enabled(struct clk_hw *hw)
+{
+       struct aspeed_clk_gate *gate = to_aspeed_clk_gate(hw);
+       u32 clk = get_bit(gate->clock_idx);
+       u32 rst = get_bit(gate->reset_idx);
+       u32 reg;
+       u32 enval;
+
+       /*
+        * If the IP is in reset, treat the clock as not enabled,
+        * this happens with some clocks such as the USB one when
+        * coming from cold reset. Without this, aspeed_clk_enable()
+        * will fail to lift the reset.
+        */
+       if (gate->reset_idx >= 0) {
+               regmap_read(gate->map, get_reset_reg(gate), &reg);
+
+               if (reg & rst)
+                       return 0;
+       }
+
+       regmap_read(gate->map, get_clock_reg(gate), &reg);
+
+       enval = (gate->flags & CLK_GATE_SET_TO_DISABLE) ? 0 : clk;
+
+       return ((reg & clk) == enval) ? 1 : 0;
+}
+
+static int aspeed_g6_clk_enable(struct clk_hw *hw)
+{
+       struct aspeed_clk_gate *gate = to_aspeed_clk_gate(hw);
+       unsigned long flags;
+       u32 clk = get_bit(gate->clock_idx);
+       u32 rst = get_bit(gate->reset_idx);
+
+       spin_lock_irqsave(gate->lock, flags);
+
+       if (aspeed_g6_clk_is_enabled(hw)) {
+               spin_unlock_irqrestore(gate->lock, flags);
+               return 0;
+       }
+
+       if (gate->reset_idx >= 0) {
+               /* Put IP in reset */
+               regmap_write(gate->map, get_reset_reg(gate), rst);
+               /* Delay 100us */
+               udelay(100);
+       }
+
+       /* Enable clock */
+       if (gate->flags & CLK_GATE_SET_TO_DISABLE) {
+               regmap_write(gate->map, get_clock_reg(gate), clk);
+       } else {
+               /* Use set to clear register */
+               regmap_write(gate->map, get_clock_reg(gate) + 0x04, clk);
+       }
+
+       if (gate->reset_idx >= 0) {
+               /* A delay of 10ms is specified by the ASPEED docs */
+               mdelay(10);
+               /* Take IP out of reset */
+               regmap_write(gate->map, get_reset_reg(gate) + 0x4, rst);
+       }
+
+       spin_unlock_irqrestore(gate->lock, flags);
+
+       return 0;
+}
+
+static void aspeed_g6_clk_disable(struct clk_hw *hw)
+{
+       struct aspeed_clk_gate *gate = to_aspeed_clk_gate(hw);
+       unsigned long flags;
+       u32 clk = get_bit(gate->clock_idx);
+
+       spin_lock_irqsave(gate->lock, flags);
+
+       if (gate->flags & CLK_GATE_SET_TO_DISABLE) {
+               regmap_write(gate->map, get_clock_reg(gate), clk);
+       } else {
+               /* Use set to clear register */
+               regmap_write(gate->map, get_clock_reg(gate) + 0x4, clk);
+       }
+
+       spin_unlock_irqrestore(gate->lock, flags);
+}
+
+static const struct clk_ops aspeed_g6_clk_gate_ops = {
+       .enable = aspeed_g6_clk_enable,
+       .disable = aspeed_g6_clk_disable,
+       .is_enabled = aspeed_g6_clk_is_enabled,
+};
+
+static int aspeed_g6_reset_deassert(struct reset_controller_dev *rcdev,
+                                   unsigned long id)
+{
+       struct aspeed_reset *ar = to_aspeed_reset(rcdev);
+       u32 rst = get_bit(id);
+       u32 reg = id >= 32 ? ASPEED_G6_RESET_CTRL2 : ASPEED_G6_RESET_CTRL;
+
+       /* Use set to clear register */
+       return regmap_write(ar->map, reg + 0x04, rst);
+}
+
+static int aspeed_g6_reset_assert(struct reset_controller_dev *rcdev,
+                                 unsigned long id)
+{
+       struct aspeed_reset *ar = to_aspeed_reset(rcdev);
+       u32 rst = get_bit(id);
+       u32 reg = id >= 32 ? ASPEED_G6_RESET_CTRL2 : ASPEED_G6_RESET_CTRL;
+
+       return regmap_write(ar->map, reg, rst);
+}
+
+static int aspeed_g6_reset_status(struct reset_controller_dev *rcdev,
+                                 unsigned long id)
+{
+       struct aspeed_reset *ar = to_aspeed_reset(rcdev);
+       int ret;
+       u32 val;
+       u32 rst = get_bit(id);
+       u32 reg = id >= 32 ? ASPEED_G6_RESET_CTRL2 : ASPEED_G6_RESET_CTRL;
+
+       ret = regmap_read(ar->map, reg, &val);
+       if (ret)
+               return ret;
+
+       return !!(val & rst);
+}
+
+static const struct reset_control_ops aspeed_g6_reset_ops = {
+       .assert = aspeed_g6_reset_assert,
+       .deassert = aspeed_g6_reset_deassert,
+       .status = aspeed_g6_reset_status,
+};
+
+static struct clk_hw *aspeed_g6_clk_hw_register_gate(struct device *dev,
+               const char *name, const char *parent_name, unsigned long flags,
+               struct regmap *map, u8 clock_idx, u8 reset_idx,
+               u8 clk_gate_flags, spinlock_t *lock)
+{
+       struct aspeed_clk_gate *gate;
+       struct clk_init_data init;
+       struct clk_hw *hw;
+       int ret;
+
+       gate = kzalloc(sizeof(*gate), GFP_KERNEL);
+       if (!gate)
+               return ERR_PTR(-ENOMEM);
+
+       init.name = name;
+       init.ops = &aspeed_g6_clk_gate_ops;
+       init.flags = flags;
+       init.parent_names = parent_name ? &parent_name : NULL;
+       init.num_parents = parent_name ? 1 : 0;
+
+       gate->map = map;
+       gate->clock_idx = clock_idx;
+       gate->reset_idx = reset_idx;
+       gate->flags = clk_gate_flags;
+       gate->lock = lock;
+       gate->hw.init = &init;
+
+       hw = &gate->hw;
+       ret = clk_hw_register(dev, hw);
+       if (ret) {
+               kfree(gate);
+               hw = ERR_PTR(ret);
+       }
+
+       return hw;
+}
+
+static const char * const vclk_parent_names[] = {
+       "dpll",
+       "d1pll",
+       "hclk",
+       "mclk",
+};
+
+static const char * const d1clk_parent_names[] = {
+       "dpll",
+       "epll",
+       "usb-phy-40m",
+       "gpioc6_clkin",
+       "dp_phy_pll",
+};
+
+static int aspeed_g6_clk_probe(struct platform_device *pdev)
+{
+       struct device *dev = &pdev->dev;
+       struct aspeed_reset *ar;
+       struct regmap *map;
+       struct clk_hw *hw;
+       u32 val, rate;
+       int i, ret;
+
+       map = syscon_node_to_regmap(dev->of_node);
+       if (IS_ERR(map)) {
+               dev_err(dev, "no syscon regmap\n");
+               return PTR_ERR(map);
+       }
+
+       ar = devm_kzalloc(dev, sizeof(*ar), GFP_KERNEL);
+       if (!ar)
+               return -ENOMEM;
+
+       ar->map = map;
+
+       ar->rcdev.owner = THIS_MODULE;
+       ar->rcdev.nr_resets = 64;
+       ar->rcdev.ops = &aspeed_g6_reset_ops;
+       ar->rcdev.of_node = dev->of_node;
+
+       ret = devm_reset_controller_register(dev, &ar->rcdev);
+       if (ret) {
+               dev_err(dev, "could not register reset controller\n");
+               return ret;
+       }
+
+       /* UART clock div13 setting */
+       regmap_read(map, ASPEED_G6_MISC_CTRL, &val);
+       if (val & UART_DIV13_EN)
+               rate = 24000000 / 13;
+       else
+               rate = 24000000;
+       hw = clk_hw_register_fixed_rate(dev, "uart", NULL, 0, rate);
+       if (IS_ERR(hw))
+               return PTR_ERR(hw);
+       aspeed_g6_clk_data->hws[ASPEED_CLK_UART] = hw;
+
+       /* UART6~13 clock div13 setting */
+       regmap_read(map, 0x80, &val);
+       if (val & BIT(31))
+               rate = 24000000 / 13;
+       else
+               rate = 24000000;
+       hw = clk_hw_register_fixed_rate(dev, "uartx", NULL, 0, rate);
+       if (IS_ERR(hw))
+               return PTR_ERR(hw);
+       aspeed_g6_clk_data->hws[ASPEED_CLK_UARTX] = hw;
+
+       /* EMMC ext clock divider */
+       hw = clk_hw_register_gate(dev, "emmc_extclk_gate", "hpll", 0,
+                       scu_g6_base + ASPEED_G6_CLK_SELECTION1, 15, 0,
+                       &aspeed_g6_clk_lock);
+       if (IS_ERR(hw))
+               return PTR_ERR(hw);
+       hw = clk_hw_register_divider_table(dev, "emmc_extclk", "emmc_extclk_gate", 0,
+                       scu_g6_base + ASPEED_G6_CLK_SELECTION1, 12, 3, 0,
+                       ast2600_div_table,
+                       &aspeed_g6_clk_lock);
+       if (IS_ERR(hw))
+               return PTR_ERR(hw);
+       aspeed_g6_clk_data->hws[ASPEED_CLK_EMMC] = hw;
+
+       /* SD/SDIO clock divider and gate */
+       hw = clk_hw_register_gate(dev, "sd_extclk_gate", "hpll", 0,
+                       scu_g6_base + ASPEED_G6_CLK_SELECTION4, 31, 0,
+                       &aspeed_g6_clk_lock);
+       if (IS_ERR(hw))
+               return PTR_ERR(hw);
+       hw = clk_hw_register_divider_table(dev, "sd_extclk", "sd_extclk_gate",
+                       0, scu_g6_base + ASPEED_G6_CLK_SELECTION4, 28, 3, 0,
+                       ast2600_div_table,
+                       &aspeed_g6_clk_lock);
+       if (IS_ERR(hw))
+               return PTR_ERR(hw);
+       aspeed_g6_clk_data->hws[ASPEED_CLK_SDIO] = hw;
+
+       /* MAC1/2 AHB bus clock divider */
+       hw = clk_hw_register_divider_table(dev, "mac12", "hpll", 0,
+                       scu_g6_base + ASPEED_G6_CLK_SELECTION1, 16, 3, 0,
+                       ast2600_mac_div_table,
+                       &aspeed_g6_clk_lock);
+       if (IS_ERR(hw))
+               return PTR_ERR(hw);
+       aspeed_g6_clk_data->hws[ASPEED_CLK_MAC12] = hw;
+
+       /* MAC3/4 AHB bus clock divider */
+       hw = clk_hw_register_divider_table(dev, "mac34", "hpll", 0,
+                       scu_g6_base + 0x310, 24, 3, 0,
+                       ast2600_mac_div_table,
+                       &aspeed_g6_clk_lock);
+       if (IS_ERR(hw))
+               return PTR_ERR(hw);
+       aspeed_g6_clk_data->hws[ASPEED_CLK_MAC34] = hw;
+
+       /* LPC Host (LHCLK) clock divider */
+       hw = clk_hw_register_divider_table(dev, "lhclk", "hpll", 0,
+                       scu_g6_base + ASPEED_G6_CLK_SELECTION1, 20, 3, 0,
+                       ast2600_div_table,
+                       &aspeed_g6_clk_lock);
+       if (IS_ERR(hw))
+               return PTR_ERR(hw);
+       aspeed_g6_clk_data->hws[ASPEED_CLK_LHCLK] = hw;
+
+       /* gfx d1clk : use dp clk */
+       regmap_update_bits(map, ASPEED_G6_CLK_SELECTION1, GENMASK(10, 8), BIT(10));
+       /* SoC Display clock selection */
+       hw = clk_hw_register_mux(dev, "d1clk", d1clk_parent_names,
+                       ARRAY_SIZE(d1clk_parent_names), 0,
+                       scu_g6_base + ASPEED_G6_CLK_SELECTION1, 8, 3, 0,
+                       &aspeed_g6_clk_lock);
+       if (IS_ERR(hw))
+               return PTR_ERR(hw);
+       aspeed_g6_clk_data->hws[ASPEED_CLK_D1CLK] = hw;
+
+       /* d1 clk div 0x308[17:15] x [14:12] - 8,7,6,5,4,3,2,1 */
+       regmap_write(map, 0x308, 0x12000); /* 3x3 = 9 */
+
+       /* P-Bus (BCLK) clock divider */
+       hw = clk_hw_register_divider_table(dev, "bclk", "hpll", 0,
+                       scu_g6_base + ASPEED_G6_CLK_SELECTION1, 20, 3, 0,
+                       ast2600_div_table,
+                       &aspeed_g6_clk_lock);
+       if (IS_ERR(hw))
+               return PTR_ERR(hw);
+       aspeed_g6_clk_data->hws[ASPEED_CLK_BCLK] = hw;
+
+       /* Video Capture clock selection */
+       hw = clk_hw_register_mux(dev, "vclk", vclk_parent_names,
+                       ARRAY_SIZE(vclk_parent_names), 0,
+                       scu_g6_base + ASPEED_G6_CLK_SELECTION2, 12, 3, 0,
+                       &aspeed_g6_clk_lock);
+       if (IS_ERR(hw))
+               return PTR_ERR(hw);
+       aspeed_g6_clk_data->hws[ASPEED_CLK_VCLK] = hw;
+
+       /* Video Engine clock divider */
+       hw = clk_hw_register_divider_table(dev, "eclk", NULL, 0,
+                       scu_g6_base + ASPEED_G6_CLK_SELECTION1, 28, 3, 0,
+                       ast2600_eclk_div_table,
+                       &aspeed_g6_clk_lock);
+       if (IS_ERR(hw))
+               return PTR_ERR(hw);
+       aspeed_g6_clk_data->hws[ASPEED_CLK_ECLK] = hw;
+
+       for (i = 0; i < ARRAY_SIZE(aspeed_g6_gates); i++) {
+               const struct aspeed_gate_data *gd = &aspeed_g6_gates[i];
+               u32 gate_flags;
+
+               /*
+                * Special case: the USB port 1 clock (bit 14) is always
+                * working the opposite way from the other ones.
+                */
+               gate_flags = (gd->clock_idx == 14) ? 0 : CLK_GATE_SET_TO_DISABLE;
+               hw = aspeed_g6_clk_hw_register_gate(dev,
+                               gd->name,
+                               gd->parent_name,
+                               gd->flags,
+                               map,
+                               gd->clock_idx,
+                               gd->reset_idx,
+                               gate_flags,
+                               &aspeed_g6_clk_lock);
+               if (IS_ERR(hw))
+                       return PTR_ERR(hw);
+               aspeed_g6_clk_data->hws[i] = hw;
+       }
+
+       return 0;
+};
+
+static const struct of_device_id aspeed_g6_clk_dt_ids[] = {
+       { .compatible = "aspeed,ast2600-scu" },
+       { }
+};
+
+static struct platform_driver aspeed_g6_clk_driver = {
+       .probe  = aspeed_g6_clk_probe,
+       .driver = {
+               .name = "ast2600-clk",
+               .of_match_table = aspeed_g6_clk_dt_ids,
+               .suppress_bind_attrs = true,
+       },
+};
+builtin_platform_driver(aspeed_g6_clk_driver);
+
+static const u32 ast2600_a0_axi_ahb_div_table[] = {
+       2, 2, 3, 5,
+};
+
+static const u32 ast2600_a1_axi_ahb_div_table[] = {
+       4, 6, 2, 4,
+};
+
+static void __init aspeed_g6_cc(struct regmap *map)
+{
+       struct clk_hw *hw;
+       u32 val, div, chip_id, axi_div, ahb_div;
+
+       clk_hw_register_fixed_rate(NULL, "clkin", NULL, 0, 25000000);
+
+       /*
+        * High-speed PLL clock derived from the crystal. This the CPU clock,
+        * and we assume that it is enabled
+        */
+       regmap_read(map, ASPEED_HPLL_PARAM, &val);
+       aspeed_g6_clk_data->hws[ASPEED_CLK_HPLL] = ast2600_calc_pll("hpll", val);
+
+       regmap_read(map, ASPEED_MPLL_PARAM, &val);
+       aspeed_g6_clk_data->hws[ASPEED_CLK_MPLL] = ast2600_calc_pll("mpll", val);
+
+       regmap_read(map, ASPEED_DPLL_PARAM, &val);
+       aspeed_g6_clk_data->hws[ASPEED_CLK_DPLL] = ast2600_calc_pll("dpll", val);
+
+       regmap_read(map, ASPEED_EPLL_PARAM, &val);
+       aspeed_g6_clk_data->hws[ASPEED_CLK_EPLL] = ast2600_calc_pll("epll", val);
+
+       regmap_read(map, ASPEED_APLL_PARAM, &val);
+       aspeed_g6_clk_data->hws[ASPEED_CLK_APLL] = ast2600_calc_apll("apll", val);
+
+       /* Strap bits 12:11 define the AXI/AHB clock frequency ratio (aka HCLK)*/
+       regmap_read(map, ASPEED_G6_STRAP1, &val);
+       if (val & BIT(16))
+               axi_div = 1;
+       else
+               axi_div = 2;
+
+       regmap_read(map, ASPEED_G6_SILICON_REV, &chip_id);
+       if (chip_id & BIT(16))
+               ahb_div = ast2600_a1_axi_ahb_div_table[(val >> 11) & 0x3];
+       else
+               ahb_div = ast2600_a0_axi_ahb_div_table[(val >> 11) & 0x3];
+
+       hw = clk_hw_register_fixed_factor(NULL, "ahb", "hpll", 0, 1, axi_div * ahb_div);
+       aspeed_g6_clk_data->hws[ASPEED_CLK_AHB] = hw;
+
+       regmap_read(map, ASPEED_G6_CLK_SELECTION1, &val);
+       val = (val >> 23) & 0x7;
+       div = 4 * (val + 1);
+       hw = clk_hw_register_fixed_factor(NULL, "apb1", "hpll", 0, 1, div);
+       aspeed_g6_clk_data->hws[ASPEED_CLK_APB1] = hw;
+
+       regmap_read(map, ASPEED_G6_CLK_SELECTION4, &val);
+       val = (val >> 9) & 0x7;
+       div = 2 * (val + 1);
+       hw = clk_hw_register_fixed_factor(NULL, "apb2", "ahb", 0, 1, div);
+       aspeed_g6_clk_data->hws[ASPEED_CLK_APB2] = hw;
+
+       /* USB 2.0 port1 phy 40MHz clock */
+       hw = clk_hw_register_fixed_rate(NULL, "usb-phy-40m", NULL, 0, 40000000);
+       aspeed_g6_clk_data->hws[ASPEED_CLK_USBPHY_40M] = hw;
+};
+
+static void __init aspeed_g6_cc_init(struct device_node *np)
+{
+       struct regmap *map;
+       int ret;
+       int i;
+
+       scu_g6_base = of_iomap(np, 0);
+       if (!scu_g6_base)
+               return;
+
+       aspeed_g6_clk_data = kzalloc(struct_size(aspeed_g6_clk_data, hws,
+                                     ASPEED_G6_NUM_CLKS), GFP_KERNEL);
+       if (!aspeed_g6_clk_data)
+               return;
+
+       /*
+        * This way all clocks fetched before the platform device probes,
+        * except those we assign here for early use, will be deferred.
+        */
+       for (i = 0; i < ASPEED_G6_NUM_CLKS; i++)
+               aspeed_g6_clk_data->hws[i] = ERR_PTR(-EPROBE_DEFER);
+
+       /*
+        * We check that the regmap works on this very first access,
+        * but as this is an MMIO-backed regmap, subsequent regmap
+        * access is not going to fail and we skip error checks from
+        * this point.
+        */
+       map = syscon_node_to_regmap(np);
+       if (IS_ERR(map)) {
+               pr_err("no syscon regmap\n");
+               return;
+       }
+
+       aspeed_g6_cc(map);
+       aspeed_g6_clk_data->num = ASPEED_G6_NUM_CLKS;
+       ret = of_clk_add_hw_provider(np, of_clk_hw_onecell_get, aspeed_g6_clk_data);
+       if (ret)
+               pr_err("failed to add DT provider: %d\n", ret);
+};
+CLK_OF_DECLARE_DRIVER(aspeed_cc_g6, "aspeed,ast2600-scu", aspeed_g6_cc_init);
index 524bf9a..e9e1642 100644 (file)
@@ -18,10 +18,13 @@ static int __must_check of_clk_bulk_get(struct device_node *np, int num_clks,
        int ret;
        int i;
 
-       for (i = 0; i < num_clks; i++)
+       for (i = 0; i < num_clks; i++) {
+               clks[i].id = NULL;
                clks[i].clk = NULL;
+       }
 
        for (i = 0; i < num_clks; i++) {
+               of_property_read_string_index(np, "clock-names", i, &clks[i].id);
                clks[i].clk = of_clk_get(np, i);
                if (IS_ERR(clks[i].clk)) {
                        ret = PTR_ERR(clks[i].clk);
index 23c9326..308b353 100644 (file)
@@ -16,6 +16,7 @@
 #include <linux/module.h>
 #include <linux/i2c.h>
 #include <linux/regmap.h>
+#include <linux/regulator/consumer.h>
 #include <linux/slab.h>
 #include <linux/gcd.h>
 
@@ -602,6 +603,30 @@ of_clk_cdce925_get(struct of_phandle_args *clkspec, void *_data)
        return &data->clk[idx].hw;
 }
 
+static void cdce925_regulator_disable(void *regulator)
+{
+       regulator_disable(regulator);
+}
+
+static int cdce925_regulator_enable(struct device *dev, const char *name)
+{
+       struct regulator *regulator;
+       int err;
+
+       regulator = devm_regulator_get(dev, name);
+       if (IS_ERR(regulator))
+               return PTR_ERR(regulator);
+
+       err = regulator_enable(regulator);
+       if (err) {
+               dev_err(dev, "Failed to enable %s: %d\n", name, err);
+               return err;
+       }
+
+       return devm_add_action_or_reset(dev, cdce925_regulator_disable,
+                                       regulator);
+}
+
 /* The CDCE925 uses a funky way to read/write registers. Bulk mode is
  * just weird, so just use the single byte mode exclusively. */
 static struct regmap_bus regmap_cdce925_bus = {
@@ -630,6 +655,15 @@ static int cdce925_probe(struct i2c_client *client,
        };
 
        dev_dbg(&client->dev, "%s\n", __func__);
+
+       err = cdce925_regulator_enable(&client->dev, "vdd");
+       if (err)
+               return err;
+
+       err = cdce925_regulator_enable(&client->dev, "vddout");
+       if (err)
+               return err;
+
        data = devm_kzalloc(&client->dev, sizeof(*data), GFP_KERNEL);
        if (!data)
                return -ENOMEM;
index b06038b..4f13a68 100644 (file)
@@ -3,7 +3,6 @@
  * Copyright (c) 2013 NVIDIA CORPORATION.  All rights reserved.
  */
 
-#include <linux/clk.h>
 #include <linux/clk-provider.h>
 #include <linux/err.h>
 #include <linux/slab.h>
index fa8c917..565bcd0 100644 (file)
@@ -198,7 +198,7 @@ static u8 lochnagar_clk_get_parent(struct clk_hw *hw)
        if (ret < 0) {
                dev_dbg(priv->dev, "Failed to read parent of %s: %d\n",
                        lclk->name, ret);
-               return hw->init->num_parents;
+               return clk_hw_get_num_parents(hw);
        }
 
        val &= lclk->src_mask;
index 5fc78fa..80b9d78 100644 (file)
@@ -437,7 +437,7 @@ static int m10v_clk_divider_set_rate(struct clk_hw *hw, unsigned long rate,
                if (readl_poll_timeout(divider->write_valid_reg, val,
                        !val, M10V_UPOLL_RATE, M10V_UTIMEOUT))
                        pr_err("%s:%s couldn't stabilize\n",
-                               __func__, divider->hw.init->name);
+                               __func__, clk_hw_get_name(hw));
        }
 
        if (divider->lock)
index 07f3b25..bed140f 100644 (file)
@@ -686,7 +686,7 @@ static const struct clockgen_chipinfo chipinfo[] = {
                .guts_compat = "fsl,qoriq-device-config-1.0",
                .init_periph = p5020_init_periph,
                .cmux_groups = {
-                       &p2041_cmux_grp1, &p2041_cmux_grp2
+                       &p5020_cmux_grp1, &p5020_cmux_grp2
                },
                .cmux_to_group = {
                        0, 1, -1
index 72424eb..6e780c2 100644 (file)
@@ -547,7 +547,6 @@ static int si5341_synth_clk_set_rate(struct clk_hw *hw, unsigned long rate,
        bool is_integer;
 
        n_num = synth->data->freq_vco;
-       n_den = rate;
 
        /* see if there's an integer solution */
        r = do_div(n_num, rate);
index ca99e9d..1c677d7 100644 (file)
@@ -37,6 +37,12 @@ static HLIST_HEAD(clk_root_list);
 static HLIST_HEAD(clk_orphan_list);
 static LIST_HEAD(clk_notifier_list);
 
+static struct hlist_head *all_lists[] = {
+       &clk_root_list,
+       &clk_orphan_list,
+       NULL,
+};
+
 /***    private data structures    ***/
 
 struct clk_parent_map {
@@ -615,6 +621,8 @@ static void clk_core_get_boundaries(struct clk_core *core,
 {
        struct clk *clk_user;
 
+       lockdep_assert_held(&prepare_lock);
+
        *min_rate = core->min_rate;
        *max_rate = core->max_rate;
 
@@ -2460,7 +2468,7 @@ static int clk_core_set_parent_nolock(struct clk_core *core,
        if (core->parent == parent)
                return 0;
 
-       /* verify ops for for multi-parent clks */
+       /* verify ops for multi-parent clks */
        if (core->num_parents > 1 && !core->ops->set_parent)
                return -EPERM;
 
@@ -2862,12 +2870,6 @@ static int inited = 0;
 static DEFINE_MUTEX(clk_debug_lock);
 static HLIST_HEAD(clk_debug_list);
 
-static struct hlist_head *all_lists[] = {
-       &clk_root_list,
-       &clk_orphan_list,
-       NULL,
-};
-
 static struct hlist_head *orphan_list[] = {
        &clk_orphan_list,
        NULL,
@@ -2876,9 +2878,6 @@ static struct hlist_head *orphan_list[] = {
 static void clk_summary_show_one(struct seq_file *s, struct clk_core *c,
                                 int level)
 {
-       if (!c)
-               return;
-
        seq_printf(s, "%*s%-*s %7d %8d %8d %11lu %10lu %5d %6d\n",
                   level * 3 + 1, "",
                   30 - level * 3, c->name,
@@ -2893,9 +2892,6 @@ static void clk_summary_show_subtree(struct seq_file *s, struct clk_core *c,
 {
        struct clk_core *child;
 
-       if (!c)
-               return;
-
        clk_summary_show_one(s, c, level);
 
        hlist_for_each_entry(child, &c->children, child_node)
@@ -2925,8 +2921,9 @@ DEFINE_SHOW_ATTRIBUTE(clk_summary);
 
 static void clk_dump_one(struct seq_file *s, struct clk_core *c, int level)
 {
-       if (!c)
-               return;
+       unsigned long min_rate, max_rate;
+
+       clk_core_get_boundaries(c, &min_rate, &max_rate);
 
        /* This should be JSON format, i.e. elements separated with a comma */
        seq_printf(s, "\"%s\": { ", c->name);
@@ -2934,6 +2931,8 @@ static void clk_dump_one(struct seq_file *s, struct clk_core *c, int level)
        seq_printf(s, "\"prepare_count\": %d,", c->prepare_count);
        seq_printf(s, "\"protect_count\": %d,", c->protect_count);
        seq_printf(s, "\"rate\": %lu,", clk_core_get_rate(c));
+       seq_printf(s, "\"min_rate\": %lu,", min_rate);
+       seq_printf(s, "\"max_rate\": %lu,", max_rate);
        seq_printf(s, "\"accuracy\": %lu,", clk_core_get_accuracy(c));
        seq_printf(s, "\"phase\": %d,", clk_core_get_phase(c));
        seq_printf(s, "\"duty_cycle\": %u",
@@ -2944,9 +2943,6 @@ static void clk_dump_subtree(struct seq_file *s, struct clk_core *c, int level)
 {
        struct clk_core *child;
 
-       if (!c)
-               return;
-
        clk_dump_one(s, c, level);
 
        hlist_for_each_entry(child, &c->children, child_node) {
@@ -3042,15 +3038,15 @@ static void possible_parent_show(struct seq_file *s, struct clk_core *core,
         */
        parent = clk_core_get_parent_by_index(core, i);
        if (parent)
-               seq_printf(s, "%s", parent->name);
+               seq_puts(s, parent->name);
        else if (core->parents[i].name)
-               seq_printf(s, "%s", core->parents[i].name);
+               seq_puts(s, core->parents[i].name);
        else if (core->parents[i].fw_name)
                seq_printf(s, "<%s>(fw)", core->parents[i].fw_name);
        else if (core->parents[i].index >= 0)
-               seq_printf(s, "%s",
-                          of_clk_get_parent_name(core->of_node,
-                                                 core->parents[i].index));
+               seq_puts(s,
+                        of_clk_get_parent_name(core->of_node,
+                                               core->parents[i].index));
        else
                seq_puts(s, "(missing)");
 
@@ -3093,6 +3089,34 @@ static int clk_duty_cycle_show(struct seq_file *s, void *data)
 }
 DEFINE_SHOW_ATTRIBUTE(clk_duty_cycle);
 
+static int clk_min_rate_show(struct seq_file *s, void *data)
+{
+       struct clk_core *core = s->private;
+       unsigned long min_rate, max_rate;
+
+       clk_prepare_lock();
+       clk_core_get_boundaries(core, &min_rate, &max_rate);
+       clk_prepare_unlock();
+       seq_printf(s, "%lu\n", min_rate);
+
+       return 0;
+}
+DEFINE_SHOW_ATTRIBUTE(clk_min_rate);
+
+static int clk_max_rate_show(struct seq_file *s, void *data)
+{
+       struct clk_core *core = s->private;
+       unsigned long min_rate, max_rate;
+
+       clk_prepare_lock();
+       clk_core_get_boundaries(core, &min_rate, &max_rate);
+       clk_prepare_unlock();
+       seq_printf(s, "%lu\n", max_rate);
+
+       return 0;
+}
+DEFINE_SHOW_ATTRIBUTE(clk_max_rate);
+
 static void clk_debug_create_one(struct clk_core *core, struct dentry *pdentry)
 {
        struct dentry *root;
@@ -3104,6 +3128,8 @@ static void clk_debug_create_one(struct clk_core *core, struct dentry *pdentry)
        core->dentry = root;
 
        debugfs_create_ulong("clk_rate", 0444, root, &core->rate);
+       debugfs_create_file("clk_min_rate", 0444, root, core, &clk_min_rate_fops);
+       debugfs_create_file("clk_max_rate", 0444, root, core, &clk_max_rate_fops);
        debugfs_create_ulong("clk_accuracy", 0444, root, &core->accuracy);
        debugfs_create_u32("clk_phase", 0444, root, &core->phase);
        debugfs_create_file("clk_flags", 0444, root, core, &clk_flags_fops);
@@ -3513,9 +3539,9 @@ static int clk_cpy_name(const char **dst_p, const char *src, bool must_exist)
        return 0;
 }
 
-static int clk_core_populate_parent_map(struct clk_core *core)
+static int clk_core_populate_parent_map(struct clk_core *core,
+                                       const struct clk_init_data *init)
 {
-       const struct clk_init_data *init = core->hw->init;
        u8 num_parents = init->num_parents;
        const char * const *parent_names = init->parent_names;
        const struct clk_hw **parent_hws = init->parent_hws;
@@ -3595,6 +3621,14 @@ __clk_register(struct device *dev, struct device_node *np, struct clk_hw *hw)
 {
        int ret;
        struct clk_core *core;
+       const struct clk_init_data *init = hw->init;
+
+       /*
+        * The init data is not supposed to be used outside of registration path.
+        * Set it to NULL so that provider drivers can't use it either and so that
+        * we catch use of hw->init early on in the core.
+        */
+       hw->init = NULL;
 
        core = kzalloc(sizeof(*core), GFP_KERNEL);
        if (!core) {
@@ -3602,17 +3636,17 @@ __clk_register(struct device *dev, struct device_node *np, struct clk_hw *hw)
                goto fail_out;
        }
 
-       core->name = kstrdup_const(hw->init->name, GFP_KERNEL);
+       core->name = kstrdup_const(init->name, GFP_KERNEL);
        if (!core->name) {
                ret = -ENOMEM;
                goto fail_name;
        }
 
-       if (WARN_ON(!hw->init->ops)) {
+       if (WARN_ON(!init->ops)) {
                ret = -EINVAL;
                goto fail_ops;
        }
-       core->ops = hw->init->ops;
+       core->ops = init->ops;
 
        if (dev && pm_runtime_enabled(dev))
                core->rpm_enabled = true;
@@ -3621,13 +3655,13 @@ __clk_register(struct device *dev, struct device_node *np, struct clk_hw *hw)
        if (dev && dev->driver)
                core->owner = dev->driver->owner;
        core->hw = hw;
-       core->flags = hw->init->flags;
-       core->num_parents = hw->init->num_parents;
+       core->flags = init->flags;
+       core->num_parents = init->num_parents;
        core->min_rate = 0;
        core->max_rate = ULONG_MAX;
        hw->core = core;
 
-       ret = clk_core_populate_parent_map(core);
+       ret = clk_core_populate_parent_map(core, init);
        if (ret)
                goto fail_parents;
 
@@ -3766,6 +3800,34 @@ static const struct clk_ops clk_nodrv_ops = {
        .set_parent     = clk_nodrv_set_parent,
 };
 
+static void clk_core_evict_parent_cache_subtree(struct clk_core *root,
+                                               struct clk_core *target)
+{
+       int i;
+       struct clk_core *child;
+
+       for (i = 0; i < root->num_parents; i++)
+               if (root->parents[i].core == target)
+                       root->parents[i].core = NULL;
+
+       hlist_for_each_entry(child, &root->children, child_node)
+               clk_core_evict_parent_cache_subtree(child, target);
+}
+
+/* Remove this clk from all parent caches */
+static void clk_core_evict_parent_cache(struct clk_core *core)
+{
+       struct hlist_head **lists;
+       struct clk_core *root;
+
+       lockdep_assert_held(&prepare_lock);
+
+       for (lists = all_lists; *lists; lists++)
+               hlist_for_each_entry(root, *lists, child_node)
+                       clk_core_evict_parent_cache_subtree(root, core);
+
+}
+
 /**
  * clk_unregister - unregister a currently registered clock
  * @clk: clock to unregister
@@ -3804,6 +3866,8 @@ void clk_unregister(struct clk *clk)
                        clk_core_set_parent_nolock(child, NULL);
        }
 
+       clk_core_evict_parent_cache(clk->core);
+
        hlist_del_init(&clk->core->child_node);
 
        if (clk->core->prepare_count)
@@ -4345,12 +4409,43 @@ void devm_of_clk_del_provider(struct device *dev)
 }
 EXPORT_SYMBOL(devm_of_clk_del_provider);
 
-/*
- * Beware the return values when np is valid, but no clock provider is found.
- * If name == NULL, the function returns -ENOENT.
- * If name != NULL, the function returns -EINVAL. This is because
- * of_parse_phandle_with_args() is called even if of_property_match_string()
- * returns an error.
+/**
+ * of_parse_clkspec() - Parse a DT clock specifier for a given device node
+ * @np: device node to parse clock specifier from
+ * @index: index of phandle to parse clock out of. If index < 0, @name is used
+ * @name: clock name to find and parse. If name is NULL, the index is used
+ * @out_args: Result of parsing the clock specifier
+ *
+ * Parses a device node's "clocks" and "clock-names" properties to find the
+ * phandle and cells for the index or name that is desired. The resulting clock
+ * specifier is placed into @out_args, or an errno is returned when there's a
+ * parsing error. The @index argument is ignored if @name is non-NULL.
+ *
+ * Example:
+ *
+ * phandle1: clock-controller@1 {
+ *     #clock-cells = <2>;
+ * }
+ *
+ * phandle2: clock-controller@2 {
+ *     #clock-cells = <1>;
+ * }
+ *
+ * clock-consumer@3 {
+ *     clocks = <&phandle1 1 2 &phandle2 3>;
+ *     clock-names = "name1", "name2";
+ * }
+ *
+ * To get a device_node for `clock-controller@2' node you may call this
+ * function a few different ways:
+ *
+ *   of_parse_clkspec(clock-consumer@3, -1, "name2", &args);
+ *   of_parse_clkspec(clock-consumer@3, 1, NULL, &args);
+ *   of_parse_clkspec(clock-consumer@3, 1, "name2", &args);
+ *
+ * Return: 0 upon successfully parsing the clock specifier. Otherwise, -ENOENT
+ * if @name is NULL or -EINVAL if @name is non-NULL and it can't be found in
+ * the "clock-names" property of @np.
  */
 static int of_parse_clkspec(const struct device_node *np, int index,
                            const char *name, struct of_phandle_args *out_args)
index 1c99e99..1ac11b6 100644 (file)
@@ -778,12 +778,15 @@ int of_davinci_pll_init(struct device *dev, struct device_node *node,
                int i;
 
                clk_data = kzalloc(sizeof(*clk_data), GFP_KERNEL);
-               if (!clk_data)
+               if (!clk_data) {
+                       of_node_put(child);
                        return -ENOMEM;
+               }
 
                clks = kmalloc_array(n_clks, sizeof(*clks), GFP_KERNEL);
                if (!clks) {
                        kfree(clk_data);
+                       of_node_put(child);
                        return -ENOMEM;
                }
 
index 42e4667..2022d9b 100644 (file)
@@ -42,6 +42,19 @@ static const struct clk_div_table ulp_div_table[] = {
        { .val = 7, .div = 64, },
 };
 
+static const int pcc2_uart_clk_ids[] __initconst = {
+       IMX7ULP_CLK_LPUART4,
+       IMX7ULP_CLK_LPUART5,
+};
+
+static const int pcc3_uart_clk_ids[] __initconst = {
+       IMX7ULP_CLK_LPUART6,
+       IMX7ULP_CLK_LPUART7,
+};
+
+static struct clk **pcc2_uart_clks[ARRAY_SIZE(pcc2_uart_clk_ids) + 1] __initdata;
+static struct clk **pcc3_uart_clks[ARRAY_SIZE(pcc3_uart_clk_ids) + 1] __initdata;
+
 static void __init imx7ulp_clk_scg1_init(struct device_node *np)
 {
        struct clk_hw_onecell_data *clk_data;
@@ -135,6 +148,7 @@ static void __init imx7ulp_clk_pcc2_init(struct device_node *np)
        struct clk_hw_onecell_data *clk_data;
        struct clk_hw **clks;
        void __iomem *base;
+       int i;
 
        clk_data = kzalloc(struct_size(clk_data, hws, IMX7ULP_CLK_PCC2_END),
                           GFP_KERNEL);
@@ -173,6 +187,14 @@ static void __init imx7ulp_clk_pcc2_init(struct device_node *np)
        imx_check_clk_hws(clks, clk_data->num);
 
        of_clk_add_hw_provider(np, of_clk_hw_onecell_get, clk_data);
+
+       for (i = 0; i < ARRAY_SIZE(pcc2_uart_clk_ids); i++) {
+               int index = pcc2_uart_clk_ids[i];
+
+               pcc2_uart_clks[i] = &clks[index]->clk;
+       }
+
+       imx_register_uart_clocks(pcc2_uart_clks);
 }
 CLK_OF_DECLARE(imx7ulp_clk_pcc2, "fsl,imx7ulp-pcc2", imx7ulp_clk_pcc2_init);
 
@@ -181,6 +203,7 @@ static void __init imx7ulp_clk_pcc3_init(struct device_node *np)
        struct clk_hw_onecell_data *clk_data;
        struct clk_hw **clks;
        void __iomem *base;
+       int i;
 
        clk_data = kzalloc(struct_size(clk_data, hws, IMX7ULP_CLK_PCC3_END),
                           GFP_KERNEL);
@@ -218,6 +241,14 @@ static void __init imx7ulp_clk_pcc3_init(struct device_node *np)
        imx_check_clk_hws(clks, clk_data->num);
 
        of_clk_add_hw_provider(np, of_clk_hw_onecell_get, clk_data);
+
+       for (i = 0; i < ARRAY_SIZE(pcc3_uart_clk_ids); i++) {
+               int index = pcc3_uart_clk_ids[i];
+
+               pcc3_uart_clks[i] = &clks[index]->clk;
+       }
+
+       imx_register_uart_clocks(pcc3_uart_clks);
 }
 CLK_OF_DECLARE(imx7ulp_clk_pcc3, "fsl,imx7ulp-pcc3", imx7ulp_clk_pcc3_init);
 
index 43fa9c3..067ab87 100644 (file)
@@ -22,7 +22,7 @@ static u32 share_count_sai3;
 static u32 share_count_sai4;
 static u32 share_count_sai5;
 static u32 share_count_sai6;
-static u32 share_count_dcss;
+static u32 share_count_disp;
 static u32 share_count_pdm;
 static u32 share_count_nand;
 
@@ -38,8 +38,8 @@ static const struct imx_pll14xx_rate_table imx8mm_pll1416x_tbl[] = {
 };
 
 static const struct imx_pll14xx_rate_table imx8mm_audiopll_tbl[] = {
-       PLL_1443X_RATE(786432000U, 655, 5, 2, 23593),
-       PLL_1443X_RATE(722534400U, 301, 5, 1, 3670),
+       PLL_1443X_RATE(393216000U, 262, 2, 3, 9437),
+       PLL_1443X_RATE(361267200U, 361, 3, 3, 17511),
 };
 
 static const struct imx_pll14xx_rate_table imx8mm_videopll_tbl[] = {
@@ -51,43 +51,43 @@ static const struct imx_pll14xx_rate_table imx8mm_drampll_tbl[] = {
        PLL_1443X_RATE(650000000U, 325, 3, 2, 0),
 };
 
-static struct imx_pll14xx_clk imx8mm_audio_pll __initdata = {
+static struct imx_pll14xx_clk imx8mm_audio_pll = {
                .type = PLL_1443X,
                .rate_table = imx8mm_audiopll_tbl,
                .rate_count = ARRAY_SIZE(imx8mm_audiopll_tbl),
 };
 
-static struct imx_pll14xx_clk imx8mm_video_pll __initdata = {
+static struct imx_pll14xx_clk imx8mm_video_pll = {
                .type = PLL_1443X,
                .rate_table = imx8mm_videopll_tbl,
                .rate_count = ARRAY_SIZE(imx8mm_videopll_tbl),
 };
 
-static struct imx_pll14xx_clk imx8mm_dram_pll __initdata = {
+static struct imx_pll14xx_clk imx8mm_dram_pll = {
                .type = PLL_1443X,
                .rate_table = imx8mm_drampll_tbl,
                .rate_count = ARRAY_SIZE(imx8mm_drampll_tbl),
 };
 
-static struct imx_pll14xx_clk imx8mm_arm_pll __initdata = {
+static struct imx_pll14xx_clk imx8mm_arm_pll = {
                .type = PLL_1416X,
                .rate_table = imx8mm_pll1416x_tbl,
                .rate_count = ARRAY_SIZE(imx8mm_pll1416x_tbl),
 };
 
-static struct imx_pll14xx_clk imx8mm_gpu_pll __initdata = {
+static struct imx_pll14xx_clk imx8mm_gpu_pll = {
                .type = PLL_1416X,
                .rate_table = imx8mm_pll1416x_tbl,
                .rate_count = ARRAY_SIZE(imx8mm_pll1416x_tbl),
 };
 
-static struct imx_pll14xx_clk imx8mm_vpu_pll __initdata = {
+static struct imx_pll14xx_clk imx8mm_vpu_pll = {
                .type = PLL_1416X,
                .rate_table = imx8mm_pll1416x_tbl,
                .rate_count = ARRAY_SIZE(imx8mm_pll1416x_tbl),
 };
 
-static struct imx_pll14xx_clk imx8mm_sys_pll __initdata = {
+static struct imx_pll14xx_clk imx8mm_sys_pll = {
                .type = PLL_1416X,
                .rate_table = imx8mm_pll1416x_tbl,
                .rate_count = ARRAY_SIZE(imx8mm_pll1416x_tbl),
@@ -175,10 +175,10 @@ static const char *imx8mm_vpu_g1_sels[] = {"osc_24m", "vpu_pll_out", "sys_pll1_8
 static const char *imx8mm_vpu_g2_sels[] = {"osc_24m", "vpu_pll_out", "sys_pll1_800m", "sys_pll2_1000m",
                                           "sys_pll1_100m", "sys_pll2_125m", "sys_pll3_out", "audio_pll1_out", };
 
-static const char *imx8mm_disp_dtrc_sels[] = {"osc_24m", "video_pll2_out", "sys_pll1_800m", "sys_pll2_1000m",
+static const char *imx8mm_disp_dtrc_sels[] = {"osc_24m", "dummy", "sys_pll1_800m", "sys_pll2_1000m",
                                              "sys_pll1_160m", "video_pll1_out", "sys_pll3_out", "audio_pll2_out", };
 
-static const char *imx8mm_disp_dc8000_sels[] = {"osc_24m", "video_pll2_out", "sys_pll1_800m", "sys_pll2_1000m",
+static const char *imx8mm_disp_dc8000_sels[] = {"osc_24m", "dummy", "sys_pll1_800m", "sys_pll2_1000m",
                                                "sys_pll1_160m", "video_pll1_out", "sys_pll3_out", "audio_pll2_out", };
 
 static const char *imx8mm_pcie1_ctrl_sels[] = {"osc_24m", "sys_pll2_250m", "sys_pll2_200m", "sys_pll1_266m",
@@ -232,7 +232,7 @@ static const char *imx8mm_enet_phy_sels[] = {"osc_24m", "sys_pll2_50m", "sys_pll
 static const char *imx8mm_nand_sels[] = {"osc_24m", "sys_pll2_500m", "audio_pll1_out", "sys_pll1_400m",
                                         "audio_pll2_out", "sys_pll3_out", "sys_pll2_250m", "video_pll1_out", };
 
-static const char *imx8mm_qspi_sels[] = {"osc_24m", "sys_pll1_400m", "sys_pll1_800m", "sys_pll2_500m",
+static const char *imx8mm_qspi_sels[] = {"osc_24m", "sys_pll1_400m", "sys_pll2_333m", "sys_pll2_500m",
                                         "audio_pll2_out", "sys_pll1_266m", "sys_pll3_out", "sys_pll1_100m", };
 
 static const char *imx8mm_usdhc1_sels[] = {"osc_24m", "sys_pll1_400m", "sys_pll1_800m", "sys_pll2_500m",
@@ -287,13 +287,13 @@ static const char *imx8mm_pwm2_sels[] = {"osc_24m", "sys_pll2_100m", "sys_pll1_1
                                         "sys_pll3_out", "clk_ext1", "sys_pll1_80m", "video_pll1_out", };
 
 static const char *imx8mm_pwm3_sels[] = {"osc_24m", "sys_pll2_100m", "sys_pll1_160m", "sys_pll1_40m",
-                                        "sys3_pll2_out", "clk_ext2", "sys_pll1_80m", "video_pll1_out", };
+                                        "sys_pll3_out", "clk_ext2", "sys_pll1_80m", "video_pll1_out", };
 
 static const char *imx8mm_pwm4_sels[] = {"osc_24m", "sys_pll2_100m", "sys_pll1_160m", "sys_pll1_40m",
                                         "sys_pll3_out", "clk_ext2", "sys_pll1_80m", "video_pll1_out", };
 
 static const char *imx8mm_gpt1_sels[] = {"osc_24m", "sys_pll2_100m", "sys_pll1_400m", "sys_pll1_40m",
-                                        "video_pll1_out", "sys_pll1_800m", "audio_pll1_out", "clk_ext1" };
+                                        "video_pll1_out", "sys_pll1_80m", "audio_pll1_out", "clk_ext1" };
 
 static const char *imx8mm_wdog_sels[] = {"osc_24m", "sys_pll1_133m", "sys_pll1_160m", "vpu_pll_out",
                                         "sys_pll2_125m", "sys_pll3_out", "sys_pll1_80m", "sys_pll2_166m", };
@@ -347,7 +347,7 @@ static const char *imx8mm_pdm_sels[] = {"osc_24m", "sys_pll2_100m", "audio_pll1_
                                        "sys_pll2_1000m", "sys_pll3_out", "clk_ext3", "audio_pll2_out", };
 
 static const char *imx8mm_vpu_h1_sels[] = {"osc_24m", "vpu_pll_out", "sys_pll1_800m", "sys_pll2_1000m",
-                                          "audio_pll2_out", "sys_pll2_125m", "sys_pll3_clk", "audio_pll1_out", };
+                                          "audio_pll2_out", "sys_pll2_125m", "sys_pll3_out", "audio_pll1_out", };
 
 static const char *imx8mm_dram_core_sels[] = {"dram_pll_out", "dram_alt_root", };
 
@@ -357,7 +357,7 @@ static const char *imx8mm_clko1_sels[] = {"osc_24m", "sys_pll1_800m", "osc_27m",
 static struct clk *clks[IMX8MM_CLK_END];
 static struct clk_onecell_data clk_data;
 
-static struct clk ** const uart_clks[] __initconst = {
+static struct clk ** const uart_clks[] = {
        &clks[IMX8MM_CLK_UART1_ROOT],
        &clks[IMX8MM_CLK_UART2_ROOT],
        &clks[IMX8MM_CLK_UART3_ROOT],
@@ -365,19 +365,20 @@ static struct clk ** const uart_clks[] __initconst = {
        NULL
 };
 
-static int __init imx8mm_clocks_init(struct device_node *ccm_node)
+static int imx8mm_clocks_probe(struct platform_device *pdev)
 {
-       struct device_node *np;
+       struct device *dev = &pdev->dev;
+       struct device_node *np = dev->of_node;
        void __iomem *base;
        int ret;
 
        clks[IMX8MM_CLK_DUMMY] = imx_clk_fixed("dummy", 0);
-       clks[IMX8MM_CLK_24M] = of_clk_get_by_name(ccm_node, "osc_24m");
-       clks[IMX8MM_CLK_32K] = of_clk_get_by_name(ccm_node, "osc_32k");
-       clks[IMX8MM_CLK_EXT1] = of_clk_get_by_name(ccm_node, "clk_ext1");
-       clks[IMX8MM_CLK_EXT2] = of_clk_get_by_name(ccm_node, "clk_ext2");
-       clks[IMX8MM_CLK_EXT3] = of_clk_get_by_name(ccm_node, "clk_ext3");
-       clks[IMX8MM_CLK_EXT4] = of_clk_get_by_name(ccm_node, "clk_ext4");
+       clks[IMX8MM_CLK_24M] = of_clk_get_by_name(np, "osc_24m");
+       clks[IMX8MM_CLK_32K] = of_clk_get_by_name(np, "osc_32k");
+       clks[IMX8MM_CLK_EXT1] = of_clk_get_by_name(np, "clk_ext1");
+       clks[IMX8MM_CLK_EXT2] = of_clk_get_by_name(np, "clk_ext2");
+       clks[IMX8MM_CLK_EXT3] = of_clk_get_by_name(np, "clk_ext3");
+       clks[IMX8MM_CLK_EXT4] = of_clk_get_by_name(np, "clk_ext4");
 
        np = of_find_compatible_node(NULL, NULL, "fsl,imx8mm-anatop");
        base = of_iomap(np, 0);
@@ -407,28 +408,16 @@ static int __init imx8mm_clocks_init(struct device_node *ccm_node)
        clks[IMX8MM_SYS_PLL3] = imx_clk_pll14xx("sys_pll3", "sys_pll3_ref_sel", base + 0x114, &imx8mm_sys_pll);
 
        /* PLL bypass out */
-       clks[IMX8MM_AUDIO_PLL1_BYPASS] = imx_clk_mux_flags("audio_pll1_bypass", base, 4, 1, audio_pll1_bypass_sels, ARRAY_SIZE(audio_pll1_bypass_sels), CLK_SET_RATE_PARENT);
-       clks[IMX8MM_AUDIO_PLL2_BYPASS] = imx_clk_mux_flags("audio_pll2_bypass", base + 0x14, 4, 1, audio_pll2_bypass_sels, ARRAY_SIZE(audio_pll2_bypass_sels), CLK_SET_RATE_PARENT);
-       clks[IMX8MM_VIDEO_PLL1_BYPASS] = imx_clk_mux_flags("video_pll1_bypass", base + 0x28, 4, 1, video_pll1_bypass_sels, ARRAY_SIZE(video_pll1_bypass_sels), CLK_SET_RATE_PARENT);
-       clks[IMX8MM_DRAM_PLL_BYPASS] = imx_clk_mux_flags("dram_pll_bypass", base + 0x50, 4, 1, dram_pll_bypass_sels, ARRAY_SIZE(dram_pll_bypass_sels), CLK_SET_RATE_PARENT);
-       clks[IMX8MM_GPU_PLL_BYPASS] = imx_clk_mux_flags("gpu_pll_bypass", base + 0x64, 4, 1, gpu_pll_bypass_sels, ARRAY_SIZE(gpu_pll_bypass_sels), CLK_SET_RATE_PARENT);
-       clks[IMX8MM_VPU_PLL_BYPASS] = imx_clk_mux_flags("vpu_pll_bypass", base + 0x74, 4, 1, vpu_pll_bypass_sels, ARRAY_SIZE(vpu_pll_bypass_sels), CLK_SET_RATE_PARENT);
-       clks[IMX8MM_ARM_PLL_BYPASS] = imx_clk_mux_flags("arm_pll_bypass", base + 0x84, 4, 1, arm_pll_bypass_sels, ARRAY_SIZE(arm_pll_bypass_sels), CLK_SET_RATE_PARENT);
-       clks[IMX8MM_SYS_PLL1_BYPASS] = imx_clk_mux_flags("sys_pll1_bypass", base + 0x94, 4, 1, sys_pll1_bypass_sels, ARRAY_SIZE(sys_pll1_bypass_sels), CLK_SET_RATE_PARENT);
-       clks[IMX8MM_SYS_PLL2_BYPASS] = imx_clk_mux_flags("sys_pll2_bypass", base + 0x104, 4, 1, sys_pll2_bypass_sels, ARRAY_SIZE(sys_pll2_bypass_sels), CLK_SET_RATE_PARENT);
-       clks[IMX8MM_SYS_PLL3_BYPASS] = imx_clk_mux_flags("sys_pll3_bypass", base + 0x114, 4, 1, sys_pll3_bypass_sels, ARRAY_SIZE(sys_pll3_bypass_sels), CLK_SET_RATE_PARENT);
-
-       /* unbypass all the plls */
-       clk_set_parent(clks[IMX8MM_AUDIO_PLL1_BYPASS], clks[IMX8MM_AUDIO_PLL1]);
-       clk_set_parent(clks[IMX8MM_AUDIO_PLL2_BYPASS], clks[IMX8MM_AUDIO_PLL2]);
-       clk_set_parent(clks[IMX8MM_VIDEO_PLL1_BYPASS], clks[IMX8MM_VIDEO_PLL1]);
-       clk_set_parent(clks[IMX8MM_DRAM_PLL_BYPASS], clks[IMX8MM_DRAM_PLL]);
-       clk_set_parent(clks[IMX8MM_GPU_PLL_BYPASS], clks[IMX8MM_GPU_PLL]);
-       clk_set_parent(clks[IMX8MM_VPU_PLL_BYPASS], clks[IMX8MM_VPU_PLL]);
-       clk_set_parent(clks[IMX8MM_ARM_PLL_BYPASS], clks[IMX8MM_ARM_PLL]);
-       clk_set_parent(clks[IMX8MM_SYS_PLL1_BYPASS], clks[IMX8MM_SYS_PLL1]);
-       clk_set_parent(clks[IMX8MM_SYS_PLL2_BYPASS], clks[IMX8MM_SYS_PLL2]);
-       clk_set_parent(clks[IMX8MM_SYS_PLL3_BYPASS], clks[IMX8MM_SYS_PLL3]);
+       clks[IMX8MM_AUDIO_PLL1_BYPASS] = imx_clk_mux_flags("audio_pll1_bypass", base, 16, 1, audio_pll1_bypass_sels, ARRAY_SIZE(audio_pll1_bypass_sels), CLK_SET_RATE_PARENT);
+       clks[IMX8MM_AUDIO_PLL2_BYPASS] = imx_clk_mux_flags("audio_pll2_bypass", base + 0x14, 16, 1, audio_pll2_bypass_sels, ARRAY_SIZE(audio_pll2_bypass_sels), CLK_SET_RATE_PARENT);
+       clks[IMX8MM_VIDEO_PLL1_BYPASS] = imx_clk_mux_flags("video_pll1_bypass", base + 0x28, 16, 1, video_pll1_bypass_sels, ARRAY_SIZE(video_pll1_bypass_sels), CLK_SET_RATE_PARENT);
+       clks[IMX8MM_DRAM_PLL_BYPASS] = imx_clk_mux_flags("dram_pll_bypass", base + 0x50, 16, 1, dram_pll_bypass_sels, ARRAY_SIZE(dram_pll_bypass_sels), CLK_SET_RATE_PARENT);
+       clks[IMX8MM_GPU_PLL_BYPASS] = imx_clk_mux_flags("gpu_pll_bypass", base + 0x64, 28, 1, gpu_pll_bypass_sels, ARRAY_SIZE(gpu_pll_bypass_sels), CLK_SET_RATE_PARENT);
+       clks[IMX8MM_VPU_PLL_BYPASS] = imx_clk_mux_flags("vpu_pll_bypass", base + 0x74, 28, 1, vpu_pll_bypass_sels, ARRAY_SIZE(vpu_pll_bypass_sels), CLK_SET_RATE_PARENT);
+       clks[IMX8MM_ARM_PLL_BYPASS] = imx_clk_mux_flags("arm_pll_bypass", base + 0x84, 28, 1, arm_pll_bypass_sels, ARRAY_SIZE(arm_pll_bypass_sels), CLK_SET_RATE_PARENT);
+       clks[IMX8MM_SYS_PLL1_BYPASS] = imx_clk_mux_flags("sys_pll1_bypass", base + 0x94, 28, 1, sys_pll1_bypass_sels, ARRAY_SIZE(sys_pll1_bypass_sels), CLK_SET_RATE_PARENT);
+       clks[IMX8MM_SYS_PLL2_BYPASS] = imx_clk_mux_flags("sys_pll2_bypass", base + 0x104, 28, 1, sys_pll2_bypass_sels, ARRAY_SIZE(sys_pll2_bypass_sels), CLK_SET_RATE_PARENT);
+       clks[IMX8MM_SYS_PLL3_BYPASS] = imx_clk_mux_flags("sys_pll3_bypass", base + 0x114, 28, 1, sys_pll3_bypass_sels, ARRAY_SIZE(sys_pll3_bypass_sels), CLK_SET_RATE_PARENT);
 
        /* PLL out gate */
        clks[IMX8MM_AUDIO_PLL1_OUT] = imx_clk_gate("audio_pll1_out", "audio_pll1_bypass", base, 13);
@@ -463,10 +452,10 @@ static int __init imx8mm_clocks_init(struct device_node *ccm_node)
        clks[IMX8MM_SYS_PLL2_500M] = imx_clk_fixed_factor("sys_pll2_500m", "sys_pll2_out", 1, 2);
        clks[IMX8MM_SYS_PLL2_1000M] = imx_clk_fixed_factor("sys_pll2_1000m", "sys_pll2_out", 1, 1);
 
-       np = ccm_node;
-       base = of_iomap(np, 0);
-       if (WARN_ON(!base))
-               return -ENOMEM;
+       np = dev->of_node;
+       base = devm_platform_ioremap_resource(pdev, 0);
+       if (WARN_ON(IS_ERR(base)))
+               return PTR_ERR(base);
 
        /* Core Slice */
        clks[IMX8MM_CLK_A53_SRC] = imx_clk_mux2("arm_a53_src", base + 0x8000, 24, 3, imx8mm_a53_sels, ARRAY_SIZE(imx8mm_a53_sels));
@@ -614,7 +603,7 @@ static int __init imx8mm_clocks_init(struct device_node *ccm_node)
        clks[IMX8MM_CLK_UART2_ROOT] = imx_clk_gate4("uart2_root_clk", "uart2", base + 0x44a0, 0);
        clks[IMX8MM_CLK_UART3_ROOT] = imx_clk_gate4("uart3_root_clk", "uart3", base + 0x44b0, 0);
        clks[IMX8MM_CLK_UART4_ROOT] = imx_clk_gate4("uart4_root_clk", "uart4", base + 0x44c0, 0);
-       clks[IMX8MM_CLK_USB1_CTRL_ROOT] = imx_clk_gate4("usb1_ctrl_root_clk", "usb_core_ref", base + 0x44d0, 0);
+       clks[IMX8MM_CLK_USB1_CTRL_ROOT] = imx_clk_gate4("usb1_ctrl_root_clk", "usb_bus", base + 0x44d0, 0);
        clks[IMX8MM_CLK_GPU3D_ROOT] = imx_clk_gate4("gpu3d_root_clk", "gpu3d_div", base + 0x44f0, 0);
        clks[IMX8MM_CLK_USDHC1_ROOT] = imx_clk_gate4("usdhc1_root_clk", "usdhc1", base + 0x4510, 0);
        clks[IMX8MM_CLK_USDHC2_ROOT] = imx_clk_gate4("usdhc2_root_clk", "usdhc2", base + 0x4520, 0);
@@ -627,10 +616,10 @@ static int __init imx8mm_clocks_init(struct device_node *ccm_node)
        clks[IMX8MM_CLK_VPU_G2_ROOT] = imx_clk_gate4("vpu_g2_root_clk", "vpu_g2", base + 0x45a0, 0);
        clks[IMX8MM_CLK_PDM_ROOT] = imx_clk_gate2_shared2("pdm_root_clk", "pdm", base + 0x45b0, 0, &share_count_pdm);
        clks[IMX8MM_CLK_PDM_IPG]  = imx_clk_gate2_shared2("pdm_ipg_clk", "ipg_audio_root", base + 0x45b0, 0, &share_count_pdm);
-       clks[IMX8MM_CLK_DISP_ROOT] = imx_clk_gate2_shared2("disp_root_clk", "disp_dc8000", base + 0x45d0, 0, &share_count_dcss);
-       clks[IMX8MM_CLK_DISP_AXI_ROOT]  = imx_clk_gate2_shared2("disp_axi_root_clk", "disp_axi", base + 0x45d0, 0, &share_count_dcss);
-       clks[IMX8MM_CLK_DISP_APB_ROOT]  = imx_clk_gate2_shared2("disp_apb_root_clk", "disp_apb", base + 0x45d0, 0, &share_count_dcss);
-       clks[IMX8MM_CLK_DISP_RTRM_ROOT] = imx_clk_gate2_shared2("disp_rtrm_root_clk", "disp_rtrm", base + 0x45d0, 0, &share_count_dcss);
+       clks[IMX8MM_CLK_DISP_ROOT] = imx_clk_gate2_shared2("disp_root_clk", "disp_dc8000", base + 0x45d0, 0, &share_count_disp);
+       clks[IMX8MM_CLK_DISP_AXI_ROOT]  = imx_clk_gate2_shared2("disp_axi_root_clk", "disp_axi", base + 0x45d0, 0, &share_count_disp);
+       clks[IMX8MM_CLK_DISP_APB_ROOT]  = imx_clk_gate2_shared2("disp_apb_root_clk", "disp_apb", base + 0x45d0, 0, &share_count_disp);
+       clks[IMX8MM_CLK_DISP_RTRM_ROOT] = imx_clk_gate2_shared2("disp_rtrm_root_clk", "disp_rtrm", base + 0x45d0, 0, &share_count_disp);
        clks[IMX8MM_CLK_USDHC3_ROOT] = imx_clk_gate4("usdhc3_root_clk", "usdhc3", base + 0x45e0, 0);
        clks[IMX8MM_CLK_TMU_ROOT] = imx_clk_gate4("tmu_root_clk", "ipg_root", base + 0x4620, 0);
        clks[IMX8MM_CLK_VPU_DEC_ROOT] = imx_clk_gate4("vpu_dec_root_clk", "vpu_bus", base + 0x4630, 0);
@@ -658,11 +647,30 @@ static int __init imx8mm_clocks_init(struct device_node *ccm_node)
        ret = of_clk_add_provider(np, of_clk_src_onecell_get, &clk_data);
        if (ret < 0) {
                pr_err("failed to register clks for i.MX8MM\n");
-               return -EINVAL;
+               goto unregister_clks;
        }
 
        imx_register_uart_clocks(uart_clks);
 
        return 0;
+
+unregister_clks:
+       imx_unregister_clocks(clks, ARRAY_SIZE(clks));
+
+       return ret;
 }
-CLK_OF_DECLARE_DRIVER(imx8mm, "fsl,imx8mm-ccm", imx8mm_clocks_init);
+
+static const struct of_device_id imx8mm_clk_of_match[] = {
+       { .compatible = "fsl,imx8mm-ccm" },
+       { /* Sentinel */ },
+};
+MODULE_DEVICE_TABLE(of, imx8mm_clk_of_match);
+
+static struct platform_driver imx8mm_clk_driver = {
+       .probe = imx8mm_clocks_probe,
+       .driver = {
+               .name = "imx8mm-ccm",
+               .of_match_table = of_match_ptr(imx8mm_clk_of_match),
+       },
+};
+module_platform_driver(imx8mm_clk_driver);
index 07481a5..47a4b44 100644 (file)
@@ -42,6 +42,8 @@ enum {
 static const struct imx_pll14xx_rate_table imx8mn_pll1416x_tbl[] = {
        PLL_1416X_RATE(1800000000U, 225, 3, 0),
        PLL_1416X_RATE(1600000000U, 200, 3, 0),
+       PLL_1416X_RATE(1500000000U, 375, 3, 1),
+       PLL_1416X_RATE(1400000000U, 350, 3, 1),
        PLL_1416X_RATE(1200000000U, 300, 3, 1),
        PLL_1416X_RATE(1000000000U, 250, 3, 1),
        PLL_1416X_RATE(800000000U,  200, 3, 1),
@@ -51,8 +53,8 @@ static const struct imx_pll14xx_rate_table imx8mn_pll1416x_tbl[] = {
 };
 
 static const struct imx_pll14xx_rate_table imx8mn_audiopll_tbl[] = {
-       PLL_1443X_RATE(786432000U, 655, 5, 2, 23593),
-       PLL_1443X_RATE(722534400U, 301, 5, 1, 3670),
+       PLL_1443X_RATE(393216000U, 262, 2, 3, 9437),
+       PLL_1443X_RATE(361267200U, 361, 3, 3, 17511),
 };
 
 static const struct imx_pll14xx_rate_table imx8mn_videopll_tbl[] = {
@@ -67,36 +69,43 @@ static const struct imx_pll14xx_rate_table imx8mn_drampll_tbl[] = {
 static struct imx_pll14xx_clk imx8mn_audio_pll = {
                .type = PLL_1443X,
                .rate_table = imx8mn_audiopll_tbl,
+               .rate_count = ARRAY_SIZE(imx8mn_audiopll_tbl),
 };
 
 static struct imx_pll14xx_clk imx8mn_video_pll = {
                .type = PLL_1443X,
                .rate_table = imx8mn_videopll_tbl,
+               .rate_count = ARRAY_SIZE(imx8mn_videopll_tbl),
 };
 
 static struct imx_pll14xx_clk imx8mn_dram_pll = {
                .type = PLL_1443X,
                .rate_table = imx8mn_drampll_tbl,
+               .rate_count = ARRAY_SIZE(imx8mn_drampll_tbl),
 };
 
 static struct imx_pll14xx_clk imx8mn_arm_pll = {
                .type = PLL_1416X,
                .rate_table = imx8mn_pll1416x_tbl,
+               .rate_count = ARRAY_SIZE(imx8mn_pll1416x_tbl),
 };
 
 static struct imx_pll14xx_clk imx8mn_gpu_pll = {
                .type = PLL_1416X,
                .rate_table = imx8mn_pll1416x_tbl,
+               .rate_count = ARRAY_SIZE(imx8mn_pll1416x_tbl),
 };
 
 static struct imx_pll14xx_clk imx8mn_vpu_pll = {
                .type = PLL_1416X,
                .rate_table = imx8mn_pll1416x_tbl,
+               .rate_count = ARRAY_SIZE(imx8mn_pll1416x_tbl),
 };
 
 static struct imx_pll14xx_clk imx8mn_sys_pll = {
                .type = PLL_1416X,
                .rate_table = imx8mn_pll1416x_tbl,
+               .rate_count = ARRAY_SIZE(imx8mn_pll1416x_tbl),
 };
 
 static const char * const pll_ref_sels[] = { "osc_24m", "dummy", "dummy", "dummy", };
@@ -140,7 +149,7 @@ static const char * const imx8mn_disp_axi_sels[] = {"osc_24m", "sys_pll2_1000m",
                                                    "clk_ext1", "clk_ext4", };
 
 static const char * const imx8mn_disp_apb_sels[] = {"osc_24m", "sys_pll2_125m", "sys_pll1_800m",
-                                                   "sys_pll3_out", "sys1_pll_40m", "audio_pll2_out",
+                                                   "sys_pll3_out", "sys_pll1_40m", "audio_pll2_out",
                                                    "clk_ext1", "clk_ext3", };
 
 static const char * const imx8mn_usb_bus_sels[] = {"osc_24m", "sys_pll2_500m", "sys_pll1_800m",
@@ -219,9 +228,9 @@ static const char * const imx8mn_nand_sels[] = {"osc_24m", "sys_pll2_500m", "aud
                                                "sys_pll1_400m", "audio_pll2_out", "sys_pll3_out",
                                                "sys_pll2_250m", "video_pll1_out", };
 
-static const char * const imx8mn_qspi_sels[] = {"osc_24m", "sys1_pll_400m", "sys_pll1_800m",
-                                               "sys2_pll_500m", "audio_pll2_out", "sys1_pll_266m",
-                                               "sys3_pll2_out", "sys1_pll_100m", };
+static const char * const imx8mn_qspi_sels[] = {"osc_24m", "sys_pll1_400m", "sys_pll2_333m",
+                                               "sys_pll2_500m", "audio_pll2_out", "sys_pll1_266m",
+                                               "sys_pll3_out", "sys_pll1_100m", };
 
 static const char * const imx8mn_usdhc1_sels[] = {"osc_24m", "sys_pll1_400m", "sys_pll1_800m",
                                                  "sys_pll2_500m", "sys_pll3_out", "sys_pll1_266m",
@@ -271,6 +280,10 @@ static const char * const imx8mn_usb_phy_sels[] = {"osc_24m", "sys_pll1_100m", "
                                                   "sys_pll2_100m", "sys_pll2_200m", "clk_ext2",
                                                   "clk_ext3", "audio_pll2_out", };
 
+static const char * const imx8mn_gic_sels[] = {"osc_24m", "sys_pll2_200m", "sys_pll1_40m",
+                                       "sys_pll2_100m", "sys_pll1_800m", "clk_ext2",
+                                       "clk_ext4", "audio_pll2_out" };
+
 static const char * const imx8mn_ecspi1_sels[] = {"osc_24m", "sys_pll2_200m", "sys_pll1_40m",
                                                  "sys_pll1_160m", "sys_pll1_800m", "sys_pll3_out",
                                                  "sys_pll2_250m", "audio_pll2_out", };
@@ -288,7 +301,7 @@ static const char * const imx8mn_pwm2_sels[] = {"osc_24m", "sys_pll2_100m", "sys
                                                "sys_pll1_80m", "video_pll1_out", };
 
 static const char * const imx8mn_pwm3_sels[] = {"osc_24m", "sys_pll2_100m", "sys_pll1_160m",
-                                               "sys_pll1_40m", "sys3_pll2_out", "clk_ext2",
+                                               "sys_pll1_40m", "sys_pll3_out", "clk_ext2",
                                                "sys_pll1_80m", "video_pll1_out", };
 
 static const char * const imx8mn_pwm4_sels[] = {"osc_24m", "sys_pll2_100m", "sys_pll1_160m",
@@ -317,7 +330,7 @@ static const char * const imx8mn_dsi_dbi_sels[] = {"osc_24m", "sys_pll1_266m", "
 
 static const char * const imx8mn_usdhc3_sels[] = {"osc_24m", "sys_pll1_400m", "sys_pll1_800m",
                                                  "sys_pll2_500m", "sys_pll3_out", "sys_pll1_266m",
-                                                 "audio_pll2_clk", "sys_pll1_100m", };
+                                                 "audio_pll2_out", "sys_pll1_100m", };
 
 static const char * const imx8mn_camera_pixel_sels[] = {"osc_24m", "sys_pll1_266m", "sys_pll2_250m",
                                                        "sys_pll1_800m", "sys_pll2_1000m", "sys_pll3_out",
@@ -346,7 +359,7 @@ static const char * const imx8mn_pdm_sels[] = {"osc_24m", "sys_pll2_100m", "audi
 static const char * const imx8mn_dram_core_sels[] = {"dram_pll_out", "dram_alt_root", };
 
 static const char * const imx8mn_clko1_sels[] = {"osc_24m", "sys_pll1_800m", "osc_27m",
-                                                "sys_pll1_200m", "audio_pll2_clk", "vpu_pll",
+                                                "sys_pll1_200m", "audio_pll2_out", "vpu_pll",
                                                 "sys_pll1_80m", };
 static const char * const imx8mn_clko2_sels[] = {"osc_24m", "sys_pll2_200m", "sys_pll1_400m",
                                                 "sys_pll2_166m", "sys_pll3_out", "audio_pll1_out",
@@ -355,6 +368,14 @@ static const char * const imx8mn_clko2_sels[] = {"osc_24m", "sys_pll2_200m", "sy
 static struct clk *clks[IMX8MN_CLK_END];
 static struct clk_onecell_data clk_data;
 
+static struct clk ** const uart_clks[] = {
+       &clks[IMX8MN_CLK_UART1_ROOT],
+       &clks[IMX8MN_CLK_UART2_ROOT],
+       &clks[IMX8MN_CLK_UART3_ROOT],
+       &clks[IMX8MN_CLK_UART4_ROOT],
+       NULL
+};
+
 static int imx8mn_clocks_probe(struct platform_device *pdev)
 {
        struct device *dev = &pdev->dev;
@@ -400,40 +421,28 @@ static int imx8mn_clocks_probe(struct platform_device *pdev)
        clks[IMX8MN_SYS_PLL3] = imx_clk_pll14xx("sys_pll3", "sys_pll3_ref_sel", base + 0x114, &imx8mn_sys_pll);
 
        /* PLL bypass out */
-       clks[IMX8MN_AUDIO_PLL1_BYPASS] = imx_clk_mux_flags("audio_pll1_bypass", base, 4, 1, audio_pll1_bypass_sels, ARRAY_SIZE(audio_pll1_bypass_sels), CLK_SET_RATE_PARENT);
-       clks[IMX8MN_AUDIO_PLL2_BYPASS] = imx_clk_mux_flags("audio_pll2_bypass", base + 0x14, 4, 1, audio_pll2_bypass_sels, ARRAY_SIZE(audio_pll2_bypass_sels), CLK_SET_RATE_PARENT);
-       clks[IMX8MN_VIDEO_PLL1_BYPASS] = imx_clk_mux_flags("video_pll1_bypass", base + 0x28, 4, 1, video_pll1_bypass_sels, ARRAY_SIZE(video_pll1_bypass_sels), CLK_SET_RATE_PARENT);
-       clks[IMX8MN_DRAM_PLL_BYPASS] = imx_clk_mux_flags("dram_pll_bypass", base + 0x50, 4, 1, dram_pll_bypass_sels, ARRAY_SIZE(dram_pll_bypass_sels), CLK_SET_RATE_PARENT);
-       clks[IMX8MN_GPU_PLL_BYPASS] = imx_clk_mux_flags("gpu_pll_bypass", base + 0x64, 4, 1, gpu_pll_bypass_sels, ARRAY_SIZE(gpu_pll_bypass_sels), CLK_SET_RATE_PARENT);
-       clks[IMX8MN_VPU_PLL_BYPASS] = imx_clk_mux_flags("vpu_pll_bypass", base + 0x74, 4, 1, vpu_pll_bypass_sels, ARRAY_SIZE(vpu_pll_bypass_sels), CLK_SET_RATE_PARENT);
-       clks[IMX8MN_ARM_PLL_BYPASS] = imx_clk_mux_flags("arm_pll_bypass", base + 0x84, 4, 1, arm_pll_bypass_sels, ARRAY_SIZE(arm_pll_bypass_sels), CLK_SET_RATE_PARENT);
-       clks[IMX8MN_SYS_PLL1_BYPASS] = imx_clk_mux_flags("sys_pll1_bypass", base + 0x94, 4, 1, sys_pll1_bypass_sels, ARRAY_SIZE(sys_pll1_bypass_sels), CLK_SET_RATE_PARENT);
-       clks[IMX8MN_SYS_PLL2_BYPASS] = imx_clk_mux_flags("sys_pll2_bypass", base + 0x104, 4, 1, sys_pll2_bypass_sels, ARRAY_SIZE(sys_pll2_bypass_sels), CLK_SET_RATE_PARENT);
-       clks[IMX8MN_SYS_PLL3_BYPASS] = imx_clk_mux_flags("sys_pll3_bypass", base + 0x114, 4, 1, sys_pll3_bypass_sels, ARRAY_SIZE(sys_pll3_bypass_sels), CLK_SET_RATE_PARENT);
-
-       /* unbypass all the plls */
-       clk_set_parent(clks[IMX8MN_AUDIO_PLL1_BYPASS], clks[IMX8MN_AUDIO_PLL1]);
-       clk_set_parent(clks[IMX8MN_AUDIO_PLL2_BYPASS], clks[IMX8MN_AUDIO_PLL2]);
-       clk_set_parent(clks[IMX8MN_VIDEO_PLL1_BYPASS], clks[IMX8MN_VIDEO_PLL1]);
-       clk_set_parent(clks[IMX8MN_DRAM_PLL_BYPASS], clks[IMX8MN_DRAM_PLL]);
-       clk_set_parent(clks[IMX8MN_GPU_PLL_BYPASS], clks[IMX8MN_GPU_PLL]);
-       clk_set_parent(clks[IMX8MN_VPU_PLL_BYPASS], clks[IMX8MN_VPU_PLL]);
-       clk_set_parent(clks[IMX8MN_ARM_PLL_BYPASS], clks[IMX8MN_ARM_PLL]);
-       clk_set_parent(clks[IMX8MN_SYS_PLL1_BYPASS], clks[IMX8MN_SYS_PLL1]);
-       clk_set_parent(clks[IMX8MN_SYS_PLL2_BYPASS], clks[IMX8MN_SYS_PLL2]);
-       clk_set_parent(clks[IMX8MN_SYS_PLL3_BYPASS], clks[IMX8MN_SYS_PLL3]);
+       clks[IMX8MN_AUDIO_PLL1_BYPASS] = imx_clk_mux_flags("audio_pll1_bypass", base, 16, 1, audio_pll1_bypass_sels, ARRAY_SIZE(audio_pll1_bypass_sels), CLK_SET_RATE_PARENT);
+       clks[IMX8MN_AUDIO_PLL2_BYPASS] = imx_clk_mux_flags("audio_pll2_bypass", base + 0x14, 16, 1, audio_pll2_bypass_sels, ARRAY_SIZE(audio_pll2_bypass_sels), CLK_SET_RATE_PARENT);
+       clks[IMX8MN_VIDEO_PLL1_BYPASS] = imx_clk_mux_flags("video_pll1_bypass", base + 0x28, 16, 1, video_pll1_bypass_sels, ARRAY_SIZE(video_pll1_bypass_sels), CLK_SET_RATE_PARENT);
+       clks[IMX8MN_DRAM_PLL_BYPASS] = imx_clk_mux_flags("dram_pll_bypass", base + 0x50, 16, 1, dram_pll_bypass_sels, ARRAY_SIZE(dram_pll_bypass_sels), CLK_SET_RATE_PARENT);
+       clks[IMX8MN_GPU_PLL_BYPASS] = imx_clk_mux_flags("gpu_pll_bypass", base + 0x64, 28, 1, gpu_pll_bypass_sels, ARRAY_SIZE(gpu_pll_bypass_sels), CLK_SET_RATE_PARENT);
+       clks[IMX8MN_VPU_PLL_BYPASS] = imx_clk_mux_flags("vpu_pll_bypass", base + 0x74, 28, 1, vpu_pll_bypass_sels, ARRAY_SIZE(vpu_pll_bypass_sels), CLK_SET_RATE_PARENT);
+       clks[IMX8MN_ARM_PLL_BYPASS] = imx_clk_mux_flags("arm_pll_bypass", base + 0x84, 28, 1, arm_pll_bypass_sels, ARRAY_SIZE(arm_pll_bypass_sels), CLK_SET_RATE_PARENT);
+       clks[IMX8MN_SYS_PLL1_BYPASS] = imx_clk_mux_flags("sys_pll1_bypass", base + 0x94, 28, 1, sys_pll1_bypass_sels, ARRAY_SIZE(sys_pll1_bypass_sels), CLK_SET_RATE_PARENT);
+       clks[IMX8MN_SYS_PLL2_BYPASS] = imx_clk_mux_flags("sys_pll2_bypass", base + 0x104, 28, 1, sys_pll2_bypass_sels, ARRAY_SIZE(sys_pll2_bypass_sels), CLK_SET_RATE_PARENT);
+       clks[IMX8MN_SYS_PLL3_BYPASS] = imx_clk_mux_flags("sys_pll3_bypass", base + 0x114, 28, 1, sys_pll3_bypass_sels, ARRAY_SIZE(sys_pll3_bypass_sels), CLK_SET_RATE_PARENT);
 
        /* PLL out gate */
        clks[IMX8MN_AUDIO_PLL1_OUT] = imx_clk_gate("audio_pll1_out", "audio_pll1_bypass", base, 13);
        clks[IMX8MN_AUDIO_PLL2_OUT] = imx_clk_gate("audio_pll2_out", "audio_pll2_bypass", base + 0x14, 13);
        clks[IMX8MN_VIDEO_PLL1_OUT] = imx_clk_gate("video_pll1_out", "video_pll1_bypass", base + 0x28, 13);
        clks[IMX8MN_DRAM_PLL_OUT] = imx_clk_gate("dram_pll_out", "dram_pll_bypass", base + 0x50, 13);
-       clks[IMX8MN_GPU_PLL_OUT] = imx_clk_gate("gpu_pll_out", "gpu_pll_bypass", base + 0x64, 13);
-       clks[IMX8MN_VPU_PLL_OUT] = imx_clk_gate("vpu_pll_out", "vpu_pll_bypass", base + 0x74, 13);
-       clks[IMX8MN_ARM_PLL_OUT] = imx_clk_gate("arm_pll_out", "arm_pll_bypass", base + 0x84, 13);
-       clks[IMX8MN_SYS_PLL1_OUT] = imx_clk_gate("sys_pll1_out", "sys_pll1_bypass", base + 0x94, 13);
-       clks[IMX8MN_SYS_PLL2_OUT] = imx_clk_gate("sys_pll2_out", "sys_pll2_bypass", base + 0x104, 13);
-       clks[IMX8MN_SYS_PLL3_OUT] = imx_clk_gate("sys_pll3_out", "sys_pll3_bypass", base + 0x114, 13);
+       clks[IMX8MN_GPU_PLL_OUT] = imx_clk_gate("gpu_pll_out", "gpu_pll_bypass", base + 0x64, 11);
+       clks[IMX8MN_VPU_PLL_OUT] = imx_clk_gate("vpu_pll_out", "vpu_pll_bypass", base + 0x74, 11);
+       clks[IMX8MN_ARM_PLL_OUT] = imx_clk_gate("arm_pll_out", "arm_pll_bypass", base + 0x84, 11);
+       clks[IMX8MN_SYS_PLL1_OUT] = imx_clk_gate("sys_pll1_out", "sys_pll1_bypass", base + 0x94, 11);
+       clks[IMX8MN_SYS_PLL2_OUT] = imx_clk_gate("sys_pll2_out", "sys_pll2_bypass", base + 0x104, 11);
+       clks[IMX8MN_SYS_PLL3_OUT] = imx_clk_gate("sys_pll3_out", "sys_pll3_bypass", base + 0x114, 11);
 
        /* SYS PLL fixed output */
        clks[IMX8MN_SYS_PLL1_40M] = imx_clk_fixed_factor("sys_pll1_40m", "sys_pll1_out", 1, 20);
@@ -516,6 +525,7 @@ static int imx8mn_clocks_probe(struct platform_device *pdev)
        clks[IMX8MN_CLK_UART4] = imx8m_clk_composite("uart4", imx8mn_uart4_sels, base + 0xb080);
        clks[IMX8MN_CLK_USB_CORE_REF] = imx8m_clk_composite("usb_core_ref", imx8mn_usb_core_sels, base + 0xb100);
        clks[IMX8MN_CLK_USB_PHY_REF] = imx8m_clk_composite("usb_phy_ref", imx8mn_usb_phy_sels, base + 0xb180);
+       clks[IMX8MN_CLK_GIC] = imx8m_clk_composite_critical("gic", imx8mn_gic_sels, base + 0xb200);
        clks[IMX8MN_CLK_ECSPI1] = imx8m_clk_composite("ecspi1", imx8mn_ecspi1_sels, base + 0xb280);
        clks[IMX8MN_CLK_ECSPI2] = imx8m_clk_composite("ecspi2", imx8mn_ecspi2_sels, base + 0xb300);
        clks[IMX8MN_CLK_PWM1] = imx8m_clk_composite("pwm1", imx8mn_pwm1_sels, base + 0xb380);
@@ -612,6 +622,8 @@ static int imx8mn_clocks_probe(struct platform_device *pdev)
                goto unregister_clks;
        }
 
+       imx_register_uart_clocks(uart_clks);
+
        return 0;
 
 unregister_clks:
index d407a07..41fc9c6 100644 (file)
@@ -41,34 +41,34 @@ static const char * const dram_pll_out_sels[] = {"dram_pll1_ref_sel", };
 
 /* CCM ROOT */
 static const char * const imx8mq_a53_sels[] = {"osc_25m", "arm_pll_out", "sys2_pll_500m", "sys2_pll_1000m",
-                                       "sys1_pll_800m", "sys1_pll_400m", "audio_pll1_out", "sys3_pll2_out", };
+                                       "sys1_pll_800m", "sys1_pll_400m", "audio_pll1_out", "sys3_pll_out", };
 
 static const char * const imx8mq_arm_m4_sels[] = {"osc_25m", "sys2_pll_200m", "sys2_pll_250m", "sys1_pll_266m",
-                                       "sys1_pll_800m", "audio_pll1_out", "video_pll1_out", "sys3_pll2_out", };
+                                       "sys1_pll_800m", "audio_pll1_out", "video_pll1_out", "sys3_pll_out", };
 
 static const char * const imx8mq_vpu_sels[] = {"osc_25m", "arm_pll_out", "sys2_pll_500m", "sys2_pll_1000m",
                                        "sys1_pll_800m", "sys1_pll_400m", "audio_pll1_out", "vpu_pll_out", };
 
-static const char * const imx8mq_gpu_core_sels[] = {"osc_25m", "gpu_pll_out", "sys1_pll_800m", "sys3_pll2_out",
+static const char * const imx8mq_gpu_core_sels[] = {"osc_25m", "gpu_pll_out", "sys1_pll_800m", "sys3_pll_out",
                                             "sys2_pll_1000m", "audio_pll1_out", "video_pll1_out", "audio_pll2_out", };
 
-static const char * const imx8mq_gpu_shader_sels[] = {"osc_25m", "gpu_pll_out", "sys1_pll_800m", "sys3_pll2_out",
+static const char * const imx8mq_gpu_shader_sels[] = {"osc_25m", "gpu_pll_out", "sys1_pll_800m", "sys3_pll_out",
                                               "sys2_pll_1000m", "audio_pll1_out", "video_pll1_out", "audio_pll2_out", };
 
 static const char * const imx8mq_main_axi_sels[] = {"osc_25m", "sys2_pll_333m", "sys1_pll_800m", "sys2_pll_250m",
                                             "sys2_pll_1000m", "audio_pll1_out", "video_pll1_out", "sys1_pll_100m",};
 
 static const char * const imx8mq_enet_axi_sels[] = {"osc_25m", "sys1_pll_266m", "sys1_pll_800m", "sys2_pll_250m",
-                                            "sys2_pll_200m", "audio_pll1_out", "video_pll1_out", "sys3_pll2_out", };
+                                            "sys2_pll_200m", "audio_pll1_out", "video_pll1_out", "sys3_pll_out", };
 
 static const char * const imx8mq_nand_usdhc_sels[] = {"osc_25m", "sys1_pll_266m", "sys1_pll_800m", "sys2_pll_200m",
-                                              "sys1_pll_133m", "sys3_pll2_out", "sys2_pll_250m", "audio_pll1_out", };
+                                              "sys1_pll_133m", "sys3_pll_out", "sys2_pll_250m", "audio_pll1_out", };
 
-static const char * const imx8mq_vpu_bus_sels[] = {"osc_25m", "sys1_pll_800m", "vpu_pll_out", "audio_pll2_out", "sys3_pll2_out", "sys2_pll_1000m", "sys2_pll_200m", "sys1_pll_100m", };
+static const char * const imx8mq_vpu_bus_sels[] = {"osc_25m", "sys1_pll_800m", "vpu_pll_out", "audio_pll2_out", "sys3_pll_out", "sys2_pll_1000m", "sys2_pll_200m", "sys1_pll_100m", };
 
-static const char * const imx8mq_disp_axi_sels[] = {"osc_25m", "sys2_pll_125m", "sys1_pll_800m", "sys3_pll2_out", "sys1_pll_400m", "audio_pll2_out", "clk_ext1", "clk_ext4", };
+static const char * const imx8mq_disp_axi_sels[] = {"osc_25m", "sys2_pll_125m", "sys1_pll_800m", "sys3_pll_out", "sys1_pll_400m", "audio_pll2_out", "clk_ext1", "clk_ext4", };
 
-static const char * const imx8mq_disp_apb_sels[] = {"osc_25m", "sys2_pll_125m", "sys1_pll_800m", "sys3_pll2_out",
+static const char * const imx8mq_disp_apb_sels[] = {"osc_25m", "sys2_pll_125m", "sys1_pll_800m", "sys3_pll_out",
                                             "sys1_pll_40m", "audio_pll2_out", "clk_ext1", "clk_ext3", };
 
 static const char * const imx8mq_disp_rtrm_sels[] = {"osc_25m", "sys1_pll_800m", "sys2_pll_200m", "sys1_pll_400m",
@@ -77,53 +77,53 @@ static const char * const imx8mq_disp_rtrm_sels[] = {"osc_25m", "sys1_pll_800m",
 static const char * const imx8mq_usb_bus_sels[] = {"osc_25m", "sys2_pll_500m", "sys1_pll_800m", "sys2_pll_100m",
                                            "sys2_pll_200m", "clk_ext2", "clk_ext4", "audio_pll2_out", };
 
-static const char * const imx8mq_gpu_axi_sels[] = {"osc_25m", "sys1_pll_800m", "gpu_pll_out", "sys3_pll2_out", "sys2_pll_1000m",
+static const char * const imx8mq_gpu_axi_sels[] = {"osc_25m", "sys1_pll_800m", "gpu_pll_out", "sys3_pll_out", "sys2_pll_1000m",
                                            "audio_pll1_out", "video_pll1_out", "audio_pll2_out", };
 
-static const char * const imx8mq_gpu_ahb_sels[] = {"osc_25m", "sys1_pll_800m", "gpu_pll_out", "sys3_pll2_out", "sys2_pll_1000m",
+static const char * const imx8mq_gpu_ahb_sels[] = {"osc_25m", "sys1_pll_800m", "gpu_pll_out", "sys3_pll_out", "sys2_pll_1000m",
                                            "audio_pll1_out", "video_pll1_out", "audio_pll2_out", };
 
-static const char * const imx8mq_noc_sels[] = {"osc_25m", "sys1_pll_800m", "sys3_pll2_out", "sys2_pll_1000m", "sys2_pll_500m",
+static const char * const imx8mq_noc_sels[] = {"osc_25m", "sys1_pll_800m", "sys3_pll_out", "sys2_pll_1000m", "sys2_pll_500m",
                                        "audio_pll1_out", "video_pll1_out", "audio_pll2_out", };
 
-static const char * const imx8mq_noc_apb_sels[] = {"osc_25m", "sys1_pll_400m", "sys3_pll2_out", "sys2_pll_333m", "sys2_pll_200m",
+static const char * const imx8mq_noc_apb_sels[] = {"osc_25m", "sys1_pll_400m", "sys3_pll_out", "sys2_pll_333m", "sys2_pll_200m",
                                            "sys1_pll_800m", "audio_pll1_out", "video_pll1_out", };
 
 static const char * const imx8mq_ahb_sels[] = {"osc_25m", "sys1_pll_133m", "sys1_pll_800m", "sys1_pll_400m",
-                                       "sys2_pll_125m", "sys3_pll2_out", "audio_pll1_out", "video_pll1_out", };
+                                       "sys2_pll_125m", "sys3_pll_out", "audio_pll1_out", "video_pll1_out", };
 
 static const char * const imx8mq_audio_ahb_sels[] = {"osc_25m", "sys2_pll_500m", "sys1_pll_800m", "sys2_pll_1000m",
-                                                 "sys2_pll_166m", "sys3_pll2_out", "audio_pll1_out", "video_pll1_out", };
+                                                 "sys2_pll_166m", "sys3_pll_out", "audio_pll1_out", "video_pll1_out", };
 
 static const char * const imx8mq_dsi_ahb_sels[] = {"osc_25m", "sys2_pll_100m", "sys1_pll_80m", "sys1_pll_800m",
-                                               "sys2_pll_1000m", "sys3_pll2_out", "clk_ext3", "audio_pll2_out"};
+                                               "sys2_pll_1000m", "sys3_pll_out", "clk_ext3", "audio_pll2_out"};
 
 static const char * const imx8mq_dram_alt_sels[] = {"osc_25m", "sys1_pll_800m", "sys1_pll_100m", "sys2_pll_500m",
                                                "sys2_pll_250m", "sys1_pll_400m", "audio_pll1_out", "sys1_pll_266m", };
 
 static const char * const imx8mq_dram_apb_sels[] = {"osc_25m", "sys2_pll_200m", "sys1_pll_40m", "sys1_pll_160m",
-                                               "sys1_pll_800m", "sys3_pll2_out", "sys2_pll_250m", "audio_pll2_out", };
+                                               "sys1_pll_800m", "sys3_pll_out", "sys2_pll_250m", "audio_pll2_out", };
 
-static const char * const imx8mq_vpu_g1_sels[] = {"osc_25m", "vpu_pll_out", "sys1_pll_800m", "sys2_pll_1000m", "sys1_pll_100m", "sys2_pll_125m", "sys3_pll2_out", "audio_pll1_out", };
+static const char * const imx8mq_vpu_g1_sels[] = {"osc_25m", "vpu_pll_out", "sys1_pll_800m", "sys2_pll_1000m", "sys1_pll_100m", "sys2_pll_125m", "sys3_pll_out", "audio_pll1_out", };
 
-static const char * const imx8mq_vpu_g2_sels[] = {"osc_25m", "vpu_pll_out", "sys1_pll_800m", "sys2_pll_1000m", "sys1_pll_100m", "sys2_pll_125m", "sys3_pll2_out", "audio_pll1_out", };
+static const char * const imx8mq_vpu_g2_sels[] = {"osc_25m", "vpu_pll_out", "sys1_pll_800m", "sys2_pll_1000m", "sys1_pll_100m", "sys2_pll_125m", "sys3_pll_out", "audio_pll1_out", };
 
-static const char * const imx8mq_disp_dtrc_sels[] = {"osc_25m", "vpu_pll_out", "sys1_pll_800m", "sys2_pll_1000m", "sys1_pll_160m", "sys2_pll_100m", "sys3_pll2_out", "audio_pll2_out", };
+static const char * const imx8mq_disp_dtrc_sels[] = {"osc_25m", "vpu_pll_out", "sys1_pll_800m", "sys2_pll_1000m", "sys1_pll_160m", "sys2_pll_100m", "sys3_pll_out", "audio_pll2_out", };
 
-static const char * const imx8mq_disp_dc8000_sels[] = {"osc_25m", "vpu_pll_out", "sys1_pll_800m", "sys2_pll_1000m", "sys1_pll_160m", "sys2_pll_100m", "sys3_pll2_out", "audio_pll2_out", };
+static const char * const imx8mq_disp_dc8000_sels[] = {"osc_25m", "vpu_pll_out", "sys1_pll_800m", "sys2_pll_1000m", "sys1_pll_160m", "sys2_pll_100m", "sys3_pll_out", "audio_pll2_out", };
 
 static const char * const imx8mq_pcie1_ctrl_sels[] = {"osc_25m", "sys2_pll_250m", "sys2_pll_200m", "sys1_pll_266m",
-                                              "sys1_pll_800m", "sys2_pll_500m", "sys2_pll_250m", "sys3_pll2_out", };
+                                              "sys1_pll_800m", "sys2_pll_500m", "sys2_pll_250m", "sys3_pll_out", };
 
 static const char * const imx8mq_pcie1_phy_sels[] = {"osc_25m", "sys2_pll_100m", "sys2_pll_500m", "clk_ext1", "clk_ext2",
                                              "clk_ext3", "clk_ext4", };
 
-static const char * const imx8mq_pcie1_aux_sels[] = {"osc_25m", "sys2_pll_200m", "sys2_pll_500m", "sys3_pll2_out",
+static const char * const imx8mq_pcie1_aux_sels[] = {"osc_25m", "sys2_pll_200m", "sys2_pll_500m", "sys3_pll_out",
                                              "sys2_pll_100m", "sys1_pll_80m", "sys1_pll_160m", "sys1_pll_200m", };
 
-static const char * const imx8mq_dc_pixel_sels[] = {"osc_25m", "video_pll1_out", "audio_pll2_out", "audio_pll1_out", "sys1_pll_800m", "sys2_pll_1000m", "sys3_pll2_out", "clk_ext4", };
+static const char * const imx8mq_dc_pixel_sels[] = {"osc_25m", "video_pll1_out", "audio_pll2_out", "audio_pll1_out", "sys1_pll_800m", "sys2_pll_1000m", "sys3_pll_out", "clk_ext4", };
 
-static const char * const imx8mq_lcdif_pixel_sels[] = {"osc_25m", "video_pll1_out", "audio_pll2_out", "audio_pll1_out", "sys1_pll_800m", "sys2_pll_1000m", "sys3_pll2_out", "clk_ext4", };
+static const char * const imx8mq_lcdif_pixel_sels[] = {"osc_25m", "video_pll1_out", "audio_pll2_out", "audio_pll1_out", "sys1_pll_800m", "sys2_pll_1000m", "sys3_pll_out", "clk_ext4", };
 
 static const char * const imx8mq_sai1_sels[] = {"osc_25m", "audio_pll1_out", "audio_pll2_out", "video_pll1_out", "sys1_pll_133m", "osc_27m", "clk_ext1", "clk_ext2", };
 
@@ -151,40 +151,40 @@ static const char * const imx8mq_enet_phy_sels[] = {"osc_25m", "sys2_pll_50m", "
                                             "audio_pll1_out", "video_pll1_out", "audio_pll2_out", };
 
 static const char * const imx8mq_nand_sels[] = {"osc_25m", "sys2_pll_500m", "audio_pll1_out", "sys1_pll_400m",
-                                        "audio_pll2_out", "sys3_pll2_out", "sys2_pll_250m", "video_pll1_out", };
+                                        "audio_pll2_out", "sys3_pll_out", "sys2_pll_250m", "video_pll1_out", };
 
 static const char * const imx8mq_qspi_sels[] = {"osc_25m", "sys1_pll_400m", "sys1_pll_800m", "sys2_pll_500m",
-                                        "audio_pll2_out", "sys1_pll_266m", "sys3_pll2_out", "sys1_pll_100m", };
+                                        "audio_pll2_out", "sys1_pll_266m", "sys3_pll_out", "sys1_pll_100m", };
 
 static const char * const imx8mq_usdhc1_sels[] = {"osc_25m", "sys1_pll_400m", "sys1_pll_800m", "sys2_pll_500m",
-                                        "audio_pll2_out", "sys1_pll_266m", "sys3_pll2_out", "sys1_pll_100m", };
+                                        "audio_pll2_out", "sys1_pll_266m", "sys3_pll_out", "sys1_pll_100m", };
 
 static const char * const imx8mq_usdhc2_sels[] = {"osc_25m", "sys1_pll_400m", "sys1_pll_800m", "sys2_pll_500m",
-                                        "audio_pll2_out", "sys1_pll_266m", "sys3_pll2_out", "sys1_pll_100m", };
+                                        "audio_pll2_out", "sys1_pll_266m", "sys3_pll_out", "sys1_pll_100m", };
 
-static const char * const imx8mq_i2c1_sels[] = {"osc_25m", "sys1_pll_160m", "sys2_pll_50m", "sys3_pll2_out", "audio_pll1_out",
+static const char * const imx8mq_i2c1_sels[] = {"osc_25m", "sys1_pll_160m", "sys2_pll_50m", "sys3_pll_out", "audio_pll1_out",
                                         "video_pll1_out", "audio_pll2_out", "sys1_pll_133m", };
 
-static const char * const imx8mq_i2c2_sels[] = {"osc_25m", "sys1_pll_160m", "sys2_pll_50m", "sys3_pll2_out", "audio_pll1_out",
+static const char * const imx8mq_i2c2_sels[] = {"osc_25m", "sys1_pll_160m", "sys2_pll_50m", "sys3_pll_out", "audio_pll1_out",
                                         "video_pll1_out", "audio_pll2_out", "sys1_pll_133m", };
 
-static const char * const imx8mq_i2c3_sels[] = {"osc_25m", "sys1_pll_160m", "sys2_pll_50m", "sys3_pll2_out", "audio_pll1_out",
+static const char * const imx8mq_i2c3_sels[] = {"osc_25m", "sys1_pll_160m", "sys2_pll_50m", "sys3_pll_out", "audio_pll1_out",
                                         "video_pll1_out", "audio_pll2_out", "sys1_pll_133m", };
 
-static const char * const imx8mq_i2c4_sels[] = {"osc_25m", "sys1_pll_160m", "sys2_pll_50m", "sys3_pll2_out", "audio_pll1_out",
+static const char * const imx8mq_i2c4_sels[] = {"osc_25m", "sys1_pll_160m", "sys2_pll_50m", "sys3_pll_out", "audio_pll1_out",
                                         "video_pll1_out", "audio_pll2_out", "sys1_pll_133m", };
 
 static const char * const imx8mq_uart1_sels[] = {"osc_25m", "sys1_pll_80m", "sys2_pll_200m", "sys2_pll_100m",
-                                         "sys3_pll2_out", "clk_ext2", "clk_ext4", "audio_pll2_out", };
+                                         "sys3_pll_out", "clk_ext2", "clk_ext4", "audio_pll2_out", };
 
 static const char * const imx8mq_uart2_sels[] = {"osc_25m", "sys1_pll_80m", "sys2_pll_200m", "sys2_pll_100m",
-                                         "sys3_pll2_out", "clk_ext2", "clk_ext3", "audio_pll2_out", };
+                                         "sys3_pll_out", "clk_ext2", "clk_ext3", "audio_pll2_out", };
 
 static const char * const imx8mq_uart3_sels[] = {"osc_25m", "sys1_pll_80m", "sys2_pll_200m", "sys2_pll_100m",
-                                         "sys3_pll2_out", "clk_ext2", "clk_ext4", "audio_pll2_out", };
+                                         "sys3_pll_out", "clk_ext2", "clk_ext4", "audio_pll2_out", };
 
 static const char * const imx8mq_uart4_sels[] = {"osc_25m", "sys1_pll_80m", "sys2_pll_200m", "sys2_pll_100m",
-                                         "sys3_pll2_out", "clk_ext2", "clk_ext3", "audio_pll2_out", };
+                                         "sys3_pll_out", "clk_ext2", "clk_ext3", "audio_pll2_out", };
 
 static const char * const imx8mq_usb_core_sels[] = {"osc_25m", "sys1_pll_100m", "sys1_pll_40m", "sys2_pll_100m",
                                             "sys2_pll_200m", "clk_ext2", "clk_ext3", "audio_pll2_out", };
@@ -196,79 +196,79 @@ static const char * const imx8mq_gic_sels[] = {"osc_25m", "sys2_pll_200m", "sys1
                                               "sys2_pll_200m", "clk_ext2", "clk_ext3", "audio_pll2_out" };
 
 static const char * const imx8mq_ecspi1_sels[] = {"osc_25m", "sys2_pll_200m", "sys1_pll_40m", "sys1_pll_160m",
-                                          "sys1_pll_800m", "sys3_pll2_out", "sys2_pll_250m", "audio_pll2_out", };
+                                          "sys1_pll_800m", "sys3_pll_out", "sys2_pll_250m", "audio_pll2_out", };
 
 static const char * const imx8mq_ecspi2_sels[] = {"osc_25m", "sys2_pll_200m", "sys1_pll_40m", "sys1_pll_160m",
-                                          "sys1_pll_800m", "sys3_pll2_out", "sys2_pll_250m", "audio_pll2_out", };
+                                          "sys1_pll_800m", "sys3_pll_out", "sys2_pll_250m", "audio_pll2_out", };
 
 static const char * const imx8mq_pwm1_sels[] = {"osc_25m", "sys2_pll_100m", "sys1_pll_160m", "sys1_pll_40m",
-                                        "sys3_pll2_out", "clk_ext1", "sys1_pll_80m", "video_pll1_out", };
+                                        "sys3_pll_out", "clk_ext1", "sys1_pll_80m", "video_pll1_out", };
 
 static const char * const imx8mq_pwm2_sels[] = {"osc_25m", "sys2_pll_100m", "sys1_pll_160m", "sys1_pll_40m",
-                                        "sys3_pll2_out", "clk_ext1", "sys1_pll_80m", "video_pll1_out", };
+                                        "sys3_pll_out", "clk_ext1", "sys1_pll_80m", "video_pll1_out", };
 
 static const char * const imx8mq_pwm3_sels[] = {"osc_25m", "sys2_pll_100m", "sys1_pll_160m", "sys1_pll_40m",
-                                        "sys3_pll2_out", "clk_ext2", "sys1_pll_80m", "video_pll1_out", };
+                                        "sys3_pll_out", "clk_ext2", "sys1_pll_80m", "video_pll1_out", };
 
 static const char * const imx8mq_pwm4_sels[] = {"osc_25m", "sys2_pll_100m", "sys1_pll_160m", "sys1_pll_40m",
-                                        "sys3_pll2_out", "clk_ext2", "sys1_pll_80m", "video_pll1_out", };
+                                        "sys3_pll_out", "clk_ext2", "sys1_pll_80m", "video_pll1_out", };
 
 static const char * const imx8mq_gpt1_sels[] = {"osc_25m", "sys2_pll_100m", "sys1_pll_400m", "sys1_pll_40m",
                                         "sys1_pll_80m", "audio_pll1_out", "clk_ext1", };
 
 static const char * const imx8mq_wdog_sels[] = {"osc_25m", "sys1_pll_133m", "sys1_pll_160m", "vpu_pll_out",
-                                        "sys2_pll_125m", "sys3_pll2_out", "sys1_pll_80m", "sys2_pll_166m", };
+                                        "sys2_pll_125m", "sys3_pll_out", "sys1_pll_80m", "sys2_pll_166m", };
 
-static const char * const imx8mq_wrclk_sels[] = {"osc_25m", "sys1_pll_40m", "vpu_pll_out", "sys3_pll2_out", "sys2_pll_200m",
+static const char * const imx8mq_wrclk_sels[] = {"osc_25m", "sys1_pll_40m", "vpu_pll_out", "sys3_pll_out", "sys2_pll_200m",
                                          "sys1_pll_266m", "sys2_pll_500m", "sys1_pll_100m", };
 
 static const char * const imx8mq_dsi_core_sels[] = {"osc_25m", "sys1_pll_266m", "sys2_pll_250m", "sys1_pll_800m",
-                                            "sys2_pll_1000m", "sys3_pll2_out", "audio_pll2_out", "video_pll1_out", };
+                                            "sys2_pll_1000m", "sys3_pll_out", "audio_pll2_out", "video_pll1_out", };
 
 static const char * const imx8mq_dsi_phy_sels[] = {"osc_25m", "sys2_pll_125m", "sys2_pll_100m", "sys1_pll_800m",
                                            "sys2_pll_1000m", "clk_ext2", "audio_pll2_out", "video_pll1_out", };
 
 static const char * const imx8mq_dsi_dbi_sels[] = {"osc_25m", "sys1_pll_266m", "sys2_pll_100m", "sys1_pll_800m",
-                                           "sys2_pll_1000m", "sys3_pll2_out", "audio_pll2_out", "video_pll1_out", };
+                                           "sys2_pll_1000m", "sys3_pll_out", "audio_pll2_out", "video_pll1_out", };
 
 static const char * const imx8mq_dsi_esc_sels[] = {"osc_25m", "sys2_pll_100m", "sys1_pll_80m", "sys1_pll_800m",
-                                           "sys2_pll_1000m", "sys3_pll2_out", "clk_ext3", "audio_pll2_out", };
+                                           "sys2_pll_1000m", "sys3_pll_out", "clk_ext3", "audio_pll2_out", };
 
 static const char * const imx8mq_csi1_core_sels[] = {"osc_25m", "sys1_pll_266m", "sys2_pll_250m", "sys1_pll_800m",
-                                             "sys2_pll_1000m", "sys3_pll2_out", "audio_pll2_out", "video_pll1_out", };
+                                             "sys2_pll_1000m", "sys3_pll_out", "audio_pll2_out", "video_pll1_out", };
 
 static const char * const imx8mq_csi1_phy_sels[] = {"osc_25m", "sys2_pll_125m", "sys2_pll_100m", "sys1_pll_800m",
                                             "sys2_pll_1000m", "clk_ext2", "audio_pll2_out", "video_pll1_out", };
 
 static const char * const imx8mq_csi1_esc_sels[] = {"osc_25m", "sys2_pll_100m", "sys1_pll_80m", "sys1_pll_800m",
-                                            "sys2_pll_1000m", "sys3_pll2_out", "clk_ext3", "audio_pll2_out", };
+                                            "sys2_pll_1000m", "sys3_pll_out", "clk_ext3", "audio_pll2_out", };
 
 static const char * const imx8mq_csi2_core_sels[] = {"osc_25m", "sys1_pll_266m", "sys2_pll_250m", "sys1_pll_800m",
-                                             "sys2_pll_1000m", "sys3_pll2_out", "audio_pll2_out", "video_pll1_out", };
+                                             "sys2_pll_1000m", "sys3_pll_out", "audio_pll2_out", "video_pll1_out", };
 
 static const char * const imx8mq_csi2_phy_sels[] = {"osc_25m", "sys2_pll_125m", "sys2_pll_100m", "sys1_pll_800m",
                                             "sys2_pll_1000m", "clk_ext2", "audio_pll2_out", "video_pll1_out", };
 
 static const char * const imx8mq_csi2_esc_sels[] = {"osc_25m", "sys2_pll_100m", "sys1_pll_80m", "sys1_pll_800m",
-                                            "sys2_pll_1000m", "sys3_pll2_out", "clk_ext3", "audio_pll2_out", };
+                                            "sys2_pll_1000m", "sys3_pll_out", "clk_ext3", "audio_pll2_out", };
 
 static const char * const imx8mq_pcie2_ctrl_sels[] = {"osc_25m", "sys2_pll_250m", "sys2_pll_200m", "sys1_pll_266m",
-                                              "sys1_pll_800m", "sys2_pll_500m", "sys2_pll_333m", "sys3_pll2_out", };
+                                              "sys1_pll_800m", "sys2_pll_500m", "sys2_pll_333m", "sys3_pll_out", };
 
 static const char * const imx8mq_pcie2_phy_sels[] = {"osc_25m", "sys2_pll_100m", "sys2_pll_500m", "clk_ext1",
                                              "clk_ext2", "clk_ext3", "clk_ext4", "sys1_pll_400m", };
 
-static const char * const imx8mq_pcie2_aux_sels[] = {"osc_25m", "sys2_pll_200m", "sys2_pll_50m", "sys3_pll2_out",
+static const char * const imx8mq_pcie2_aux_sels[] = {"osc_25m", "sys2_pll_200m", "sys2_pll_50m", "sys3_pll_out",
                                              "sys2_pll_100m", "sys1_pll_80m", "sys1_pll_160m", "sys1_pll_200m", };
 
 static const char * const imx8mq_ecspi3_sels[] = {"osc_25m", "sys2_pll_200m", "sys1_pll_40m", "sys1_pll_160m",
-                                          "sys1_pll_800m", "sys3_pll2_out", "sys2_pll_250m", "audio_pll2_out", };
+                                          "sys1_pll_800m", "sys3_pll_out", "sys2_pll_250m", "audio_pll2_out", };
 static const char * const imx8mq_dram_core_sels[] = {"dram_pll_out", "dram_alt_root", };
 
 static const char * const imx8mq_clko1_sels[] = {"osc_25m", "sys1_pll_800m", "osc_27m", "sys1_pll_200m",
                                          "audio_pll2_out", "sys2_pll_500m", "vpu_pll_out", "sys1_pll_80m", };
 static const char * const imx8mq_clko2_sels[] = {"osc_25m", "sys2_pll_200m", "sys1_pll_400m", "sys2_pll_166m",
-                                         "sys3_pll2_out", "audio_pll1_out", "video_pll1_out", "ckil", };
+                                         "sys3_pll_out", "audio_pll1_out", "video_pll1_out", "ckil", };
 
 static struct clk_onecell_data clk_data;
 
@@ -406,7 +406,8 @@ static int imx8mq_clocks_probe(struct platform_device *pdev)
        clks[IMX8MQ_CLK_NOC_APB] = imx8m_clk_composite_critical("noc_apb", imx8mq_noc_apb_sels, base + 0x8d80);
 
        /* AHB */
-       clks[IMX8MQ_CLK_AHB] = imx8m_clk_composite("ahb", imx8mq_ahb_sels, base + 0x9000);
+       /* AHB clock is used by the AHB bus therefore marked as critical */
+       clks[IMX8MQ_CLK_AHB] = imx8m_clk_composite_critical("ahb", imx8mq_ahb_sels, base + 0x9000);
        clks[IMX8MQ_CLK_AUDIO_AHB] = imx8m_clk_composite("audio_ahb", imx8mq_audio_ahb_sels, base + 0x9100);
 
        /* IPG */
@@ -523,8 +524,8 @@ static int imx8mq_clocks_probe(struct platform_device *pdev)
        clks[IMX8MQ_CLK_UART2_ROOT] = imx_clk_gate4("uart2_root_clk", "uart2", base + 0x44a0, 0);
        clks[IMX8MQ_CLK_UART3_ROOT] = imx_clk_gate4("uart3_root_clk", "uart3", base + 0x44b0, 0);
        clks[IMX8MQ_CLK_UART4_ROOT] = imx_clk_gate4("uart4_root_clk", "uart4", base + 0x44c0, 0);
-       clks[IMX8MQ_CLK_USB1_CTRL_ROOT] = imx_clk_gate4("usb1_ctrl_root_clk", "usb_core_ref", base + 0x44d0, 0);
-       clks[IMX8MQ_CLK_USB2_CTRL_ROOT] = imx_clk_gate4("usb2_ctrl_root_clk", "usb_core_ref", base + 0x44e0, 0);
+       clks[IMX8MQ_CLK_USB1_CTRL_ROOT] = imx_clk_gate4("usb1_ctrl_root_clk", "usb_bus", base + 0x44d0, 0);
+       clks[IMX8MQ_CLK_USB2_CTRL_ROOT] = imx_clk_gate4("usb2_ctrl_root_clk", "usb_bus", base + 0x44e0, 0);
        clks[IMX8MQ_CLK_USB1_PHY_ROOT] = imx_clk_gate4("usb1_phy_root_clk", "usb_phy_ref", base + 0x44f0, 0);
        clks[IMX8MQ_CLK_USB2_PHY_ROOT] = imx_clk_gate4("usb2_phy_root_clk", "usb_phy_ref", base + 0x4500, 0);
        clks[IMX8MQ_CLK_USDHC1_ROOT] = imx_clk_gate4("usdhc1_root_clk", "usdhc1", base + 0x4510, 0);
@@ -539,7 +540,7 @@ static int imx8mq_clocks_probe(struct platform_device *pdev)
        clks[IMX8MQ_CLK_DISP_AXI_ROOT]  = imx_clk_gate2_shared2("disp_axi_root_clk", "disp_axi", base + 0x45d0, 0, &share_count_dcss);
        clks[IMX8MQ_CLK_DISP_APB_ROOT]  = imx_clk_gate2_shared2("disp_apb_root_clk", "disp_apb", base + 0x45d0, 0, &share_count_dcss);
        clks[IMX8MQ_CLK_DISP_RTRM_ROOT] = imx_clk_gate2_shared2("disp_rtrm_root_clk", "disp_rtrm", base + 0x45d0, 0, &share_count_dcss);
-       clks[IMX8MQ_CLK_TMU_ROOT] = imx_clk_gate4_flags("tmu_root_clk", "ipg_root", base + 0x4620, 0, CLK_IS_CRITICAL);
+       clks[IMX8MQ_CLK_TMU_ROOT] = imx_clk_gate4("tmu_root_clk", "ipg_root", base + 0x4620, 0);
        clks[IMX8MQ_CLK_VPU_DEC_ROOT] = imx_clk_gate2_flags("vpu_dec_root_clk", "vpu_bus", base + 0x4630, 0, CLK_SET_RATE_PARENT | CLK_OPS_PARENT_ENABLE);
        clks[IMX8MQ_CLK_CSI1_ROOT] = imx_clk_gate4("csi1_root_clk", "csi1_core", base + 0x4650, 0);
        clks[IMX8MQ_CLK_CSI2_ROOT] = imx_clk_gate4("csi2_root_clk", "csi2_core", base + 0x4660, 0);
@@ -561,10 +562,18 @@ static int imx8mq_clocks_probe(struct platform_device *pdev)
        clk_data.clk_num = ARRAY_SIZE(clks);
 
        err = of_clk_add_provider(np, of_clk_src_onecell_get, &clk_data);
-       WARN_ON(err);
+       if (err < 0) {
+               dev_err(dev, "failed to register clks for i.MX8MQ\n");
+               goto unregister_clks;
+       }
 
        imx_register_uart_clocks(uart_clks);
 
+       return 0;
+
+unregister_clks:
+       imx_unregister_clocks(clks, ARRAY_SIZE(clks));
+
        return err;
 }
 
index b721302..7a815ec 100644 (file)
@@ -191,6 +191,10 @@ static int clk_pll1416x_set_rate(struct clk_hw *hw, unsigned long drate,
        tmp &= ~RST_MASK;
        writel_relaxed(tmp, pll->base);
 
+       /* Enable BYPASS */
+       tmp |= BYPASS_MASK;
+       writel(tmp, pll->base);
+
        div_val = (rate->mdiv << MDIV_SHIFT) | (rate->pdiv << PDIV_SHIFT) |
                (rate->sdiv << SDIV_SHIFT);
        writel_relaxed(div_val, pll->base + 0x4);
@@ -250,6 +254,10 @@ static int clk_pll1443x_set_rate(struct clk_hw *hw, unsigned long drate,
        tmp &= ~RST_MASK;
        writel_relaxed(tmp, pll->base);
 
+       /* Enable BYPASS */
+       tmp |= BYPASS_MASK;
+       writel_relaxed(tmp, pll->base);
+
        div_val = (rate->mdiv << MDIV_SHIFT) | (rate->pdiv << PDIV_SHIFT) |
                (rate->sdiv << SDIV_SHIFT);
        writel_relaxed(div_val, pll->base + 0x4);
@@ -283,16 +291,28 @@ static int clk_pll14xx_prepare(struct clk_hw *hw)
 {
        struct clk_pll14xx *pll = to_clk_pll14xx(hw);
        u32 val;
+       int ret;
 
        /*
         * RESETB = 1 from 0, PLL starts its normal
         * operation after lock time
         */
        val = readl_relaxed(pll->base + GNRL_CTL);
+       if (val & RST_MASK)
+               return 0;
+       val |= BYPASS_MASK;
+       writel_relaxed(val, pll->base + GNRL_CTL);
        val |= RST_MASK;
        writel_relaxed(val, pll->base + GNRL_CTL);
 
-       return clk_pll14xx_wait_lock(pll);
+       ret = clk_pll14xx_wait_lock(pll);
+       if (ret)
+               return ret;
+
+       val &= ~BYPASS_MASK;
+       writel_relaxed(val, pll->base + GNRL_CTL);
+
+       return 0;
 }
 
 static int clk_pll14xx_is_prepared(struct clk_hw *hw)
@@ -348,6 +368,7 @@ struct clk *imx_clk_pll14xx(const char *name, const char *parent_name,
        struct clk_pll14xx *pll;
        struct clk *clk;
        struct clk_init_data init;
+       u32 val;
 
        pll = kzalloc(sizeof(*pll), GFP_KERNEL);
        if (!pll)
@@ -379,6 +400,10 @@ struct clk *imx_clk_pll14xx(const char *name, const char *parent_name,
        pll->rate_table = pll_clk->rate_table;
        pll->rate_count = pll_clk->rate_count;
 
+       val = readl_relaxed(pll->base + GNRL_CTL);
+       val &= ~BYPASS_MASK;
+       writel_relaxed(val, pll->base + GNRL_CTL);
+
        clk = clk_register(NULL, &pll->hw);
        if (IS_ERR(clk)) {
                pr_err("%s: failed to register pll %s %lu\n",
index bb4ec1b..f7a389a 100644 (file)
@@ -10,7 +10,6 @@ extern spinlock_t imx_ccm_lock;
 void imx_check_clocks(struct clk *clks[], unsigned int count);
 void imx_check_clk_hws(struct clk_hw *clks[], unsigned int count);
 void imx_register_uart_clocks(struct clk ** const clks[]);
-void imx_register_uart_clocks_hws(struct clk_hw ** const hws[]);
 void imx_mmdc_mask_handshake(void __iomem *ccm_base, unsigned int chn);
 void imx_unregister_clocks(struct clk *clks[], unsigned int count);
 
@@ -51,12 +50,6 @@ struct imx_pll14xx_clk {
        int flags;
 };
 
-#define imx_clk_busy_divider(name, parent_name, reg, shift, width, busy_reg, busy_shift) \
-       imx_clk_hw_busy_divider(name, parent_name, reg, shift, width, busy_reg, busy_shift)->clk
-
-#define imx_clk_busy_mux(name, reg, shift, width, busy_reg, busy_shift, parent_names, num_parents) \
-       imx_clk_hw_busy_mux(name, reg, shift, width, busy_reg, busy_shift, parent_names, num_parents)->clk
-
 #define imx_clk_cpu(name, parent_name, div, mux, pll, step) \
        imx_clk_hw_cpu(name, parent_name, div, mux, pll, step)->clk
 
@@ -74,15 +67,6 @@ struct imx_pll14xx_clk {
 #define imx_clk_gate_exclusive(name, parent, reg, shift, exclusive_mask) \
        imx_clk_hw_gate_exclusive(name, parent, reg, shift, exclusive_mask)->clk
 
-#define imx_clk_fixup_divider(name, parent, reg, shift, width, fixup) \
-       imx_clk_hw_fixup_divider(name, parent, reg, shift, width, fixup)->clk
-
-#define imx_clk_fixup_mux(name, reg, shift, width, parents, num_parents, fixup) \
-       imx_clk_hw_fixup_mux(name, reg, shift, width, parents, num_parents, fixup)->clk
-
-#define imx_clk_mux_ldb(name, reg, shift, width, parents, num_parents) \
-       imx_clk_hw_mux_ldb(name, reg, shift, width, parents, num_parents)->clk
-
 #define imx_clk_fixed_factor(name, parent, mult, div) \
        imx_clk_hw_fixed_factor(name, parent, mult, div)->clk
 
@@ -92,21 +76,12 @@ struct imx_pll14xx_clk {
 #define imx_clk_gate_dis(name, parent, reg, shift) \
        imx_clk_hw_gate_dis(name, parent, reg, shift)->clk
 
-#define imx_clk_gate_dis_flags(name, parent, reg, shift, flags) \
-       imx_clk_hw_gate_dis_flags(name, parent, reg, shift, flags)->clk
-
-#define imx_clk_gate_flags(name, parent, reg, shift, flags) \
-       imx_clk_hw_gate_flags(name, parent, reg, shift, flags)->clk
-
 #define imx_clk_gate2(name, parent, reg, shift) \
        imx_clk_hw_gate2(name, parent, reg, shift)->clk
 
 #define imx_clk_gate2_flags(name, parent, reg, shift, flags) \
        imx_clk_hw_gate2_flags(name, parent, reg, shift, flags)->clk
 
-#define imx_clk_gate2_shared(name, parent, reg, shift, share_count) \
-       imx_clk_hw_gate2_shared(name, parent, reg, shift, share_count)->clk
-
 #define imx_clk_gate2_shared2(name, parent, reg, shift, share_count) \
        imx_clk_hw_gate2_shared2(name, parent, reg, shift, share_count)->clk
 
index fe8db93..1cb4899 100644 (file)
@@ -1,5 +1,5 @@
 # SPDX-License-Identifier: GPL-2.0-only
-menu "Ingenic JZ47xx CGU drivers"
+menu "Ingenic SoCs drivers"
        depends on MIPS
 
 config INGENIC_CGU_COMMON
@@ -45,4 +45,12 @@ config INGENIC_CGU_JZ4780
 
          If building for a JZ4780 SoC, you want to say Y here.
 
+config INGENIC_TCU_CLK
+       bool "Ingenic JZ47xx TCU clocks driver"
+       default MACH_INGENIC
+       select MFD_SYSCON
+       help
+         Support the clocks of the Timer/Counter Unit (TCU) of the Ingenic
+         JZ47xx SoCs.
+
 endmenu
index 250570a..097220b 100644 (file)
@@ -4,3 +4,4 @@ obj-$(CONFIG_INGENIC_CGU_JZ4740)        += jz4740-cgu.o
 obj-$(CONFIG_INGENIC_CGU_JZ4725B)      += jz4725b-cgu.o
 obj-$(CONFIG_INGENIC_CGU_JZ4770)       += jz4770-cgu.o
 obj-$(CONFIG_INGENIC_CGU_JZ4780)       += jz4780-cgu.o
+obj-$(CONFIG_INGENIC_TCU_CLK)          += tcu.o
index 2642d36..a3b4635 100644 (file)
@@ -257,4 +257,4 @@ static void __init jz4725b_cgu_init(struct device_node *np)
 
        ingenic_cgu_register_syscore_ops(cgu);
 }
-CLK_OF_DECLARE(jz4725b_cgu, "ingenic,jz4725b-cgu", jz4725b_cgu_init);
+CLK_OF_DECLARE_DRIVER(jz4725b_cgu, "ingenic,jz4725b-cgu", jz4725b_cgu_init);
index 4c0a209..4f0e92c 100644 (file)
@@ -53,6 +53,10 @@ static const u8 jz4740_cgu_cpccr_div_table[] = {
        1, 2, 3, 4, 6, 8, 12, 16, 24, 32,
 };
 
+static const u8 jz4740_cgu_pll_half_div_table[] = {
+       2, 1,
+};
+
 static const struct ingenic_cgu_clk_info jz4740_cgu_clocks[] = {
 
        /* External clocks */
@@ -86,7 +90,10 @@ static const struct ingenic_cgu_clk_info jz4740_cgu_clocks[] = {
        [JZ4740_CLK_PLL_HALF] = {
                "pll half", CGU_CLK_DIV,
                .parents = { JZ4740_CLK_PLL, -1, -1, -1 },
-               .div = { CGU_REG_CPCCR, 21, 1, 1, -1, -1, -1 },
+               .div = {
+                       CGU_REG_CPCCR, 21, 1, 1, -1, -1, -1,
+                       jz4740_cgu_pll_half_div_table,
+               },
        },
 
        [JZ4740_CLK_CCLK] = {
@@ -222,6 +229,12 @@ static const struct ingenic_cgu_clk_info jz4740_cgu_clocks[] = {
                .parents = { JZ4740_CLK_EXT, -1, -1, -1 },
                .gate = { CGU_REG_CLKGR, 5 },
        },
+
+       [JZ4740_CLK_TCU] = {
+               "tcu", CGU_CLK_GATE,
+               .parents = { JZ4740_CLK_EXT, -1, -1, -1 },
+               .gate = { CGU_REG_CLKGR, 1 },
+       },
 };
 
 static void __init jz4740_cgu_init(struct device_node *np)
@@ -241,4 +254,4 @@ static void __init jz4740_cgu_init(struct device_node *np)
 
        ingenic_cgu_register_syscore_ops(cgu);
 }
-CLK_OF_DECLARE(jz4740_cgu, "ingenic,jz4740-cgu", jz4740_cgu_init);
+CLK_OF_DECLARE_DRIVER(jz4740_cgu, "ingenic,jz4740-cgu", jz4740_cgu_init);
index eebc1be..956dd65 100644 (file)
@@ -443,4 +443,4 @@ static void __init jz4770_cgu_init(struct device_node *np)
 }
 
 /* We only probe via devicetree, no need for a platform driver */
-CLK_OF_DECLARE(jz4770_cgu, "ingenic,jz4770-cgu", jz4770_cgu_init);
+CLK_OF_DECLARE_DRIVER(jz4770_cgu, "ingenic,jz4770-cgu", jz4770_cgu_init);
index 8c67f89..ea905ff 100644 (file)
@@ -725,4 +725,4 @@ static void __init jz4780_cgu_init(struct device_node *np)
 
        ingenic_cgu_register_syscore_ops(cgu);
 }
-CLK_OF_DECLARE(jz4780_cgu, "ingenic,jz4780-cgu", jz4780_cgu_init);
+CLK_OF_DECLARE_DRIVER(jz4780_cgu, "ingenic,jz4780-cgu", jz4780_cgu_init);
diff --git a/drivers/clk/ingenic/tcu.c b/drivers/clk/ingenic/tcu.c
new file mode 100644 (file)
index 0000000..a1a5f9c
--- /dev/null
@@ -0,0 +1,474 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * JZ47xx SoCs TCU clocks driver
+ * Copyright (C) 2019 Paul Cercueil <paul@crapouillou.net>
+ */
+
+#include <linux/clk.h>
+#include <linux/clk-provider.h>
+#include <linux/clockchips.h>
+#include <linux/mfd/ingenic-tcu.h>
+#include <linux/mfd/syscon.h>
+#include <linux/regmap.h>
+#include <linux/slab.h>
+#include <linux/syscore_ops.h>
+
+#include <dt-bindings/clock/ingenic,tcu.h>
+
+/* 8 channels max + watchdog + OST */
+#define TCU_CLK_COUNT  10
+
+#undef pr_fmt
+#define pr_fmt(fmt) "ingenic-tcu-clk: " fmt
+
+enum tcu_clk_parent {
+       TCU_PARENT_PCLK,
+       TCU_PARENT_RTC,
+       TCU_PARENT_EXT,
+};
+
+struct ingenic_soc_info {
+       unsigned int num_channels;
+       bool has_ost;
+       bool has_tcu_clk;
+};
+
+struct ingenic_tcu_clk_info {
+       struct clk_init_data init_data;
+       u8 gate_bit;
+       u8 tcsr_reg;
+};
+
+struct ingenic_tcu_clk {
+       struct clk_hw hw;
+       unsigned int idx;
+       struct ingenic_tcu *tcu;
+       const struct ingenic_tcu_clk_info *info;
+};
+
+struct ingenic_tcu {
+       const struct ingenic_soc_info *soc_info;
+       struct regmap *map;
+       struct clk *clk;
+
+       struct clk_hw_onecell_data *clocks;
+};
+
+static struct ingenic_tcu *ingenic_tcu;
+
+static inline struct ingenic_tcu_clk *to_tcu_clk(struct clk_hw *hw)
+{
+       return container_of(hw, struct ingenic_tcu_clk, hw);
+}
+
+static int ingenic_tcu_enable(struct clk_hw *hw)
+{
+       struct ingenic_tcu_clk *tcu_clk = to_tcu_clk(hw);
+       const struct ingenic_tcu_clk_info *info = tcu_clk->info;
+       struct ingenic_tcu *tcu = tcu_clk->tcu;
+
+       regmap_write(tcu->map, TCU_REG_TSCR, BIT(info->gate_bit));
+
+       return 0;
+}
+
+static void ingenic_tcu_disable(struct clk_hw *hw)
+{
+       struct ingenic_tcu_clk *tcu_clk = to_tcu_clk(hw);
+       const struct ingenic_tcu_clk_info *info = tcu_clk->info;
+       struct ingenic_tcu *tcu = tcu_clk->tcu;
+
+       regmap_write(tcu->map, TCU_REG_TSSR, BIT(info->gate_bit));
+}
+
+static int ingenic_tcu_is_enabled(struct clk_hw *hw)
+{
+       struct ingenic_tcu_clk *tcu_clk = to_tcu_clk(hw);
+       const struct ingenic_tcu_clk_info *info = tcu_clk->info;
+       unsigned int value;
+
+       regmap_read(tcu_clk->tcu->map, TCU_REG_TSR, &value);
+
+       return !(value & BIT(info->gate_bit));
+}
+
+static bool ingenic_tcu_enable_regs(struct clk_hw *hw)
+{
+       struct ingenic_tcu_clk *tcu_clk = to_tcu_clk(hw);
+       const struct ingenic_tcu_clk_info *info = tcu_clk->info;
+       struct ingenic_tcu *tcu = tcu_clk->tcu;
+       bool enabled = false;
+
+       /*
+        * If the SoC has no global TCU clock, we must ungate the channel's
+        * clock to be able to access its registers.
+        * If we have a TCU clock, it will be enabled automatically as it has
+        * been attached to the regmap.
+        */
+       if (!tcu->clk) {
+               enabled = !!ingenic_tcu_is_enabled(hw);
+               regmap_write(tcu->map, TCU_REG_TSCR, BIT(info->gate_bit));
+       }
+
+       return enabled;
+}
+
+static void ingenic_tcu_disable_regs(struct clk_hw *hw)
+{
+       struct ingenic_tcu_clk *tcu_clk = to_tcu_clk(hw);
+       const struct ingenic_tcu_clk_info *info = tcu_clk->info;
+       struct ingenic_tcu *tcu = tcu_clk->tcu;
+
+       if (!tcu->clk)
+               regmap_write(tcu->map, TCU_REG_TSSR, BIT(info->gate_bit));
+}
+
+static u8 ingenic_tcu_get_parent(struct clk_hw *hw)
+{
+       struct ingenic_tcu_clk *tcu_clk = to_tcu_clk(hw);
+       const struct ingenic_tcu_clk_info *info = tcu_clk->info;
+       unsigned int val = 0;
+       int ret;
+
+       ret = regmap_read(tcu_clk->tcu->map, info->tcsr_reg, &val);
+       WARN_ONCE(ret < 0, "Unable to read TCSR %d", tcu_clk->idx);
+
+       return ffs(val & TCU_TCSR_PARENT_CLOCK_MASK) - 1;
+}
+
+static int ingenic_tcu_set_parent(struct clk_hw *hw, u8 idx)
+{
+       struct ingenic_tcu_clk *tcu_clk = to_tcu_clk(hw);
+       const struct ingenic_tcu_clk_info *info = tcu_clk->info;
+       bool was_enabled;
+       int ret;
+
+       was_enabled = ingenic_tcu_enable_regs(hw);
+
+       ret = regmap_update_bits(tcu_clk->tcu->map, info->tcsr_reg,
+                                TCU_TCSR_PARENT_CLOCK_MASK, BIT(idx));
+       WARN_ONCE(ret < 0, "Unable to update TCSR %d", tcu_clk->idx);
+
+       if (!was_enabled)
+               ingenic_tcu_disable_regs(hw);
+
+       return 0;
+}
+
+static unsigned long ingenic_tcu_recalc_rate(struct clk_hw *hw,
+               unsigned long parent_rate)
+{
+       struct ingenic_tcu_clk *tcu_clk = to_tcu_clk(hw);
+       const struct ingenic_tcu_clk_info *info = tcu_clk->info;
+       unsigned int prescale;
+       int ret;
+
+       ret = regmap_read(tcu_clk->tcu->map, info->tcsr_reg, &prescale);
+       WARN_ONCE(ret < 0, "Unable to read TCSR %d", tcu_clk->idx);
+
+       prescale = (prescale & TCU_TCSR_PRESCALE_MASK) >> TCU_TCSR_PRESCALE_LSB;
+
+       return parent_rate >> (prescale * 2);
+}
+
+static u8 ingenic_tcu_get_prescale(unsigned long rate, unsigned long req_rate)
+{
+       u8 prescale;
+
+       for (prescale = 0; prescale < 5; prescale++)
+               if ((rate >> (prescale * 2)) <= req_rate)
+                       return prescale;
+
+       return 5; /* /1024 divider */
+}
+
+static long ingenic_tcu_round_rate(struct clk_hw *hw, unsigned long req_rate,
+               unsigned long *parent_rate)
+{
+       unsigned long rate = *parent_rate;
+       u8 prescale;
+
+       if (req_rate > rate)
+               return -EINVAL;
+
+       prescale = ingenic_tcu_get_prescale(rate, req_rate);
+
+       return rate >> (prescale * 2);
+}
+
+static int ingenic_tcu_set_rate(struct clk_hw *hw, unsigned long req_rate,
+               unsigned long parent_rate)
+{
+       struct ingenic_tcu_clk *tcu_clk = to_tcu_clk(hw);
+       const struct ingenic_tcu_clk_info *info = tcu_clk->info;
+       u8 prescale = ingenic_tcu_get_prescale(parent_rate, req_rate);
+       bool was_enabled;
+       int ret;
+
+       was_enabled = ingenic_tcu_enable_regs(hw);
+
+       ret = regmap_update_bits(tcu_clk->tcu->map, info->tcsr_reg,
+                                TCU_TCSR_PRESCALE_MASK,
+                                prescale << TCU_TCSR_PRESCALE_LSB);
+       WARN_ONCE(ret < 0, "Unable to update TCSR %d", tcu_clk->idx);
+
+       if (!was_enabled)
+               ingenic_tcu_disable_regs(hw);
+
+       return 0;
+}
+
+static const struct clk_ops ingenic_tcu_clk_ops = {
+       .get_parent     = ingenic_tcu_get_parent,
+       .set_parent     = ingenic_tcu_set_parent,
+
+       .recalc_rate    = ingenic_tcu_recalc_rate,
+       .round_rate     = ingenic_tcu_round_rate,
+       .set_rate       = ingenic_tcu_set_rate,
+
+       .enable         = ingenic_tcu_enable,
+       .disable        = ingenic_tcu_disable,
+       .is_enabled     = ingenic_tcu_is_enabled,
+};
+
+static const char * const ingenic_tcu_timer_parents[] = {
+       [TCU_PARENT_PCLK] = "pclk",
+       [TCU_PARENT_RTC]  = "rtc",
+       [TCU_PARENT_EXT]  = "ext",
+};
+
+#define DEF_TIMER(_name, _gate_bit, _tcsr)                             \
+       {                                                               \
+               .init_data = {                                          \
+                       .name = _name,                                  \
+                       .parent_names = ingenic_tcu_timer_parents,      \
+                       .num_parents = ARRAY_SIZE(ingenic_tcu_timer_parents),\
+                       .ops = &ingenic_tcu_clk_ops,                    \
+                       .flags = CLK_SET_RATE_UNGATE,                   \
+               },                                                      \
+               .gate_bit = _gate_bit,                                  \
+               .tcsr_reg = _tcsr,                                      \
+       }
+static const struct ingenic_tcu_clk_info ingenic_tcu_clk_info[] = {
+       [TCU_CLK_TIMER0] = DEF_TIMER("timer0", 0, TCU_REG_TCSRc(0)),
+       [TCU_CLK_TIMER1] = DEF_TIMER("timer1", 1, TCU_REG_TCSRc(1)),
+       [TCU_CLK_TIMER2] = DEF_TIMER("timer2", 2, TCU_REG_TCSRc(2)),
+       [TCU_CLK_TIMER3] = DEF_TIMER("timer3", 3, TCU_REG_TCSRc(3)),
+       [TCU_CLK_TIMER4] = DEF_TIMER("timer4", 4, TCU_REG_TCSRc(4)),
+       [TCU_CLK_TIMER5] = DEF_TIMER("timer5", 5, TCU_REG_TCSRc(5)),
+       [TCU_CLK_TIMER6] = DEF_TIMER("timer6", 6, TCU_REG_TCSRc(6)),
+       [TCU_CLK_TIMER7] = DEF_TIMER("timer7", 7, TCU_REG_TCSRc(7)),
+};
+
+static const struct ingenic_tcu_clk_info ingenic_tcu_watchdog_clk_info =
+                                        DEF_TIMER("wdt", 16, TCU_REG_WDT_TCSR);
+static const struct ingenic_tcu_clk_info ingenic_tcu_ost_clk_info =
+                                        DEF_TIMER("ost", 15, TCU_REG_OST_TCSR);
+#undef DEF_TIMER
+
+static int __init ingenic_tcu_register_clock(struct ingenic_tcu *tcu,
+                       unsigned int idx, enum tcu_clk_parent parent,
+                       const struct ingenic_tcu_clk_info *info,
+                       struct clk_hw_onecell_data *clocks)
+{
+       struct ingenic_tcu_clk *tcu_clk;
+       int err;
+
+       tcu_clk = kzalloc(sizeof(*tcu_clk), GFP_KERNEL);
+       if (!tcu_clk)
+               return -ENOMEM;
+
+       tcu_clk->hw.init = &info->init_data;
+       tcu_clk->idx = idx;
+       tcu_clk->info = info;
+       tcu_clk->tcu = tcu;
+
+       /* Reset channel and clock divider, set default parent */
+       ingenic_tcu_enable_regs(&tcu_clk->hw);
+       regmap_update_bits(tcu->map, info->tcsr_reg, 0xffff, BIT(parent));
+       ingenic_tcu_disable_regs(&tcu_clk->hw);
+
+       err = clk_hw_register(NULL, &tcu_clk->hw);
+       if (err) {
+               kfree(tcu_clk);
+               return err;
+       }
+
+       clocks->hws[idx] = &tcu_clk->hw;
+
+       return 0;
+}
+
+static const struct ingenic_soc_info jz4740_soc_info = {
+       .num_channels = 8,
+       .has_ost = false,
+       .has_tcu_clk = true,
+};
+
+static const struct ingenic_soc_info jz4725b_soc_info = {
+       .num_channels = 6,
+       .has_ost = true,
+       .has_tcu_clk = true,
+};
+
+static const struct ingenic_soc_info jz4770_soc_info = {
+       .num_channels = 8,
+       .has_ost = true,
+       .has_tcu_clk = false,
+};
+
+static const struct of_device_id ingenic_tcu_of_match[] __initconst = {
+       { .compatible = "ingenic,jz4740-tcu", .data = &jz4740_soc_info, },
+       { .compatible = "ingenic,jz4725b-tcu", .data = &jz4725b_soc_info, },
+       { .compatible = "ingenic,jz4770-tcu", .data = &jz4770_soc_info, },
+       { /* sentinel */ }
+};
+
+static int __init ingenic_tcu_probe(struct device_node *np)
+{
+       const struct of_device_id *id = of_match_node(ingenic_tcu_of_match, np);
+       struct ingenic_tcu *tcu;
+       struct regmap *map;
+       unsigned int i;
+       int ret;
+
+       map = device_node_to_regmap(np);
+       if (IS_ERR(map))
+               return PTR_ERR(map);
+
+       tcu = kzalloc(sizeof(*tcu), GFP_KERNEL);
+       if (!tcu)
+               return -ENOMEM;
+
+       tcu->map = map;
+       tcu->soc_info = id->data;
+
+       if (tcu->soc_info->has_tcu_clk) {
+               tcu->clk = of_clk_get_by_name(np, "tcu");
+               if (IS_ERR(tcu->clk)) {
+                       ret = PTR_ERR(tcu->clk);
+                       pr_crit("Cannot get TCU clock\n");
+                       goto err_free_tcu;
+               }
+
+               ret = clk_prepare_enable(tcu->clk);
+               if (ret) {
+                       pr_crit("Unable to enable TCU clock\n");
+                       goto err_put_clk;
+               }
+       }
+
+       tcu->clocks = kzalloc(sizeof(*tcu->clocks) +
+                             sizeof(*tcu->clocks->hws) * TCU_CLK_COUNT,
+                             GFP_KERNEL);
+       if (!tcu->clocks) {
+               ret = -ENOMEM;
+               goto err_clk_disable;
+       }
+
+       tcu->clocks->num = TCU_CLK_COUNT;
+
+       for (i = 0; i < tcu->soc_info->num_channels; i++) {
+               ret = ingenic_tcu_register_clock(tcu, i, TCU_PARENT_EXT,
+                                                &ingenic_tcu_clk_info[i],
+                                                tcu->clocks);
+               if (ret) {
+                       pr_crit("cannot register clock %d\n", i);
+                       goto err_unregister_timer_clocks;
+               }
+       }
+
+       /*
+        * We set EXT as the default parent clock for all the TCU clocks
+        * except for the watchdog one, where we set the RTC clock as the
+        * parent. Since the EXT and PCLK are much faster than the RTC clock,
+        * the watchdog would kick after a maximum time of 5s, and we might
+        * want a slower kicking time.
+        */
+       ret = ingenic_tcu_register_clock(tcu, TCU_CLK_WDT, TCU_PARENT_RTC,
+                                        &ingenic_tcu_watchdog_clk_info,
+                                        tcu->clocks);
+       if (ret) {
+               pr_crit("cannot register watchdog clock\n");
+               goto err_unregister_timer_clocks;
+       }
+
+       if (tcu->soc_info->has_ost) {
+               ret = ingenic_tcu_register_clock(tcu, TCU_CLK_OST,
+                                                TCU_PARENT_EXT,
+                                                &ingenic_tcu_ost_clk_info,
+                                                tcu->clocks);
+               if (ret) {
+                       pr_crit("cannot register ost clock\n");
+                       goto err_unregister_watchdog_clock;
+               }
+       }
+
+       ret = of_clk_add_hw_provider(np, of_clk_hw_onecell_get, tcu->clocks);
+       if (ret) {
+               pr_crit("cannot add OF clock provider\n");
+               goto err_unregister_ost_clock;
+       }
+
+       ingenic_tcu = tcu;
+
+       return 0;
+
+err_unregister_ost_clock:
+       if (tcu->soc_info->has_ost)
+               clk_hw_unregister(tcu->clocks->hws[i + 1]);
+err_unregister_watchdog_clock:
+       clk_hw_unregister(tcu->clocks->hws[i]);
+err_unregister_timer_clocks:
+       for (i = 0; i < tcu->clocks->num; i++)
+               if (tcu->clocks->hws[i])
+                       clk_hw_unregister(tcu->clocks->hws[i]);
+       kfree(tcu->clocks);
+err_clk_disable:
+       if (tcu->soc_info->has_tcu_clk)
+               clk_disable_unprepare(tcu->clk);
+err_put_clk:
+       if (tcu->soc_info->has_tcu_clk)
+               clk_put(tcu->clk);
+err_free_tcu:
+       kfree(tcu);
+       return ret;
+}
+
+static int __maybe_unused tcu_pm_suspend(void)
+{
+       struct ingenic_tcu *tcu = ingenic_tcu;
+
+       if (tcu->clk)
+               clk_disable(tcu->clk);
+
+       return 0;
+}
+
+static void __maybe_unused tcu_pm_resume(void)
+{
+       struct ingenic_tcu *tcu = ingenic_tcu;
+
+       if (tcu->clk)
+               clk_enable(tcu->clk);
+}
+
+static struct syscore_ops __maybe_unused tcu_pm_ops = {
+       .suspend = tcu_pm_suspend,
+       .resume = tcu_pm_resume,
+};
+
+static void __init ingenic_tcu_init(struct device_node *np)
+{
+       int ret = ingenic_tcu_probe(np);
+
+       if (ret)
+               pr_crit("Failed to initialize TCU clocks: %d\n", ret);
+
+       if (IS_ENABLED(CONFIG_PM_SLEEP))
+               register_syscore_ops(&tcu_pm_ops);
+}
+
+CLK_OF_DECLARE_DRIVER(jz4740_cgu, "ingenic,jz4740-tcu", ingenic_tcu_init);
+CLK_OF_DECLARE_DRIVER(jz4725b_cgu, "ingenic,jz4725b-tcu", ingenic_tcu_init);
+CLK_OF_DECLARE_DRIVER(jz4770_cgu, "ingenic,jz4770-tcu", ingenic_tcu_init);
index ce3d9b3..7efc361 100644 (file)
@@ -117,6 +117,62 @@ config COMMON_CLK_MT2712_VENCSYS
        ---help---
          This driver supports MediaTek MT2712 vencsys clocks.
 
+config COMMON_CLK_MT6779
+       bool "Clock driver for MediaTek MT6779"
+       depends on (ARCH_MEDIATEK && ARM64) || COMPILE_TEST
+       select COMMON_CLK_MEDIATEK
+       default ARCH_MEDIATEK && ARM64
+       help
+         This driver supports MediaTek MT6779 basic clocks.
+
+config COMMON_CLK_MT6779_MMSYS
+       bool "Clock driver for MediaTek MT6779 mmsys"
+       depends on COMMON_CLK_MT6779
+       help
+         This driver supports MediaTek MT6779 mmsys clocks.
+
+config COMMON_CLK_MT6779_IMGSYS
+       bool "Clock driver for MediaTek MT6779 imgsys"
+       depends on COMMON_CLK_MT6779
+       help
+         This driver supports MediaTek MT6779 imgsys clocks.
+
+config COMMON_CLK_MT6779_IPESYS
+       bool "Clock driver for MediaTek MT6779 ipesys"
+       depends on COMMON_CLK_MT6779
+       help
+         This driver supports MediaTek MT6779 ipesys clocks.
+
+config COMMON_CLK_MT6779_CAMSYS
+       bool "Clock driver for MediaTek MT6779 camsys"
+       depends on COMMON_CLK_MT6779
+       help
+         This driver supports MediaTek MT6779 camsys clocks.
+
+config COMMON_CLK_MT6779_VDECSYS
+       bool "Clock driver for MediaTek MT6779 vdecsys"
+       depends on COMMON_CLK_MT6779
+       help
+         This driver supports MediaTek MT6779 vdecsys clocks.
+
+config COMMON_CLK_MT6779_VENCSYS
+       bool "Clock driver for MediaTek MT6779 vencsys"
+       depends on COMMON_CLK_MT6779
+       help
+         This driver supports MediaTek MT6779 vencsys clocks.
+
+config COMMON_CLK_MT6779_MFGCFG
+       bool "Clock driver for MediaTek MT6779 mfgcfg"
+       depends on COMMON_CLK_MT6779
+       help
+         This driver supports MediaTek MT6779 mfgcfg clocks.
+
+config COMMON_CLK_MT6779_AUDSYS
+       bool "Clock driver for Mediatek MT6779 audsys"
+       depends on COMMON_CLK_MT6779
+       help
+         This driver supports Mediatek MT6779 audsys clocks.
+
 config COMMON_CLK_MT6797
        bool "Clock driver for MediaTek MT6797"
        depends on (ARCH_MEDIATEK && ARM64) || COMPILE_TEST
index 672de00..8cdb76a 100644 (file)
@@ -1,6 +1,15 @@
 # SPDX-License-Identifier: GPL-2.0
 obj-$(CONFIG_COMMON_CLK_MEDIATEK) += clk-mtk.o clk-pll.o clk-gate.o clk-apmixed.o clk-cpumux.o reset.o clk-mux.o
 
+obj-$(CONFIG_COMMON_CLK_MT6779) += clk-mt6779.o
+obj-$(CONFIG_COMMON_CLK_MT6779_MMSYS) += clk-mt6779-mm.o
+obj-$(CONFIG_COMMON_CLK_MT6779_IMGSYS) += clk-mt6779-img.o
+obj-$(CONFIG_COMMON_CLK_MT6779_IPESYS) += clk-mt6779-ipe.o
+obj-$(CONFIG_COMMON_CLK_MT6779_CAMSYS) += clk-mt6779-cam.o
+obj-$(CONFIG_COMMON_CLK_MT6779_VDECSYS) += clk-mt6779-vdec.o
+obj-$(CONFIG_COMMON_CLK_MT6779_VENCSYS) += clk-mt6779-venc.o
+obj-$(CONFIG_COMMON_CLK_MT6779_MFGCFG) += clk-mt6779-mfg.o
+obj-$(CONFIG_COMMON_CLK_MT6779_AUDSYS) += clk-mt6779-aud.o
 obj-$(CONFIG_COMMON_CLK_MT6797) += clk-mt6797.o
 obj-$(CONFIG_COMMON_CLK_MT6797_IMGSYS) += clk-mt6797-img.o
 obj-$(CONFIG_COMMON_CLK_MT6797_MMSYS) += clk-mt6797-mm.o
index 803bf0a..a35cf0b 100644 (file)
@@ -150,7 +150,8 @@ struct clk *mtk_clk_register_gate(
                int sta_ofs,
                u8 bit,
                const struct clk_ops *ops,
-               unsigned long flags)
+               unsigned long flags,
+               struct device *dev)
 {
        struct mtk_clk_gate *cg;
        struct clk *clk;
@@ -174,7 +175,7 @@ struct clk *mtk_clk_register_gate(
 
        cg->hw.init = &init;
 
-       clk = clk_register(NULL, &cg->hw);
+       clk = clk_register(dev, &cg->hw);
        if (IS_ERR(clk))
                kfree(cg);
 
index e05c736..3c3329e 100644 (file)
@@ -40,7 +40,8 @@ struct clk *mtk_clk_register_gate(
                int sta_ofs,
                u8 bit,
                const struct clk_ops *ops,
-               unsigned long flags);
+               unsigned long flags,
+               struct device *dev);
 
 #define GATE_MTK_FLAGS(_id, _name, _parent, _regs, _shift,     \
                        _ops, _flags) {                         \
diff --git a/drivers/clk/mediatek/clk-mt6779-aud.c b/drivers/clk/mediatek/clk-mt6779-aud.c
new file mode 100644 (file)
index 0000000..11b209f
--- /dev/null
@@ -0,0 +1,117 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2019 MediaTek Inc.
+ * Author: Wendell Lin <wendell.lin@mediatek.com>
+ */
+
+#include <linux/clk-provider.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/of_device.h>
+#include <linux/platform_device.h>
+
+#include "clk-mtk.h"
+#include "clk-gate.h"
+
+#include <dt-bindings/clock/mt6779-clk.h>
+
+static const struct mtk_gate_regs audio0_cg_regs = {
+       .set_ofs = 0x0,
+       .clr_ofs = 0x0,
+       .sta_ofs = 0x0,
+};
+
+static const struct mtk_gate_regs audio1_cg_regs = {
+       .set_ofs = 0x4,
+       .clr_ofs = 0x4,
+       .sta_ofs = 0x4,
+};
+
+#define GATE_AUDIO0(_id, _name, _parent, _shift)               \
+       GATE_MTK(_id, _name, _parent, &audio0_cg_regs, _shift,  \
+               &mtk_clk_gate_ops_no_setclr)
+#define GATE_AUDIO1(_id, _name, _parent, _shift)               \
+       GATE_MTK(_id, _name, _parent, &audio1_cg_regs, _shift,  \
+               &mtk_clk_gate_ops_no_setclr)
+
+static const struct mtk_gate audio_clks[] = {
+       /* AUDIO0 */
+       GATE_AUDIO0(CLK_AUD_AFE, "aud_afe", "audio_sel", 2),
+       GATE_AUDIO0(CLK_AUD_22M, "aud_22m", "aud_eng1_sel", 8),
+       GATE_AUDIO0(CLK_AUD_24M, "aud_24m", "aud_eng2_sel", 9),
+       GATE_AUDIO0(CLK_AUD_APLL2_TUNER, "aud_apll2_tuner",
+                   "aud_eng2_sel", 18),
+       GATE_AUDIO0(CLK_AUD_APLL_TUNER, "aud_apll_tuner",
+                   "aud_eng1_sel", 19),
+       GATE_AUDIO0(CLK_AUD_TDM, "aud_tdm", "aud_eng1_sel", 20),
+       GATE_AUDIO0(CLK_AUD_ADC, "aud_adc", "audio_sel", 24),
+       GATE_AUDIO0(CLK_AUD_DAC, "aud_dac", "audio_sel", 25),
+       GATE_AUDIO0(CLK_AUD_DAC_PREDIS, "aud_dac_predis",
+                   "audio_sel", 26),
+       GATE_AUDIO0(CLK_AUD_TML, "aud_tml", "audio_sel", 27),
+       GATE_AUDIO0(CLK_AUD_NLE, "aud_nle", "audio_sel", 28),
+       /* AUDIO1 */
+       GATE_AUDIO1(CLK_AUD_I2S1_BCLK_SW, "aud_i2s1_bclk",
+                   "audio_sel", 4),
+       GATE_AUDIO1(CLK_AUD_I2S2_BCLK_SW, "aud_i2s2_bclk",
+                   "audio_sel", 5),
+       GATE_AUDIO1(CLK_AUD_I2S3_BCLK_SW, "aud_i2s3_bclk",
+                   "audio_sel", 6),
+       GATE_AUDIO1(CLK_AUD_I2S4_BCLK_SW, "aud_i2s4_bclk",
+                   "audio_sel", 7),
+       GATE_AUDIO1(CLK_AUD_I2S5_BCLK_SW, "aud_i2s5_bclk",
+                   "audio_sel", 8),
+       GATE_AUDIO1(CLK_AUD_CONN_I2S_ASRC, "aud_conn_i2s",
+                   "audio_sel", 12),
+       GATE_AUDIO1(CLK_AUD_GENERAL1_ASRC, "aud_general1",
+                   "audio_sel", 13),
+       GATE_AUDIO1(CLK_AUD_GENERAL2_ASRC, "aud_general2",
+                   "audio_sel", 14),
+       GATE_AUDIO1(CLK_AUD_DAC_HIRES, "aud_dac_hires",
+                   "audio_h_sel", 15),
+       GATE_AUDIO1(CLK_AUD_ADC_HIRES, "aud_adc_hires",
+                   "audio_h_sel", 16),
+       GATE_AUDIO1(CLK_AUD_ADC_HIRES_TML, "aud_adc_hires_tml",
+                   "audio_h_sel", 17),
+       GATE_AUDIO1(CLK_AUD_PDN_ADDA6_ADC, "aud_pdn_adda6_adc",
+                   "audio_sel", 20),
+       GATE_AUDIO1(CLK_AUD_ADDA6_ADC_HIRES, "aud_adda6_adc_hires",
+                   "audio_h_sel",
+                   21),
+       GATE_AUDIO1(CLK_AUD_3RD_DAC, "aud_3rd_dac", "audio_sel",
+                   28),
+       GATE_AUDIO1(CLK_AUD_3RD_DAC_PREDIS, "aud_3rd_dac_predis",
+                   "audio_sel", 29),
+       GATE_AUDIO1(CLK_AUD_3RD_DAC_TML, "aud_3rd_dac_tml",
+                   "audio_sel", 30),
+       GATE_AUDIO1(CLK_AUD_3RD_DAC_HIRES, "aud_3rd_dac_hires",
+                   "audio_h_sel", 31),
+};
+
+static const struct of_device_id of_match_clk_mt6779_aud[] = {
+       { .compatible = "mediatek,mt6779-audio", },
+       {}
+};
+
+static int clk_mt6779_aud_probe(struct platform_device *pdev)
+{
+       struct clk_onecell_data *clk_data;
+       struct device_node *node = pdev->dev.of_node;
+
+       clk_data = mtk_alloc_clk_data(CLK_AUD_NR_CLK);
+
+       mtk_clk_register_gates(node, audio_clks, ARRAY_SIZE(audio_clks),
+                              clk_data);
+
+       return of_clk_add_provider(node, of_clk_src_onecell_get, clk_data);
+}
+
+static struct platform_driver clk_mt6779_aud_drv = {
+       .probe = clk_mt6779_aud_probe,
+       .driver = {
+               .name = "clk-mt6779-aud",
+               .of_match_table = of_match_clk_mt6779_aud,
+       },
+};
+
+builtin_platform_driver(clk_mt6779_aud_drv);
diff --git a/drivers/clk/mediatek/clk-mt6779-cam.c b/drivers/clk/mediatek/clk-mt6779-cam.c
new file mode 100644 (file)
index 0000000..244d420
--- /dev/null
@@ -0,0 +1,66 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2019 MediaTek Inc.
+ * Author: Wendell Lin <wendell.lin@mediatek.com>
+ */
+
+#include <linux/clk-provider.h>
+#include <linux/platform_device.h>
+#include <dt-bindings/clock/mt6779-clk.h>
+
+#include "clk-mtk.h"
+#include "clk-gate.h"
+
+static const struct mtk_gate_regs cam_cg_regs = {
+       .set_ofs = 0x0004,
+       .clr_ofs = 0x0008,
+       .sta_ofs = 0x0000,
+};
+
+#define GATE_CAM(_id, _name, _parent, _shift)                  \
+       GATE_MTK(_id, _name, _parent, &cam_cg_regs, _shift,     \
+               &mtk_clk_gate_ops_setclr)
+
+static const struct mtk_gate cam_clks[] = {
+       GATE_CAM(CLK_CAM_LARB10, "camsys_larb10", "cam_sel", 0),
+       GATE_CAM(CLK_CAM_DFP_VAD, "camsys_dfp_vad", "cam_sel", 1),
+       GATE_CAM(CLK_CAM_LARB11, "camsys_larb11", "cam_sel", 2),
+       GATE_CAM(CLK_CAM_LARB9, "camsys_larb9", "cam_sel", 3),
+       GATE_CAM(CLK_CAM_CAM, "camsys_cam", "cam_sel", 6),
+       GATE_CAM(CLK_CAM_CAMTG, "camsys_camtg", "cam_sel", 7),
+       GATE_CAM(CLK_CAM_SENINF, "camsys_seninf", "cam_sel", 8),
+       GATE_CAM(CLK_CAM_CAMSV0, "camsys_camsv0", "cam_sel", 9),
+       GATE_CAM(CLK_CAM_CAMSV1, "camsys_camsv1", "cam_sel", 10),
+       GATE_CAM(CLK_CAM_CAMSV2, "camsys_camsv2", "cam_sel", 11),
+       GATE_CAM(CLK_CAM_CAMSV3, "camsys_camsv3", "cam_sel", 12),
+       GATE_CAM(CLK_CAM_CCU, "camsys_ccu", "cam_sel", 13),
+       GATE_CAM(CLK_CAM_FAKE_ENG, "camsys_fake_eng", "cam_sel", 14),
+};
+
+static const struct of_device_id of_match_clk_mt6779_cam[] = {
+       { .compatible = "mediatek,mt6779-camsys", },
+       {}
+};
+
+static int clk_mt6779_cam_probe(struct platform_device *pdev)
+{
+       struct clk_onecell_data *clk_data;
+       struct device_node *node = pdev->dev.of_node;
+
+       clk_data = mtk_alloc_clk_data(CLK_CAM_NR_CLK);
+
+       mtk_clk_register_gates(node, cam_clks, ARRAY_SIZE(cam_clks),
+                              clk_data);
+
+       return of_clk_add_provider(node, of_clk_src_onecell_get, clk_data);
+}
+
+static struct platform_driver clk_mt6779_cam_drv = {
+       .probe = clk_mt6779_cam_probe,
+       .driver = {
+               .name = "clk-mt6779-cam",
+               .of_match_table = of_match_clk_mt6779_cam,
+       },
+};
+
+builtin_platform_driver(clk_mt6779_cam_drv);
diff --git a/drivers/clk/mediatek/clk-mt6779-img.c b/drivers/clk/mediatek/clk-mt6779-img.c
new file mode 100644 (file)
index 0000000..26292a4
--- /dev/null
@@ -0,0 +1,58 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2019 MediaTek Inc.
+ * Author: Wendell Lin <wendell.lin@mediatek.com>
+ */
+
+#include <linux/clk-provider.h>
+#include <linux/platform_device.h>
+#include <dt-bindings/clock/mt6779-clk.h>
+
+#include "clk-mtk.h"
+#include "clk-gate.h"
+
+static const struct mtk_gate_regs img_cg_regs = {
+       .set_ofs = 0x0004,
+       .clr_ofs = 0x0008,
+       .sta_ofs = 0x0000,
+};
+
+#define GATE_IMG(_id, _name, _parent, _shift)                  \
+       GATE_MTK(_id, _name, _parent, &img_cg_regs, _shift,     \
+               &mtk_clk_gate_ops_setclr)
+
+static const struct mtk_gate img_clks[] = {
+       GATE_IMG(CLK_IMG_LARB5, "imgsys_larb5", "img_sel", 0),
+       GATE_IMG(CLK_IMG_LARB6, "imgsys_larb6", "img_sel", 1),
+       GATE_IMG(CLK_IMG_DIP, "imgsys_dip", "img_sel", 2),
+       GATE_IMG(CLK_IMG_MFB, "imgsys_mfb", "img_sel", 6),
+       GATE_IMG(CLK_IMG_WPE_A, "imgsys_wpe_a", "img_sel", 7),
+};
+
+static const struct of_device_id of_match_clk_mt6779_img[] = {
+       { .compatible = "mediatek,mt6779-imgsys", },
+       {}
+};
+
+static int clk_mt6779_img_probe(struct platform_device *pdev)
+{
+       struct clk_onecell_data *clk_data;
+       struct device_node *node = pdev->dev.of_node;
+
+       clk_data = mtk_alloc_clk_data(CLK_IMG_NR_CLK);
+
+       mtk_clk_register_gates(node, img_clks, ARRAY_SIZE(img_clks),
+                              clk_data);
+
+       return of_clk_add_provider(node, of_clk_src_onecell_get, clk_data);
+}
+
+static struct platform_driver clk_mt6779_img_drv = {
+       .probe = clk_mt6779_img_probe,
+       .driver = {
+               .name = "clk-mt6779-img",
+               .of_match_table = of_match_clk_mt6779_img,
+       },
+};
+
+builtin_platform_driver(clk_mt6779_img_drv);
diff --git a/drivers/clk/mediatek/clk-mt6779-ipe.c b/drivers/clk/mediatek/clk-mt6779-ipe.c
new file mode 100644 (file)
index 0000000..bb51907
--- /dev/null
@@ -0,0 +1,60 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2019 MediaTek Inc.
+ * Author: Wendell Lin <wendell.lin@mediatek.com>
+ */
+
+#include <linux/clk-provider.h>
+#include <linux/platform_device.h>
+#include <dt-bindings/clock/mt6779-clk.h>
+
+#include "clk-mtk.h"
+#include "clk-gate.h"
+
+static const struct mtk_gate_regs ipe_cg_regs = {
+       .set_ofs = 0x0004,
+       .clr_ofs = 0x0008,
+       .sta_ofs = 0x0000,
+};
+
+#define GATE_IPE(_id, _name, _parent, _shift)                  \
+       GATE_MTK(_id, _name, _parent, &ipe_cg_regs, _shift,     \
+               &mtk_clk_gate_ops_setclr)
+
+static const struct mtk_gate ipe_clks[] = {
+       GATE_IPE(CLK_IPE_LARB7, "ipe_larb7", "ipe_sel", 0),
+       GATE_IPE(CLK_IPE_LARB8, "ipe_larb8", "ipe_sel", 1),
+       GATE_IPE(CLK_IPE_SMI_SUBCOM, "ipe_smi_subcom", "ipe_sel", 2),
+       GATE_IPE(CLK_IPE_FD, "ipe_fd", "ipe_sel", 3),
+       GATE_IPE(CLK_IPE_FE, "ipe_fe", "ipe_sel", 4),
+       GATE_IPE(CLK_IPE_RSC, "ipe_rsc", "ipe_sel", 5),
+       GATE_IPE(CLK_IPE_DPE, "ipe_dpe", "ipe_sel", 6),
+};
+
+static const struct of_device_id of_match_clk_mt6779_ipe[] = {
+       { .compatible = "mediatek,mt6779-ipesys", },
+       {}
+};
+
+static int clk_mt6779_ipe_probe(struct platform_device *pdev)
+{
+       struct clk_onecell_data *clk_data;
+       struct device_node *node = pdev->dev.of_node;
+
+       clk_data = mtk_alloc_clk_data(CLK_IPE_NR_CLK);
+
+       mtk_clk_register_gates(node, ipe_clks, ARRAY_SIZE(ipe_clks),
+                              clk_data);
+
+       return of_clk_add_provider(node, of_clk_src_onecell_get, clk_data);
+}
+
+static struct platform_driver clk_mt6779_ipe_drv = {
+       .probe = clk_mt6779_ipe_probe,
+       .driver = {
+               .name = "clk-mt6779-ipe",
+               .of_match_table = of_match_clk_mt6779_ipe,
+       },
+};
+
+builtin_platform_driver(clk_mt6779_ipe_drv);
diff --git a/drivers/clk/mediatek/clk-mt6779-mfg.c b/drivers/clk/mediatek/clk-mt6779-mfg.c
new file mode 100644 (file)
index 0000000..c6ee2a8
--- /dev/null
@@ -0,0 +1,55 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2019 MediaTek Inc.
+ * Author: Wendell Lin <wendell.lin@mediatek.com>
+ */
+
+#include <linux/clk-provider.h>
+#include <linux/platform_device.h>
+
+#include "clk-mtk.h"
+#include "clk-gate.h"
+
+#include <dt-bindings/clock/mt6779-clk.h>
+
+static const struct mtk_gate_regs mfg_cg_regs = {
+       .set_ofs = 0x4,
+       .clr_ofs = 0x8,
+       .sta_ofs = 0x0,
+};
+
+#define GATE_MFG(_id, _name, _parent, _shift)                  \
+       GATE_MTK(_id, _name, _parent, &mfg_cg_regs, _shift,     \
+               &mtk_clk_gate_ops_setclr)
+
+static const struct mtk_gate mfg_clks[] = {
+       GATE_MFG(CLK_MFGCFG_BG3D, "mfg_bg3d", "mfg_sel", 0),
+};
+
+static int clk_mt6779_mfg_probe(struct platform_device *pdev)
+{
+       struct clk_onecell_data *clk_data;
+       struct device_node *node = pdev->dev.of_node;
+
+       clk_data = mtk_alloc_clk_data(CLK_MFGCFG_NR_CLK);
+
+       mtk_clk_register_gates(node, mfg_clks, ARRAY_SIZE(mfg_clks),
+                              clk_data);
+
+       return of_clk_add_provider(node, of_clk_src_onecell_get, clk_data);
+}
+
+static const struct of_device_id of_match_clk_mt6779_mfg[] = {
+       { .compatible = "mediatek,mt6779-mfgcfg", },
+       {}
+};
+
+static struct platform_driver clk_mt6779_mfg_drv = {
+       .probe = clk_mt6779_mfg_probe,
+       .driver = {
+               .name = "clk-mt6779-mfg",
+               .of_match_table = of_match_clk_mt6779_mfg,
+       },
+};
+
+builtin_platform_driver(clk_mt6779_mfg_drv);
diff --git a/drivers/clk/mediatek/clk-mt6779-mm.c b/drivers/clk/mediatek/clk-mt6779-mm.c
new file mode 100644 (file)
index 0000000..fb5fbb8
--- /dev/null
@@ -0,0 +1,113 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2019 MediaTek Inc.
+ * Author: Wendell Lin <wendell.lin@mediatek.com>
+ */
+
+#include <linux/clk-provider.h>
+#include <linux/platform_device.h>
+#include <dt-bindings/clock/mt6779-clk.h>
+
+#include "clk-mtk.h"
+#include "clk-gate.h"
+
+static const struct mtk_gate_regs mm0_cg_regs = {
+       .set_ofs = 0x0104,
+       .clr_ofs = 0x0108,
+       .sta_ofs = 0x0100,
+};
+
+static const struct mtk_gate_regs mm1_cg_regs = {
+       .set_ofs = 0x0114,
+       .clr_ofs = 0x0118,
+       .sta_ofs = 0x0110,
+};
+
+#define GATE_MM0(_id, _name, _parent, _shift)                  \
+       GATE_MTK(_id, _name, _parent, &mm0_cg_regs, _shift,     \
+               &mtk_clk_gate_ops_setclr)
+#define GATE_MM1(_id, _name, _parent, _shift)                  \
+       GATE_MTK(_id, _name, _parent, &mm1_cg_regs, _shift,     \
+               &mtk_clk_gate_ops_setclr)
+
+static const struct mtk_gate mm_clks[] = {
+       /* MM0 */
+       GATE_MM0(CLK_MM_SMI_COMMON, "mm_smi_common", "mm_sel", 0),
+       GATE_MM0(CLK_MM_SMI_LARB0, "mm_smi_larb0", "mm_sel", 1),
+       GATE_MM0(CLK_MM_SMI_LARB1, "mm_smi_larb1", "mm_sel", 2),
+       GATE_MM0(CLK_MM_GALS_COMM0, "mm_gals_comm0", "mm_sel", 3),
+       GATE_MM0(CLK_MM_GALS_COMM1, "mm_gals_comm1", "mm_sel", 4),
+       GATE_MM0(CLK_MM_GALS_CCU2MM, "mm_gals_ccu2mm", "mm_sel", 5),
+       GATE_MM0(CLK_MM_GALS_IPU12MM, "mm_gals_ipu12mm", "mm_sel", 6),
+       GATE_MM0(CLK_MM_GALS_IMG2MM, "mm_gals_img2mm", "mm_sel", 7),
+       GATE_MM0(CLK_MM_GALS_CAM2MM, "mm_gals_cam2mm", "mm_sel", 8),
+       GATE_MM0(CLK_MM_GALS_IPU2MM, "mm_gals_ipu2mm", "mm_sel", 9),
+       GATE_MM0(CLK_MM_MDP_DL_TXCK, "mm_mdp_dl_txck", "mm_sel", 10),
+       GATE_MM0(CLK_MM_IPU_DL_TXCK, "mm_ipu_dl_txck", "mm_sel", 11),
+       GATE_MM0(CLK_MM_MDP_RDMA0, "mm_mdp_rdma0", "mm_sel", 12),
+       GATE_MM0(CLK_MM_MDP_RDMA1, "mm_mdp_rdma1", "mm_sel", 13),
+       GATE_MM0(CLK_MM_MDP_RSZ0, "mm_mdp_rsz0", "mm_sel", 14),
+       GATE_MM0(CLK_MM_MDP_RSZ1, "mm_mdp_rsz1", "mm_sel", 15),
+       GATE_MM0(CLK_MM_MDP_TDSHP, "mm_mdp_tdshp", "mm_sel", 16),
+       GATE_MM0(CLK_MM_MDP_WROT0, "mm_mdp_wrot0", "mm_sel", 17),
+       GATE_MM0(CLK_MM_MDP_WROT1, "mm_mdp_wrot1", "mm_sel", 18),
+       GATE_MM0(CLK_MM_FAKE_ENG, "mm_fake_eng", "mm_sel", 19),
+       GATE_MM0(CLK_MM_DISP_OVL0, "mm_disp_ovl0", "mm_sel", 20),
+       GATE_MM0(CLK_MM_DISP_OVL0_2L, "mm_disp_ovl0_2l", "mm_sel", 21),
+       GATE_MM0(CLK_MM_DISP_OVL1_2L, "mm_disp_ovl1_2l", "mm_sel", 22),
+       GATE_MM0(CLK_MM_DISP_RDMA0, "mm_disp_rdma0", "mm_sel", 23),
+       GATE_MM0(CLK_MM_DISP_RDMA1, "mm_disp_rdma1", "mm_sel", 24),
+       GATE_MM0(CLK_MM_DISP_WDMA0, "mm_disp_wdma0", "mm_sel", 25),
+       GATE_MM0(CLK_MM_DISP_COLOR0, "mm_disp_color0", "mm_sel", 26),
+       GATE_MM0(CLK_MM_DISP_CCORR0, "mm_disp_ccorr0", "mm_sel", 27),
+       GATE_MM0(CLK_MM_DISP_AAL0, "mm_disp_aal0", "mm_sel", 28),
+       GATE_MM0(CLK_MM_DISP_GAMMA0, "mm_disp_gamma0", "mm_sel", 29),
+       GATE_MM0(CLK_MM_DISP_DITHER0, "mm_disp_dither0", "mm_sel", 30),
+       GATE_MM0(CLK_MM_DISP_SPLIT, "mm_disp_split", "mm_sel", 31),
+       /* MM1 */
+       GATE_MM1(CLK_MM_DSI0_MM_CK, "mm_dsi0_mmck", "mm_sel", 0),
+       GATE_MM1(CLK_MM_DSI0_IF_CK, "mm_dsi0_ifck", "mm_sel", 1),
+       GATE_MM1(CLK_MM_DPI_MM_CK, "mm_dpi_mmck", "mm_sel", 2),
+       GATE_MM1(CLK_MM_DPI_IF_CK, "mm_dpi_ifck", "dpi0_sel", 3),
+       GATE_MM1(CLK_MM_FAKE_ENG2, "mm_fake_eng2", "mm_sel", 4),
+       GATE_MM1(CLK_MM_MDP_DL_RX_CK, "mm_mdp_dl_rxck", "mm_sel", 5),
+       GATE_MM1(CLK_MM_IPU_DL_RX_CK, "mm_ipu_dl_rxck", "mm_sel", 6),
+       GATE_MM1(CLK_MM_26M, "mm_26m", "f_f26m_ck", 7),
+       GATE_MM1(CLK_MM_MM_R2Y, "mm_mmsys_r2y", "mm_sel", 8),
+       GATE_MM1(CLK_MM_DISP_RSZ, "mm_disp_rsz", "mm_sel", 9),
+       GATE_MM1(CLK_MM_MDP_AAL, "mm_mdp_aal", "mm_sel", 10),
+       GATE_MM1(CLK_MM_MDP_HDR, "mm_mdp_hdr", "mm_sel", 11),
+       GATE_MM1(CLK_MM_DBI_MM_CK, "mm_dbi_mmck", "mm_sel", 12),
+       GATE_MM1(CLK_MM_DBI_IF_CK, "mm_dbi_ifck", "dpi0_sel", 13),
+       GATE_MM1(CLK_MM_DISP_POSTMASK0, "mm_disp_pm0", "mm_sel", 14),
+       GATE_MM1(CLK_MM_DISP_HRT_BW, "mm_disp_hrt_bw", "mm_sel", 15),
+       GATE_MM1(CLK_MM_DISP_OVL_FBDC, "mm_disp_ovl_fbdc", "mm_sel", 16),
+};
+
+static const struct of_device_id of_match_clk_mt6779_mm[] = {
+       { .compatible = "mediatek,mt6779-mmsys", },
+       {}
+};
+
+static int clk_mt6779_mm_probe(struct platform_device *pdev)
+{
+       struct clk_onecell_data *clk_data;
+       struct device_node *node = pdev->dev.of_node;
+
+       clk_data = mtk_alloc_clk_data(CLK_MM_NR_CLK);
+
+       mtk_clk_register_gates(node, mm_clks, ARRAY_SIZE(mm_clks),
+                              clk_data);
+
+       return of_clk_add_provider(node, of_clk_src_onecell_get, clk_data);
+}
+
+static struct platform_driver clk_mt6779_mm_drv = {
+       .probe = clk_mt6779_mm_probe,
+       .driver = {
+               .name = "clk-mt6779-mm",
+               .of_match_table = of_match_clk_mt6779_mm,
+       },
+};
+
+builtin_platform_driver(clk_mt6779_mm_drv);
diff --git a/drivers/clk/mediatek/clk-mt6779-vdec.c b/drivers/clk/mediatek/clk-mt6779-vdec.c
new file mode 100644 (file)
index 0000000..1900da2
--- /dev/null
@@ -0,0 +1,67 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2019 MediaTek Inc.
+ * Author: Wendell Lin <wendell.lin@mediatek.com>
+ */
+
+#include <linux/clk-provider.h>
+#include <linux/platform_device.h>
+
+#include "clk-mtk.h"
+#include "clk-gate.h"
+
+#include <dt-bindings/clock/mt6779-clk.h>
+
+static const struct mtk_gate_regs vdec0_cg_regs = {
+       .set_ofs = 0x0000,
+       .clr_ofs = 0x0004,
+       .sta_ofs = 0x0000,
+};
+
+static const struct mtk_gate_regs vdec1_cg_regs = {
+       .set_ofs = 0x0008,
+       .clr_ofs = 0x000c,
+       .sta_ofs = 0x0008,
+};
+
+#define GATE_VDEC0_I(_id, _name, _parent, _shift)              \
+       GATE_MTK(_id, _name, _parent, &vdec0_cg_regs, _shift,   \
+               &mtk_clk_gate_ops_setclr_inv)
+#define GATE_VDEC1_I(_id, _name, _parent, _shift)              \
+       GATE_MTK(_id, _name, _parent, &vdec1_cg_regs, _shift,   \
+               &mtk_clk_gate_ops_setclr_inv)
+
+static const struct mtk_gate vdec_clks[] = {
+       /* VDEC0 */
+       GATE_VDEC0_I(CLK_VDEC_VDEC, "vdec_cken", "vdec_sel", 0),
+       /* VDEC1 */
+       GATE_VDEC1_I(CLK_VDEC_LARB1, "vdec_larb1_cken", "vdec_sel", 0),
+};
+
+static const struct of_device_id of_match_clk_mt6779_vdec[] = {
+       { .compatible = "mediatek,mt6779-vdecsys", },
+       {}
+};
+
+static int clk_mt6779_vdec_probe(struct platform_device *pdev)
+{
+       struct clk_onecell_data *clk_data;
+       struct device_node *node = pdev->dev.of_node;
+
+       clk_data = mtk_alloc_clk_data(CLK_VDEC_GCON_NR_CLK);
+
+       mtk_clk_register_gates(node, vdec_clks, ARRAY_SIZE(vdec_clks),
+                              clk_data);
+
+       return of_clk_add_provider(node, of_clk_src_onecell_get, clk_data);
+}
+
+static struct platform_driver clk_mt6779_vdec_drv = {
+       .probe = clk_mt6779_vdec_probe,
+       .driver = {
+               .name = "clk-mt6779-vdec",
+               .of_match_table = of_match_clk_mt6779_vdec,
+       },
+};
+
+builtin_platform_driver(clk_mt6779_vdec_drv);
diff --git a/drivers/clk/mediatek/clk-mt6779-venc.c b/drivers/clk/mediatek/clk-mt6779-venc.c
new file mode 100644 (file)
index 0000000..b41d1f8
--- /dev/null
@@ -0,0 +1,58 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2019 MediaTek Inc.
+ * Author: Wendell Lin <wendell.lin@mediatek.com>
+ */
+
+#include <linux/clk-provider.h>
+#include <linux/platform_device.h>
+
+#include "clk-mtk.h"
+#include "clk-gate.h"
+
+#include <dt-bindings/clock/mt6779-clk.h>
+
+static const struct mtk_gate_regs venc_cg_regs = {
+       .set_ofs = 0x0004,
+       .clr_ofs = 0x0008,
+       .sta_ofs = 0x0000,
+};
+
+#define GATE_VENC_I(_id, _name, _parent, _shift)               \
+       GATE_MTK(_id, _name, _parent, &venc_cg_regs, _shift,    \
+               &mtk_clk_gate_ops_setclr_inv)
+
+static const struct mtk_gate venc_clks[] = {
+       GATE_VENC_I(CLK_VENC_GCON_LARB, "venc_larb", "venc_sel", 0),
+       GATE_VENC_I(CLK_VENC_GCON_VENC, "venc_venc", "venc_sel", 4),
+       GATE_VENC_I(CLK_VENC_GCON_JPGENC, "venc_jpgenc", "venc_sel", 8),
+       GATE_VENC_I(CLK_VENC_GCON_GALS, "venc_gals", "venc_sel", 28),
+};
+
+static const struct of_device_id of_match_clk_mt6779_venc[] = {
+       { .compatible = "mediatek,mt6779-vencsys", },
+       {}
+};
+
+static int clk_mt6779_venc_probe(struct platform_device *pdev)
+{
+       struct clk_onecell_data *clk_data;
+       struct device_node *node = pdev->dev.of_node;
+
+       clk_data = mtk_alloc_clk_data(CLK_VENC_GCON_NR_CLK);
+
+       mtk_clk_register_gates(node, venc_clks, ARRAY_SIZE(venc_clks),
+                              clk_data);
+
+       return of_clk_add_provider(node, of_clk_src_onecell_get, clk_data);
+}
+
+static struct platform_driver clk_mt6779_venc_drv = {
+       .probe = clk_mt6779_venc_probe,
+       .driver = {
+               .name = "clk-mt6779-venc",
+               .of_match_table = of_match_clk_mt6779_venc,
+       },
+};
+
+builtin_platform_driver(clk_mt6779_venc_drv);
diff --git a/drivers/clk/mediatek/clk-mt6779.c b/drivers/clk/mediatek/clk-mt6779.c
new file mode 100644 (file)
index 0000000..608a9a6
--- /dev/null
@@ -0,0 +1,1315 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2019 MediaTek Inc.
+ * Author: Wendell Lin <wendell.lin@mediatek.com>
+ */
+
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/of_device.h>
+#include <linux/platform_device.h>
+
+#include "clk-mtk.h"
+#include "clk-mux.h"
+#include "clk-gate.h"
+
+#include <dt-bindings/clock/mt6779-clk.h>
+
+static DEFINE_SPINLOCK(mt6779_clk_lock);
+
+static const struct mtk_fixed_clk top_fixed_clks[] = {
+       FIXED_CLK(CLK_TOP_CLK26M, "f_f26m_ck", "clk26m", 26000000),
+};
+
+static const struct mtk_fixed_factor top_divs[] = {
+       FACTOR(CLK_TOP_CLK13M, "clk13m", "clk26m", 1, 2),
+       FACTOR(CLK_TOP_F26M_CK_D2, "csw_f26m_ck_d2", "clk26m", 1, 2),
+       FACTOR(CLK_TOP_MAINPLL_CK, "mainpll_ck", "mainpll", 1, 1),
+       FACTOR(CLK_TOP_MAINPLL_D2, "mainpll_d2", "mainpll_ck", 1, 2),
+       FACTOR(CLK_TOP_MAINPLL_D2_D2, "mainpll_d2_d2", "mainpll_d2", 1, 2),
+       FACTOR(CLK_TOP_MAINPLL_D2_D4, "mainpll_d2_d4", "mainpll_d2", 1, 4),
+       FACTOR(CLK_TOP_MAINPLL_D2_D8, "mainpll_d2_d8", "mainpll_d2", 1, 8),
+       FACTOR(CLK_TOP_MAINPLL_D2_D16, "mainpll_d2_d16", "mainpll_d2", 1, 16),
+       FACTOR(CLK_TOP_MAINPLL_D3, "mainpll_d3", "mainpll", 1, 3),
+       FACTOR(CLK_TOP_MAINPLL_D3_D2, "mainpll_d3_d2", "mainpll_d3", 1, 2),
+       FACTOR(CLK_TOP_MAINPLL_D3_D4, "mainpll_d3_d4", "mainpll_d3", 1, 4),
+       FACTOR(CLK_TOP_MAINPLL_D3_D8, "mainpll_d3_d8", "mainpll_d3", 1, 8),
+       FACTOR(CLK_TOP_MAINPLL_D5, "mainpll_d5", "mainpll", 1, 5),
+       FACTOR(CLK_TOP_MAINPLL_D5_D2, "mainpll_d5_d2", "mainpll_d5", 1, 2),
+       FACTOR(CLK_TOP_MAINPLL_D5_D4, "mainpll_d5_d4", "mainpll_d5", 1, 4),
+       FACTOR(CLK_TOP_MAINPLL_D7, "mainpll_d7", "mainpll", 1, 7),
+       FACTOR(CLK_TOP_MAINPLL_D7_D2, "mainpll_d7_d2", "mainpll_d7", 1, 2),
+       FACTOR(CLK_TOP_MAINPLL_D7_D4, "mainpll_d7_d4", "mainpll_d7", 1, 4),
+       FACTOR(CLK_TOP_UNIVPLL_CK, "univpll", "univ2pll", 1, 2),
+       FACTOR(CLK_TOP_UNIVPLL_D2, "univpll_d2", "univpll", 1, 2),
+       FACTOR(CLK_TOP_UNIVPLL_D2_D2, "univpll_d2_d2", "univpll_d2", 1, 2),
+       FACTOR(CLK_TOP_UNIVPLL_D2_D4, "univpll_d2_d4", "univpll_d2", 1, 4),
+       FACTOR(CLK_TOP_UNIVPLL_D2_D8, "univpll_d2_d8", "univpll_d2", 1, 8),
+       FACTOR(CLK_TOP_UNIVPLL_D3, "univpll_d3", "univpll", 1, 3),
+       FACTOR(CLK_TOP_UNIVPLL_D3_D2, "univpll_d3_d2", "univpll_d3", 1, 2),
+       FACTOR(CLK_TOP_UNIVPLL_D3_D4, "univpll_d3_d4", "univpll_d3", 1, 4),
+       FACTOR(CLK_TOP_UNIVPLL_D3_D8, "univpll_d3_d8", "univpll_d3", 1, 8),
+       FACTOR(CLK_TOP_UNIVPLL_D3_D16, "univpll_d3_d16", "univpll_d3", 1, 16),
+       FACTOR(CLK_TOP_UNIVPLL_D5, "univpll_d5", "univpll", 1, 5),
+       FACTOR(CLK_TOP_UNIVPLL_D5_D2, "univpll_d5_d2", "univpll_d5", 1, 2),
+       FACTOR(CLK_TOP_UNIVPLL_D5_D4, "univpll_d5_d4", "univpll_d5", 1, 4),
+       FACTOR(CLK_TOP_UNIVPLL_D5_D8, "univpll_d5_d8", "univpll_d5", 1, 8),
+       FACTOR(CLK_TOP_UNIVPLL_D7, "univpll_d7", "univpll", 1, 7),
+       FACTOR(CLK_TOP_UNIVP_192M_CK, "univpll_192m_ck", "univ2pll", 1, 13),
+       FACTOR(CLK_TOP_UNIVP_192M_D2, "univpll_192m_d2", "univpll_192m_ck",
+              1, 2),
+       FACTOR(CLK_TOP_UNIVP_192M_D4, "univpll_192m_d4", "univpll_192m_ck",
+              1, 4),
+       FACTOR(CLK_TOP_UNIVP_192M_D8, "univpll_192m_d8", "univpll_192m_ck",
+              1, 8),
+       FACTOR(CLK_TOP_UNIVP_192M_D16, "univpll_192m_d16", "univpll_192m_ck",
+              1, 16),
+       FACTOR(CLK_TOP_UNIVP_192M_D32, "univpll_192m_d32", "univpll_192m_ck",
+              1, 32),
+       FACTOR(CLK_TOP_APLL1_CK, "apll1_ck", "apll1", 1, 1),
+       FACTOR(CLK_TOP_APLL1_D2, "apll1_d2", "apll1", 1, 2),
+       FACTOR(CLK_TOP_APLL1_D4, "apll1_d4", "apll1", 1, 4),
+       FACTOR(CLK_TOP_APLL1_D8, "apll1_d8", "apll1", 1, 8),
+       FACTOR(CLK_TOP_APLL2_CK, "apll2_ck", "apll2", 1, 1),
+       FACTOR(CLK_TOP_APLL2_D2, "apll2_d2", "apll2", 1, 2),
+       FACTOR(CLK_TOP_APLL2_D4, "apll2_d4", "apll2", 1, 4),
+       FACTOR(CLK_TOP_APLL2_D8, "apll2_d8", "apll2", 1, 8),
+       FACTOR(CLK_TOP_TVDPLL_CK, "tvdpll_ck", "tvdpll", 1, 1),
+       FACTOR(CLK_TOP_TVDPLL_D2, "tvdpll_d2", "tvdpll_ck", 1, 2),
+       FACTOR(CLK_TOP_TVDPLL_D4, "tvdpll_d4", "tvdpll", 1, 4),
+       FACTOR(CLK_TOP_TVDPLL_D8, "tvdpll_d8", "tvdpll", 1, 8),
+       FACTOR(CLK_TOP_TVDPLL_D16, "tvdpll_d16", "tvdpll", 1, 16),
+       FACTOR(CLK_TOP_MMPLL_CK, "mmpll_ck", "mmpll", 1, 1),
+       FACTOR(CLK_TOP_MMPLL_D4, "mmpll_d4", "mmpll", 1, 4),
+       FACTOR(CLK_TOP_MMPLL_D4_D2, "mmpll_d4_d2", "mmpll_d4", 1, 2),
+       FACTOR(CLK_TOP_MMPLL_D4_D4, "mmpll_d4_d4", "mmpll_d4", 1, 4),
+       FACTOR(CLK_TOP_MMPLL_D5, "mmpll_d5", "mmpll", 1, 5),
+       FACTOR(CLK_TOP_MMPLL_D5_D2, "mmpll_d5_d2", "mmpll_d5", 1, 2),
+       FACTOR(CLK_TOP_MMPLL_D5_D4, "mmpll_d5_d4", "mmpll_d5", 1, 4),
+       FACTOR(CLK_TOP_MMPLL_D6, "mmpll_d6", "mmpll", 1, 6),
+       FACTOR(CLK_TOP_MMPLL_D7, "mmpll_d7", "mmpll", 1, 7),
+       FACTOR(CLK_TOP_MFGPLL_CK, "mfgpll_ck", "mfgpll", 1, 1),
+       FACTOR(CLK_TOP_ADSPPLL_CK, "adsppll_ck", "adsppll", 1, 1),
+       FACTOR(CLK_TOP_ADSPPLL_D4, "adsppll_d4", "adsppll", 1, 4),
+       FACTOR(CLK_TOP_ADSPPLL_D5, "adsppll_d5", "adsppll", 1, 5),
+       FACTOR(CLK_TOP_ADSPPLL_D6, "adsppll_d6", "adsppll", 1, 6),
+       FACTOR(CLK_TOP_MSDCPLL_CK, "msdcpll_ck", "msdcpll", 1, 1),
+       FACTOR(CLK_TOP_MSDCPLL_D2, "msdcpll_d2", "msdcpll", 1, 2),
+       FACTOR(CLK_TOP_MSDCPLL_D4, "msdcpll_d4", "msdcpll", 1, 4),
+       FACTOR(CLK_TOP_MSDCPLL_D8, "msdcpll_d8", "msdcpll", 1, 8),
+       FACTOR(CLK_TOP_MSDCPLL_D16, "msdcpll_d16", "msdcpll", 1, 16),
+       FACTOR(CLK_TOP_AD_OSC_CK, "ad_osc_ck", "osc", 1, 1),
+       FACTOR(CLK_TOP_OSC_D2, "osc_d2", "osc", 1, 2),
+       FACTOR(CLK_TOP_OSC_D4, "osc_d4", "osc", 1, 4),
+       FACTOR(CLK_TOP_OSC_D8, "osc_d8", "osc", 1, 8),
+       FACTOR(CLK_TOP_OSC_D10, "osc_d10", "osc", 1, 10),
+       FACTOR(CLK_TOP_OSC_D16, "osc_d16", "osc", 1, 16),
+       FACTOR(CLK_TOP_AD_OSC2_CK, "ad_osc2_ck", "osc2", 1, 1),
+       FACTOR(CLK_TOP_OSC2_D2, "osc2_d2", "osc2", 1, 2),
+       FACTOR(CLK_TOP_OSC2_D3, "osc2_d3", "osc2", 1, 3),
+       FACTOR(CLK_TOP_TVDPLL_MAINPLL_D2_CK, "tvdpll_mainpll_d2_ck",
+              "tvdpll", 1, 1),
+       FACTOR(CLK_TOP_FMEM_466M_CK, "fmem_466m_ck", "fmem", 1, 1),
+};
+
+static const char * const axi_parents[] = {
+       "clk26m",
+       "mainpll_d2_d4",
+       "mainpll_d7",
+       "osc_d4"
+};
+
+static const char * const mm_parents[] = {
+       "clk26m",
+       "tvdpll_mainpll_d2_ck",
+       "mmpll_d7",
+       "mmpll_d5_d2",
+       "mainpll_d2_d2",
+       "mainpll_d3_d2"
+};
+
+static const char * const scp_parents[] = {
+       "clk26m",
+       "univpll_d2_d8",
+       "mainpll_d2_d4",
+       "mainpll_d3",
+       "univpll_d3",
+       "ad_osc2_ck",
+       "osc2_d2",
+       "osc2_d3"
+};
+
+static const char * const img_parents[] = {
+       "clk26m",
+       "mainpll_d2",
+       "mainpll_d2",
+       "univpll_d3",
+       "mainpll_d3",
+       "mmpll_d5_d2",
+       "tvdpll_mainpll_d2_ck",
+       "mainpll_d5"
+};
+
+static const char * const ipe_parents[] = {
+       "clk26m",
+       "mainpll_d2",
+       "mmpll_d7",
+       "univpll_d3",
+       "mainpll_d3",
+       "mmpll_d5_d2",
+       "mainpll_d2_d2",
+       "mainpll_d5"
+};
+
+static const char * const dpe_parents[] = {
+       "clk26m",
+       "mainpll_d2",
+       "mmpll_d7",
+       "univpll_d3",
+       "mainpll_d3",
+       "mmpll_d5_d2",
+       "mainpll_d2_d2",
+       "mainpll_d5"
+};
+
+static const char * const cam_parents[] = {
+       "clk26m",
+       "mainpll_d2",
+       "mmpll_d6",
+       "mainpll_d3",
+       "mmpll_d7",
+       "univpll_d3",
+       "mmpll_d5_d2",
+       "adsppll_d5",
+       "tvdpll_mainpll_d2_ck",
+       "univpll_d3_d2"
+};
+
+static const char * const ccu_parents[] = {
+       "clk26m",
+       "mainpll_d2",
+       "mmpll_d6",
+       "mainpll_d3",
+       "mmpll_d7",
+       "univpll_d3",
+       "mmpll_d5_d2",
+       "mainpll_d2_d2",
+       "adsppll_d5",
+       "univpll_d3_d2"
+};
+
+static const char * const dsp_parents[] = {
+       "clk26m",
+       "univpll_d3_d8",
+       "univpll_d3_d4",
+       "mainpll_d2_d4",
+       "univpll_d3_d2",
+       "mainpll_d2_d2",
+       "univpll_d2_d2",
+       "mainpll_d3",
+       "univpll_d3",
+       "mmpll_d7",
+       "mmpll_d6",
+       "adsppll_d5",
+       "tvdpll_ck",
+       "tvdpll_mainpll_d2_ck",
+       "univpll_d2",
+       "adsppll_d4"
+};
+
+static const char * const dsp1_parents[] = {
+       "clk26m",
+       "univpll_d3_d8",
+       "univpll_d3_d4",
+       "mainpll_d2_d4",
+       "univpll_d3_d2",
+       "mainpll_d2_d2",
+       "univpll_d2_d2",
+       "mainpll_d3",
+       "univpll_d3",
+       "mmpll_d7",
+       "mmpll_d6",
+       "adsppll_d5",
+       "tvdpll_ck",
+       "tvdpll_mainpll_d2_ck",
+       "univpll_d2",
+       "adsppll_d4"
+};
+
+static const char * const dsp2_parents[] = {
+       "clk26m",
+       "univpll_d3_d8",
+       "univpll_d3_d4",
+       "mainpll_d2_d4",
+       "univpll_d3_d2",
+       "mainpll_d2_d2",
+       "univpll_d2_d2",
+       "mainpll_d3",
+       "univpll_d3",
+       "mmpll_d7",
+       "mmpll_d6",
+       "adsppll_d5",
+       "tvdpll_ck",
+       "tvdpll_mainpll_d2_ck",
+       "univpll_d2",
+       "adsppll_d4"
+};
+
+static const char * const dsp3_parents[] = {
+       "clk26m",
+       "univpll_d3_d8",
+       "mainpll_d2_d4",
+       "univpll_d3_d2",
+       "mainpll_d2_d2",
+       "univpll_d2_d2",
+       "mainpll_d3",
+       "univpll_d3",
+       "mmpll_d7",
+       "mmpll_d6",
+       "mainpll_d2",
+       "tvdpll_ck",
+       "tvdpll_mainpll_d2_ck",
+       "univpll_d2",
+       "adsppll_d4",
+       "mmpll_d4"
+};
+
+static const char * const ipu_if_parents[] = {
+       "clk26m",
+       "univpll_d3_d8",
+       "univpll_d3_d4",
+       "mainpll_d2_d4",
+       "univpll_d3_d2",
+       "mainpll_d2_d2",
+       "univpll_d2_d2",
+       "mainpll_d3",
+       "univpll_d3",
+       "mmpll_d7",
+       "mmpll_d6",
+       "adsppll_d5",
+       "tvdpll_ck",
+       "tvdpll_mainpll_d2_ck",
+       "univpll_d2",
+       "adsppll_d4"
+};
+
+static const char * const mfg_parents[] = {
+       "clk26m",
+       "mfgpll_ck",
+       "univpll_d3",
+       "mainpll_d5"
+};
+
+static const char * const f52m_mfg_parents[] = {
+       "clk26m",
+       "univpll_d3_d2",
+       "univpll_d3_d4",
+       "univpll_d3_d8"
+};
+
+static const char * const camtg_parents[] = {
+       "clk26m",
+       "univpll_192m_d8",
+       "univpll_d3_d8",
+       "univpll_192m_d4",
+       "univpll_d3_d16",
+       "csw_f26m_ck_d2",
+       "univpll_192m_d16",
+       "univpll_192m_d32"
+};
+
+static const char * const camtg2_parents[] = {
+       "clk26m",
+       "univpll_192m_d8",
+       "univpll_d3_d8",
+       "univpll_192m_d4",
+       "univpll_d3_d16",
+       "csw_f26m_ck_d2",
+       "univpll_192m_d16",
+       "univpll_192m_d32"
+};
+
+static const char * const camtg3_parents[] = {
+       "clk26m",
+       "univpll_192m_d8",
+       "univpll_d3_d8",
+       "univpll_192m_d4",
+       "univpll_d3_d16",
+       "csw_f26m_ck_d2",
+       "univpll_192m_d16",
+       "univpll_192m_d32"
+};
+
+static const char * const camtg4_parents[] = {
+       "clk26m",
+       "univpll_192m_d8",
+       "univpll_d3_d8",
+       "univpll_192m_d4",
+       "univpll_d3_d16",
+       "csw_f26m_ck_d2",
+       "univpll_192m_d16",
+       "univpll_192m_d32"
+};
+
+static const char * const uart_parents[] = {
+       "clk26m",
+       "univpll_d3_d8"
+};
+
+static const char * const spi_parents[] = {
+       "clk26m",
+       "mainpll_d5_d2",
+       "mainpll_d3_d4",
+       "msdcpll_d4"
+};
+
+static const char * const msdc50_hclk_parents[] = {
+       "clk26m",
+       "mainpll_d2_d2",
+       "mainpll_d3_d2"
+};
+
+static const char * const msdc50_0_parents[] = {
+       "clk26m",
+       "msdcpll_ck",
+       "msdcpll_d2",
+       "univpll_d2_d4",
+       "mainpll_d3_d2",
+       "univpll_d2_d2"
+};
+
+static const char * const msdc30_1_parents[] = {
+       "clk26m",
+       "univpll_d3_d2",
+       "mainpll_d3_d2",
+       "mainpll_d7",
+       "msdcpll_d2"
+};
+
+static const char * const audio_parents[] = {
+       "clk26m",
+       "mainpll_d5_d4",
+       "mainpll_d7_d4",
+       "mainpll_d2_d16"
+};
+
+static const char * const aud_intbus_parents[] = {
+       "clk26m",
+       "mainpll_d2_d4",
+       "mainpll_d7_d2"
+};
+
+static const char * const fpwrap_ulposc_parents[] = {
+       "osc_d10",
+       "clk26m",
+       "osc_d4",
+       "osc_d8",
+       "osc_d16"
+};
+
+static const char * const atb_parents[] = {
+       "clk26m",
+       "mainpll_d2_d2",
+       "mainpll_d5"
+};
+
+static const char * const sspm_parents[] = {
+       "clk26m",
+       "univpll_d2_d4",
+       "mainpll_d2_d2",
+       "univpll_d2_d2",
+       "mainpll_d3"
+};
+
+static const char * const dpi0_parents[] = {
+       "clk26m",
+       "tvdpll_d2",
+       "tvdpll_d4",
+       "tvdpll_d8",
+       "tvdpll_d16"
+};
+
+static const char * const scam_parents[] = {
+       "clk26m",
+       "mainpll_d5_d2"
+};
+
+static const char * const disppwm_parents[] = {
+       "clk26m",
+       "univpll_d3_d4",
+       "osc_d2",
+       "osc_d4",
+       "osc_d16"
+};
+
+static const char * const usb_top_parents[] = {
+       "clk26m",
+       "univpll_d5_d4",
+       "univpll_d3_d4",
+       "univpll_d5_d2"
+};
+
+static const char * const ssusb_top_xhci_parents[] = {
+       "clk26m",
+       "univpll_d5_d4",
+       "univpll_d3_d4",
+       "univpll_d5_d2"
+};
+
+static const char * const spm_parents[] = {
+       "clk26m",
+       "osc_d8",
+       "mainpll_d2_d8"
+};
+
+static const char * const i2c_parents[] = {
+       "clk26m",
+       "mainpll_d2_d8",
+       "univpll_d5_d2"
+};
+
+static const char * const seninf_parents[] = {
+       "clk26m",
+       "univpll_d7",
+       "univpll_d3_d2",
+       "univpll_d2_d2",
+       "mainpll_d3",
+       "mmpll_d4_d2",
+       "mmpll_d7",
+       "mmpll_d6"
+};
+
+static const char * const seninf1_parents[] = {
+       "clk26m",
+       "univpll_d7",
+       "univpll_d3_d2",
+       "univpll_d2_d2",
+       "mainpll_d3",
+       "mmpll_d4_d2",
+       "mmpll_d7",
+       "mmpll_d6"
+};
+
+static const char * const seninf2_parents[] = {
+       "clk26m",
+       "univpll_d7",
+       "univpll_d3_d2",
+       "univpll_d2_d2",
+       "mainpll_d3",
+       "mmpll_d4_d2",
+       "mmpll_d7",
+       "mmpll_d6"
+};
+
+static const char * const dxcc_parents[] = {
+       "clk26m",
+       "mainpll_d2_d2",
+       "mainpll_d2_d4",
+       "mainpll_d2_d8"
+};
+
+static const char * const aud_engen1_parents[] = {
+       "clk26m",
+       "apll1_d2",
+       "apll1_d4",
+       "apll1_d8"
+};
+
+static const char * const aud_engen2_parents[] = {
+       "clk26m",
+       "apll2_d2",
+       "apll2_d4",
+       "apll2_d8"
+};
+
+static const char * const faes_ufsfde_parents[] = {
+       "clk26m",
+       "mainpll_d2",
+       "mainpll_d2_d2",
+       "mainpll_d3",
+       "mainpll_d2_d4",
+       "univpll_d3"
+};
+
+static const char * const fufs_parents[] = {
+       "clk26m",
+       "mainpll_d2_d4",
+       "mainpll_d2_d8",
+       "mainpll_d2_d16"
+};
+
+static const char * const aud_1_parents[] = {
+       "clk26m",
+       "apll1_ck"
+};
+
+static const char * const aud_2_parents[] = {
+       "clk26m",
+       "apll2_ck"
+};
+
+static const char * const adsp_parents[] = {
+       "clk26m",
+       "mainpll_d3",
+       "univpll_d2_d4",
+       "univpll_d2",
+       "mmpll_d4",
+       "adsppll_d4",
+       "adsppll_d6"
+};
+
+static const char * const dpmaif_parents[] = {
+       "clk26m",
+       "univpll_d2_d4",
+       "mainpll_d3",
+       "mainpll_d2_d2",
+       "univpll_d2_d2",
+       "univpll_d3"
+};
+
+static const char * const venc_parents[] = {
+       "clk26m",
+       "mmpll_d7",
+       "mainpll_d3",
+       "univpll_d2_d2",
+       "mainpll_d2_d2",
+       "univpll_d3",
+       "mmpll_d6",
+       "mainpll_d5",
+       "mainpll_d3_d2",
+       "mmpll_d4_d2",
+       "univpll_d2_d4",
+       "mmpll_d5",
+       "univpll_192m_d2"
+
+};
+
+static const char * const vdec_parents[] = {
+       "clk26m",
+       "univpll_d2_d4",
+       "mainpll_d3",
+       "univpll_d2_d2",
+       "mainpll_d2_d2",
+       "univpll_d3",
+       "univpll_d5",
+       "univpll_d5_d2",
+       "mainpll_d2",
+       "univpll_d2",
+       "univpll_192m_d2"
+};
+
+static const char * const camtm_parents[] = {
+       "clk26m",
+       "univpll_d7",
+       "univpll_d3_d2",
+       "univpll_d2_d2"
+};
+
+static const char * const pwm_parents[] = {
+       "clk26m",
+       "univpll_d2_d8"
+};
+
+static const char * const audio_h_parents[] = {
+       "clk26m",
+       "univpll_d7",
+       "apll1_ck",
+       "apll2_ck"
+};
+
+static const char * const camtg5_parents[] = {
+       "clk26m",
+       "univpll_192m_d8",
+       "univpll_d3_d8",
+       "univpll_192m_d4",
+       "univpll_d3_d16",
+       "csw_f26m_ck_d2",
+       "univpll_192m_d16",
+       "univpll_192m_d32"
+};
+
+/*
+ * CRITICAL CLOCK:
+ * axi_sel is the main bus clock of whole SOC.
+ * spm_sel is the clock of the always-on co-processor.
+ * sspm_sel is the clock of the always-on co-processor.
+ */
+static const struct mtk_mux top_muxes[] = {
+       /* CLK_CFG_0 */
+       MUX_GATE_CLR_SET_UPD_FLAGS(CLK_TOP_AXI, "axi_sel", axi_parents,
+                                  0x20, 0x24, 0x28, 0, 2, 7,
+                                  0x004, 0, CLK_IS_CRITICAL),
+       MUX_GATE_CLR_SET_UPD(CLK_TOP_MM, "mm_sel", mm_parents,
+                            0x20, 0x24, 0x28, 8, 3, 15, 0x004, 1),
+       MUX_GATE_CLR_SET_UPD(CLK_TOP_SCP, "scp_sel", scp_parents,
+                            0x20, 0x24, 0x28, 16, 3, 23, 0x004, 2),
+       /* CLK_CFG_1 */
+       MUX_GATE_CLR_SET_UPD(CLK_TOP_IMG, "img_sel", img_parents,
+                            0x30, 0x34, 0x38, 0, 3, 7, 0x004, 4),
+       MUX_GATE_CLR_SET_UPD(CLK_TOP_IPE, "ipe_sel", ipe_parents,
+                            0x30, 0x34, 0x38, 8, 3, 15, 0x004, 5),
+       MUX_GATE_CLR_SET_UPD(CLK_TOP_DPE, "dpe_sel", dpe_parents,
+                            0x30, 0x34, 0x38, 16, 3, 23, 0x004, 6),
+       MUX_GATE_CLR_SET_UPD(CLK_TOP_CAM, "cam_sel", cam_parents,
+                            0x30, 0x34, 0x38, 24, 4, 31, 0x004, 7),
+       /* CLK_CFG_2 */
+       MUX_GATE_CLR_SET_UPD(CLK_TOP_CCU, "ccu_sel", ccu_parents,
+                            0x40, 0x44, 0x48, 0, 4, 7, 0x004, 8),
+       MUX_GATE_CLR_SET_UPD(CLK_TOP_DSP, "dsp_sel", dsp_parents,
+                            0x40, 0x44, 0x48, 8, 4, 15, 0x004, 9),
+       MUX_GATE_CLR_SET_UPD(CLK_TOP_DSP1, "dsp1_sel", dsp1_parents,
+                            0x40, 0x44, 0x48, 16, 4, 23, 0x004, 10),
+       MUX_GATE_CLR_SET_UPD(CLK_TOP_DSP2, "dsp2_sel", dsp2_parents,
+                            0x40, 0x44, 0x48, 24, 4, 31, 0x004, 11),
+       /* CLK_CFG_3 */
+       MUX_GATE_CLR_SET_UPD(CLK_TOP_DSP3, "dsp3_sel", dsp3_parents,
+                            0x50, 0x54, 0x58, 0, 4, 7, 0x004, 12),
+       MUX_GATE_CLR_SET_UPD(CLK_TOP_IPU_IF, "ipu_if_sel", ipu_if_parents,
+                            0x50, 0x54, 0x58, 8, 4, 15, 0x004, 13),
+       MUX_GATE_CLR_SET_UPD(CLK_TOP_MFG, "mfg_sel", mfg_parents,
+                            0x50, 0x54, 0x58, 16, 2, 23, 0x004, 14),
+       MUX_GATE_CLR_SET_UPD(CLK_TOP_F52M_MFG, "f52m_mfg_sel",
+                            f52m_mfg_parents, 0x50, 0x54, 0x58,
+                            24, 2, 31, 0x004, 15),
+       /* CLK_CFG_4 */
+       MUX_GATE_CLR_SET_UPD(CLK_TOP_CAMTG, "camtg_sel", camtg_parents,
+                            0x60, 0x64, 0x68, 0, 3, 7, 0x004, 16),
+       MUX_GATE_CLR_SET_UPD(CLK_TOP_CAMTG2, "camtg2_sel", camtg2_parents,
+                            0x60, 0x64, 0x68, 8, 3, 15, 0x004, 17),
+       MUX_GATE_CLR_SET_UPD(CLK_TOP_CAMTG3, "camtg3_sel", camtg3_parents,
+                            0x60, 0x64, 0x68, 16, 3, 23, 0x004, 18),
+       MUX_GATE_CLR_SET_UPD(CLK_TOP_CAMTG4, "camtg4_sel", camtg4_parents,
+                            0x60, 0x64, 0x68, 24, 3, 31, 0x004, 19),
+       /* CLK_CFG_5 */
+       MUX_GATE_CLR_SET_UPD(CLK_TOP_UART, "uart_sel", uart_parents,
+                            0x70, 0x74, 0x78, 0, 1, 7, 0x004, 20),
+       MUX_GATE_CLR_SET_UPD(CLK_TOP_SPI, "spi_sel", spi_parents,
+                            0x70, 0x74, 0x78, 8, 2, 15, 0x004, 21),
+       MUX_GATE_CLR_SET_UPD(CLK_TOP_MSDC50_0_HCLK, "msdc50_hclk_sel",
+                            msdc50_hclk_parents, 0x70, 0x74, 0x78,
+                            16, 2, 23, 0x004, 22),
+       MUX_GATE_CLR_SET_UPD(CLK_TOP_MSDC50_0, "msdc50_0_sel",
+                            msdc50_0_parents, 0x70, 0x74, 0x78,
+                            24, 3, 31, 0x004, 23),
+       /* CLK_CFG_6 */
+       MUX_GATE_CLR_SET_UPD(CLK_TOP_MSDC30_1, "msdc30_1_sel",
+                            msdc30_1_parents, 0x80, 0x84, 0x88,
+                            0, 3, 7, 0x004, 24),
+       MUX_GATE_CLR_SET_UPD(CLK_TOP_AUD, "audio_sel", audio_parents,
+                            0x80, 0x84, 0x88, 8, 2, 15, 0x004, 25),
+       MUX_GATE_CLR_SET_UPD(CLK_TOP_AUD_INTBUS, "aud_intbus_sel",
+                            aud_intbus_parents, 0x80, 0x84, 0x88,
+                            16, 2, 23, 0x004, 26),
+       MUX_GATE_CLR_SET_UPD(CLK_TOP_FPWRAP_ULPOSC, "fpwrap_ulposc_sel",
+                            fpwrap_ulposc_parents, 0x80, 0x84, 0x88,
+                            24, 3, 31, 0x004, 27),
+       /* CLK_CFG_7 */
+       MUX_GATE_CLR_SET_UPD(CLK_TOP_ATB, "atb_sel", atb_parents,
+                            0x90, 0x94, 0x98, 0, 2, 7, 0x004, 28),
+       MUX_GATE_CLR_SET_UPD_FLAGS(CLK_TOP_SSPM, "sspm_sel", sspm_parents,
+                                  0x90, 0x94, 0x98, 8, 3, 15,
+                                  0x004, 29, CLK_IS_CRITICAL),
+       MUX_GATE_CLR_SET_UPD(CLK_TOP_DPI0, "dpi0_sel", dpi0_parents,
+                            0x90, 0x94, 0x98, 16, 3, 23, 0x004, 30),
+       MUX_GATE_CLR_SET_UPD(CLK_TOP_SCAM, "scam_sel", scam_parents,
+                            0x90, 0x94, 0x98, 24, 1, 31, 0x004, 0),
+       /* CLK_CFG_8 */
+       MUX_GATE_CLR_SET_UPD(CLK_TOP_DISP_PWM, "disppwm_sel",
+                            disppwm_parents, 0xa0, 0xa4, 0xa8,
+                            0, 3, 7, 0x008, 1),
+       MUX_GATE_CLR_SET_UPD(CLK_TOP_USB_TOP, "usb_top_sel",
+                            usb_top_parents, 0xa0, 0xa4, 0xa8,
+                            8, 2, 15, 0x008, 2),
+       MUX_GATE_CLR_SET_UPD(CLK_TOP_SSUSB_TOP_XHCI, "ssusb_top_xhci_sel",
+                            ssusb_top_xhci_parents, 0xa0, 0xa4, 0xa8,
+                            16, 2, 23, 0x008, 3),
+       MUX_GATE_CLR_SET_UPD_FLAGS(CLK_TOP_SPM, "spm_sel", spm_parents,
+                                  0xa0, 0xa4, 0xa8, 24, 2, 31,
+                                  0x008, 4, CLK_IS_CRITICAL),
+       /* CLK_CFG_9 */
+       MUX_GATE_CLR_SET_UPD(CLK_TOP_I2C, "i2c_sel", i2c_parents,
+                            0xb0, 0xb4, 0xb8, 0, 2, 7, 0x008, 5),
+       MUX_GATE_CLR_SET_UPD(CLK_TOP_SENINF, "seninf_sel", seninf_parents,
+                            0xb0, 0xb4, 0xb8, 8, 2, 15, 0x008, 6),
+       MUX_GATE_CLR_SET_UPD(CLK_TOP_SENINF1, "seninf1_sel",
+                            seninf1_parents, 0xb0, 0xb4, 0xb8,
+                            16, 2, 23, 0x008, 7),
+       MUX_GATE_CLR_SET_UPD(CLK_TOP_SENINF2, "seninf2_sel",
+                            seninf2_parents, 0xb0, 0xb4, 0xb8,
+                            24, 2, 31, 0x008, 8),
+       /* CLK_CFG_10 */
+       MUX_GATE_CLR_SET_UPD(CLK_TOP_DXCC, "dxcc_sel", dxcc_parents,
+                            0xc0, 0xc4, 0xc8, 0, 2, 7, 0x008, 9),
+       MUX_GATE_CLR_SET_UPD(CLK_TOP_AUD_ENG1, "aud_eng1_sel",
+                            aud_engen1_parents, 0xc0, 0xc4, 0xc8,
+                            8, 2, 15, 0x008, 10),
+       MUX_GATE_CLR_SET_UPD(CLK_TOP_AUD_ENG2, "aud_eng2_sel",
+                            aud_engen2_parents, 0xc0, 0xc4, 0xc8,
+                            16, 2, 23, 0x008, 11),
+       MUX_GATE_CLR_SET_UPD(CLK_TOP_FAES_UFSFDE, "faes_ufsfde_sel",
+                            faes_ufsfde_parents, 0xc0, 0xc4, 0xc8,
+                            24, 3, 31,
+                            0x008, 12),
+       /* CLK_CFG_11 */
+       MUX_GATE_CLR_SET_UPD(CLK_TOP_FUFS, "fufs_sel", fufs_parents,
+                            0xd0, 0xd4, 0xd8, 0, 2, 7, 0x008, 13),
+       MUX_GATE_CLR_SET_UPD(CLK_TOP_AUD_1, "aud_1_sel", aud_1_parents,
+                            0xd0, 0xd4, 0xd8, 8, 1, 15, 0x008, 14),
+       MUX_GATE_CLR_SET_UPD(CLK_TOP_AUD_2, "aud_2_sel", aud_2_parents,
+                            0xd0, 0xd4, 0xd8, 16, 1, 23, 0x008, 15),
+       MUX_GATE_CLR_SET_UPD(CLK_TOP_ADSP, "adsp_sel", adsp_parents,
+                            0xd0, 0xd4, 0xd8, 24, 3, 31, 0x008, 16),
+       /* CLK_CFG_12 */
+       MUX_GATE_CLR_SET_UPD(CLK_TOP_DPMAIF, "dpmaif_sel", dpmaif_parents,
+                            0xe0, 0xe4, 0xe8, 0, 3, 7, 0x008, 17),
+       MUX_GATE_CLR_SET_UPD(CLK_TOP_VENC, "venc_sel", venc_parents,
+                            0xe0, 0xe4, 0xe8, 8, 4, 15, 0x008, 18),
+       MUX_GATE_CLR_SET_UPD(CLK_TOP_VDEC, "vdec_sel", vdec_parents,
+                            0xe0, 0xe4, 0xe8, 16, 4, 23, 0x008, 19),
+       MUX_GATE_CLR_SET_UPD(CLK_TOP_CAMTM, "camtm_sel", camtm_parents,
+                            0xe0, 0xe4, 0xe8, 24, 2, 31, 0x004, 20),
+       /* CLK_CFG_13 */
+       MUX_GATE_CLR_SET_UPD(CLK_TOP_PWM, "pwm_sel", pwm_parents,
+                            0xf0, 0xf4, 0xf8, 0, 1, 7, 0x008, 21),
+       MUX_GATE_CLR_SET_UPD(CLK_TOP_AUD_H, "audio_h_sel",
+                            audio_h_parents, 0xf0, 0xf4, 0xf8,
+                            8, 2, 15, 0x008, 22),
+       MUX_GATE_CLR_SET_UPD(CLK_TOP_CAMTG5, "camtg5_sel", camtg5_parents,
+                            0xf0, 0xf4, 0xf8, 24, 3, 31, 0x008, 24),
+};
+
+static const char * const i2s0_m_ck_parents[] = {
+       "aud_1_sel",
+       "aud_2_sel"
+};
+
+static const char * const i2s1_m_ck_parents[] = {
+       "aud_1_sel",
+       "aud_2_sel"
+};
+
+static const char * const i2s2_m_ck_parents[] = {
+       "aud_1_sel",
+       "aud_2_sel"
+};
+
+static const char * const i2s3_m_ck_parents[] = {
+       "aud_1_sel",
+       "aud_2_sel"
+};
+
+static const char * const i2s4_m_ck_parents[] = {
+       "aud_1_sel",
+       "aud_2_sel"
+};
+
+static const char * const i2s5_m_ck_parents[] = {
+       "aud_1_sel",
+       "aud_2_sel"
+};
+
+static const struct mtk_composite top_aud_muxes[] = {
+       MUX(CLK_TOP_I2S0_M_SEL, "i2s0_m_ck_sel", i2s0_m_ck_parents,
+           0x320, 8, 1),
+       MUX(CLK_TOP_I2S1_M_SEL, "i2s1_m_ck_sel", i2s1_m_ck_parents,
+           0x320, 9, 1),
+       MUX(CLK_TOP_I2S2_M_SEL, "i2s2_m_ck_sel", i2s2_m_ck_parents,
+           0x320, 10, 1),
+       MUX(CLK_TOP_I2S3_M_SEL, "i2s3_m_ck_sel", i2s3_m_ck_parents,
+           0x320, 11, 1),
+       MUX(CLK_TOP_I2S4_M_SEL, "i2s4_m_ck_sel", i2s4_m_ck_parents,
+           0x320, 12, 1),
+       MUX(CLK_TOP_I2S5_M_SEL, "i2s5_m_ck_sel", i2s5_m_ck_parents,
+           0x328, 20, 1),
+};
+
+static struct mtk_composite top_aud_divs[] = {
+       DIV_GATE(CLK_TOP_APLL12_DIV0, "apll12_div0", "i2s0_m_ck_sel",
+                0x320, 2, 0x324, 8, 0),
+       DIV_GATE(CLK_TOP_APLL12_DIV1, "apll12_div1", "i2s1_m_ck_sel",
+                0x320, 3, 0x324, 8, 8),
+       DIV_GATE(CLK_TOP_APLL12_DIV2, "apll12_div2", "i2s2_m_ck_sel",
+                0x320, 4, 0x324, 8, 16),
+       DIV_GATE(CLK_TOP_APLL12_DIV3, "apll12_div3", "i2s3_m_ck_sel",
+                0x320, 5, 0x324, 8, 24),
+       DIV_GATE(CLK_TOP_APLL12_DIV4, "apll12_div4", "i2s4_m_ck_sel",
+                0x320, 6, 0x328, 8, 0),
+       DIV_GATE(CLK_TOP_APLL12_DIVB, "apll12_divb", "apll12_div4",
+                0x320, 7, 0x328, 8, 8),
+       DIV_GATE(CLK_TOP_APLL12_DIV5, "apll12_div5", "i2s5_m_ck_sel",
+                0x328, 16, 0x328, 4, 28),
+};
+
+static const struct mtk_gate_regs infra0_cg_regs = {
+       .set_ofs = 0x80,
+       .clr_ofs = 0x84,
+       .sta_ofs = 0x90,
+};
+
+static const struct mtk_gate_regs infra1_cg_regs = {
+       .set_ofs = 0x88,
+       .clr_ofs = 0x8c,
+       .sta_ofs = 0x94,
+};
+
+static const struct mtk_gate_regs infra2_cg_regs = {
+       .set_ofs = 0xa4,
+       .clr_ofs = 0xa8,
+       .sta_ofs = 0xac,
+};
+
+static const struct mtk_gate_regs infra3_cg_regs = {
+       .set_ofs = 0xc0,
+       .clr_ofs = 0xc4,
+       .sta_ofs = 0xc8,
+};
+
+#define GATE_INFRA0(_id, _name, _parent, _shift)               \
+       GATE_MTK(_id, _name, _parent, &infra0_cg_regs, _shift,  \
+               &mtk_clk_gate_ops_setclr)
+#define GATE_INFRA1(_id, _name, _parent, _shift)               \
+       GATE_MTK(_id, _name, _parent, &infra1_cg_regs, _shift,  \
+               &mtk_clk_gate_ops_setclr)
+#define GATE_INFRA2(_id, _name, _parent, _shift)               \
+       GATE_MTK(_id, _name, _parent, &infra2_cg_regs, _shift,  \
+               &mtk_clk_gate_ops_setclr)
+#define GATE_INFRA3(_id, _name, _parent, _shift)               \
+       GATE_MTK(_id, _name, _parent, &infra3_cg_regs, _shift,  \
+               &mtk_clk_gate_ops_setclr)
+
+static const struct mtk_gate infra_clks[] = {
+       /* INFRA0 */
+       GATE_INFRA0(CLK_INFRA_PMIC_TMR, "infra_pmic_tmr",
+                   "axi_sel", 0),
+       GATE_INFRA0(CLK_INFRA_PMIC_AP, "infra_pmic_ap",
+                   "axi_sel", 1),
+       GATE_INFRA0(CLK_INFRA_PMIC_MD, "infra_pmic_md",
+                   "axi_sel", 2),
+       GATE_INFRA0(CLK_INFRA_PMIC_CONN, "infra_pmic_conn",
+                   "axi_sel", 3),
+       GATE_INFRA0(CLK_INFRA_SCPSYS, "infra_scp",
+                   "axi_sel", 4),
+       GATE_INFRA0(CLK_INFRA_SEJ, "infra_sej",
+                   "f_f26m_ck", 5),
+       GATE_INFRA0(CLK_INFRA_APXGPT, "infra_apxgpt",
+                   "axi_sel", 6),
+       GATE_INFRA0(CLK_INFRA_ICUSB, "infra_icusb",
+                   "axi_sel", 8),
+       GATE_INFRA0(CLK_INFRA_GCE, "infra_gce",
+                   "axi_sel", 9),
+       GATE_INFRA0(CLK_INFRA_THERM, "infra_therm",
+                   "axi_sel", 10),
+       GATE_INFRA0(CLK_INFRA_I2C0, "infra_i2c0",
+                   "i2c_sel", 11),
+       GATE_INFRA0(CLK_INFRA_I2C1, "infra_i2c1",
+                   "i2c_sel", 12),
+       GATE_INFRA0(CLK_INFRA_I2C2, "infra_i2c2",
+                   "i2c_sel", 13),
+       GATE_INFRA0(CLK_INFRA_I2C3, "infra_i2c3",
+                   "i2c_sel", 14),
+       GATE_INFRA0(CLK_INFRA_PWM_HCLK, "infra_pwm_hclk",
+                   "pwm_sel", 15),
+       GATE_INFRA0(CLK_INFRA_PWM1, "infra_pwm1",
+                   "pwm_sel", 16),
+       GATE_INFRA0(CLK_INFRA_PWM2, "infra_pwm2",
+                   "pwm_sel", 17),
+       GATE_INFRA0(CLK_INFRA_PWM3, "infra_pwm3",
+                   "pwm_sel", 18),
+       GATE_INFRA0(CLK_INFRA_PWM4, "infra_pwm4",
+                   "pwm_sel", 19),
+       GATE_INFRA0(CLK_INFRA_PWM, "infra_pwm",
+                   "pwm_sel", 21),
+       GATE_INFRA0(CLK_INFRA_UART1, "infra_uart1",
+                   "uart_sel", 23),
+       GATE_INFRA0(CLK_INFRA_UART2, "infra_uart2",
+                   "uart_sel", 24),
+       GATE_INFRA0(CLK_INFRA_UART3, "infra_uart3",
+                   "uart_sel", 25),
+       GATE_INFRA0(CLK_INFRA_GCE_26M, "infra_gce_26m",
+                   "axi_sel", 27),
+       GATE_INFRA0(CLK_INFRA_CQ_DMA_FPC, "infra_cqdma_fpc",
+                   "axi_sel", 28),
+       GATE_INFRA0(CLK_INFRA_BTIF, "infra_btif",
+                   "axi_sel", 31),
+       /* INFRA1 */
+       GATE_INFRA1(CLK_INFRA_SPI0, "infra_spi0",
+                   "spi_sel", 1),
+       GATE_INFRA1(CLK_INFRA_MSDC0, "infra_msdc0",
+                   "msdc50_hclk_sel", 2),
+       GATE_INFRA1(CLK_INFRA_MSDC1, "infra_msdc1",
+                   "axi_sel", 4),
+       GATE_INFRA1(CLK_INFRA_MSDC2, "infra_msdc2",
+                   "axi_sel", 5),
+       GATE_INFRA1(CLK_INFRA_MSDC0_SCK, "infra_msdc0_sck",
+                   "msdc50_0_sel", 6),
+       GATE_INFRA1(CLK_INFRA_DVFSRC, "infra_dvfsrc",
+                   "f_f26m_ck", 7),
+       GATE_INFRA1(CLK_INFRA_GCPU, "infra_gcpu",
+                   "axi_sel", 8),
+       GATE_INFRA1(CLK_INFRA_TRNG, "infra_trng",
+                   "axi_sel", 9),
+       GATE_INFRA1(CLK_INFRA_AUXADC, "infra_auxadc",
+                   "f_f26m_ck", 10),
+       GATE_INFRA1(CLK_INFRA_CPUM, "infra_cpum",
+                   "axi_sel", 11),
+       GATE_INFRA1(CLK_INFRA_CCIF1_AP, "infra_ccif1_ap",
+                   "axi_sel", 12),
+       GATE_INFRA1(CLK_INFRA_CCIF1_MD, "infra_ccif1_md",
+                   "axi_sel", 13),
+       GATE_INFRA1(CLK_INFRA_AUXADC_MD, "infra_auxadc_md",
+                   "f_f26m_ck", 14),
+       GATE_INFRA1(CLK_INFRA_MSDC1_SCK, "infra_msdc1_sck",
+                   "msdc30_1_sel", 16),
+       GATE_INFRA1(CLK_INFRA_MSDC2_SCK, "infra_msdc2_sck",
+                   "msdc30_2_sel", 17),
+       GATE_INFRA1(CLK_INFRA_AP_DMA, "infra_apdma",
+                   "axi_sel", 18),
+       GATE_INFRA1(CLK_INFRA_XIU, "infra_xiu",
+                   "axi_sel", 19),
+       GATE_INFRA1(CLK_INFRA_DEVICE_APC, "infra_device_apc",
+                   "axi_sel", 20),
+       GATE_INFRA1(CLK_INFRA_CCIF_AP, "infra_ccif_ap",
+                   "axi_sel", 23),
+       GATE_INFRA1(CLK_INFRA_DEBUGSYS, "infra_debugsys",
+                   "axi_sel", 24),
+       GATE_INFRA1(CLK_INFRA_AUD, "infra_audio",
+                   "axi_sel", 25),
+       GATE_INFRA1(CLK_INFRA_CCIF_MD, "infra_ccif_md",
+                   "axi_sel", 26),
+       GATE_INFRA1(CLK_INFRA_DXCC_SEC_CORE, "infra_dxcc_sec_core",
+                   "dxcc_sel", 27),
+       GATE_INFRA1(CLK_INFRA_DXCC_AO, "infra_dxcc_ao",
+                   "dxcc_sel", 28),
+       GATE_INFRA1(CLK_INFRA_DEVMPU_BCLK, "infra_devmpu_bclk",
+                   "axi_sel", 30),
+       GATE_INFRA1(CLK_INFRA_DRAMC_F26M, "infra_dramc_f26m",
+                   "f_f26m_ck", 31),
+       /* INFRA2 */
+       GATE_INFRA2(CLK_INFRA_IRTX, "infra_irtx",
+                   "f_f26m_ck", 0),
+       GATE_INFRA2(CLK_INFRA_USB, "infra_usb",
+                   "usb_top_sel", 1),
+       GATE_INFRA2(CLK_INFRA_DISP_PWM, "infra_disppwm",
+                   "axi_sel", 2),
+       GATE_INFRA2(CLK_INFRA_AUD_26M_BCLK,
+                   "infracfg_ao_audio_26m_bclk", "f_f26m_ck", 4),
+       GATE_INFRA2(CLK_INFRA_SPI1, "infra_spi1",
+                   "spi_sel", 6),
+       GATE_INFRA2(CLK_INFRA_I2C4, "infra_i2c4",
+                   "i2c_sel", 7),
+       GATE_INFRA2(CLK_INFRA_MODEM_TEMP_SHARE, "infra_md_tmp_share",
+                   "f_f26m_ck", 8),
+       GATE_INFRA2(CLK_INFRA_SPI2, "infra_spi2",
+                   "spi_sel", 9),
+       GATE_INFRA2(CLK_INFRA_SPI3, "infra_spi3",
+                   "spi_sel", 10),
+       GATE_INFRA2(CLK_INFRA_UNIPRO_SCK, "infra_unipro_sck",
+                   "fufs_sel", 11),
+       GATE_INFRA2(CLK_INFRA_UNIPRO_TICK, "infra_unipro_tick",
+                   "fufs_sel", 12),
+       GATE_INFRA2(CLK_INFRA_UFS_MP_SAP_BCLK, "infra_ufs_mp_sap_bck",
+                   "fufs_sel", 13),
+       GATE_INFRA2(CLK_INFRA_MD32_BCLK, "infra_md32_bclk",
+                   "axi_sel", 14),
+       GATE_INFRA2(CLK_INFRA_UNIPRO_MBIST, "infra_unipro_mbist",
+                   "axi_sel", 16),
+       GATE_INFRA2(CLK_INFRA_SSPM_BUS_HCLK, "infra_sspm_bus_hclk",
+                   "axi_sel", 17),
+       GATE_INFRA2(CLK_INFRA_I2C5, "infra_i2c5",
+                   "i2c_sel", 18),
+       GATE_INFRA2(CLK_INFRA_I2C5_ARBITER, "infra_i2c5_arbiter",
+                   "i2c_sel", 19),
+       GATE_INFRA2(CLK_INFRA_I2C5_IMM, "infra_i2c5_imm",
+                   "i2c_sel", 20),
+       GATE_INFRA2(CLK_INFRA_I2C1_ARBITER, "infra_i2c1_arbiter",
+                   "i2c_sel", 21),
+       GATE_INFRA2(CLK_INFRA_I2C1_IMM, "infra_i2c1_imm",
+                   "i2c_sel", 22),
+       GATE_INFRA2(CLK_INFRA_I2C2_ARBITER, "infra_i2c2_arbiter",
+                   "i2c_sel", 23),
+       GATE_INFRA2(CLK_INFRA_I2C2_IMM, "infra_i2c2_imm",
+                   "i2c_sel", 24),
+       GATE_INFRA2(CLK_INFRA_SPI4, "infra_spi4",
+                   "spi_sel", 25),
+       GATE_INFRA2(CLK_INFRA_SPI5, "infra_spi5",
+                   "spi_sel", 26),
+       GATE_INFRA2(CLK_INFRA_CQ_DMA, "infra_cqdma",
+                   "axi_sel", 27),
+       GATE_INFRA2(CLK_INFRA_UFS, "infra_ufs",
+                   "fufs_sel", 28),
+       GATE_INFRA2(CLK_INFRA_AES_UFSFDE, "infra_aes_ufsfde",
+                   "faes_ufsfde_sel", 29),
+       GATE_INFRA2(CLK_INFRA_UFS_TICK, "infra_ufs_tick",
+                   "fufs_sel", 30),
+       GATE_INFRA2(CLK_INFRA_SSUSB_XHCI, "infra_ssusb_xhci",
+                   "ssusb_top_xhci_sel", 31),
+       /* INFRA3 */
+       GATE_INFRA3(CLK_INFRA_MSDC0_SELF, "infra_msdc0_self",
+                   "msdc50_0_sel", 0),
+       GATE_INFRA3(CLK_INFRA_MSDC1_SELF, "infra_msdc1_self",
+                   "msdc50_0_sel", 1),
+       GATE_INFRA3(CLK_INFRA_MSDC2_SELF, "infra_msdc2_self",
+                   "msdc50_0_sel", 2),
+       GATE_INFRA3(CLK_INFRA_SSPM_26M_SELF, "infra_sspm_26m_self",
+                   "f_f26m_ck", 3),
+       GATE_INFRA3(CLK_INFRA_SSPM_32K_SELF, "infra_sspm_32k_self",
+                   "f_f26m_ck", 4),
+       GATE_INFRA3(CLK_INFRA_UFS_AXI, "infra_ufs_axi",
+                   "axi_sel", 5),
+       GATE_INFRA3(CLK_INFRA_I2C6, "infra_i2c6",
+                   "i2c_sel", 6),
+       GATE_INFRA3(CLK_INFRA_AP_MSDC0, "infra_ap_msdc0",
+                   "msdc50_hclk_sel", 7),
+       GATE_INFRA3(CLK_INFRA_MD_MSDC0, "infra_md_msdc0",
+                   "msdc50_hclk_sel", 8),
+       GATE_INFRA3(CLK_INFRA_CCIF2_AP, "infra_ccif2_ap",
+                   "axi_sel", 16),
+       GATE_INFRA3(CLK_INFRA_CCIF2_MD, "infra_ccif2_md",
+                   "axi_sel", 17),
+       GATE_INFRA3(CLK_INFRA_CCIF3_AP, "infra_ccif3_ap",
+                   "axi_sel", 18),
+       GATE_INFRA3(CLK_INFRA_CCIF3_MD, "infra_ccif3_md",
+                   "axi_sel", 19),
+       GATE_INFRA3(CLK_INFRA_SEJ_F13M, "infra_sej_f13m",
+                   "f_f26m_ck", 20),
+       GATE_INFRA3(CLK_INFRA_AES_BCLK, "infra_aes_bclk",
+                   "axi_sel", 21),
+       GATE_INFRA3(CLK_INFRA_I2C7, "infra_i2c7",
+                   "i2c_sel", 22),
+       GATE_INFRA3(CLK_INFRA_I2C8, "infra_i2c8",
+                   "i2c_sel", 23),
+       GATE_INFRA3(CLK_INFRA_FBIST2FPC, "infra_fbist2fpc",
+                   "msdc50_0_sel", 24),
+       GATE_INFRA3(CLK_INFRA_DPMAIF_CK, "infra_dpmaif",
+                   "dpmaif_sel", 26),
+       GATE_INFRA3(CLK_INFRA_FADSP, "infra_fadsp",
+                   "adsp_sel", 27),
+       GATE_INFRA3(CLK_INFRA_CCIF4_AP, "infra_ccif4_ap",
+                   "axi_sel", 28),
+       GATE_INFRA3(CLK_INFRA_CCIF4_MD, "infra_ccif4_md",
+                   "axi_sel", 29),
+       GATE_INFRA3(CLK_INFRA_SPI6, "infra_spi6",
+                   "spi_sel", 30),
+       GATE_INFRA3(CLK_INFRA_SPI7, "infra_spi7",
+                   "spi_sel", 31),
+};
+
+static const struct mtk_gate_regs apmixed_cg_regs = {
+       .set_ofs = 0x20,
+       .clr_ofs = 0x20,
+       .sta_ofs = 0x20,
+};
+
+#define GATE_APMIXED_FLAGS(_id, _name, _parent, _shift, _flags)        \
+       GATE_MTK_FLAGS(_id, _name, _parent, &apmixed_cg_regs,           \
+               _shift, &mtk_clk_gate_ops_no_setclr_inv, _flags)
+
+#define GATE_APMIXED(_id, _name, _parent, _shift)      \
+       GATE_APMIXED_FLAGS(_id, _name, _parent, _shift, 0)
+
+/*
+ * CRITICAL CLOCK:
+ * apmixed_appll26m is the toppest clock gate of all PLLs.
+ */
+static const struct mtk_gate apmixed_clks[] = {
+       GATE_APMIXED(CLK_APMIXED_SSUSB26M, "apmixed_ssusb26m",
+                    "f_f26m_ck", 4),
+       GATE_APMIXED_FLAGS(CLK_APMIXED_APPLL26M, "apmixed_appll26m",
+                          "f_f26m_ck", 5, CLK_IS_CRITICAL),
+       GATE_APMIXED(CLK_APMIXED_MIPIC0_26M, "apmixed_mipic026m",
+                    "f_f26m_ck", 6),
+       GATE_APMIXED(CLK_APMIXED_MDPLLGP26M, "apmixed_mdpll26m",
+                    "f_f26m_ck", 7),
+       GATE_APMIXED(CLK_APMIXED_MM_F26M, "apmixed_mmsys26m",
+                    "f_f26m_ck", 8),
+       GATE_APMIXED(CLK_APMIXED_UFS26M, "apmixed_ufs26m",
+                    "f_f26m_ck", 9),
+       GATE_APMIXED(CLK_APMIXED_MIPIC1_26M, "apmixed_mipic126m",
+                    "f_f26m_ck", 11),
+       GATE_APMIXED(CLK_APMIXED_MEMPLL26M, "apmixed_mempll26m",
+                    "f_f26m_ck", 13),
+       GATE_APMIXED(CLK_APMIXED_CLKSQ_LVPLL_26M, "apmixed_lvpll26m",
+                    "f_f26m_ck", 14),
+       GATE_APMIXED(CLK_APMIXED_MIPID0_26M, "apmixed_mipid026m",
+                    "f_f26m_ck", 16),
+       GATE_APMIXED(CLK_APMIXED_MIPID1_26M, "apmixed_mipid126m",
+                    "f_f26m_ck", 17),
+};
+
+#define MT6779_PLL_FMAX                (3800UL * MHZ)
+#define MT6779_PLL_FMIN                (1500UL * MHZ)
+
+#define PLL_B(_id, _name, _reg, _pwr_reg, _en_mask, _flags,            \
+                       _rst_bar_mask, _pcwbits, _pcwibits, _pd_reg,    \
+                       _pd_shift, _tuner_reg,  _tuner_en_reg,          \
+                       _tuner_en_bit, _pcw_reg, _pcw_shift,            \
+                       _pcw_chg_reg, _div_table) {                     \
+               .id = _id,                                              \
+               .name = _name,                                          \
+               .reg = _reg,                                            \
+               .pwr_reg = _pwr_reg,                                    \
+               .en_mask = _en_mask,                                    \
+               .flags = _flags,                                        \
+               .rst_bar_mask = _rst_bar_mask,                          \
+               .fmax = MT6779_PLL_FMAX,                                \
+               .fmin = MT6779_PLL_FMIN,                                \
+               .pcwbits = _pcwbits,                                    \
+               .pcwibits = _pcwibits,                                  \
+               .pd_reg = _pd_reg,                                      \
+               .pd_shift = _pd_shift,                                  \
+               .tuner_reg = _tuner_reg,                                \
+               .tuner_en_reg = _tuner_en_reg,                          \
+               .tuner_en_bit = _tuner_en_bit,                          \
+               .pcw_reg = _pcw_reg,                                    \
+               .pcw_shift = _pcw_shift,                                \
+               .pcw_chg_reg = _pcw_chg_reg,                            \
+               .div_table = _div_table,                                \
+       }
+
+#define PLL(_id, _name, _reg, _pwr_reg, _en_mask, _flags,              \
+                       _rst_bar_mask, _pcwbits, _pcwibits, _pd_reg,    \
+                       _pd_shift, _tuner_reg, _tuner_en_reg,           \
+                       _tuner_en_bit, _pcw_reg, _pcw_shift,            \
+                       _pcw_chg_reg)                                   \
+               PLL_B(_id, _name, _reg, _pwr_reg, _en_mask, _flags,     \
+                       _rst_bar_mask, _pcwbits, _pcwibits, _pd_reg,    \
+                       _pd_shift, _tuner_reg, _tuner_en_reg,           \
+                       _tuner_en_bit, _pcw_reg, _pcw_shift,            \
+                       _pcw_chg_reg, NULL)
+
+static const struct mtk_pll_data plls[] = {
+       PLL(CLK_APMIXED_ARMPLL_LL, "armpll_ll", 0x0200, 0x020C, BIT(0),
+           PLL_AO, 0, 22, 8, 0x0204, 24, 0, 0, 0, 0x0204, 0, 0),
+       PLL(CLK_APMIXED_ARMPLL_BL, "armpll_bl", 0x0210, 0x021C, BIT(0),
+           PLL_AO, 0, 22, 8, 0x0214, 24, 0, 0, 0, 0x0214, 0, 0),
+       PLL(CLK_APMIXED_CCIPLL, "ccipll", 0x02A0, 0x02AC, BIT(0),
+           PLL_AO, 0, 22, 8, 0x02A4, 24, 0, 0, 0, 0x02A4, 0, 0),
+       PLL(CLK_APMIXED_MAINPLL, "mainpll", 0x0230, 0x023C, BIT(0),
+           (HAVE_RST_BAR), BIT(24), 22, 8, 0x0234, 24, 0, 0, 0,
+           0x0234, 0, 0),
+       PLL(CLK_APMIXED_UNIV2PLL, "univ2pll", 0x0240, 0x024C, BIT(0),
+           (HAVE_RST_BAR), BIT(24), 22, 8, 0x0244, 24,
+           0, 0, 0, 0x0244, 0, 0),
+       PLL(CLK_APMIXED_MFGPLL, "mfgpll", 0x0250, 0x025C, BIT(0),
+           0, 0, 22, 8, 0x0254, 24, 0, 0, 0, 0x0254, 0, 0),
+       PLL(CLK_APMIXED_MSDCPLL, "msdcpll", 0x0260, 0x026C, BIT(0),
+           0, 0, 22, 8, 0x0264, 24, 0, 0, 0, 0x0264, 0, 0),
+       PLL(CLK_APMIXED_TVDPLL, "tvdpll", 0x0270, 0x027C, BIT(0),
+           0, 0, 22, 8, 0x0274, 24, 0, 0, 0, 0x0274, 0, 0),
+       PLL(CLK_APMIXED_ADSPPLL, "adsppll", 0x02b0, 0x02bC, BIT(0),
+           (HAVE_RST_BAR), BIT(23), 22, 8, 0x02b4, 24,
+           0, 0, 0, 0x02b4, 0, 0),
+       PLL(CLK_APMIXED_MMPLL, "mmpll", 0x0280, 0x028C, BIT(0),
+           (HAVE_RST_BAR), BIT(23), 22, 8, 0x0284, 24,
+           0, 0, 0, 0x0284, 0, 0),
+       PLL(CLK_APMIXED_APLL1, "apll1", 0x02C0, 0x02D0, BIT(0),
+           0, 0, 32, 8, 0x02C0, 1, 0, 0x14, 0, 0x02C4, 0, 0x2C0),
+       PLL(CLK_APMIXED_APLL2, "apll2", 0x02D4, 0x02E4, BIT(0),
+           0, 0, 32, 8, 0x02D4, 1, 0, 0x14, 1, 0x02D8, 0, 0x02D4),
+};
+
+static int clk_mt6779_apmixed_probe(struct platform_device *pdev)
+{
+       struct clk_onecell_data *clk_data;
+       struct device_node *node = pdev->dev.of_node;
+
+       clk_data = mtk_alloc_clk_data(CLK_APMIXED_NR_CLK);
+
+       mtk_clk_register_plls(node, plls, ARRAY_SIZE(plls), clk_data);
+
+       mtk_clk_register_gates(node, apmixed_clks,
+                              ARRAY_SIZE(apmixed_clks), clk_data);
+
+       return of_clk_add_provider(node, of_clk_src_onecell_get, clk_data);
+}
+
+static int clk_mt6779_top_probe(struct platform_device *pdev)
+{
+       struct resource *res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+       void __iomem *base;
+       struct clk_onecell_data *clk_data;
+       struct device_node *node = pdev->dev.of_node;
+
+       base = devm_ioremap_resource(&pdev->dev, res);
+       if (IS_ERR(base))
+               return PTR_ERR(base);
+
+       clk_data = mtk_alloc_clk_data(CLK_TOP_NR_CLK);
+
+       mtk_clk_register_fixed_clks(top_fixed_clks, ARRAY_SIZE(top_fixed_clks),
+                                   clk_data);
+
+       mtk_clk_register_factors(top_divs, ARRAY_SIZE(top_divs), clk_data);
+
+       mtk_clk_register_muxes(top_muxes, ARRAY_SIZE(top_muxes),
+                              node, &mt6779_clk_lock, clk_data);
+
+       mtk_clk_register_composites(top_aud_muxes, ARRAY_SIZE(top_aud_muxes),
+                                   base, &mt6779_clk_lock, clk_data);
+
+       mtk_clk_register_composites(top_aud_divs, ARRAY_SIZE(top_aud_divs),
+                                   base, &mt6779_clk_lock, clk_data);
+
+       return of_clk_add_provider(node, of_clk_src_onecell_get, clk_data);
+}
+
+static int clk_mt6779_infra_probe(struct platform_device *pdev)
+{
+       struct clk_onecell_data *clk_data;
+       struct device_node *node = pdev->dev.of_node;
+
+       clk_data = mtk_alloc_clk_data(CLK_INFRA_NR_CLK);
+
+       mtk_clk_register_gates(node, infra_clks, ARRAY_SIZE(infra_clks),
+                              clk_data);
+
+       return of_clk_add_provider(node, of_clk_src_onecell_get, clk_data);
+}
+
+static const struct of_device_id of_match_clk_mt6779[] = {
+       {
+               .compatible = "mediatek,mt6779-apmixed",
+               .data = clk_mt6779_apmixed_probe,
+       }, {
+               .compatible = "mediatek,mt6779-topckgen",
+               .data = clk_mt6779_top_probe,
+       }, {
+               .compatible = "mediatek,mt6779-infracfg_ao",
+               .data = clk_mt6779_infra_probe,
+       }, {
+               /* sentinel */
+       }
+};
+
+static int clk_mt6779_probe(struct platform_device *pdev)
+{
+       int (*clk_probe)(struct platform_device *pdev);
+       int r;
+
+       clk_probe = of_device_get_match_data(&pdev->dev);
+       if (!clk_probe)
+               return -EINVAL;
+
+       r = clk_probe(pdev);
+       if (r)
+               dev_err(&pdev->dev,
+                       "could not register clock provider: %s: %d\n",
+                       pdev->name, r);
+
+       return r;
+}
+
+static struct platform_driver clk_mt6779_drv = {
+       .probe = clk_mt6779_probe,
+       .driver = {
+               .name = "clk-mt6779",
+               .of_match_table = of_match_clk_mt6779,
+       },
+};
+
+static int __init clk_mt6779_init(void)
+{
+       return platform_driver_register(&clk_mt6779_drv);
+}
+
+arch_initcall(clk_mt6779_init);
index 99a6b02..37b4162 100644 (file)
@@ -5,6 +5,7 @@
 
 #include <linux/clk-provider.h>
 #include <linux/platform_device.h>
+#include <linux/pm_runtime.h>
 
 #include "clk-mtk.h"
 #include "clk-gate.h"
@@ -30,10 +31,12 @@ static int clk_mt8183_mfg_probe(struct platform_device *pdev)
        struct clk_onecell_data *clk_data;
        struct device_node *node = pdev->dev.of_node;
 
+       pm_runtime_enable(&pdev->dev);
+
        clk_data = mtk_alloc_clk_data(CLK_MFG_NR_CLK);
 
-       mtk_clk_register_gates(node, mfg_clks, ARRAY_SIZE(mfg_clks),
-                       clk_data);
+       mtk_clk_register_gates_with_dev(node, mfg_clks, ARRAY_SIZE(mfg_clks),
+                       clk_data, &pdev->dev);
 
        return of_clk_add_provider(node, of_clk_src_onecell_get, clk_data);
 }
index 73b7e23..51c8d5c 100644 (file)
@@ -17,6 +17,9 @@
 
 #include <dt-bindings/clock/mt8183-clk.h>
 
+/* Infra global controller reset set register */
+#define INFRA_RST0_SET_OFFSET          0x120
+
 static DEFINE_SPINLOCK(mt8183_clk_lock);
 
 static const struct mtk_fixed_clk top_fixed_clks[] = {
@@ -1001,6 +1004,20 @@ static const struct mtk_gate infra_clks[] = {
                "msdc50_0_sel", 24),
 };
 
+static const struct mtk_gate_regs peri_cg_regs = {
+       .set_ofs = 0x20c,
+       .clr_ofs = 0x20c,
+       .sta_ofs = 0x20c,
+};
+
+#define GATE_PERI(_id, _name, _parent, _shift)                 \
+       GATE_MTK(_id, _name, _parent, &peri_cg_regs, _shift,    \
+               &mtk_clk_gate_ops_no_setclr_inv)
+
+static const struct mtk_gate peri_clks[] = {
+       GATE_PERI(CLK_PERI_AXI, "peri_axi", "axi_sel", 31),
+};
+
 static const struct mtk_gate_regs apmixed_cg_regs = {
        .set_ofs = 0x20,
        .clr_ofs = 0x20,
@@ -1207,12 +1224,36 @@ static int clk_mt8183_infra_probe(struct platform_device *pdev)
 {
        struct clk_onecell_data *clk_data;
        struct device_node *node = pdev->dev.of_node;
+       int r;
 
        clk_data = mtk_alloc_clk_data(CLK_INFRA_NR_CLK);
 
        mtk_clk_register_gates(node, infra_clks, ARRAY_SIZE(infra_clks),
                clk_data);
 
+       r = of_clk_add_provider(node, of_clk_src_onecell_get, clk_data);
+       if (r) {
+               dev_err(&pdev->dev,
+                       "%s(): could not register clock provider: %d\n",
+                       __func__, r);
+               return r;
+       }
+
+       mtk_register_reset_controller_set_clr(node, 4, INFRA_RST0_SET_OFFSET);
+
+       return r;
+}
+
+static int clk_mt8183_peri_probe(struct platform_device *pdev)
+{
+       struct clk_onecell_data *clk_data;
+       struct device_node *node = pdev->dev.of_node;
+
+       clk_data = mtk_alloc_clk_data(CLK_PERI_NR_CLK);
+
+       mtk_clk_register_gates(node, peri_clks, ARRAY_SIZE(peri_clks),
+                              clk_data);
+
        return of_clk_add_provider(node, of_clk_src_onecell_get, clk_data);
 }
 
@@ -1246,6 +1287,9 @@ static const struct of_device_id of_match_clk_mt8183[] = {
                .compatible = "mediatek,mt8183-infracfg",
                .data = clk_mt8183_infra_probe,
        }, {
+               .compatible = "mediatek,mt8183-pericfg",
+               .data = clk_mt8183_peri_probe,
+       }, {
                .compatible = "mediatek,mt8183-mcucfg",
                .data = clk_mt8183_mcu_probe,
        }, {
index d28790c..cec1c8a 100644 (file)
@@ -12,6 +12,7 @@
 #include <linux/delay.h>
 #include <linux/clkdev.h>
 #include <linux/mfd/syscon.h>
+#include <linux/device.h>
 
 #include "clk-mtk.h"
 #include "clk-gate.h"
@@ -93,9 +94,10 @@ void mtk_clk_register_factors(const struct mtk_fixed_factor *clks,
        }
 }
 
-int mtk_clk_register_gates(struct device_node *node,
+int mtk_clk_register_gates_with_dev(struct device_node *node,
                const struct mtk_gate *clks,
-               int num, struct clk_onecell_data *clk_data)
+               int num, struct clk_onecell_data *clk_data,
+               struct device *dev)
 {
        int i;
        struct clk *clk;
@@ -122,7 +124,7 @@ int mtk_clk_register_gates(struct device_node *node,
                                gate->regs->set_ofs,
                                gate->regs->clr_ofs,
                                gate->regs->sta_ofs,
-                               gate->shift, gate->ops, gate->flags);
+                               gate->shift, gate->ops, gate->flags, dev);
 
                if (IS_ERR(clk)) {
                        pr_err("Failed to register clk %s: %ld\n",
@@ -136,6 +138,14 @@ int mtk_clk_register_gates(struct device_node *node,
        return 0;
 }
 
+int mtk_clk_register_gates(struct device_node *node,
+               const struct mtk_gate *clks,
+               int num, struct clk_onecell_data *clk_data)
+{
+       return mtk_clk_register_gates_with_dev(node,
+               clks, num, clk_data, NULL);
+}
+
 struct clk *mtk_clk_register_composite(const struct mtk_composite *mc,
                void __iomem *base, spinlock_t *lock)
 {
index 733a11d..c3d6756 100644 (file)
@@ -169,6 +169,11 @@ int mtk_clk_register_gates(struct device_node *node,
                        const struct mtk_gate *clks, int num,
                        struct clk_onecell_data *clk_data);
 
+int mtk_clk_register_gates_with_dev(struct device_node *node,
+               const struct mtk_gate *clks,
+               int num, struct clk_onecell_data *clk_data,
+               struct device *dev);
+
 struct mtk_clk_divider {
        int id;
        const char *name;
@@ -240,4 +245,7 @@ struct clk *mtk_clk_register_ref2usb_tx(const char *name,
 void mtk_register_reset_controller(struct device_node *np,
                        unsigned int num_regs, int regofs);
 
+void mtk_register_reset_controller_set_clr(struct device_node *np,
+       unsigned int num_regs, int regofs);
+
 #endif /* __DRV_CLK_MTK_H */
index d8376b9..cb939c0 100644 (file)
@@ -19,6 +19,24 @@ struct mtk_reset {
        struct reset_controller_dev rcdev;
 };
 
+static int mtk_reset_assert_set_clr(struct reset_controller_dev *rcdev,
+       unsigned long id)
+{
+       struct mtk_reset *data = container_of(rcdev, struct mtk_reset, rcdev);
+       unsigned int reg = data->regofs + ((id / 32) << 4);
+
+       return regmap_write(data->regmap, reg, 1);
+}
+
+static int mtk_reset_deassert_set_clr(struct reset_controller_dev *rcdev,
+       unsigned long id)
+{
+       struct mtk_reset *data = container_of(rcdev, struct mtk_reset, rcdev);
+       unsigned int reg = data->regofs + ((id / 32) << 4) + 0x4;
+
+       return regmap_write(data->regmap, reg, 1);
+}
+
 static int mtk_reset_assert(struct reset_controller_dev *rcdev,
                              unsigned long id)
 {
@@ -49,14 +67,32 @@ static int mtk_reset(struct reset_controller_dev *rcdev,
        return mtk_reset_deassert(rcdev, id);
 }
 
+static int mtk_reset_set_clr(struct reset_controller_dev *rcdev,
+       unsigned long id)
+{
+       int ret;
+
+       ret = mtk_reset_assert_set_clr(rcdev, id);
+       if (ret)
+               return ret;
+       return mtk_reset_deassert_set_clr(rcdev, id);
+}
+
 static const struct reset_control_ops mtk_reset_ops = {
        .assert = mtk_reset_assert,
        .deassert = mtk_reset_deassert,
        .reset = mtk_reset,
 };
 
-void mtk_register_reset_controller(struct device_node *np,
-                       unsigned int num_regs, int regofs)
+static const struct reset_control_ops mtk_reset_ops_set_clr = {
+       .assert = mtk_reset_assert_set_clr,
+       .deassert = mtk_reset_deassert_set_clr,
+       .reset = mtk_reset_set_clr,
+};
+
+static void mtk_register_reset_controller_common(struct device_node *np,
+                       unsigned int num_regs, int regofs,
+                       const struct reset_control_ops *reset_ops)
 {
        struct mtk_reset *data;
        int ret;
@@ -77,7 +113,7 @@ void mtk_register_reset_controller(struct device_node *np,
        data->regofs = regofs;
        data->rcdev.owner = THIS_MODULE;
        data->rcdev.nr_resets = num_regs * 32;
-       data->rcdev.ops = &mtk_reset_ops;
+       data->rcdev.ops = reset_ops;
        data->rcdev.of_node = np;
 
        ret = reset_controller_register(&data->rcdev);
@@ -87,3 +123,17 @@ void mtk_register_reset_controller(struct device_node *np,
                return;
        }
 }
+
+void mtk_register_reset_controller(struct device_node *np,
+       unsigned int num_regs, int regofs)
+{
+       mtk_register_reset_controller_common(np, num_regs, regofs,
+               &mtk_reset_ops);
+}
+
+void mtk_register_reset_controller_set_clr(struct device_node *np,
+       unsigned int num_regs, int regofs)
+{
+       mtk_register_reset_controller_common(np, num_regs, regofs,
+               &mtk_reset_ops_set_clr);
+}
index 741df7e..18b23cd 100644 (file)
@@ -12,6 +12,7 @@
 #include <linux/platform_device.h>
 #include <linux/regmap.h>
 #include <linux/reset.h>
+#include <linux/reset-controller.h>
 #include <linux/slab.h>
 
 #include "axg-audio.h"
@@ -918,6 +919,84 @@ static int devm_clk_get_enable(struct device *dev, char *id)
        return 0;
 }
 
+struct axg_audio_reset_data {
+       struct reset_controller_dev rstc;
+       struct regmap *map;
+       unsigned int offset;
+};
+
+static void axg_audio_reset_reg_and_bit(struct axg_audio_reset_data *rst,
+                                       unsigned long id,
+                                       unsigned int *reg,
+                                       unsigned int *bit)
+{
+       unsigned int stride = regmap_get_reg_stride(rst->map);
+
+       *reg = (id / (stride * BITS_PER_BYTE)) * stride;
+       *reg += rst->offset;
+       *bit = id % (stride * BITS_PER_BYTE);
+}
+
+static int axg_audio_reset_update(struct reset_controller_dev *rcdev,
+                               unsigned long id, bool assert)
+{
+       struct axg_audio_reset_data *rst =
+               container_of(rcdev, struct axg_audio_reset_data, rstc);
+       unsigned int offset, bit;
+
+       axg_audio_reset_reg_and_bit(rst, id, &offset, &bit);
+
+       regmap_update_bits(rst->map, offset, BIT(bit),
+                       assert ? BIT(bit) : 0);
+
+       return 0;
+}
+
+static int axg_audio_reset_status(struct reset_controller_dev *rcdev,
+                               unsigned long id)
+{
+       struct axg_audio_reset_data *rst =
+               container_of(rcdev, struct axg_audio_reset_data, rstc);
+       unsigned int val, offset, bit;
+
+       axg_audio_reset_reg_and_bit(rst, id, &offset, &bit);
+
+       regmap_read(rst->map, offset, &val);
+
+       return !!(val & BIT(bit));
+}
+
+static int axg_audio_reset_assert(struct reset_controller_dev *rcdev,
+                               unsigned long id)
+{
+       return axg_audio_reset_update(rcdev, id, true);
+}
+
+static int axg_audio_reset_deassert(struct reset_controller_dev *rcdev,
+                               unsigned long id)
+{
+       return axg_audio_reset_update(rcdev, id, false);
+}
+
+static int axg_audio_reset_toggle(struct reset_controller_dev *rcdev,
+                               unsigned long id)
+{
+       int ret;
+
+       ret = axg_audio_reset_assert(rcdev, id);
+       if (ret)
+               return ret;
+
+       return axg_audio_reset_deassert(rcdev, id);
+}
+
+static const struct reset_control_ops axg_audio_rstc_ops = {
+       .assert = axg_audio_reset_assert,
+       .deassert = axg_audio_reset_deassert,
+       .reset = axg_audio_reset_toggle,
+       .status = axg_audio_reset_status,
+};
+
 static const struct regmap_config axg_audio_regmap_cfg = {
        .reg_bits       = 32,
        .val_bits       = 32,
@@ -927,12 +1006,15 @@ static const struct regmap_config axg_audio_regmap_cfg = {
 
 struct audioclk_data {
        struct clk_hw_onecell_data *hw_onecell_data;
+       unsigned int reset_offset;
+       unsigned int reset_num;
 };
 
 static int axg_audio_clkc_probe(struct platform_device *pdev)
 {
        struct device *dev = &pdev->dev;
        const struct audioclk_data *data;
+       struct axg_audio_reset_data *rst;
        struct regmap *map;
        struct resource *res;
        void __iomem *regs;
@@ -971,21 +1053,43 @@ static int axg_audio_clkc_probe(struct platform_device *pdev)
 
        /* Take care to skip the registered input clocks */
        for (i = AUD_CLKID_DDR_ARB; i < data->hw_onecell_data->num; i++) {
+               const char *name;
+
                hw = data->hw_onecell_data->hws[i];
                /* array might be sparse */
                if (!hw)
                        continue;
 
+               name = hw->init->name;
+
                ret = devm_clk_hw_register(dev, hw);
                if (ret) {
-                       dev_err(dev, "failed to register clock %s\n",
-                               hw->init->name);
+                       dev_err(dev, "failed to register clock %s\n", name);
                        return ret;
                }
        }
 
-       return devm_of_clk_add_hw_provider(dev, of_clk_hw_onecell_get,
-                                          data->hw_onecell_data);
+       ret = devm_of_clk_add_hw_provider(dev, of_clk_hw_onecell_get,
+                                       data->hw_onecell_data);
+       if (ret)
+               return ret;
+
+       /* Stop here if there is no reset */
+       if (!data->reset_num)
+               return 0;
+
+       rst = devm_kzalloc(dev, sizeof(*rst), GFP_KERNEL);
+       if (!rst)
+               return -ENOMEM;
+
+       rst->map = map;
+       rst->offset = data->reset_offset;
+       rst->rstc.nr_resets = data->reset_num;
+       rst->rstc.ops = &axg_audio_rstc_ops;
+       rst->rstc.of_node = dev->of_node;
+       rst->rstc.owner = THIS_MODULE;
+
+       return devm_reset_controller_register(dev, &rst->rstc);
 }
 
 static const struct audioclk_data axg_audioclk_data = {
@@ -994,6 +1098,8 @@ static const struct audioclk_data axg_audioclk_data = {
 
 static const struct audioclk_data g12a_audioclk_data = {
        .hw_onecell_data = &g12a_audio_hw_onecell_data,
+       .reset_offset = AUDIO_SW_RESET,
+       .reset_num = 26,
 };
 
 static const struct of_device_id clkc_match_table[] = {
index 5d972d5..c00e28b 100644 (file)
@@ -22,6 +22,7 @@
 #define AUDIO_MCLK_F_CTRL      0x018
 #define AUDIO_MST_PAD_CTRL0    0x01c
 #define AUDIO_MST_PAD_CTRL1    0x020
+#define AUDIO_SW_RESET         0x024
 #define AUDIO_MST_A_SCLK_CTRL0 0x040
 #define AUDIO_MST_A_SCLK_CTRL1 0x044
 #define AUDIO_MST_B_SCLK_CTRL0 0x048
index c3f0ffc..ea4c791 100644 (file)
@@ -676,6 +676,226 @@ static struct clk_regmap g12b_cpub_clk = {
        },
 };
 
+static struct clk_regmap sm1_gp1_pll;
+
+/* Datasheet names this field as "premux0" */
+static struct clk_regmap sm1_dsu_clk_premux0 = {
+       .data = &(struct clk_regmap_mux_data){
+               .offset = HHI_SYS_CPU_CLK_CNTL5,
+               .mask = 0x3,
+               .shift = 0,
+       },
+       .hw.init = &(struct clk_init_data){
+               .name = "dsu_clk_dyn0_sel",
+               .ops = &clk_regmap_mux_ro_ops,
+               .parent_data = (const struct clk_parent_data []) {
+                       { .fw_name = "xtal", },
+                       { .hw = &g12a_fclk_div2.hw },
+                       { .hw = &g12a_fclk_div3.hw },
+                       { .hw = &sm1_gp1_pll.hw },
+               },
+               .num_parents = 4,
+       },
+};
+
+/* Datasheet names this field as "premux1" */
+static struct clk_regmap sm1_dsu_clk_premux1 = {
+       .data = &(struct clk_regmap_mux_data){
+               .offset = HHI_SYS_CPU_CLK_CNTL5,
+               .mask = 0x3,
+               .shift = 16,
+       },
+       .hw.init = &(struct clk_init_data){
+               .name = "dsu_clk_dyn1_sel",
+               .ops = &clk_regmap_mux_ro_ops,
+               .parent_data = (const struct clk_parent_data []) {
+                       { .fw_name = "xtal", },
+                       { .hw = &g12a_fclk_div2.hw },
+                       { .hw = &g12a_fclk_div3.hw },
+                       { .hw = &sm1_gp1_pll.hw },
+               },
+               .num_parents = 4,
+       },
+};
+
+/* Datasheet names this field as "Mux0_divn_tcnt" */
+static struct clk_regmap sm1_dsu_clk_mux0_div = {
+       .data = &(struct clk_regmap_div_data){
+               .offset = HHI_SYS_CPU_CLK_CNTL5,
+               .shift = 4,
+               .width = 6,
+       },
+       .hw.init = &(struct clk_init_data){
+               .name = "dsu_clk_dyn0_div",
+               .ops = &clk_regmap_divider_ro_ops,
+               .parent_hws = (const struct clk_hw *[]) {
+                       &sm1_dsu_clk_premux0.hw
+               },
+               .num_parents = 1,
+       },
+};
+
+/* Datasheet names this field as "postmux0" */
+static struct clk_regmap sm1_dsu_clk_postmux0 = {
+       .data = &(struct clk_regmap_mux_data){
+               .offset = HHI_SYS_CPU_CLK_CNTL5,
+               .mask = 0x1,
+               .shift = 2,
+       },
+       .hw.init = &(struct clk_init_data){
+               .name = "dsu_clk_dyn0",
+               .ops = &clk_regmap_mux_ro_ops,
+               .parent_hws = (const struct clk_hw *[]) {
+                       &sm1_dsu_clk_premux0.hw,
+                       &sm1_dsu_clk_mux0_div.hw,
+               },
+               .num_parents = 2,
+       },
+};
+
+/* Datasheet names this field as "Mux1_divn_tcnt" */
+static struct clk_regmap sm1_dsu_clk_mux1_div = {
+       .data = &(struct clk_regmap_div_data){
+               .offset = HHI_SYS_CPU_CLK_CNTL5,
+               .shift = 20,
+               .width = 6,
+       },
+       .hw.init = &(struct clk_init_data){
+               .name = "dsu_clk_dyn1_div",
+               .ops = &clk_regmap_divider_ro_ops,
+               .parent_hws = (const struct clk_hw *[]) {
+                       &sm1_dsu_clk_premux1.hw
+               },
+               .num_parents = 1,
+       },
+};
+
+/* Datasheet names this field as "postmux1" */
+static struct clk_regmap sm1_dsu_clk_postmux1 = {
+       .data = &(struct clk_regmap_mux_data){
+               .offset = HHI_SYS_CPU_CLK_CNTL5,
+               .mask = 0x1,
+               .shift = 18,
+       },
+       .hw.init = &(struct clk_init_data){
+               .name = "dsu_clk_dyn1",
+               .ops = &clk_regmap_mux_ro_ops,
+               .parent_hws = (const struct clk_hw *[]) {
+                       &sm1_dsu_clk_premux1.hw,
+                       &sm1_dsu_clk_mux1_div.hw,
+               },
+               .num_parents = 2,
+       },
+};
+
+/* Datasheet names this field as "Final_dyn_mux_sel" */
+static struct clk_regmap sm1_dsu_clk_dyn = {
+       .data = &(struct clk_regmap_mux_data){
+               .offset = HHI_SYS_CPU_CLK_CNTL5,
+               .mask = 0x1,
+               .shift = 10,
+       },
+       .hw.init = &(struct clk_init_data){
+               .name = "dsu_clk_dyn",
+               .ops = &clk_regmap_mux_ro_ops,
+               .parent_hws = (const struct clk_hw *[]) {
+                       &sm1_dsu_clk_postmux0.hw,
+                       &sm1_dsu_clk_postmux1.hw,
+               },
+               .num_parents = 2,
+       },
+};
+
+/* Datasheet names this field as "Final_mux_sel" */
+static struct clk_regmap sm1_dsu_final_clk = {
+       .data = &(struct clk_regmap_mux_data){
+               .offset = HHI_SYS_CPU_CLK_CNTL5,
+               .mask = 0x1,
+               .shift = 11,
+       },
+       .hw.init = &(struct clk_init_data){
+               .name = "dsu_clk_final",
+               .ops = &clk_regmap_mux_ro_ops,
+               .parent_hws = (const struct clk_hw *[]) {
+                       &sm1_dsu_clk_dyn.hw,
+                       &g12a_sys_pll.hw,
+               },
+               .num_parents = 2,
+       },
+};
+
+/* Datasheet names this field as "Cpu_clk_sync_mux_sel" bit 0 */
+static struct clk_regmap sm1_cpu1_clk = {
+       .data = &(struct clk_regmap_mux_data){
+               .offset = HHI_SYS_CPU_CLK_CNTL6,
+               .mask = 0x1,
+               .shift = 24,
+       },
+       .hw.init = &(struct clk_init_data){
+               .name = "cpu1_clk",
+               .ops = &clk_regmap_mux_ro_ops,
+               .parent_hws = (const struct clk_hw *[]) {
+                       &g12a_cpu_clk.hw,
+                       /* This CPU also have a dedicated clock tree */
+               },
+               .num_parents = 1,
+       },
+};
+
+/* Datasheet names this field as "Cpu_clk_sync_mux_sel" bit 1 */
+static struct clk_regmap sm1_cpu2_clk = {
+       .data = &(struct clk_regmap_mux_data){
+               .offset = HHI_SYS_CPU_CLK_CNTL6,
+               .mask = 0x1,
+               .shift = 25,
+       },
+       .hw.init = &(struct clk_init_data){
+               .name = "cpu2_clk",
+               .ops = &clk_regmap_mux_ro_ops,
+               .parent_hws = (const struct clk_hw *[]) {
+                       &g12a_cpu_clk.hw,
+                       /* This CPU also have a dedicated clock tree */
+               },
+               .num_parents = 1,
+       },
+};
+
+/* Datasheet names this field as "Cpu_clk_sync_mux_sel" bit 2 */
+static struct clk_regmap sm1_cpu3_clk = {
+       .data = &(struct clk_regmap_mux_data){
+               .offset = HHI_SYS_CPU_CLK_CNTL6,
+               .mask = 0x1,
+               .shift = 26,
+       },
+       .hw.init = &(struct clk_init_data){
+               .name = "cpu3_clk",
+               .ops = &clk_regmap_mux_ro_ops,
+               .parent_hws = (const struct clk_hw *[]) {
+                       &g12a_cpu_clk.hw,
+                       /* This CPU also have a dedicated clock tree */
+               },
+               .num_parents = 1,
+       },
+};
+
+/* Datasheet names this field as "Cpu_clk_sync_mux_sel" bit 4 */
+static struct clk_regmap sm1_dsu_clk = {
+       .data = &(struct clk_regmap_mux_data){
+               .offset = HHI_SYS_CPU_CLK_CNTL6,
+               .mask = 0x1,
+               .shift = 27,
+       },
+       .hw.init = &(struct clk_init_data){
+               .name = "dsu_clk",
+               .ops = &clk_regmap_mux_ro_ops,
+               .parent_hws = (const struct clk_hw *[]) {
+                       &g12a_cpu_clk.hw,
+                       &sm1_dsu_final_clk.hw,
+               },
+               .num_parents = 2,
+       },
+};
+
 static int g12a_cpu_clk_mux_notifier_cb(struct notifier_block *nb,
                                        unsigned long event, void *data)
 {
@@ -1443,6 +1663,69 @@ static struct clk_regmap g12a_gp0_pll = {
        },
 };
 
+static struct clk_regmap sm1_gp1_pll_dco = {
+       .data = &(struct meson_clk_pll_data){
+               .en = {
+                       .reg_off = HHI_GP1_PLL_CNTL0,
+                       .shift   = 28,
+                       .width   = 1,
+               },
+               .m = {
+                       .reg_off = HHI_GP1_PLL_CNTL0,
+                       .shift   = 0,
+                       .width   = 8,
+               },
+               .n = {
+                       .reg_off = HHI_GP1_PLL_CNTL0,
+                       .shift   = 10,
+                       .width   = 5,
+               },
+               .frac = {
+                       .reg_off = HHI_GP1_PLL_CNTL1,
+                       .shift   = 0,
+                       .width   = 17,
+               },
+               .l = {
+                       .reg_off = HHI_GP1_PLL_CNTL0,
+                       .shift   = 31,
+                       .width   = 1,
+               },
+               .rst = {
+                       .reg_off = HHI_GP1_PLL_CNTL0,
+                       .shift   = 29,
+                       .width   = 1,
+               },
+       },
+       .hw.init = &(struct clk_init_data){
+               .name = "gp1_pll_dco",
+               .ops = &meson_clk_pll_ro_ops,
+               .parent_data = &(const struct clk_parent_data) {
+                       .fw_name = "xtal",
+               },
+               .num_parents = 1,
+               /* This clock feeds the DSU, avoid disabling it */
+               .flags = CLK_IS_CRITICAL,
+       },
+};
+
+static struct clk_regmap sm1_gp1_pll = {
+       .data = &(struct clk_regmap_div_data){
+               .offset = HHI_GP1_PLL_CNTL0,
+               .shift = 16,
+               .width = 3,
+               .flags = (CLK_DIVIDER_POWER_OF_TWO |
+                         CLK_DIVIDER_ROUND_CLOSEST),
+       },
+       .hw.init = &(struct clk_init_data){
+               .name = "gp1_pll",
+               .ops = &clk_regmap_divider_ro_ops,
+               .parent_hws = (const struct clk_hw *[]) {
+                       &sm1_gp1_pll_dco.hw
+               },
+               .num_parents = 1,
+       },
+};
+
 /*
  * Internal hifi pll emulation configuration parameters
  */
@@ -4121,6 +4404,240 @@ static struct clk_hw_onecell_data g12b_hw_onecell_data = {
        .num = NR_CLKS,
 };
 
+static struct clk_hw_onecell_data sm1_hw_onecell_data = {
+       .hws = {
+               [CLKID_SYS_PLL]                 = &g12a_sys_pll.hw,
+               [CLKID_FIXED_PLL]               = &g12a_fixed_pll.hw,
+               [CLKID_FCLK_DIV2]               = &g12a_fclk_div2.hw,
+               [CLKID_FCLK_DIV3]               = &g12a_fclk_div3.hw,
+               [CLKID_FCLK_DIV4]               = &g12a_fclk_div4.hw,
+               [CLKID_FCLK_DIV5]               = &g12a_fclk_div5.hw,
+               [CLKID_FCLK_DIV7]               = &g12a_fclk_div7.hw,
+               [CLKID_FCLK_DIV2P5]             = &g12a_fclk_div2p5.hw,
+               [CLKID_GP0_PLL]                 = &g12a_gp0_pll.hw,
+               [CLKID_MPEG_SEL]                = &g12a_mpeg_clk_sel.hw,
+               [CLKID_MPEG_DIV]                = &g12a_mpeg_clk_div.hw,
+               [CLKID_CLK81]                   = &g12a_clk81.hw,
+               [CLKID_MPLL0]                   = &g12a_mpll0.hw,
+               [CLKID_MPLL1]                   = &g12a_mpll1.hw,
+               [CLKID_MPLL2]                   = &g12a_mpll2.hw,
+               [CLKID_MPLL3]                   = &g12a_mpll3.hw,
+               [CLKID_DDR]                     = &g12a_ddr.hw,
+               [CLKID_DOS]                     = &g12a_dos.hw,
+               [CLKID_AUDIO_LOCKER]            = &g12a_audio_locker.hw,
+               [CLKID_MIPI_DSI_HOST]           = &g12a_mipi_dsi_host.hw,
+               [CLKID_ETH_PHY]                 = &g12a_eth_phy.hw,
+               [CLKID_ISA]                     = &g12a_isa.hw,
+               [CLKID_PL301]                   = &g12a_pl301.hw,
+               [CLKID_PERIPHS]                 = &g12a_periphs.hw,
+               [CLKID_SPICC0]                  = &g12a_spicc_0.hw,
+               [CLKID_I2C]                     = &g12a_i2c.hw,
+               [CLKID_SANA]                    = &g12a_sana.hw,
+               [CLKID_SD]                      = &g12a_sd.hw,
+               [CLKID_RNG0]                    = &g12a_rng0.hw,
+               [CLKID_UART0]                   = &g12a_uart0.hw,
+               [CLKID_SPICC1]                  = &g12a_spicc_1.hw,
+               [CLKID_HIU_IFACE]               = &g12a_hiu_reg.hw,
+               [CLKID_MIPI_DSI_PHY]            = &g12a_mipi_dsi_phy.hw,
+               [CLKID_ASSIST_MISC]             = &g12a_assist_misc.hw,
+               [CLKID_SD_EMMC_A]               = &g12a_emmc_a.hw,
+               [CLKID_SD_EMMC_B]               = &g12a_emmc_b.hw,
+               [CLKID_SD_EMMC_C]               = &g12a_emmc_c.hw,
+               [CLKID_AUDIO_CODEC]             = &g12a_audio_codec.hw,
+               [CLKID_AUDIO]                   = &g12a_audio.hw,
+               [CLKID_ETH]                     = &g12a_eth_core.hw,
+               [CLKID_DEMUX]                   = &g12a_demux.hw,
+               [CLKID_AUDIO_IFIFO]             = &g12a_audio_ififo.hw,
+               [CLKID_ADC]                     = &g12a_adc.hw,
+               [CLKID_UART1]                   = &g12a_uart1.hw,
+               [CLKID_G2D]                     = &g12a_g2d.hw,
+               [CLKID_RESET]                   = &g12a_reset.hw,
+               [CLKID_PCIE_COMB]               = &g12a_pcie_comb.hw,
+               [CLKID_PARSER]                  = &g12a_parser.hw,
+               [CLKID_USB]                     = &g12a_usb_general.hw,
+               [CLKID_PCIE_PHY]                = &g12a_pcie_phy.hw,
+               [CLKID_AHB_ARB0]                = &g12a_ahb_arb0.hw,
+               [CLKID_AHB_DATA_BUS]            = &g12a_ahb_data_bus.hw,
+               [CLKID_AHB_CTRL_BUS]            = &g12a_ahb_ctrl_bus.hw,
+               [CLKID_HTX_HDCP22]              = &g12a_htx_hdcp22.hw,
+               [CLKID_HTX_PCLK]                = &g12a_htx_pclk.hw,
+               [CLKID_BT656]                   = &g12a_bt656.hw,
+               [CLKID_USB1_DDR_BRIDGE]         = &g12a_usb1_to_ddr.hw,
+               [CLKID_MMC_PCLK]                = &g12a_mmc_pclk.hw,
+               [CLKID_UART2]                   = &g12a_uart2.hw,
+               [CLKID_VPU_INTR]                = &g12a_vpu_intr.hw,
+               [CLKID_GIC]                     = &g12a_gic.hw,
+               [CLKID_SD_EMMC_A_CLK0_SEL]      = &g12a_sd_emmc_a_clk0_sel.hw,
+               [CLKID_SD_EMMC_A_CLK0_DIV]      = &g12a_sd_emmc_a_clk0_div.hw,
+               [CLKID_SD_EMMC_A_CLK0]          = &g12a_sd_emmc_a_clk0.hw,
+               [CLKID_SD_EMMC_B_CLK0_SEL]      = &g12a_sd_emmc_b_clk0_sel.hw,
+               [CLKID_SD_EMMC_B_CLK0_DIV]      = &g12a_sd_emmc_b_clk0_div.hw,
+               [CLKID_SD_EMMC_B_CLK0]          = &g12a_sd_emmc_b_clk0.hw,
+               [CLKID_SD_EMMC_C_CLK0_SEL]      = &g12a_sd_emmc_c_clk0_sel.hw,
+               [CLKID_SD_EMMC_C_CLK0_DIV]      = &g12a_sd_emmc_c_clk0_div.hw,
+               [CLKID_SD_EMMC_C_CLK0]          = &g12a_sd_emmc_c_clk0.hw,
+               [CLKID_MPLL0_DIV]               = &g12a_mpll0_div.hw,
+               [CLKID_MPLL1_DIV]               = &g12a_mpll1_div.hw,
+               [CLKID_MPLL2_DIV]               = &g12a_mpll2_div.hw,
+               [CLKID_MPLL3_DIV]               = &g12a_mpll3_div.hw,
+               [CLKID_FCLK_DIV2_DIV]           = &g12a_fclk_div2_div.hw,
+               [CLKID_FCLK_DIV3_DIV]           = &g12a_fclk_div3_div.hw,
+               [CLKID_FCLK_DIV4_DIV]           = &g12a_fclk_div4_div.hw,
+               [CLKID_FCLK_DIV5_DIV]           = &g12a_fclk_div5_div.hw,
+               [CLKID_FCLK_DIV7_DIV]           = &g12a_fclk_div7_div.hw,
+               [CLKID_FCLK_DIV2P5_DIV]         = &g12a_fclk_div2p5_div.hw,
+               [CLKID_HIFI_PLL]                = &g12a_hifi_pll.hw,
+               [CLKID_VCLK2_VENCI0]            = &g12a_vclk2_venci0.hw,
+               [CLKID_VCLK2_VENCI1]            = &g12a_vclk2_venci1.hw,
+               [CLKID_VCLK2_VENCP0]            = &g12a_vclk2_vencp0.hw,
+               [CLKID_VCLK2_VENCP1]            = &g12a_vclk2_vencp1.hw,
+               [CLKID_VCLK2_VENCT0]            = &g12a_vclk2_venct0.hw,
+               [CLKID_VCLK2_VENCT1]            = &g12a_vclk2_venct1.hw,
+               [CLKID_VCLK2_OTHER]             = &g12a_vclk2_other.hw,
+               [CLKID_VCLK2_ENCI]              = &g12a_vclk2_enci.hw,
+               [CLKID_VCLK2_ENCP]              = &g12a_vclk2_encp.hw,
+               [CLKID_DAC_CLK]                 = &g12a_dac_clk.hw,
+               [CLKID_AOCLK]                   = &g12a_aoclk_gate.hw,
+               [CLKID_IEC958]                  = &g12a_iec958_gate.hw,
+               [CLKID_ENC480P]                 = &g12a_enc480p.hw,
+               [CLKID_RNG1]                    = &g12a_rng1.hw,
+               [CLKID_VCLK2_ENCT]              = &g12a_vclk2_enct.hw,
+               [CLKID_VCLK2_ENCL]              = &g12a_vclk2_encl.hw,
+               [CLKID_VCLK2_VENCLMMC]          = &g12a_vclk2_venclmmc.hw,
+               [CLKID_VCLK2_VENCL]             = &g12a_vclk2_vencl.hw,
+               [CLKID_VCLK2_OTHER1]            = &g12a_vclk2_other1.hw,
+               [CLKID_FIXED_PLL_DCO]           = &g12a_fixed_pll_dco.hw,
+               [CLKID_SYS_PLL_DCO]             = &g12a_sys_pll_dco.hw,
+               [CLKID_GP0_PLL_DCO]             = &g12a_gp0_pll_dco.hw,
+               [CLKID_HIFI_PLL_DCO]            = &g12a_hifi_pll_dco.hw,
+               [CLKID_DMA]                     = &g12a_dma.hw,
+               [CLKID_EFUSE]                   = &g12a_efuse.hw,
+               [CLKID_ROM_BOOT]                = &g12a_rom_boot.hw,
+               [CLKID_RESET_SEC]               = &g12a_reset_sec.hw,
+               [CLKID_SEC_AHB_APB3]            = &g12a_sec_ahb_apb3.hw,
+               [CLKID_MPLL_PREDIV]             = &g12a_mpll_prediv.hw,
+               [CLKID_VPU_0_SEL]               = &g12a_vpu_0_sel.hw,
+               [CLKID_VPU_0_DIV]               = &g12a_vpu_0_div.hw,
+               [CLKID_VPU_0]                   = &g12a_vpu_0.hw,
+               [CLKID_VPU_1_SEL]               = &g12a_vpu_1_sel.hw,
+               [CLKID_VPU_1_DIV]               = &g12a_vpu_1_div.hw,
+               [CLKID_VPU_1]                   = &g12a_vpu_1.hw,
+               [CLKID_VPU]                     = &g12a_vpu.hw,
+               [CLKID_VAPB_0_SEL]              = &g12a_vapb_0_sel.hw,
+               [CLKID_VAPB_0_DIV]              = &g12a_vapb_0_div.hw,
+               [CLKID_VAPB_0]                  = &g12a_vapb_0.hw,
+               [CLKID_VAPB_1_SEL]              = &g12a_vapb_1_sel.hw,
+               [CLKID_VAPB_1_DIV]              = &g12a_vapb_1_div.hw,
+               [CLKID_VAPB_1]                  = &g12a_vapb_1.hw,
+               [CLKID_VAPB_SEL]                = &g12a_vapb_sel.hw,
+               [CLKID_VAPB]                    = &g12a_vapb.hw,
+               [CLKID_HDMI_PLL_DCO]            = &g12a_hdmi_pll_dco.hw,
+               [CLKID_HDMI_PLL_OD]             = &g12a_hdmi_pll_od.hw,
+               [CLKID_HDMI_PLL_OD2]            = &g12a_hdmi_pll_od2.hw,
+               [CLKID_HDMI_PLL]                = &g12a_hdmi_pll.hw,
+               [CLKID_VID_PLL]                 = &g12a_vid_pll_div.hw,
+               [CLKID_VID_PLL_SEL]             = &g12a_vid_pll_sel.hw,
+               [CLKID_VID_PLL_DIV]             = &g12a_vid_pll.hw,
+               [CLKID_VCLK_SEL]                = &g12a_vclk_sel.hw,
+               [CLKID_VCLK2_SEL]               = &g12a_vclk2_sel.hw,
+               [CLKID_VCLK_INPUT]              = &g12a_vclk_input.hw,
+               [CLKID_VCLK2_INPUT]             = &g12a_vclk2_input.hw,
+               [CLKID_VCLK_DIV]                = &g12a_vclk_div.hw,
+               [CLKID_VCLK2_DIV]               = &g12a_vclk2_div.hw,
+               [CLKID_VCLK]                    = &g12a_vclk.hw,
+               [CLKID_VCLK2]                   = &g12a_vclk2.hw,
+               [CLKID_VCLK_DIV1]               = &g12a_vclk_div1.hw,
+               [CLKID_VCLK_DIV2_EN]            = &g12a_vclk_div2_en.hw,
+               [CLKID_VCLK_DIV4_EN]            = &g12a_vclk_div4_en.hw,
+               [CLKID_VCLK_DIV6_EN]            = &g12a_vclk_div6_en.hw,
+               [CLKID_VCLK_DIV12_EN]           = &g12a_vclk_div12_en.hw,
+               [CLKID_VCLK2_DIV1]              = &g12a_vclk2_div1.hw,
+               [CLKID_VCLK2_DIV2_EN]           = &g12a_vclk2_div2_en.hw,
+               [CLKID_VCLK2_DIV4_EN]           = &g12a_vclk2_div4_en.hw,
+               [CLKID_VCLK2_DIV6_EN]           = &g12a_vclk2_div6_en.hw,
+               [CLKID_VCLK2_DIV12_EN]          = &g12a_vclk2_div12_en.hw,
+               [CLKID_VCLK_DIV2]               = &g12a_vclk_div2.hw,
+               [CLKID_VCLK_DIV4]               = &g12a_vclk_div4.hw,
+               [CLKID_VCLK_DIV6]               = &g12a_vclk_div6.hw,
+               [CLKID_VCLK_DIV12]              = &g12a_vclk_div12.hw,
+               [CLKID_VCLK2_DIV2]              = &g12a_vclk2_div2.hw,
+               [CLKID_VCLK2_DIV4]              = &g12a_vclk2_div4.hw,
+               [CLKID_VCLK2_DIV6]              = &g12a_vclk2_div6.hw,
+               [CLKID_VCLK2_DIV12]             = &g12a_vclk2_div12.hw,
+               [CLKID_CTS_ENCI_SEL]            = &g12a_cts_enci_sel.hw,
+               [CLKID_CTS_ENCP_SEL]            = &g12a_cts_encp_sel.hw,
+               [CLKID_CTS_VDAC_SEL]            = &g12a_cts_vdac_sel.hw,
+               [CLKID_HDMI_TX_SEL]             = &g12a_hdmi_tx_sel.hw,
+               [CLKID_CTS_ENCI]                = &g12a_cts_enci.hw,
+               [CLKID_CTS_ENCP]                = &g12a_cts_encp.hw,
+               [CLKID_CTS_VDAC]                = &g12a_cts_vdac.hw,
+               [CLKID_HDMI_TX]                 = &g12a_hdmi_tx.hw,
+               [CLKID_HDMI_SEL]                = &g12a_hdmi_sel.hw,
+               [CLKID_HDMI_DIV]                = &g12a_hdmi_div.hw,
+               [CLKID_HDMI]                    = &g12a_hdmi.hw,
+               [CLKID_MALI_0_SEL]              = &g12a_mali_0_sel.hw,
+               [CLKID_MALI_0_DIV]              = &g12a_mali_0_div.hw,
+               [CLKID_MALI_0]                  = &g12a_mali_0.hw,
+               [CLKID_MALI_1_SEL]              = &g12a_mali_1_sel.hw,
+               [CLKID_MALI_1_DIV]              = &g12a_mali_1_div.hw,
+               [CLKID_MALI_1]                  = &g12a_mali_1.hw,
+               [CLKID_MALI]                    = &g12a_mali.hw,
+               [CLKID_MPLL_50M_DIV]            = &g12a_mpll_50m_div.hw,
+               [CLKID_MPLL_50M]                = &g12a_mpll_50m.hw,
+               [CLKID_SYS_PLL_DIV16_EN]        = &g12a_sys_pll_div16_en.hw,
+               [CLKID_SYS_PLL_DIV16]           = &g12a_sys_pll_div16.hw,
+               [CLKID_CPU_CLK_DYN0_SEL]        = &g12a_cpu_clk_premux0.hw,
+               [CLKID_CPU_CLK_DYN0_DIV]        = &g12a_cpu_clk_mux0_div.hw,
+               [CLKID_CPU_CLK_DYN0]            = &g12a_cpu_clk_postmux0.hw,
+               [CLKID_CPU_CLK_DYN1_SEL]        = &g12a_cpu_clk_premux1.hw,
+               [CLKID_CPU_CLK_DYN1_DIV]        = &g12a_cpu_clk_mux1_div.hw,
+               [CLKID_CPU_CLK_DYN1]            = &g12a_cpu_clk_postmux1.hw,
+               [CLKID_CPU_CLK_DYN]             = &g12a_cpu_clk_dyn.hw,
+               [CLKID_CPU_CLK]                 = &g12a_cpu_clk.hw,
+               [CLKID_CPU_CLK_DIV16_EN]        = &g12a_cpu_clk_div16_en.hw,
+               [CLKID_CPU_CLK_DIV16]           = &g12a_cpu_clk_div16.hw,
+               [CLKID_CPU_CLK_APB_DIV]         = &g12a_cpu_clk_apb_div.hw,
+               [CLKID_CPU_CLK_APB]             = &g12a_cpu_clk_apb.hw,
+               [CLKID_CPU_CLK_ATB_DIV]         = &g12a_cpu_clk_atb_div.hw,
+               [CLKID_CPU_CLK_ATB]             = &g12a_cpu_clk_atb.hw,
+               [CLKID_CPU_CLK_AXI_DIV]         = &g12a_cpu_clk_axi_div.hw,
+               [CLKID_CPU_CLK_AXI]             = &g12a_cpu_clk_axi.hw,
+               [CLKID_CPU_CLK_TRACE_DIV]       = &g12a_cpu_clk_trace_div.hw,
+               [CLKID_CPU_CLK_TRACE]           = &g12a_cpu_clk_trace.hw,
+               [CLKID_PCIE_PLL_DCO]            = &g12a_pcie_pll_dco.hw,
+               [CLKID_PCIE_PLL_DCO_DIV2]       = &g12a_pcie_pll_dco_div2.hw,
+               [CLKID_PCIE_PLL_OD]             = &g12a_pcie_pll_od.hw,
+               [CLKID_PCIE_PLL]                = &g12a_pcie_pll.hw,
+               [CLKID_VDEC_1_SEL]              = &g12a_vdec_1_sel.hw,
+               [CLKID_VDEC_1_DIV]              = &g12a_vdec_1_div.hw,
+               [CLKID_VDEC_1]                  = &g12a_vdec_1.hw,
+               [CLKID_VDEC_HEVC_SEL]           = &g12a_vdec_hevc_sel.hw,
+               [CLKID_VDEC_HEVC_DIV]           = &g12a_vdec_hevc_div.hw,
+               [CLKID_VDEC_HEVC]               = &g12a_vdec_hevc.hw,
+               [CLKID_VDEC_HEVCF_SEL]          = &g12a_vdec_hevcf_sel.hw,
+               [CLKID_VDEC_HEVCF_DIV]          = &g12a_vdec_hevcf_div.hw,
+               [CLKID_VDEC_HEVCF]              = &g12a_vdec_hevcf.hw,
+               [CLKID_TS_DIV]                  = &g12a_ts_div.hw,
+               [CLKID_TS]                      = &g12a_ts.hw,
+               [CLKID_GP1_PLL_DCO]             = &sm1_gp1_pll_dco.hw,
+               [CLKID_GP1_PLL]                 = &sm1_gp1_pll.hw,
+               [CLKID_DSU_CLK_DYN0_SEL]        = &sm1_dsu_clk_premux0.hw,
+               [CLKID_DSU_CLK_DYN0_DIV]        = &sm1_dsu_clk_premux1.hw,
+               [CLKID_DSU_CLK_DYN0]            = &sm1_dsu_clk_mux0_div.hw,
+               [CLKID_DSU_CLK_DYN1_SEL]        = &sm1_dsu_clk_postmux0.hw,
+               [CLKID_DSU_CLK_DYN1_DIV]        = &sm1_dsu_clk_mux1_div.hw,
+               [CLKID_DSU_CLK_DYN1]            = &sm1_dsu_clk_postmux1.hw,
+               [CLKID_DSU_CLK_DYN]             = &sm1_dsu_clk_dyn.hw,
+               [CLKID_DSU_CLK_FINAL]           = &sm1_dsu_final_clk.hw,
+               [CLKID_DSU_CLK]                 = &sm1_dsu_clk.hw,
+               [CLKID_CPU1_CLK]                = &sm1_cpu1_clk.hw,
+               [CLKID_CPU2_CLK]                = &sm1_cpu2_clk.hw,
+               [CLKID_CPU3_CLK]                = &sm1_cpu3_clk.hw,
+               [NR_CLKS]                       = NULL,
+       },
+       .num = NR_CLKS,
+};
+
 /* Convenience table to populate regmap in .probe */
 static struct clk_regmap *const g12a_clk_regmaps[] = {
        &g12a_clk81,
@@ -4336,6 +4853,20 @@ static struct clk_regmap *const g12a_clk_regmaps[] = {
        &g12b_cpub_clk_axi,
        &g12b_cpub_clk_trace_sel,
        &g12b_cpub_clk_trace,
+       &sm1_gp1_pll_dco,
+       &sm1_gp1_pll,
+       &sm1_dsu_clk_premux0,
+       &sm1_dsu_clk_premux1,
+       &sm1_dsu_clk_mux0_div,
+       &sm1_dsu_clk_postmux0,
+       &sm1_dsu_clk_mux1_div,
+       &sm1_dsu_clk_postmux1,
+       &sm1_dsu_clk_dyn,
+       &sm1_dsu_final_clk,
+       &sm1_dsu_clk,
+       &sm1_cpu1_clk,
+       &sm1_cpu2_clk,
+       &sm1_cpu3_clk,
 };
 
 static const struct reg_sequence g12a_init_regs[] = {
@@ -4532,6 +5063,15 @@ static const struct meson_g12a_data g12b_clkc_data = {
        .dvfs_setup = meson_g12b_dvfs_setup,
 };
 
+static const struct meson_g12a_data sm1_clkc_data = {
+       .eeclkc_data = {
+               .regmap_clks = g12a_clk_regmaps,
+               .regmap_clk_num = ARRAY_SIZE(g12a_clk_regmaps),
+               .hw_onecell_data = &sm1_hw_onecell_data,
+       },
+       .dvfs_setup = meson_g12a_dvfs_setup,
+};
+
 static const struct of_device_id clkc_match_table[] = {
        {
                .compatible = "amlogic,g12a-clkc",
@@ -4541,6 +5081,10 @@ static const struct of_device_id clkc_match_table[] = {
                .compatible = "amlogic,g12b-clkc",
                .data = &g12b_clkc_data.eeclkc_data
        },
+       {
+               .compatible = "amlogic,sm1-clkc",
+               .data = &sm1_clkc_data.eeclkc_data
+       },
        {}
 };
 
index 559a34c..9df4068 100644 (file)
 #define HHI_GP0_PLL_CNTL5              0x054
 #define HHI_GP0_PLL_CNTL6              0x058
 #define HHI_GP0_PLL_STS                        0x05C
+#define HHI_GP1_PLL_CNTL0              0x060
+#define HHI_GP1_PLL_CNTL1              0x064
+#define HHI_GP1_PLL_CNTL2              0x068
+#define HHI_GP1_PLL_CNTL3              0x06C
+#define HHI_GP1_PLL_CNTL4              0x070
+#define HHI_GP1_PLL_CNTL5              0x074
+#define HHI_GP1_PLL_CNTL6              0x078
+#define HHI_GP1_PLL_STS                        0x07C
 #define HHI_PCIE_PLL_CNTL0             0x098
 #define HHI_PCIE_PLL_CNTL1             0x09C
 #define HHI_PCIE_PLL_CNTL2             0x0A0
 #define HHI_SYS_CPUB_CLK_CNTL1         0x200
 #define HHI_SYS_CPUB_CLK_CNTL          0x208
 #define HHI_VPU_CLKB_CNTL              0x20C
+#define HHI_SYS_CPU_CLK_CNTL2          0x210
+#define HHI_SYS_CPU_CLK_CNTL3          0x214
+#define HHI_SYS_CPU_CLK_CNTL4          0x218
+#define HHI_SYS_CPU_CLK_CNTL5          0x21c
+#define HHI_SYS_CPU_CLK_CNTL6          0x220
 #define HHI_GEN_CLK_CNTL               0x228
 #define HHI_VDIN_MEAS_CLK_CNTL         0x250
 #define HHI_MIPIDSI_PHY_CLK_CNTL       0x254
 #define CLKID_CPUB_CLK_AXI                     239
 #define CLKID_CPUB_CLK_TRACE_SEL               240
 #define CLKID_CPUB_CLK_TRACE                   241
+#define CLKID_GP1_PLL_DCO                      242
+#define CLKID_DSU_CLK_DYN0_SEL                 244
+#define CLKID_DSU_CLK_DYN0_DIV                 245
+#define CLKID_DSU_CLK_DYN0                     246
+#define CLKID_DSU_CLK_DYN1_SEL                 247
+#define CLKID_DSU_CLK_DYN1_DIV                 248
+#define CLKID_DSU_CLK_DYN1                     249
+#define CLKID_DSU_CLK_DYN                      250
+#define CLKID_DSU_CLK_FINAL                    251
 
-#define NR_CLKS                                        242
+#define NR_CLKS                                        256
 
 /* include the CLKIDs that have been made part of the DT binding */
 #include <dt-bindings/clock/g12a-clkc.h>
index b09f6de..415e690 100644 (file)
@@ -8,6 +8,9 @@ config MVEBU_CLK_CPU
 config MVEBU_CLK_COREDIV
        bool
 
+config ARMADA_AP_CP_HELPER
+       bool
+
 config ARMADA_370_CLK
        bool
        select MVEBU_CLK_COMMON
@@ -35,9 +38,14 @@ config ARMADA_XP_CLK
 
 config ARMADA_AP806_SYSCON
        bool
+       select ARMADA_AP_CP_HELPER
+
+config ARMADA_AP_CPU_CLK
+       bool
 
 config ARMADA_CP110_SYSCON
        bool
+       select ARMADA_AP_CP_HELPER
 
 config DOVE_CLK
        bool
index 93ac368..04464ce 100644 (file)
@@ -2,6 +2,7 @@
 obj-$(CONFIG_MVEBU_CLK_COMMON) += common.o
 obj-$(CONFIG_MVEBU_CLK_CPU)    += clk-cpu.o
 obj-$(CONFIG_MVEBU_CLK_COREDIV)        += clk-corediv.o
+obj-$(CONFIG_ARMADA_AP_CP_HELPER) += armada_ap_cp_helper.o
 
 obj-$(CONFIG_ARMADA_370_CLK)   += armada-370.o
 obj-$(CONFIG_ARMADA_375_CLK)   += armada-375.o
@@ -12,6 +13,7 @@ obj-$(CONFIG_ARMADA_37XX_CLK) += armada-37xx-tbg.o
 obj-$(CONFIG_ARMADA_37XX_CLK)  += armada-37xx-periph.o
 obj-$(CONFIG_ARMADA_XP_CLK)    += armada-xp.o mv98dx3236.o
 obj-$(CONFIG_ARMADA_AP806_SYSCON) += ap806-system-controller.o
+obj-$(CONFIG_ARMADA_AP_CPU_CLK) += ap-cpu-clk.o
 obj-$(CONFIG_ARMADA_CP110_SYSCON) += cp110-system-controller.o
 obj-$(CONFIG_DOVE_CLK)         += dove.o dove-divider.o
 obj-$(CONFIG_KIRKWOOD_CLK)     += kirkwood.o
diff --git a/drivers/clk/mvebu/ap-cpu-clk.c b/drivers/clk/mvebu/ap-cpu-clk.c
new file mode 100644 (file)
index 0000000..af5e5ac
--- /dev/null
@@ -0,0 +1,356 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Marvell Armada AP CPU Clock Controller
+ *
+ * Copyright (C) 2018 Marvell
+ *
+ * Omri Itach <omrii@marvell.com>
+ * Gregory Clement <gregory.clement@bootlin.com>
+ */
+
+#define pr_fmt(fmt) "ap-cpu-clk: " fmt
+
+#include <linux/clk-provider.h>
+#include <linux/clk.h>
+#include <linux/mfd/syscon.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/of_platform.h>
+#include <linux/platform_device.h>
+#include <linux/regmap.h>
+#include "armada_ap_cp_helper.h"
+
+#define AP806_CPU_CLUSTER0             0
+#define AP806_CPU_CLUSTER1             1
+#define AP806_CPUS_PER_CLUSTER         2
+#define APN806_CPU1_MASK               0x1
+
+#define APN806_CLUSTER_NUM_OFFSET      8
+#define APN806_CLUSTER_NUM_MASK                BIT(APN806_CLUSTER_NUM_OFFSET)
+
+#define APN806_MAX_DIVIDER             32
+
+/**
+ * struct cpu_dfs_regs: CPU DFS register mapping
+ * @divider_reg: full integer ratio from PLL frequency to CPU clock frequency
+ * @force_reg: request to force new ratio regardless of relation to other clocks
+ * @ratio_reg: central request to switch ratios
+ */
+struct cpu_dfs_regs {
+       unsigned int divider_reg;
+       unsigned int force_reg;
+       unsigned int ratio_reg;
+       unsigned int ratio_state_reg;
+       unsigned int divider_mask;
+       unsigned int cluster_offset;
+       unsigned int force_mask;
+       int divider_offset;
+       int divider_ratio;
+       int ratio_offset;
+       int ratio_state_offset;
+       int ratio_state_cluster_offset;
+};
+
+/* AP806 CPU DFS register mapping*/
+#define AP806_CA72MP2_0_PLL_CR_0_REG_OFFSET            0x278
+#define AP806_CA72MP2_0_PLL_CR_1_REG_OFFSET            0x280
+#define AP806_CA72MP2_0_PLL_CR_2_REG_OFFSET            0x284
+#define AP806_CA72MP2_0_PLL_SR_REG_OFFSET              0xC94
+
+#define AP806_CA72MP2_0_PLL_CR_CLUSTER_OFFSET          0x14
+#define AP806_PLL_CR_0_CPU_CLK_DIV_RATIO_OFFSET                0
+#define AP806_PLL_CR_CPU_CLK_DIV_RATIO                 0
+#define AP806_PLL_CR_0_CPU_CLK_DIV_RATIO_MASK \
+                       (0x3f << AP806_PLL_CR_0_CPU_CLK_DIV_RATIO_OFFSET)
+#define AP806_PLL_CR_0_CPU_CLK_RELOAD_FORCE_OFFSET     24
+#define AP806_PLL_CR_0_CPU_CLK_RELOAD_FORCE_MASK \
+                       (0x1 << AP806_PLL_CR_0_CPU_CLK_RELOAD_FORCE_OFFSET)
+#define AP806_PLL_CR_0_CPU_CLK_RELOAD_RATIO_OFFSET     16
+#define AP806_CA72MP2_0_PLL_RATIO_STABLE_OFFSET        0
+#define AP806_CA72MP2_0_PLL_RATIO_STATE                        11
+
+#define STATUS_POLL_PERIOD_US          1
+#define STATUS_POLL_TIMEOUT_US         1000000
+
+#define to_ap_cpu_clk(_hw) container_of(_hw, struct ap_cpu_clk, hw)
+
+static const struct cpu_dfs_regs ap806_dfs_regs = {
+       .divider_reg = AP806_CA72MP2_0_PLL_CR_0_REG_OFFSET,
+       .force_reg = AP806_CA72MP2_0_PLL_CR_1_REG_OFFSET,
+       .ratio_reg = AP806_CA72MP2_0_PLL_CR_2_REG_OFFSET,
+       .ratio_state_reg = AP806_CA72MP2_0_PLL_SR_REG_OFFSET,
+       .divider_mask = AP806_PLL_CR_0_CPU_CLK_DIV_RATIO_MASK,
+       .cluster_offset = AP806_CA72MP2_0_PLL_CR_CLUSTER_OFFSET,
+       .force_mask = AP806_PLL_CR_0_CPU_CLK_RELOAD_FORCE_MASK,
+       .divider_offset = AP806_PLL_CR_0_CPU_CLK_DIV_RATIO_OFFSET,
+       .divider_ratio = AP806_PLL_CR_CPU_CLK_DIV_RATIO,
+       .ratio_offset = AP806_PLL_CR_0_CPU_CLK_RELOAD_RATIO_OFFSET,
+       .ratio_state_offset = AP806_CA72MP2_0_PLL_RATIO_STABLE_OFFSET,
+       .ratio_state_cluster_offset = AP806_CA72MP2_0_PLL_RATIO_STABLE_OFFSET,
+};
+
+/* AP807 CPU DFS register mapping */
+#define AP807_DEVICE_GENERAL_CONTROL_10_REG_OFFSET             0x278
+#define AP807_DEVICE_GENERAL_CONTROL_11_REG_OFFSET             0x27c
+#define AP807_DEVICE_GENERAL_STATUS_6_REG_OFFSET               0xc98
+#define AP807_CA72MP2_0_PLL_CR_CLUSTER_OFFSET                  0x8
+#define AP807_PLL_CR_0_CPU_CLK_DIV_RATIO_OFFSET                        18
+#define AP807_PLL_CR_0_CPU_CLK_DIV_RATIO_MASK \
+               (0x3f << AP807_PLL_CR_0_CPU_CLK_DIV_RATIO_OFFSET)
+#define AP807_PLL_CR_1_CPU_CLK_DIV_RATIO_OFFSET                        12
+#define AP807_PLL_CR_1_CPU_CLK_DIV_RATIO_MASK \
+               (0x3f << AP807_PLL_CR_1_CPU_CLK_DIV_RATIO_OFFSET)
+#define AP807_PLL_CR_CPU_CLK_DIV_RATIO                         3
+#define AP807_PLL_CR_0_CPU_CLK_RELOAD_FORCE_OFFSET             0
+#define AP807_PLL_CR_0_CPU_CLK_RELOAD_FORCE_MASK \
+               (0x3 << AP807_PLL_CR_0_CPU_CLK_RELOAD_FORCE_OFFSET)
+#define AP807_PLL_CR_0_CPU_CLK_RELOAD_RATIO_OFFSET             6
+#define        AP807_CA72MP2_0_PLL_CLKDIV_RATIO_STABLE_OFFSET          20
+#define AP807_CA72MP2_0_PLL_CLKDIV_RATIO_STABLE_CLUSTER_OFFSET 3
+
+static const struct cpu_dfs_regs ap807_dfs_regs = {
+       .divider_reg = AP807_DEVICE_GENERAL_CONTROL_10_REG_OFFSET,
+       .force_reg = AP807_DEVICE_GENERAL_CONTROL_11_REG_OFFSET,
+       .ratio_reg = AP807_DEVICE_GENERAL_CONTROL_11_REG_OFFSET,
+       .ratio_state_reg = AP807_DEVICE_GENERAL_STATUS_6_REG_OFFSET,
+       .divider_mask = AP807_PLL_CR_0_CPU_CLK_DIV_RATIO_MASK,
+       .cluster_offset = AP807_CA72MP2_0_PLL_CR_CLUSTER_OFFSET,
+       .force_mask = AP807_PLL_CR_0_CPU_CLK_RELOAD_FORCE_MASK,
+       .divider_offset = AP807_PLL_CR_0_CPU_CLK_DIV_RATIO_OFFSET,
+       .divider_ratio = AP807_PLL_CR_CPU_CLK_DIV_RATIO,
+       .ratio_offset = AP807_PLL_CR_0_CPU_CLK_RELOAD_RATIO_OFFSET,
+       .ratio_state_offset = AP807_CA72MP2_0_PLL_CLKDIV_RATIO_STABLE_OFFSET,
+       .ratio_state_cluster_offset =
+               AP807_CA72MP2_0_PLL_CLKDIV_RATIO_STABLE_CLUSTER_OFFSET
+};
+
+/*
+ * struct ap806_clk: CPU cluster clock controller instance
+ * @cluster: Cluster clock controller index
+ * @clk_name: Cluster clock controller name
+ * @dev : Cluster clock device
+ * @hw: HW specific structure of Cluster clock controller
+ * @pll_cr_base: CA72MP2 Register base (Device Sample at Reset register)
+ */
+struct ap_cpu_clk {
+       unsigned int cluster;
+       const char *clk_name;
+       struct device *dev;
+       struct clk_hw hw;
+       struct regmap *pll_cr_base;
+       const struct cpu_dfs_regs *pll_regs;
+};
+
+static unsigned long ap_cpu_clk_recalc_rate(struct clk_hw *hw,
+                                           unsigned long parent_rate)
+{
+       struct ap_cpu_clk *clk = to_ap_cpu_clk(hw);
+       unsigned int cpu_clkdiv_reg;
+       int cpu_clkdiv_ratio;
+
+       cpu_clkdiv_reg = clk->pll_regs->divider_reg +
+               (clk->cluster * clk->pll_regs->cluster_offset);
+       regmap_read(clk->pll_cr_base, cpu_clkdiv_reg, &cpu_clkdiv_ratio);
+       cpu_clkdiv_ratio &= clk->pll_regs->divider_mask;
+       cpu_clkdiv_ratio >>= clk->pll_regs->divider_offset;
+
+       return parent_rate / cpu_clkdiv_ratio;
+}
+
+static int ap_cpu_clk_set_rate(struct clk_hw *hw, unsigned long rate,
+                              unsigned long parent_rate)
+{
+       struct ap_cpu_clk *clk = to_ap_cpu_clk(hw);
+       int ret, reg, divider = parent_rate / rate;
+       unsigned int cpu_clkdiv_reg, cpu_force_reg, cpu_ratio_reg, stable_bit;
+
+       cpu_clkdiv_reg = clk->pll_regs->divider_reg +
+               (clk->cluster * clk->pll_regs->cluster_offset);
+       cpu_force_reg = clk->pll_regs->force_reg +
+               (clk->cluster * clk->pll_regs->cluster_offset);
+       cpu_ratio_reg = clk->pll_regs->ratio_reg +
+               (clk->cluster * clk->pll_regs->cluster_offset);
+
+       regmap_read(clk->pll_cr_base, cpu_clkdiv_reg, &reg);
+       reg &= ~(clk->pll_regs->divider_mask);
+       reg |= (divider << clk->pll_regs->divider_offset);
+
+       /*
+        * AP807 CPU divider has two channels with ratio 1:3 and divider_ratio
+        * is 1. Otherwise, in the case of the AP806, divider_ratio is 0.
+        */
+       if (clk->pll_regs->divider_ratio) {
+               reg &= ~(AP807_PLL_CR_1_CPU_CLK_DIV_RATIO_MASK);
+               reg |= ((divider * clk->pll_regs->divider_ratio) <<
+                               AP807_PLL_CR_1_CPU_CLK_DIV_RATIO_OFFSET);
+       }
+       regmap_write(clk->pll_cr_base, cpu_clkdiv_reg, reg);
+
+
+       regmap_update_bits(clk->pll_cr_base, cpu_force_reg,
+                          clk->pll_regs->force_mask,
+                          clk->pll_regs->force_mask);
+
+       regmap_update_bits(clk->pll_cr_base, cpu_ratio_reg,
+                          BIT(clk->pll_regs->ratio_offset),
+                          BIT(clk->pll_regs->ratio_offset));
+
+       stable_bit = BIT(clk->pll_regs->ratio_state_offset +
+                        clk->cluster *
+                        clk->pll_regs->ratio_state_cluster_offset),
+       ret = regmap_read_poll_timeout(clk->pll_cr_base,
+                                      clk->pll_regs->ratio_state_reg, reg,
+                                      reg & stable_bit, STATUS_POLL_PERIOD_US,
+                                      STATUS_POLL_TIMEOUT_US);
+       if (ret)
+               return ret;
+
+       regmap_update_bits(clk->pll_cr_base, cpu_ratio_reg,
+                          BIT(clk->pll_regs->ratio_offset), 0);
+
+       return 0;
+}
+
+static long ap_cpu_clk_round_rate(struct clk_hw *hw, unsigned long rate,
+                                 unsigned long *parent_rate)
+{
+       int divider = *parent_rate / rate;
+
+       divider = min(divider, APN806_MAX_DIVIDER);
+
+       return *parent_rate / divider;
+}
+
+static const struct clk_ops ap_cpu_clk_ops = {
+       .recalc_rate    = ap_cpu_clk_recalc_rate,
+       .round_rate     = ap_cpu_clk_round_rate,
+       .set_rate       = ap_cpu_clk_set_rate,
+};
+
+static int ap_cpu_clock_probe(struct platform_device *pdev)
+{
+       int ret, nclusters = 0, cluster_index = 0;
+       struct device *dev = &pdev->dev;
+       struct device_node *dn, *np = dev->of_node;
+       struct clk_hw_onecell_data *ap_cpu_data;
+       struct ap_cpu_clk *ap_cpu_clk;
+       struct regmap *regmap;
+
+       regmap = syscon_node_to_regmap(np->parent);
+       if (IS_ERR(regmap)) {
+               pr_err("cannot get pll_cr_base regmap\n");
+               return PTR_ERR(regmap);
+       }
+
+       /*
+        * AP806 has 4 cpus and DFS for AP806 is controlled per
+        * cluster (2 CPUs per cluster), cpu0 and cpu1 are fixed to
+        * cluster0 while cpu2 and cpu3 are fixed to cluster1 whether
+        * they are enabled or not.  Since cpu0 is the boot cpu, then
+        * cluster0 must exist.  If cpu2 or cpu3 is enabled, cluster1
+        * will exist and the cluster number is 2; otherwise the
+        * cluster number is 1.
+        */
+       nclusters = 1;
+       for_each_of_cpu_node(dn) {
+               int cpu, err;
+
+               err = of_property_read_u32(dn, "reg", &cpu);
+               if (WARN_ON(err))
+                       return err;
+
+               /* If cpu2 or cpu3 is enabled */
+               if (cpu & APN806_CLUSTER_NUM_MASK) {
+                       nclusters = 2;
+                       break;
+               }
+       }
+       /*
+        * DFS for AP806 is controlled per cluster (2 CPUs per cluster),
+        * so allocate structs per cluster
+        */
+       ap_cpu_clk = devm_kcalloc(dev, nclusters, sizeof(*ap_cpu_clk),
+                                 GFP_KERNEL);
+       if (!ap_cpu_clk)
+               return -ENOMEM;
+
+       ap_cpu_data = devm_kzalloc(dev, sizeof(*ap_cpu_data) +
+                               sizeof(struct clk_hw *) * nclusters,
+                               GFP_KERNEL);
+       if (!ap_cpu_data)
+               return -ENOMEM;
+
+       for_each_of_cpu_node(dn) {
+               char *clk_name = "cpu-cluster-0";
+               struct clk_init_data init;
+               const char *parent_name;
+               struct clk *parent;
+               int cpu, err;
+
+               err = of_property_read_u32(dn, "reg", &cpu);
+               if (WARN_ON(err))
+                       return err;
+
+               cluster_index = cpu & APN806_CLUSTER_NUM_MASK;
+               cluster_index >>= APN806_CLUSTER_NUM_OFFSET;
+
+               /* Initialize once for one cluster */
+               if (ap_cpu_data->hws[cluster_index])
+                       continue;
+
+               parent = of_clk_get(np, cluster_index);
+               if (IS_ERR(parent)) {
+                       dev_err(dev, "Could not get the clock parent\n");
+                       return -EINVAL;
+               }
+               parent_name =  __clk_get_name(parent);
+               clk_name[12] += cluster_index;
+               ap_cpu_clk[cluster_index].clk_name =
+                       ap_cp_unique_name(dev, np->parent, clk_name);
+               ap_cpu_clk[cluster_index].cluster = cluster_index;
+               ap_cpu_clk[cluster_index].pll_cr_base = regmap;
+               ap_cpu_clk[cluster_index].hw.init = &init;
+               ap_cpu_clk[cluster_index].dev = dev;
+               ap_cpu_clk[cluster_index].pll_regs = of_device_get_match_data(&pdev->dev);
+
+               init.name = ap_cpu_clk[cluster_index].clk_name;
+               init.ops = &ap_cpu_clk_ops;
+               init.num_parents = 1;
+               init.parent_names = &parent_name;
+
+               ret = devm_clk_hw_register(dev, &ap_cpu_clk[cluster_index].hw);
+               if (ret)
+                       return ret;
+               ap_cpu_data->hws[cluster_index] = &ap_cpu_clk[cluster_index].hw;
+       }
+
+       ap_cpu_data->num = cluster_index + 1;
+
+       ret = of_clk_add_hw_provider(np, of_clk_hw_onecell_get, ap_cpu_data);
+       if (ret)
+               dev_err(dev, "failed to register OF clock provider\n");
+
+       return ret;
+}
+
+static const struct of_device_id ap_cpu_clock_of_match[] = {
+       {
+               .compatible = "marvell,ap806-cpu-clock",
+               .data = &ap806_dfs_regs,
+       },
+       {
+               .compatible = "marvell,ap807-cpu-clock",
+               .data = &ap807_dfs_regs,
+       },
+       { }
+};
+
+static struct platform_driver ap_cpu_clock_driver = {
+       .probe = ap_cpu_clock_probe,
+       .driver         = {
+               .name   = "marvell-ap-cpu-clock",
+               .of_match_table = ap_cpu_clock_of_match,
+               .suppress_bind_attrs = true,
+       },
+};
+builtin_platform_driver(ap_cpu_clock_driver);
index ea54a87..948bd1e 100644 (file)
 
 #define pr_fmt(fmt) "ap806-system-controller: " fmt
 
+#include "armada_ap_cp_helper.h"
 #include <linux/clk-provider.h>
 #include <linux/mfd/syscon.h>
 #include <linux/init.h>
 #include <linux/of.h>
-#include <linux/of_address.h>
 #include <linux/platform_device.h>
 #include <linux/regmap.h>
 
 #define AP806_SAR_REG                  0x400
 #define AP806_SAR_CLKFREQ_MODE_MASK    0x1f
 
-#define AP806_CLK_NUM                  5
+#define AP806_CLK_NUM                  6
 
 static struct clk *ap806_clks[AP806_CLK_NUM];
 
@@ -30,86 +30,149 @@ static struct clk_onecell_data ap806_clk_data = {
        .clk_num = AP806_CLK_NUM,
 };
 
-static char *ap806_unique_name(struct device *dev, struct device_node *np,
-                              char *name)
+static int ap806_get_sar_clocks(unsigned int freq_mode,
+                               unsigned int *cpuclk_freq,
+                               unsigned int *dclk_freq)
 {
-       const __be32 *reg;
-       u64 addr;
-
-       reg = of_get_property(np, "reg", NULL);
-       addr = of_translate_address(np, reg);
-       return devm_kasprintf(dev, GFP_KERNEL, "%llx-%s",
-                       (unsigned long long)addr, name);
-}
-
-static int ap806_syscon_common_probe(struct platform_device *pdev,
-                                    struct device_node *syscon_node)
-{
-       unsigned int freq_mode, cpuclk_freq;
-       const char *name, *fixedclk_name;
-       struct device *dev = &pdev->dev;
-       struct device_node *np = dev->of_node;
-       struct regmap *regmap;
-       u32 reg;
-       int ret;
-
-       regmap = syscon_node_to_regmap(syscon_node);
-       if (IS_ERR(regmap)) {
-               dev_err(dev, "cannot get regmap\n");
-               return PTR_ERR(regmap);
-       }
-
-       ret = regmap_read(regmap, AP806_SAR_REG, &reg);
-       if (ret) {
-               dev_err(dev, "cannot read from regmap\n");
-               return ret;
-       }
-
-       freq_mode = reg & AP806_SAR_CLKFREQ_MODE_MASK;
        switch (freq_mode) {
        case 0x0:
+               *cpuclk_freq = 2000;
+               *dclk_freq = 600;
+               break;
        case 0x1:
-               cpuclk_freq = 2000;
+               *cpuclk_freq = 2000;
+               *dclk_freq = 525;
                break;
        case 0x6:
+               *cpuclk_freq = 1800;
+               *dclk_freq = 600;
+               break;
        case 0x7:
-               cpuclk_freq = 1800;
+               *cpuclk_freq = 1800;
+               *dclk_freq = 525;
                break;
        case 0x4:
+               *cpuclk_freq = 1600;
+               *dclk_freq = 400;
+               break;
        case 0xB:
+               *cpuclk_freq = 1600;
+               *dclk_freq = 450;
+               break;
        case 0xD:
-               cpuclk_freq = 1600;
+               *cpuclk_freq = 1600;
+               *dclk_freq = 525;
                break;
        case 0x1a:
-               cpuclk_freq = 1400;
+               *cpuclk_freq = 1400;
+               *dclk_freq = 400;
                break;
        case 0x14:
+               *cpuclk_freq = 1300;
+               *dclk_freq = 400;
+               break;
        case 0x17:
-               cpuclk_freq = 1300;
+               *cpuclk_freq = 1300;
+               *dclk_freq = 325;
                break;
        case 0x19:
-               cpuclk_freq = 1200;
+               *cpuclk_freq = 1200;
+               *dclk_freq = 400;
                break;
        case 0x13:
+               *cpuclk_freq = 1000;
+               *dclk_freq = 325;
+               break;
        case 0x1d:
-               cpuclk_freq = 1000;
+               *cpuclk_freq = 1000;
+               *dclk_freq = 400;
                break;
        case 0x1c:
-               cpuclk_freq = 800;
+               *cpuclk_freq = 800;
+               *dclk_freq = 400;
                break;
        case 0x1b:
-               cpuclk_freq = 600;
+               *cpuclk_freq = 600;
+               *dclk_freq = 400;
                break;
        default:
-               dev_err(dev, "invalid SAR value\n");
                return -EINVAL;
        }
 
+       return 0;
+}
+
+static int ap807_get_sar_clocks(unsigned int freq_mode,
+                               unsigned int *cpuclk_freq,
+                               unsigned int *dclk_freq)
+{
+       switch (freq_mode) {
+       case 0x0:
+               *cpuclk_freq = 2000;
+               *dclk_freq = 1200;
+               break;
+       case 0x6:
+               *cpuclk_freq = 2200;
+               *dclk_freq = 1200;
+               break;
+       case 0xD:
+               *cpuclk_freq = 1600;
+               *dclk_freq = 1200;
+               break;
+       default:
+               return -EINVAL;
+       }
+
+       return 0;
+}
+
+static int ap806_syscon_common_probe(struct platform_device *pdev,
+                                    struct device_node *syscon_node)
+{
+       unsigned int freq_mode, cpuclk_freq, dclk_freq;
+       const char *name, *fixedclk_name;
+       struct device *dev = &pdev->dev;
+       struct device_node *np = dev->of_node;
+       struct regmap *regmap;
+       u32 reg;
+       int ret;
+
+       regmap = syscon_node_to_regmap(syscon_node);
+       if (IS_ERR(regmap)) {
+               dev_err(dev, "cannot get regmap\n");
+               return PTR_ERR(regmap);
+       }
+
+       ret = regmap_read(regmap, AP806_SAR_REG, &reg);
+       if (ret) {
+               dev_err(dev, "cannot read from regmap\n");
+               return ret;
+       }
+
+       freq_mode = reg & AP806_SAR_CLKFREQ_MODE_MASK;
+
+       if (of_device_is_compatible(pdev->dev.of_node,
+                                   "marvell,ap806-clock")) {
+               ret = ap806_get_sar_clocks(freq_mode, &cpuclk_freq, &dclk_freq);
+       } else if (of_device_is_compatible(pdev->dev.of_node,
+                                          "marvell,ap807-clock")) {
+               ret = ap807_get_sar_clocks(freq_mode, &cpuclk_freq, &dclk_freq);
+       } else {
+               dev_err(dev, "compatible not supported\n");
+               return -EINVAL;
+       }
+
+       if (ret) {
+               dev_err(dev, "invalid Sample at Reset value\n");
+               return ret;
+       }
+
        /* Convert to hertz */
        cpuclk_freq *= 1000 * 1000;
+       dclk_freq *= 1000 * 1000;
 
        /* CPU clocks depend on the Sample At Reset configuration */
-       name = ap806_unique_name(dev, syscon_node, "cpu-cluster-0");
+       name = ap_cp_unique_name(dev, syscon_node, "pll-cluster-0");
        ap806_clks[0] = clk_register_fixed_rate(dev, name, NULL,
                                                0, cpuclk_freq);
        if (IS_ERR(ap806_clks[0])) {
@@ -117,7 +180,7 @@ static int ap806_syscon_common_probe(struct platform_device *pdev,
                goto fail0;
        }
 
-       name = ap806_unique_name(dev, syscon_node, "cpu-cluster-1");
+       name = ap_cp_unique_name(dev, syscon_node, "pll-cluster-1");
        ap806_clks[1] = clk_register_fixed_rate(dev, name, NULL, 0,
                                                cpuclk_freq);
        if (IS_ERR(ap806_clks[1])) {
@@ -126,7 +189,7 @@ static int ap806_syscon_common_probe(struct platform_device *pdev,
        }
 
        /* Fixed clock is always 1200 Mhz */
-       fixedclk_name = ap806_unique_name(dev, syscon_node, "fixed");
+       fixedclk_name = ap_cp_unique_name(dev, syscon_node, "fixed");
        ap806_clks[2] = clk_register_fixed_rate(dev, fixedclk_name, NULL,
                                                0, 1200 * 1000 * 1000);
        if (IS_ERR(ap806_clks[2])) {
@@ -135,7 +198,7 @@ static int ap806_syscon_common_probe(struct platform_device *pdev,
        }
 
        /* MSS Clock is fixed clock divided by 6 */
-       name = ap806_unique_name(dev, syscon_node, "mss");
+       name = ap_cp_unique_name(dev, syscon_node, "mss");
        ap806_clks[3] = clk_register_fixed_factor(NULL, name, fixedclk_name,
                                                  0, 1, 6);
        if (IS_ERR(ap806_clks[3])) {
@@ -144,7 +207,7 @@ static int ap806_syscon_common_probe(struct platform_device *pdev,
        }
 
        /* SDIO(/eMMC) Clock is fixed clock divided by 3 */
-       name = ap806_unique_name(dev, syscon_node, "sdio");
+       name = ap_cp_unique_name(dev, syscon_node, "sdio");
        ap806_clks[4] = clk_register_fixed_factor(NULL, name,
                                                  fixedclk_name,
                                                  0, 1, 3);
@@ -153,6 +216,14 @@ static int ap806_syscon_common_probe(struct platform_device *pdev,
                goto fail4;
        }
 
+       /* AP-DCLK(HCLK) Clock is DDR clock divided by 2 */
+       name = ap_cp_unique_name(dev, syscon_node, "ap-dclk");
+       ap806_clks[5] = clk_register_fixed_rate(dev, name, NULL, 0, dclk_freq);
+       if (IS_ERR(ap806_clks[5])) {
+               ret = PTR_ERR(ap806_clks[5]);
+               goto fail5;
+       }
+
        ret = of_clk_add_provider(np, of_clk_src_onecell_get, &ap806_clk_data);
        if (ret)
                goto fail_clk_add;
@@ -160,6 +231,8 @@ static int ap806_syscon_common_probe(struct platform_device *pdev,
        return 0;
 
 fail_clk_add:
+       clk_unregister_fixed_factor(ap806_clks[5]);
+fail5:
        clk_unregister_fixed_factor(ap806_clks[4]);
 fail4:
        clk_unregister_fixed_factor(ap806_clks[3]);
@@ -206,6 +279,7 @@ builtin_platform_driver(ap806_syscon_legacy_driver);
 
 static const struct of_device_id ap806_clock_of_match[] = {
        { .compatible = "marvell,ap806-clock", },
+       { .compatible = "marvell,ap807-clock", },
        { }
 };
 
diff --git a/drivers/clk/mvebu/armada_ap_cp_helper.c b/drivers/clk/mvebu/armada_ap_cp_helper.c
new file mode 100644 (file)
index 0000000..6a930f6
--- /dev/null
@@ -0,0 +1,30 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Marvell Armada AP and CP110 helper
+ *
+ * Copyright (C) 2018 Marvell
+ *
+ * Gregory Clement <gregory.clement@bootlin.com>
+ *
+ */
+
+#include "armada_ap_cp_helper.h"
+#include <linux/device.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+
+char *ap_cp_unique_name(struct device *dev, struct device_node *np,
+                       const char *name)
+{
+       const __be32 *reg;
+       u64 addr;
+
+       /* Do not create a name if there is no clock */
+       if (!name)
+               return NULL;
+
+       reg = of_get_property(np, "reg", NULL);
+       addr = of_translate_address(np, reg);
+       return devm_kasprintf(dev, GFP_KERNEL, "%llx-%s",
+                             (unsigned long long)addr, name);
+}
diff --git a/drivers/clk/mvebu/armada_ap_cp_helper.h b/drivers/clk/mvebu/armada_ap_cp_helper.h
new file mode 100644 (file)
index 0000000..810af1e
--- /dev/null
@@ -0,0 +1,11 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+
+#ifndef __ARMADA_AP_CP_HELPER_H
+#define __ARMADA_AP_CP_HELPER_H
+
+struct device;
+struct device_node;
+
+char *ap_cp_unique_name(struct device *dev, struct device_node *np,
+                       const char *name);
+#endif
index b6de283..8084632 100644 (file)
 
 #define pr_fmt(fmt) "cp110-system-controller: " fmt
 
+#include "armada_ap_cp_helper.h"
 #include <linux/clk-provider.h>
 #include <linux/mfd/syscon.h>
 #include <linux/init.h>
 #include <linux/of.h>
-#include <linux/of_address.h>
 #include <linux/platform_device.h>
 #include <linux/regmap.h>
 #include <linux/slab.h>
@@ -212,22 +212,6 @@ static struct clk_hw *cp110_of_clk_get(struct of_phandle_args *clkspec,
        return ERR_PTR(-EINVAL);
 }
 
-static char *cp110_unique_name(struct device *dev, struct device_node *np,
-                              const char *name)
-{
-       const __be32 *reg;
-       u64 addr;
-
-       /* Do not create a name if there is no clock */
-       if (!name)
-               return NULL;
-
-       reg = of_get_property(np, "reg", NULL);
-       addr = of_translate_address(np, reg);
-       return devm_kasprintf(dev, GFP_KERNEL, "%llx-%s",
-                             (unsigned long long)addr, name);
-}
-
 static int cp110_syscon_common_probe(struct platform_device *pdev,
                                     struct device_node *syscon_node)
 {
@@ -261,7 +245,7 @@ static int cp110_syscon_common_probe(struct platform_device *pdev,
        cp110_clk_data->num = CP110_CLK_NUM;
 
        /* Register the PLL0 which is the root of the hw tree */
-       pll0_name = cp110_unique_name(dev, syscon_node, "pll0");
+       pll0_name = ap_cp_unique_name(dev, syscon_node, "pll0");
        hw = clk_hw_register_fixed_rate(NULL, pll0_name, NULL, 0,
                                        1000 * 1000 * 1000);
        if (IS_ERR(hw)) {
@@ -272,7 +256,7 @@ static int cp110_syscon_common_probe(struct platform_device *pdev,
        cp110_clks[CP110_CORE_PLL0] = hw;
 
        /* PPv2 is PLL0/3 */
-       ppv2_name = cp110_unique_name(dev, syscon_node, "ppv2-core");
+       ppv2_name = ap_cp_unique_name(dev, syscon_node, "ppv2-core");
        hw = clk_hw_register_fixed_factor(NULL, ppv2_name, pll0_name, 0, 1, 3);
        if (IS_ERR(hw)) {
                ret = PTR_ERR(hw);
@@ -282,7 +266,7 @@ static int cp110_syscon_common_probe(struct platform_device *pdev,
        cp110_clks[CP110_CORE_PPV2] = hw;
 
        /* X2CORE clock is PLL0/2 */
-       x2core_name = cp110_unique_name(dev, syscon_node, "x2core");
+       x2core_name = ap_cp_unique_name(dev, syscon_node, "x2core");
        hw = clk_hw_register_fixed_factor(NULL, x2core_name, pll0_name,
                                          0, 1, 2);
        if (IS_ERR(hw)) {
@@ -293,7 +277,7 @@ static int cp110_syscon_common_probe(struct platform_device *pdev,
        cp110_clks[CP110_CORE_X2CORE] = hw;
 
        /* Core clock is X2CORE/2 */
-       core_name = cp110_unique_name(dev, syscon_node, "core");
+       core_name = ap_cp_unique_name(dev, syscon_node, "core");
        hw = clk_hw_register_fixed_factor(NULL, core_name, x2core_name,
                                          0, 1, 2);
        if (IS_ERR(hw)) {
@@ -303,7 +287,7 @@ static int cp110_syscon_common_probe(struct platform_device *pdev,
 
        cp110_clks[CP110_CORE_CORE] = hw;
        /* NAND can be either PLL0/2.5 or core clock */
-       nand_name = cp110_unique_name(dev, syscon_node, "nand-core");
+       nand_name = ap_cp_unique_name(dev, syscon_node, "nand-core");
        if (nand_clk_ctrl & NF_CLOCK_SEL_400_MASK)
                hw = clk_hw_register_fixed_factor(NULL, nand_name,
                                                   pll0_name, 0, 2, 5);
@@ -318,7 +302,7 @@ static int cp110_syscon_common_probe(struct platform_device *pdev,
        cp110_clks[CP110_CORE_NAND] = hw;
 
        /* SDIO clock is PLL0/2.5 */
-       sdio_name = cp110_unique_name(dev, syscon_node, "sdio-core");
+       sdio_name = ap_cp_unique_name(dev, syscon_node, "sdio-core");
        hw = clk_hw_register_fixed_factor(NULL, sdio_name,
                                          pll0_name, 0, 2, 5);
        if (IS_ERR(hw)) {
@@ -330,7 +314,7 @@ static int cp110_syscon_common_probe(struct platform_device *pdev,
 
        /* create the unique name for all the gate clocks */
        for (i = 0; i < ARRAY_SIZE(gate_base_names); i++)
-               gate_name[i] =  cp110_unique_name(dev, syscon_node,
+               gate_name[i] =  ap_cp_unique_name(dev, syscon_node,
                                                  gate_base_names[i]);
 
        for (i = 0; i < ARRAY_SIZE(gate_base_names); i++) {
index e1ff83c..32dbb4f 100644 (file)
@@ -21,7 +21,6 @@ if COMMON_CLK_QCOM
 
 config QCOM_A53PLL
        tristate "MSM8916 A53 PLL"
-       default ARCH_QCOM
        help
          Support for the A53 PLL on MSM8916 devices. It provides
          the CPU with frequencies above 1GHz.
@@ -31,7 +30,6 @@ config QCOM_A53PLL
 config QCOM_CLK_APCS_MSM8916
        tristate "MSM8916 APCS Clock Controller"
        depends on QCOM_APCS_IPC || COMPILE_TEST
-       default ARCH_QCOM
        help
          Support for the APCS Clock Controller on msm8916 devices. The
          APCS is managing the mux and divider which feeds the CPUs.
@@ -292,6 +290,13 @@ config SDM_LPASSCC_845
          Say Y if you want to use the LPASS branch clocks of the LPASS clock
          controller to reset the LPASS subsystem.
 
+config SM_GCC_8150
+       tristate "SM8150 Global Clock Controller"
+       help
+         Support for the global clock controller on SM8150 devices.
+         Say Y if you want to use peripheral devices such as UART,
+         SPI, I2C, USB, SD/UFS, PCIe etc.
+
 config SPMI_PMIC_CLKDIV
        tristate "SPMI PMIC clkdiv Support"
        depends on SPMI || COMPILE_TEST
index f0768fb..4a813b4 100644 (file)
@@ -50,6 +50,7 @@ obj-$(CONFIG_SDM_GCC_845) += gcc-sdm845.o
 obj-$(CONFIG_SDM_GPUCC_845) += gpucc-sdm845.o
 obj-$(CONFIG_SDM_LPASSCC_845) += lpasscc-sdm845.o
 obj-$(CONFIG_SDM_VIDEOCC_845) += videocc-sdm845.o
+obj-$(CONFIG_SM_GCC_8150) += gcc-sm8150.o
 obj-$(CONFIG_SPMI_PMIC_CLKDIV) += clk-spmi-pmic-div.o
 obj-$(CONFIG_KPSS_XCC) += kpss-xcc.o
 obj-$(CONFIG_QCOM_HFPLL) += hfpll.o
index 0ced4a5..055318f 100644 (file)
@@ -32,6 +32,7 @@
 # define PLL_LOCK_DET          BIT(31)
 
 #define PLL_L_VAL(p)           ((p)->offset + (p)->regs[PLL_OFF_L_VAL])
+#define PLL_CAL_L_VAL(p)       ((p)->offset + (p)->regs[PLL_OFF_CAL_L_VAL])
 #define PLL_ALPHA_VAL(p)       ((p)->offset + (p)->regs[PLL_OFF_ALPHA_VAL])
 #define PLL_ALPHA_VAL_U(p)     ((p)->offset + (p)->regs[PLL_OFF_ALPHA_VAL_U])
 
 # define PLL_VCO_MASK          0x3
 
 #define PLL_USER_CTL_U(p)      ((p)->offset + (p)->regs[PLL_OFF_USER_CTL_U])
+#define PLL_USER_CTL_U1(p)     ((p)->offset + (p)->regs[PLL_OFF_USER_CTL_U1])
 
 #define PLL_CONFIG_CTL(p)      ((p)->offset + (p)->regs[PLL_OFF_CONFIG_CTL])
 #define PLL_CONFIG_CTL_U(p)    ((p)->offset + (p)->regs[PLL_OFF_CONFIG_CTL_U])
+#define PLL_CONFIG_CTL_U1(p)   ((p)->offset + (p)->regs[PLL_OFF_CONFIG_CTL_U1])
 #define PLL_TEST_CTL(p)                ((p)->offset + (p)->regs[PLL_OFF_TEST_CTL])
 #define PLL_TEST_CTL_U(p)      ((p)->offset + (p)->regs[PLL_OFF_TEST_CTL_U])
 #define PLL_STATUS(p)          ((p)->offset + (p)->regs[PLL_OFF_STATUS])
 #define PLL_OPMODE(p)          ((p)->offset + (p)->regs[PLL_OFF_OPMODE])
 #define PLL_FRAC(p)            ((p)->offset + (p)->regs[PLL_OFF_FRAC])
+#define PLL_CAL_VAL(p)         ((p)->offset + (p)->regs[PLL_OFF_CAL_VAL])
 
 const u8 clk_alpha_pll_regs[][PLL_OFF_MAX_REGS] = {
        [CLK_ALPHA_PLL_TYPE_DEFAULT] =  {
@@ -96,6 +100,22 @@ const u8 clk_alpha_pll_regs[][PLL_OFF_MAX_REGS] = {
                [PLL_OFF_OPMODE] = 0x2c,
                [PLL_OFF_FRAC] = 0x38,
        },
+       [CLK_ALPHA_PLL_TYPE_TRION] = {
+               [PLL_OFF_L_VAL] = 0x04,
+               [PLL_OFF_CAL_L_VAL] = 0x08,
+               [PLL_OFF_USER_CTL] = 0x0c,
+               [PLL_OFF_USER_CTL_U] = 0x10,
+               [PLL_OFF_USER_CTL_U1] = 0x14,
+               [PLL_OFF_CONFIG_CTL] = 0x18,
+               [PLL_OFF_CONFIG_CTL_U] = 0x1c,
+               [PLL_OFF_CONFIG_CTL_U1] = 0x20,
+               [PLL_OFF_TEST_CTL] = 0x24,
+               [PLL_OFF_TEST_CTL_U] = 0x28,
+               [PLL_OFF_STATUS] = 0x30,
+               [PLL_OFF_OPMODE] = 0x38,
+               [PLL_OFF_ALPHA_VAL] = 0x40,
+               [PLL_OFF_CAL_VAL] = 0x44,
+       },
 };
 EXPORT_SYMBOL_GPL(clk_alpha_pll_regs);
 
@@ -120,6 +140,10 @@ EXPORT_SYMBOL_GPL(clk_alpha_pll_regs);
 #define FABIA_PLL_OUT_MASK     0x7
 #define FABIA_PLL_RATE_MARGIN  500
 
+#define TRION_PLL_STANDBY      0x0
+#define TRION_PLL_RUN          0x1
+#define TRION_PLL_OUT_MASK     0x7
+
 #define pll_alpha_width(p)                                     \
                ((PLL_ALPHA_VAL_U(p) - PLL_ALPHA_VAL(p) == 4) ? \
                                 ALPHA_REG_BITWIDTH : ALPHA_REG_16BIT_WIDTH)
@@ -730,6 +754,130 @@ static long alpha_pll_huayra_round_rate(struct clk_hw *hw, unsigned long rate,
        return alpha_huayra_pll_round_rate(rate, *prate, &l, &a);
 }
 
+static int trion_pll_is_enabled(struct clk_alpha_pll *pll,
+                               struct regmap *regmap)
+{
+       u32 mode_regval, opmode_regval;
+       int ret;
+
+       ret = regmap_read(regmap, PLL_MODE(pll), &mode_regval);
+       ret |= regmap_read(regmap, PLL_OPMODE(pll), &opmode_regval);
+       if (ret)
+               return 0;
+
+       return ((opmode_regval & TRION_PLL_RUN) && (mode_regval & PLL_OUTCTRL));
+}
+
+static int clk_trion_pll_is_enabled(struct clk_hw *hw)
+{
+       struct clk_alpha_pll *pll = to_clk_alpha_pll(hw);
+
+       return trion_pll_is_enabled(pll, pll->clkr.regmap);
+}
+
+static int clk_trion_pll_enable(struct clk_hw *hw)
+{
+       struct clk_alpha_pll *pll = to_clk_alpha_pll(hw);
+       struct regmap *regmap = pll->clkr.regmap;
+       u32 val;
+       int ret;
+
+       ret = regmap_read(regmap, PLL_MODE(pll), &val);
+       if (ret)
+               return ret;
+
+       /* If in FSM mode, just vote for it */
+       if (val & PLL_VOTE_FSM_ENA) {
+               ret = clk_enable_regmap(hw);
+               if (ret)
+                       return ret;
+               return wait_for_pll_enable_active(pll);
+       }
+
+       /* Set operation mode to RUN */
+       regmap_write(regmap, PLL_OPMODE(pll), TRION_PLL_RUN);
+
+       ret = wait_for_pll_enable_lock(pll);
+       if (ret)
+               return ret;
+
+       /* Enable the PLL outputs */
+       ret = regmap_update_bits(regmap, PLL_USER_CTL(pll),
+                                TRION_PLL_OUT_MASK, TRION_PLL_OUT_MASK);
+       if (ret)
+               return ret;
+
+       /* Enable the global PLL outputs */
+       return regmap_update_bits(regmap, PLL_MODE(pll),
+                                PLL_OUTCTRL, PLL_OUTCTRL);
+}
+
+static void clk_trion_pll_disable(struct clk_hw *hw)
+{
+       struct clk_alpha_pll *pll = to_clk_alpha_pll(hw);
+       struct regmap *regmap = pll->clkr.regmap;
+       u32 val;
+       int ret;
+
+       ret = regmap_read(regmap, PLL_MODE(pll), &val);
+       if (ret)
+               return;
+
+       /* If in FSM mode, just unvote it */
+       if (val & PLL_VOTE_FSM_ENA) {
+               clk_disable_regmap(hw);
+               return;
+       }
+
+       /* Disable the global PLL output */
+       ret = regmap_update_bits(regmap, PLL_MODE(pll), PLL_OUTCTRL, 0);
+       if (ret)
+               return;
+
+       /* Disable the PLL outputs */
+       ret = regmap_update_bits(regmap, PLL_USER_CTL(pll),
+                                TRION_PLL_OUT_MASK, 0);
+       if (ret)
+               return;
+
+       /* Place the PLL mode in STANDBY */
+       regmap_write(regmap, PLL_OPMODE(pll), TRION_PLL_STANDBY);
+       regmap_update_bits(regmap, PLL_MODE(pll), PLL_RESET_N, PLL_RESET_N);
+}
+
+static unsigned long
+clk_trion_pll_recalc_rate(struct clk_hw *hw, unsigned long parent_rate)
+{
+       struct clk_alpha_pll *pll = to_clk_alpha_pll(hw);
+       struct regmap *regmap = pll->clkr.regmap;
+       u32 l, frac;
+       u64 prate = parent_rate;
+
+       regmap_read(regmap, PLL_L_VAL(pll), &l);
+       regmap_read(regmap, PLL_ALPHA_VAL(pll), &frac);
+
+       return alpha_pll_calc_rate(prate, l, frac, ALPHA_REG_16BIT_WIDTH);
+}
+
+static long clk_trion_pll_round_rate(struct clk_hw *hw, unsigned long rate,
+                                    unsigned long *prate)
+{
+       struct clk_alpha_pll *pll = to_clk_alpha_pll(hw);
+       unsigned long min_freq, max_freq;
+       u32 l;
+       u64 a;
+
+       rate = alpha_pll_round_rate(rate, *prate,
+                                   &l, &a, ALPHA_REG_16BIT_WIDTH);
+       if (!pll->vco_table || alpha_pll_find_vco(pll, rate))
+               return rate;
+
+       min_freq = pll->vco_table[0].min_freq;
+       max_freq = pll->vco_table[pll->num_vco - 1].max_freq;
+
+       return clamp(rate, min_freq, max_freq);
+}
+
 const struct clk_ops clk_alpha_pll_ops = {
        .enable = clk_alpha_pll_enable,
        .disable = clk_alpha_pll_disable,
@@ -760,6 +908,15 @@ const struct clk_ops clk_alpha_pll_hwfsm_ops = {
 };
 EXPORT_SYMBOL_GPL(clk_alpha_pll_hwfsm_ops);
 
+const struct clk_ops clk_trion_fixed_pll_ops = {
+       .enable = clk_trion_pll_enable,
+       .disable = clk_trion_pll_disable,
+       .is_enabled = clk_trion_pll_is_enabled,
+       .recalc_rate = clk_trion_pll_recalc_rate,
+       .round_rate = clk_trion_pll_round_rate,
+};
+EXPORT_SYMBOL_GPL(clk_trion_fixed_pll_ops);
+
 static unsigned long
 clk_alpha_pll_postdiv_recalc_rate(struct clk_hw *hw, unsigned long parent_rate)
 {
@@ -832,7 +989,7 @@ static int clk_alpha_pll_postdiv_set_rate(struct clk_hw *hw, unsigned long rate,
        int div;
 
        /* 16 -> 0xf, 8 -> 0x7, 4 -> 0x3, 2 -> 0x1, 1 -> 0x0 */
-       div = DIV_ROUND_UP_ULL((u64)parent_rate, rate) - 1;
+       div = DIV_ROUND_UP_ULL(parent_rate, rate) - 1;
 
        return regmap_update_bits(pll->clkr.regmap, PLL_USER_CTL(pll),
                                  PLL_POST_DIV_MASK(pll) << PLL_POST_DIV_SHIFT,
@@ -1036,11 +1193,6 @@ static unsigned long clk_alpha_pll_postdiv_fabia_recalc_rate(struct clk_hw *hw,
        u32 i, div = 1, val;
        int ret;
 
-       if (!pll->post_div_table) {
-               pr_err("Missing the post_div_table for the PLL\n");
-               return -EINVAL;
-       }
-
        ret = regmap_read(pll->clkr.regmap, PLL_USER_CTL(pll), &val);
        if (ret)
                return ret;
@@ -1058,16 +1210,71 @@ static unsigned long clk_alpha_pll_postdiv_fabia_recalc_rate(struct clk_hw *hw,
        return (parent_rate / div);
 }
 
-static long clk_alpha_pll_postdiv_fabia_round_rate(struct clk_hw *hw,
-                               unsigned long rate, unsigned long *prate)
+static unsigned long
+clk_trion_pll_postdiv_recalc_rate(struct clk_hw *hw, unsigned long parent_rate)
 {
        struct clk_alpha_pll_postdiv *pll = to_clk_alpha_pll_postdiv(hw);
+       struct regmap *regmap = pll->clkr.regmap;
+       u32 i, div = 1, val;
 
-       if (!pll->post_div_table) {
-               pr_err("Missing the post_div_table for the PLL\n");
-               return -EINVAL;
+       regmap_read(regmap, PLL_USER_CTL(pll), &val);
+
+       val >>= pll->post_div_shift;
+       val &= PLL_POST_DIV_MASK(pll);
+
+       for (i = 0; i < pll->num_post_div; i++) {
+               if (pll->post_div_table[i].val == val) {
+                       div = pll->post_div_table[i].div;
+                       break;
+               }
+       }
+
+       return (parent_rate / div);
+}
+
+static long
+clk_trion_pll_postdiv_round_rate(struct clk_hw *hw, unsigned long rate,
+                                unsigned long *prate)
+{
+       struct clk_alpha_pll_postdiv *pll = to_clk_alpha_pll_postdiv(hw);
+
+       return divider_round_rate(hw, rate, prate, pll->post_div_table,
+                                 pll->width, CLK_DIVIDER_ROUND_CLOSEST);
+};
+
+static int
+clk_trion_pll_postdiv_set_rate(struct clk_hw *hw, unsigned long rate,
+                              unsigned long parent_rate)
+{
+       struct clk_alpha_pll_postdiv *pll = to_clk_alpha_pll_postdiv(hw);
+       struct regmap *regmap = pll->clkr.regmap;
+       int i, val = 0, div;
+
+       div = DIV_ROUND_UP_ULL(parent_rate, rate);
+       for (i = 0; i < pll->num_post_div; i++) {
+               if (pll->post_div_table[i].div == div) {
+                       val = pll->post_div_table[i].val;
+                       break;
+               }
        }
 
+       return regmap_update_bits(regmap, PLL_USER_CTL(pll),
+                                 PLL_POST_DIV_MASK(pll) << PLL_POST_DIV_SHIFT,
+                                 val << PLL_POST_DIV_SHIFT);
+}
+
+const struct clk_ops clk_trion_pll_postdiv_ops = {
+       .recalc_rate = clk_trion_pll_postdiv_recalc_rate,
+       .round_rate = clk_trion_pll_postdiv_round_rate,
+       .set_rate = clk_trion_pll_postdiv_set_rate,
+};
+EXPORT_SYMBOL_GPL(clk_trion_pll_postdiv_ops);
+
+static long clk_alpha_pll_postdiv_fabia_round_rate(struct clk_hw *hw,
+                               unsigned long rate, unsigned long *prate)
+{
+       struct clk_alpha_pll_postdiv *pll = to_clk_alpha_pll_postdiv(hw);
+
        return divider_round_rate(hw, rate, prate, pll->post_div_table,
                                pll->width, CLK_DIVIDER_ROUND_CLOSEST);
 }
@@ -1089,12 +1296,7 @@ static int clk_alpha_pll_postdiv_fabia_set_rate(struct clk_hw *hw,
        if (val & PLL_VOTE_FSM_ENA)
                return 0;
 
-       if (!pll->post_div_table) {
-               pr_err("Missing the post_div_table for the PLL\n");
-               return -EINVAL;
-       }
-
-       div = DIV_ROUND_UP_ULL((u64)parent_rate, rate);
+       div = DIV_ROUND_UP_ULL(parent_rate, rate);
        for (i = 0; i < pll->num_post_div; i++) {
                if (pll->post_div_table[i].div == div) {
                        val = pll->post_div_table[i].val;
index 66755f0..15f27f4 100644 (file)
@@ -13,22 +13,27 @@ enum {
        CLK_ALPHA_PLL_TYPE_HUAYRA,
        CLK_ALPHA_PLL_TYPE_BRAMMO,
        CLK_ALPHA_PLL_TYPE_FABIA,
+       CLK_ALPHA_PLL_TYPE_TRION,
        CLK_ALPHA_PLL_TYPE_MAX,
 };
 
 enum {
        PLL_OFF_L_VAL,
+       PLL_OFF_CAL_L_VAL,
        PLL_OFF_ALPHA_VAL,
        PLL_OFF_ALPHA_VAL_U,
        PLL_OFF_USER_CTL,
        PLL_OFF_USER_CTL_U,
+       PLL_OFF_USER_CTL_U1,
        PLL_OFF_CONFIG_CTL,
        PLL_OFF_CONFIG_CTL_U,
+       PLL_OFF_CONFIG_CTL_U1,
        PLL_OFF_TEST_CTL,
        PLL_OFF_TEST_CTL_U,
        PLL_OFF_STATUS,
        PLL_OFF_OPMODE,
        PLL_OFF_FRAC,
+       PLL_OFF_CAL_VAL,
        PLL_OFF_MAX_REGS
 };
 
@@ -117,5 +122,7 @@ void clk_alpha_pll_configure(struct clk_alpha_pll *pll, struct regmap *regmap,
                             const struct alpha_pll_config *config);
 void clk_fabia_pll_configure(struct clk_alpha_pll *pll, struct regmap *regmap,
                                const struct alpha_pll_config *config);
+extern const struct clk_ops clk_trion_fixed_pll_ops;
+extern const struct clk_ops clk_trion_pll_postdiv_ops;
 
 #endif
index 8c02bff..b98b81e 100644 (file)
@@ -119,7 +119,7 @@ static int update_config(struct clk_rcg2 *rcg)
        }
 
        WARN(1, "%s: rcg didn't update its configuration.", name);
-       return 0;
+       return -EBUSY;
 }
 
 static int clk_rcg2_set_parent(struct clk_hw *hw, u8 index)
@@ -1105,8 +1105,6 @@ static int clk_rcg2_enable_dfs(const struct clk_rcg_dfs_data *data,
 
        rcg->freq_tbl = NULL;
 
-       pr_debug("DFS registered for clk %s\n", init->name);
-
        return 0;
 }
 
@@ -1117,12 +1115,8 @@ int qcom_cc_register_rcg_dfs(struct regmap *regmap,
 
        for (i = 0; i < len; i++) {
                ret = clk_rcg2_enable_dfs(&rcgs[i], regmap);
-               if (ret) {
-                       const char *name = rcgs[i].init->name;
-
-                       pr_err("DFS register failed for clk %s\n", name);
+               if (ret)
                        return ret;
-               }
        }
 
        return 0;
index a32bfae..96a36f6 100644 (file)
@@ -85,7 +85,10 @@ static DEFINE_MUTEX(rpmh_clk_lock);
                .hw.init = &(struct clk_init_data){                     \
                        .ops = &clk_rpmh_ops,                           \
                        .name = #_name,                                 \
-                       .parent_names = (const char *[]){ "xo_board" }, \
+                       .parent_data =  &(const struct clk_parent_data){ \
+                                       .fw_name = "xo",                \
+                                       .name = "xo_board",             \
+                       },                                              \
                        .num_parents = 1,                               \
                },                                                      \
        };                                                              \
@@ -100,7 +103,10 @@ static DEFINE_MUTEX(rpmh_clk_lock);
                .hw.init = &(struct clk_init_data){                     \
                        .ops = &clk_rpmh_ops,                           \
                        .name = #_name_active,                          \
-                       .parent_names = (const char *[]){ "xo_board" }, \
+                       .parent_data =  &(const struct clk_parent_data){ \
+                                       .fw_name = "xo",                \
+                                       .name = "xo_board",             \
+                       },                                              \
                        .num_parents = 1,                               \
                },                                                      \
        }
@@ -358,6 +364,33 @@ static const struct clk_rpmh_desc clk_rpmh_sdm845 = {
        .num_clks = ARRAY_SIZE(sdm845_rpmh_clocks),
 };
 
+DEFINE_CLK_RPMH_ARC(sm8150, bi_tcxo, bi_tcxo_ao, "xo.lvl", 0x3, 2);
+DEFINE_CLK_RPMH_VRM(sm8150, ln_bb_clk2, ln_bb_clk2_ao, "lnbclka2", 2);
+DEFINE_CLK_RPMH_VRM(sm8150, ln_bb_clk3, ln_bb_clk3_ao, "lnbclka3", 2);
+DEFINE_CLK_RPMH_VRM(sm8150, rf_clk1, rf_clk1_ao, "rfclka1", 1);
+DEFINE_CLK_RPMH_VRM(sm8150, rf_clk2, rf_clk2_ao, "rfclka2", 1);
+DEFINE_CLK_RPMH_VRM(sm8150, rf_clk3, rf_clk3_ao, "rfclka3", 1);
+
+static struct clk_hw *sm8150_rpmh_clocks[] = {
+       [RPMH_CXO_CLK]          = &sm8150_bi_tcxo.hw,
+       [RPMH_CXO_CLK_A]        = &sm8150_bi_tcxo_ao.hw,
+       [RPMH_LN_BB_CLK2]       = &sm8150_ln_bb_clk2.hw,
+       [RPMH_LN_BB_CLK2_A]     = &sm8150_ln_bb_clk2_ao.hw,
+       [RPMH_LN_BB_CLK3]       = &sm8150_ln_bb_clk3.hw,
+       [RPMH_LN_BB_CLK3_A]     = &sm8150_ln_bb_clk3_ao.hw,
+       [RPMH_RF_CLK1]          = &sm8150_rf_clk1.hw,
+       [RPMH_RF_CLK1_A]        = &sm8150_rf_clk1_ao.hw,
+       [RPMH_RF_CLK2]          = &sm8150_rf_clk2.hw,
+       [RPMH_RF_CLK2_A]        = &sm8150_rf_clk2_ao.hw,
+       [RPMH_RF_CLK3]          = &sm8150_rf_clk3.hw,
+       [RPMH_RF_CLK3_A]        = &sm8150_rf_clk3_ao.hw,
+};
+
+static const struct clk_rpmh_desc clk_rpmh_sm8150 = {
+       .clks = sm8150_rpmh_clocks,
+       .num_clks = ARRAY_SIZE(sm8150_rpmh_clocks),
+};
+
 static struct clk_hw *of_clk_rpmh_hw_get(struct of_phandle_args *clkspec,
                                         void *data)
 {
@@ -386,6 +419,7 @@ static int clk_rpmh_probe(struct platform_device *pdev)
        hw_clks = desc->clks;
 
        for (i = 0; i < desc->num_clks; i++) {
+               const char *name = hw_clks[i]->init->name;
                u32 res_addr;
                size_t aux_data_len;
                const struct bcm_db *data;
@@ -416,8 +450,7 @@ static int clk_rpmh_probe(struct platform_device *pdev)
 
                ret = devm_clk_hw_register(&pdev->dev, hw_clks[i]);
                if (ret) {
-                       dev_err(&pdev->dev, "failed to register %s\n",
-                               hw_clks[i]->init->name);
+                       dev_err(&pdev->dev, "failed to register %s\n", name);
                        return ret;
                }
        }
@@ -437,6 +470,7 @@ static int clk_rpmh_probe(struct platform_device *pdev)
 
 static const struct of_device_id clk_rpmh_match_table[] = {
        { .compatible = "qcom,sdm845-rpmh-clk", .data = &clk_rpmh_sdm845},
+       { .compatible = "qcom,sm8150-rpmh-clk", .data = &clk_rpmh_sm8150},
        { }
 };
 MODULE_DEVICE_TABLE(of, clk_rpmh_match_table);
index a6b2f86..28ddc74 100644 (file)
@@ -306,4 +306,24 @@ int qcom_cc_probe(struct platform_device *pdev, const struct qcom_cc_desc *desc)
 }
 EXPORT_SYMBOL_GPL(qcom_cc_probe);
 
+int qcom_cc_probe_by_index(struct platform_device *pdev, int index,
+                          const struct qcom_cc_desc *desc)
+{
+       struct regmap *regmap;
+       struct resource *res;
+       void __iomem *base;
+
+       res = platform_get_resource(pdev, IORESOURCE_MEM, index);
+       base = devm_ioremap_resource(&pdev->dev, res);
+       if (IS_ERR(base))
+               return -ENOMEM;
+
+       regmap = devm_regmap_init_mmio(&pdev->dev, base, desc->config);
+       if (IS_ERR(regmap))
+               return PTR_ERR(regmap);
+
+       return qcom_cc_really_probe(pdev, desc, regmap);
+}
+EXPORT_SYMBOL_GPL(qcom_cc_probe_by_index);
+
 MODULE_LICENSE("GPL v2");
index 1e2a8bd..bb39a7e 100644 (file)
@@ -61,5 +61,7 @@ extern int qcom_cc_really_probe(struct platform_device *pdev,
                                struct regmap *regmap);
 extern int qcom_cc_probe(struct platform_device *pdev,
                         const struct qcom_cc_desc *desc);
+extern int qcom_cc_probe_by_index(struct platform_device *pdev, int index,
+                                 const struct qcom_cc_desc *desc);
 
 #endif
index 39ade58..e01f5f5 100644 (file)
@@ -1108,7 +1108,7 @@ static struct clk_rcg2 sdcc2_apps_clk_src = {
                .name = "sdcc2_apps_clk_src",
                .parent_names = gcc_xo_gpll0_gpll2_gpll0_out_main_div2,
                .num_parents = 4,
-               .ops = &clk_rcg2_ops,
+               .ops = &clk_rcg2_floor_ops,
        },
 };
 
index 0336882..091acd5 100644 (file)
@@ -1042,7 +1042,7 @@ static struct clk_rcg2 sdcc2_apps_clk_src = {
                .name = "sdcc2_apps_clk_src",
                .parent_names = gcc_parent_names_4,
                .num_parents = 4,
-               .ops = &clk_rcg2_ops,
+               .ops = &clk_rcg2_floor_ops,
        },
 };
 
@@ -1066,7 +1066,7 @@ static struct clk_rcg2 sdcc4_apps_clk_src = {
                .name = "sdcc4_apps_clk_src",
                .parent_names = gcc_parent_names_1,
                .num_parents = 3,
-               .ops = &clk_rcg2_ops,
+               .ops = &clk_rcg2_floor_ops,
        },
 };
 
index 29cf464..bd32212 100644 (file)
@@ -1057,7 +1057,7 @@ static struct clk_rcg2 sdcc1_apps_clk_src = {
                .name = "sdcc1_apps_clk_src",
                .parent_names = gcc_parent_names_13,
                .num_parents = 5,
-               .ops = &clk_rcg2_ops,
+               .ops = &clk_rcg2_floor_ops,
        },
 };
 
@@ -1103,7 +1103,7 @@ static struct clk_rcg2 sdcc2_apps_clk_src = {
                .name = "sdcc2_apps_clk_src",
                .parent_names = gcc_parent_names_14,
                .num_parents = 4,
-               .ops = &clk_rcg2_ops,
+               .ops = &clk_rcg2_floor_ops,
        },
 };
 
@@ -2604,6 +2604,32 @@ static struct clk_branch gcc_usb_hs_system_clk = {
        },
 };
 
+static struct clk_branch gcc_wdsp_q6ss_ahbs_clk = {
+       .halt_reg = 0x1e004,
+       .halt_check = BRANCH_HALT,
+       .clkr = {
+               .enable_reg = 0x1e004,
+               .enable_mask = BIT(0),
+               .hw.init = &(struct clk_init_data){
+                       .name = "gcc_wdsp_q6ss_ahbs_clk",
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch gcc_wdsp_q6ss_axim_clk = {
+       .halt_reg = 0x1e008,
+       .halt_check = BRANCH_HALT,
+       .clkr = {
+               .enable_reg = 0x1e008,
+               .enable_mask = BIT(0),
+               .hw.init = &(struct clk_init_data){
+                       .name = "gcc_wdsp_q6ss_axim_clk",
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
 static struct clk_hw *gcc_qcs404_hws[] = {
        &cxo.hw,
 };
@@ -2749,6 +2775,9 @@ static struct clk_regmap *gcc_qcs404_clocks[] = {
        [GCC_QDSS_DAP_CLK] = &gcc_qdss_dap_clk.clkr,
        [GCC_DCC_CLK] = &gcc_dcc_clk.clkr,
        [GCC_DCC_XO_CLK] = &gcc_dcc_xo_clk.clkr,
+       [GCC_WCSS_Q6_AHB_CLK] = &gcc_wdsp_q6ss_ahbs_clk.clkr,
+       [GCC_WCSS_Q6_AXIM_CLK] =  &gcc_wdsp_q6ss_axim_clk.clkr,
+
 };
 
 static const struct qcom_reset_map gcc_qcs404_resets[] = {
@@ -2774,6 +2803,7 @@ static const struct qcom_reset_map gcc_qcs404_resets[] = {
        [GCC_PCIE_0_SLEEP_ARES] = { 0x3e040, 1 },
        [GCC_PCIE_0_PIPE_ARES] = { 0x3e040, 0 },
        [GCC_EMAC_BCR] = { 0x4e000 },
+       [GCC_WDSP_RESTART] = {0x19000},
 };
 
 static const struct regmap_config gcc_qcs404_regmap_config = {
index 8827db2..bf57308 100644 (file)
@@ -787,7 +787,7 @@ static struct clk_rcg2 sdcc2_apps_clk_src = {
                .name = "sdcc2_apps_clk_src",
                .parent_names = gcc_parent_names_xo_gpll0_gpll0_early_div_gpll4,
                .num_parents = 4,
-               .ops = &clk_rcg2_ops,
+               .ops = &clk_rcg2_floor_ops,
        },
 };
 
index 7131dcf..95be125 100644 (file)
@@ -685,7 +685,7 @@ static struct clk_rcg2 gcc_sdcc2_apps_clk_src = {
                .name = "gcc_sdcc2_apps_clk_src",
                .parent_names = gcc_parent_names_10,
                .num_parents = 5,
-               .ops = &clk_rcg2_ops,
+               .ops = &clk_rcg2_floor_ops,
        },
 };
 
@@ -709,7 +709,7 @@ static struct clk_rcg2 gcc_sdcc4_apps_clk_src = {
                .name = "gcc_sdcc4_apps_clk_src",
                .parent_names = gcc_parent_names_0,
                .num_parents = 4,
-               .ops = &clk_rcg2_ops,
+               .ops = &clk_rcg2_floor_ops,
        },
 };
 
diff --git a/drivers/clk/qcom/gcc-sm8150.c b/drivers/clk/qcom/gcc-sm8150.c
new file mode 100644 (file)
index 0000000..2087721
--- /dev/null
@@ -0,0 +1,3588 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2017-2019, The Linux Foundation. All rights reserved.
+
+#include <linux/kernel.h>
+#include <linux/bitops.h>
+#include <linux/err.h>
+#include <linux/platform_device.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
+#include <linux/clk-provider.h>
+#include <linux/regmap.h>
+#include <linux/reset-controller.h>
+
+#include <dt-bindings/clock/qcom,gcc-sm8150.h>
+
+#include "common.h"
+#include "clk-alpha-pll.h"
+#include "clk-branch.h"
+#include "clk-pll.h"
+#include "clk-rcg.h"
+#include "clk-regmap.h"
+#include "reset.h"
+
+enum {
+       P_BI_TCXO,
+       P_AUD_REF_CLK,
+       P_CORE_BI_PLL_TEST_SE,
+       P_GPLL0_OUT_EVEN,
+       P_GPLL0_OUT_MAIN,
+       P_GPLL7_OUT_MAIN,
+       P_GPLL9_OUT_MAIN,
+       P_SLEEP_CLK,
+};
+
+static const struct pll_vco trion_vco[] = {
+       { 249600000, 2000000000, 0 },
+};
+
+static struct clk_alpha_pll gpll0 = {
+       .offset = 0x0,
+       .vco_table = trion_vco,
+       .num_vco = ARRAY_SIZE(trion_vco),
+       .regs = clk_alpha_pll_regs[CLK_ALPHA_PLL_TYPE_TRION],
+       .clkr = {
+               .enable_reg = 0x52000,
+               .enable_mask = BIT(0),
+               .hw.init = &(struct clk_init_data){
+                       .name = "gpll0",
+                       .parent_data = &(const struct clk_parent_data){
+                               .fw_name = "bi_tcxo",
+                               .name = "bi_tcxo",
+                       },
+                       .num_parents = 1,
+                       .ops = &clk_trion_fixed_pll_ops,
+               },
+       },
+};
+
+static const struct clk_div_table post_div_table_trion_even[] = {
+       { 0x0, 1 },
+       { 0x1, 2 },
+       { 0x3, 4 },
+       { 0x7, 8 },
+       { }
+};
+
+static struct clk_alpha_pll_postdiv gpll0_out_even = {
+       .offset = 0x0,
+       .post_div_shift = 8,
+       .post_div_table = post_div_table_trion_even,
+       .num_post_div = ARRAY_SIZE(post_div_table_trion_even),
+       .regs = clk_alpha_pll_regs[CLK_ALPHA_PLL_TYPE_TRION],
+       .width = 4,
+       .clkr.hw.init = &(struct clk_init_data){
+               .name = "gpll0_out_even",
+               .parent_data = &(const struct clk_parent_data){
+                       .fw_name = "bi_tcxo",
+                       .name = "bi_tcxo",
+               },
+               .num_parents = 1,
+               .ops = &clk_trion_pll_postdiv_ops,
+       },
+};
+
+static struct clk_alpha_pll gpll7 = {
+       .offset = 0x1a000,
+       .vco_table = trion_vco,
+       .num_vco = ARRAY_SIZE(trion_vco),
+       .regs = clk_alpha_pll_regs[CLK_ALPHA_PLL_TYPE_TRION],
+       .clkr = {
+               .enable_reg = 0x52000,
+               .enable_mask = BIT(7),
+               .hw.init = &(struct clk_init_data){
+                       .name = "gpll7",
+                       .parent_data = &(const struct clk_parent_data){
+                               .fw_name = "bi_tcxo",
+                               .name = "bi_tcxo",
+                       },
+                       .num_parents = 1,
+                       .ops = &clk_trion_fixed_pll_ops,
+               },
+       },
+};
+
+static struct clk_alpha_pll gpll9 = {
+       .offset = 0x1c000,
+       .vco_table = trion_vco,
+       .num_vco = ARRAY_SIZE(trion_vco),
+       .regs = clk_alpha_pll_regs[CLK_ALPHA_PLL_TYPE_TRION],
+       .clkr = {
+               .enable_reg = 0x52000,
+               .enable_mask = BIT(9),
+               .hw.init = &(struct clk_init_data){
+                       .name = "gpll9",
+                       .parent_data = &(const struct clk_parent_data){
+                               .fw_name = "bi_tcxo",
+                               .name = "bi_tcxo",
+                       },
+                       .num_parents = 1,
+                       .ops = &clk_trion_fixed_pll_ops,
+               },
+       },
+};
+
+static const struct parent_map gcc_parent_map_0[] = {
+       { P_BI_TCXO, 0 },
+       { P_GPLL0_OUT_MAIN, 1 },
+       { P_GPLL0_OUT_EVEN, 6 },
+       { P_CORE_BI_PLL_TEST_SE, 7 },
+};
+
+static const struct clk_parent_data gcc_parents_0[] = {
+       { .fw_name = "bi_tcxo", .name = "bi_tcxo" },
+       { .hw = &gpll0.clkr.hw },
+       { .hw = &gpll0_out_even.clkr.hw },
+       { .fw_name = "core_bi_pll_test_se" },
+};
+
+static const struct parent_map gcc_parent_map_1[] = {
+       { P_BI_TCXO, 0 },
+       { P_GPLL0_OUT_MAIN, 1 },
+       { P_SLEEP_CLK, 5 },
+       { P_GPLL0_OUT_EVEN, 6 },
+       { P_CORE_BI_PLL_TEST_SE, 7 },
+};
+
+static const struct clk_parent_data gcc_parents_1[] = {
+       { .fw_name = "bi_tcxo", .name = "bi_tcxo" },
+       { .hw = &gpll0.clkr.hw },
+       { .fw_name = "sleep_clk", .name = "sleep_clk" },
+       { .hw = &gpll0_out_even.clkr.hw },
+       { .fw_name = "core_bi_pll_test_se" },
+};
+
+static const struct parent_map gcc_parent_map_2[] = {
+       { P_BI_TCXO, 0 },
+       { P_SLEEP_CLK, 5 },
+       { P_CORE_BI_PLL_TEST_SE, 7 },
+};
+
+static const struct clk_parent_data gcc_parents_2[] = {
+       { .fw_name = "bi_tcxo", .name = "bi_tcxo" },
+       { .fw_name = "sleep_clk", .name = "sleep_clk" },
+       { .fw_name = "core_bi_pll_test_se" },
+};
+
+static const struct parent_map gcc_parent_map_3[] = {
+       { P_BI_TCXO, 0 },
+       { P_GPLL0_OUT_MAIN, 1 },
+       { P_CORE_BI_PLL_TEST_SE, 7 },
+};
+
+static const struct clk_parent_data gcc_parents_3[] = {
+       { .fw_name = "bi_tcxo", .name = "bi_tcxo" },
+       { .hw = &gpll0.clkr.hw },
+       { .fw_name = "core_bi_pll_test_se"},
+};
+
+static const struct parent_map gcc_parent_map_4[] = {
+       { P_BI_TCXO, 0 },
+       { P_CORE_BI_PLL_TEST_SE, 7 },
+};
+
+static const struct clk_parent_data gcc_parents_4[] = {
+       { .fw_name = "bi_tcxo", .name = "bi_tcxo" },
+       { .fw_name = "core_bi_pll_test_se" },
+};
+
+static const struct parent_map gcc_parent_map_5[] = {
+       { P_BI_TCXO, 0 },
+       { P_GPLL0_OUT_MAIN, 1 },
+       { P_GPLL7_OUT_MAIN, 3 },
+       { P_GPLL0_OUT_EVEN, 6 },
+       { P_CORE_BI_PLL_TEST_SE, 7 },
+};
+
+static const struct clk_parent_data gcc_parents_5[] = {
+       { .fw_name = "bi_tcxo", .name = "bi_tcxo" },
+       { .hw = &gpll0.clkr.hw },
+       { .hw = &gpll7.clkr.hw },
+       { .hw = &gpll0_out_even.clkr.hw },
+       { .fw_name = "core_bi_pll_test_se" },
+};
+
+static const struct parent_map gcc_parent_map_6[] = {
+       { P_BI_TCXO, 0 },
+       { P_GPLL0_OUT_MAIN, 1 },
+       { P_GPLL9_OUT_MAIN, 2 },
+       { P_GPLL0_OUT_EVEN, 6 },
+       { P_CORE_BI_PLL_TEST_SE, 7 },
+};
+
+static const struct clk_parent_data gcc_parents_6[] = {
+       { .fw_name = "bi_tcxo", .name = "bi_tcxo" },
+       { .hw = &gpll0.clkr.hw },
+       { .hw = &gpll9.clkr.hw },
+       { .hw = &gpll0_out_even.clkr.hw },
+       { .fw_name = "core_bi_pll_test_se" },
+};
+
+static const struct parent_map gcc_parent_map_7[] = {
+       { P_BI_TCXO, 0 },
+       { P_GPLL0_OUT_MAIN, 1 },
+       { P_AUD_REF_CLK, 2 },
+       { P_GPLL0_OUT_EVEN, 6 },
+       { P_CORE_BI_PLL_TEST_SE, 7 },
+};
+
+static const struct clk_parent_data gcc_parents_7[] = {
+       { .fw_name = "bi_tcxo", .name = "bi_tcxo" },
+       { .hw = &gpll0.clkr.hw },
+       { .fw_name = "aud_ref_clk", .name = "aud_ref_clk" },
+       { .hw = &gpll0_out_even.clkr.hw },
+       { .fw_name = "core_bi_pll_test_se" },
+};
+
+static const struct freq_tbl ftbl_gcc_cpuss_ahb_clk_src[] = {
+       F(19200000, P_BI_TCXO, 1, 0, 0),
+       F(50000000, P_GPLL0_OUT_MAIN, 12, 0, 0),
+       F(100000000, P_GPLL0_OUT_MAIN, 6, 0, 0),
+       { }
+};
+
+static struct clk_rcg2 gcc_cpuss_ahb_clk_src = {
+       .cmd_rcgr = 0x48014,
+       .mnd_width = 0,
+       .hid_width = 5,
+       .parent_map = gcc_parent_map_0,
+       .freq_tbl = ftbl_gcc_cpuss_ahb_clk_src,
+       .clkr.hw.init = &(struct clk_init_data){
+               .name = "gcc_cpuss_ahb_clk_src",
+               .parent_data = gcc_parents_0,
+               .num_parents = 4,
+               .flags = CLK_SET_RATE_PARENT,
+               .ops = &clk_rcg2_ops,
+       },
+};
+
+static const struct freq_tbl ftbl_gcc_emac_ptp_clk_src[] = {
+       F(19200000, P_BI_TCXO, 1, 0, 0),
+       F(50000000, P_GPLL0_OUT_EVEN, 6, 0, 0),
+       F(125000000, P_GPLL7_OUT_MAIN, 4, 0, 0),
+       F(250000000, P_GPLL7_OUT_MAIN, 2, 0, 0),
+       { }
+};
+
+static struct clk_rcg2 gcc_emac_ptp_clk_src = {
+       .cmd_rcgr = 0x6038,
+       .mnd_width = 0,
+       .hid_width = 5,
+       .parent_map = gcc_parent_map_5,
+       .freq_tbl = ftbl_gcc_emac_ptp_clk_src,
+       .clkr.hw.init = &(struct clk_init_data){
+               .name = "gcc_emac_ptp_clk_src",
+               .parent_data = gcc_parents_5,
+               .num_parents = 5,
+               .flags = CLK_SET_RATE_PARENT,
+               .ops = &clk_rcg2_ops,
+       },
+};
+
+static const struct freq_tbl ftbl_gcc_emac_rgmii_clk_src[] = {
+       F(2500000, P_BI_TCXO, 1, 25, 192),
+       F(5000000, P_BI_TCXO, 1, 25, 96),
+       F(19200000, P_BI_TCXO, 1, 0, 0),
+       F(25000000, P_GPLL0_OUT_EVEN, 12, 0, 0),
+       F(50000000, P_GPLL0_OUT_EVEN, 6, 0, 0),
+       F(125000000, P_GPLL7_OUT_MAIN, 4, 0, 0),
+       F(250000000, P_GPLL7_OUT_MAIN, 2, 0, 0),
+       { }
+};
+
+static struct clk_rcg2 gcc_emac_rgmii_clk_src = {
+       .cmd_rcgr = 0x601c,
+       .mnd_width = 8,
+       .hid_width = 5,
+       .parent_map = gcc_parent_map_5,
+       .freq_tbl = ftbl_gcc_emac_rgmii_clk_src,
+       .clkr.hw.init = &(struct clk_init_data){
+               .name = "gcc_emac_rgmii_clk_src",
+               .parent_data = gcc_parents_5,
+               .num_parents = 5,
+               .flags = CLK_SET_RATE_PARENT,
+               .ops = &clk_rcg2_ops,
+       },
+};
+
+static const struct freq_tbl ftbl_gcc_gp1_clk_src[] = {
+       F(19200000, P_BI_TCXO, 1, 0, 0),
+       F(25000000, P_GPLL0_OUT_EVEN, 12, 0, 0),
+       F(50000000, P_GPLL0_OUT_EVEN, 6, 0, 0),
+       F(100000000, P_GPLL0_OUT_MAIN, 6, 0, 0),
+       F(200000000, P_GPLL0_OUT_MAIN, 3, 0, 0),
+       { }
+};
+
+static struct clk_rcg2 gcc_gp1_clk_src = {
+       .cmd_rcgr = 0x64004,
+       .mnd_width = 8,
+       .hid_width = 5,
+       .parent_map = gcc_parent_map_1,
+       .freq_tbl = ftbl_gcc_gp1_clk_src,
+       .clkr.hw.init = &(struct clk_init_data){
+               .name = "gcc_gp1_clk_src",
+               .parent_data = gcc_parents_1,
+               .num_parents = 5,
+               .flags = CLK_SET_RATE_PARENT,
+               .ops = &clk_rcg2_ops,
+       },
+};
+
+static struct clk_rcg2 gcc_gp2_clk_src = {
+       .cmd_rcgr = 0x65004,
+       .mnd_width = 8,
+       .hid_width = 5,
+       .parent_map = gcc_parent_map_1,
+       .freq_tbl = ftbl_gcc_gp1_clk_src,
+       .clkr.hw.init = &(struct clk_init_data){
+               .name = "gcc_gp2_clk_src",
+               .parent_data = gcc_parents_1,
+               .num_parents = 5,
+               .flags = CLK_SET_RATE_PARENT,
+               .ops = &clk_rcg2_ops,
+       },
+};
+
+static struct clk_rcg2 gcc_gp3_clk_src = {
+       .cmd_rcgr = 0x66004,
+       .mnd_width = 8,
+       .hid_width = 5,
+       .parent_map = gcc_parent_map_1,
+       .freq_tbl = ftbl_gcc_gp1_clk_src,
+       .clkr.hw.init = &(struct clk_init_data){
+               .name = "gcc_gp3_clk_src",
+               .parent_data = gcc_parents_1,
+               .num_parents = 5,
+               .flags = CLK_SET_RATE_PARENT,
+               .ops = &clk_rcg2_ops,
+       },
+};
+
+static const struct freq_tbl ftbl_gcc_pcie_0_aux_clk_src[] = {
+       F(9600000, P_BI_TCXO, 2, 0, 0),
+       F(19200000, P_BI_TCXO, 1, 0, 0),
+       { }
+};
+
+static struct clk_rcg2 gcc_pcie_0_aux_clk_src = {
+       .cmd_rcgr = 0x6b02c,
+       .mnd_width = 16,
+       .hid_width = 5,
+       .parent_map = gcc_parent_map_2,
+       .freq_tbl = ftbl_gcc_pcie_0_aux_clk_src,
+       .clkr.hw.init = &(struct clk_init_data){
+               .name = "gcc_pcie_0_aux_clk_src",
+               .parent_data = gcc_parents_2,
+               .num_parents = 3,
+               .flags = CLK_SET_RATE_PARENT,
+               .ops = &clk_rcg2_ops,
+       },
+};
+
+static struct clk_rcg2 gcc_pcie_1_aux_clk_src = {
+       .cmd_rcgr = 0x8d02c,
+       .mnd_width = 16,
+       .hid_width = 5,
+       .parent_map = gcc_parent_map_2,
+       .freq_tbl = ftbl_gcc_pcie_0_aux_clk_src,
+       .clkr.hw.init = &(struct clk_init_data){
+               .name = "gcc_pcie_1_aux_clk_src",
+               .parent_data = gcc_parents_2,
+               .num_parents = 3,
+               .flags = CLK_SET_RATE_PARENT,
+               .ops = &clk_rcg2_ops,
+       },
+};
+
+static const struct freq_tbl ftbl_gcc_pcie_phy_refgen_clk_src[] = {
+       F(19200000, P_BI_TCXO, 1, 0, 0),
+       F(100000000, P_GPLL0_OUT_MAIN, 6, 0, 0),
+       { }
+};
+
+static struct clk_rcg2 gcc_pcie_phy_refgen_clk_src = {
+       .cmd_rcgr = 0x6f014,
+       .mnd_width = 0,
+       .hid_width = 5,
+       .parent_map = gcc_parent_map_0,
+       .freq_tbl = ftbl_gcc_pcie_phy_refgen_clk_src,
+       .clkr.hw.init = &(struct clk_init_data){
+               .name = "gcc_pcie_phy_refgen_clk_src",
+               .parent_data = gcc_parents_0,
+               .num_parents = 4,
+               .flags = CLK_SET_RATE_PARENT,
+               .ops = &clk_rcg2_ops,
+       },
+};
+
+static const struct freq_tbl ftbl_gcc_pdm2_clk_src[] = {
+       F(9600000, P_BI_TCXO, 2, 0, 0),
+       F(19200000, P_BI_TCXO, 1, 0, 0),
+       F(60000000, P_GPLL0_OUT_MAIN, 10, 0, 0),
+       { }
+};
+
+static struct clk_rcg2 gcc_pdm2_clk_src = {
+       .cmd_rcgr = 0x33010,
+       .mnd_width = 0,
+       .hid_width = 5,
+       .parent_map = gcc_parent_map_0,
+       .freq_tbl = ftbl_gcc_pdm2_clk_src,
+       .clkr.hw.init = &(struct clk_init_data){
+               .name = "gcc_pdm2_clk_src",
+               .parent_data = gcc_parents_0,
+               .num_parents = 4,
+               .flags = CLK_SET_RATE_PARENT,
+               .ops = &clk_rcg2_ops,
+       },
+};
+
+static const struct freq_tbl ftbl_gcc_qspi_core_clk_src[] = {
+       F(19200000, P_BI_TCXO, 1, 0, 0),
+       F(75000000, P_GPLL0_OUT_EVEN, 4, 0, 0),
+       F(150000000, P_GPLL0_OUT_MAIN, 4, 0, 0),
+       F(300000000, P_GPLL0_OUT_MAIN, 2, 0, 0),
+       { }
+};
+
+static struct clk_rcg2 gcc_qspi_core_clk_src = {
+       .cmd_rcgr = 0x4b008,
+       .mnd_width = 0,
+       .hid_width = 5,
+       .parent_map = gcc_parent_map_0,
+       .freq_tbl = ftbl_gcc_qspi_core_clk_src,
+       .clkr.hw.init = &(struct clk_init_data){
+               .name = "gcc_qspi_core_clk_src",
+               .parent_data = gcc_parents_0,
+               .num_parents = 4,
+               .flags = CLK_SET_RATE_PARENT,
+               .ops = &clk_rcg2_ops,
+       },
+};
+
+static const struct freq_tbl ftbl_gcc_qupv3_wrap0_s0_clk_src[] = {
+       F(7372800, P_GPLL0_OUT_EVEN, 1, 384, 15625),
+       F(14745600, P_GPLL0_OUT_EVEN, 1, 768, 15625),
+       F(19200000, P_BI_TCXO, 1, 0, 0),
+       F(29491200, P_GPLL0_OUT_EVEN, 1, 1536, 15625),
+       F(32000000, P_GPLL0_OUT_EVEN, 1, 8, 75),
+       F(48000000, P_GPLL0_OUT_EVEN, 1, 4, 25),
+       F(64000000, P_GPLL0_OUT_EVEN, 1, 16, 75),
+       F(80000000, P_GPLL0_OUT_EVEN, 1, 4, 15),
+       F(96000000, P_GPLL0_OUT_EVEN, 1, 8, 25),
+       F(100000000, P_GPLL0_OUT_EVEN, 3, 0, 0),
+       F(102400000, P_GPLL0_OUT_EVEN, 1, 128, 375),
+       F(112000000, P_GPLL0_OUT_EVEN, 1, 28, 75),
+       F(117964800, P_GPLL0_OUT_EVEN, 1, 6144, 15625),
+       F(120000000, P_GPLL0_OUT_EVEN, 2.5, 0, 0),
+       F(128000000, P_GPLL0_OUT_MAIN, 1, 16, 75),
+       { }
+};
+
+static struct clk_rcg2 gcc_qupv3_wrap0_s0_clk_src = {
+       .cmd_rcgr = 0x17148,
+       .mnd_width = 16,
+       .hid_width = 5,
+       .parent_map = gcc_parent_map_0,
+       .freq_tbl = ftbl_gcc_qupv3_wrap0_s0_clk_src,
+       .clkr.hw.init = &(struct clk_init_data){
+               .name = "gcc_qupv3_wrap0_s0_clk_src",
+               .parent_data = gcc_parents_0,
+               .num_parents = 4,
+               .flags = CLK_SET_RATE_PARENT,
+               .ops = &clk_rcg2_ops,
+       },
+};
+
+static struct clk_rcg2 gcc_qupv3_wrap0_s1_clk_src = {
+       .cmd_rcgr = 0x17278,
+       .mnd_width = 16,
+       .hid_width = 5,
+       .parent_map = gcc_parent_map_0,
+       .freq_tbl = ftbl_gcc_qupv3_wrap0_s0_clk_src,
+       .clkr.hw.init = &(struct clk_init_data){
+               .name = "gcc_qupv3_wrap0_s1_clk_src",
+               .parent_data = gcc_parents_0,
+               .num_parents = 4,
+               .flags = CLK_SET_RATE_PARENT,
+               .ops = &clk_rcg2_ops,
+       },
+};
+
+static struct clk_rcg2 gcc_qupv3_wrap0_s2_clk_src = {
+       .cmd_rcgr = 0x173a8,
+       .mnd_width = 16,
+       .hid_width = 5,
+       .parent_map = gcc_parent_map_0,
+       .freq_tbl = ftbl_gcc_qupv3_wrap0_s0_clk_src,
+       .clkr.hw.init = &(struct clk_init_data){
+               .name = "gcc_qupv3_wrap0_s2_clk_src",
+               .parent_data = gcc_parents_0,
+               .num_parents = 4,
+               .flags = CLK_SET_RATE_PARENT,
+               .ops = &clk_rcg2_ops,
+       },
+};
+
+static struct clk_rcg2 gcc_qupv3_wrap0_s3_clk_src = {
+       .cmd_rcgr = 0x174d8,
+       .mnd_width = 16,
+       .hid_width = 5,
+       .parent_map = gcc_parent_map_0,
+       .freq_tbl = ftbl_gcc_qupv3_wrap0_s0_clk_src,
+       .clkr.hw.init = &(struct clk_init_data){
+               .name = "gcc_qupv3_wrap0_s3_clk_src",
+               .parent_data = gcc_parents_0,
+               .num_parents = 4,
+               .flags = CLK_SET_RATE_PARENT,
+               .ops = &clk_rcg2_ops,
+       },
+};
+
+static struct clk_rcg2 gcc_qupv3_wrap0_s4_clk_src = {
+       .cmd_rcgr = 0x17608,
+       .mnd_width = 16,
+       .hid_width = 5,
+       .parent_map = gcc_parent_map_0,
+       .freq_tbl = ftbl_gcc_qupv3_wrap0_s0_clk_src,
+       .clkr.hw.init = &(struct clk_init_data){
+               .name = "gcc_qupv3_wrap0_s4_clk_src",
+               .parent_data = gcc_parents_0,
+               .num_parents = 4,
+               .flags = CLK_SET_RATE_PARENT,
+               .ops = &clk_rcg2_ops,
+       },
+};
+
+static struct clk_rcg2 gcc_qupv3_wrap0_s5_clk_src = {
+       .cmd_rcgr = 0x17738,
+       .mnd_width = 16,
+       .hid_width = 5,
+       .parent_map = gcc_parent_map_0,
+       .freq_tbl = ftbl_gcc_qupv3_wrap0_s0_clk_src,
+       .clkr.hw.init = &(struct clk_init_data){
+               .name = "gcc_qupv3_wrap0_s5_clk_src",
+               .parent_data = gcc_parents_0,
+               .num_parents = 4,
+               .flags = CLK_SET_RATE_PARENT,
+               .ops = &clk_rcg2_ops,
+       },
+};
+
+static struct clk_rcg2 gcc_qupv3_wrap0_s6_clk_src = {
+       .cmd_rcgr = 0x17868,
+       .mnd_width = 16,
+       .hid_width = 5,
+       .parent_map = gcc_parent_map_0,
+       .freq_tbl = ftbl_gcc_qupv3_wrap0_s0_clk_src,
+       .clkr.hw.init = &(struct clk_init_data){
+               .name = "gcc_qupv3_wrap0_s6_clk_src",
+               .parent_data = gcc_parents_0,
+               .num_parents = 4,
+               .flags = CLK_SET_RATE_PARENT,
+               .ops = &clk_rcg2_ops,
+       },
+};
+
+static struct clk_rcg2 gcc_qupv3_wrap0_s7_clk_src = {
+       .cmd_rcgr = 0x17998,
+       .mnd_width = 16,
+       .hid_width = 5,
+       .parent_map = gcc_parent_map_0,
+       .freq_tbl = ftbl_gcc_qupv3_wrap0_s0_clk_src,
+       .clkr.hw.init = &(struct clk_init_data){
+               .name = "gcc_qupv3_wrap0_s7_clk_src",
+               .parent_data = gcc_parents_0,
+               .num_parents = 4,
+               .flags = CLK_SET_RATE_PARENT,
+               .ops = &clk_rcg2_ops,
+       },
+};
+
+static struct clk_rcg2 gcc_qupv3_wrap1_s0_clk_src = {
+       .cmd_rcgr = 0x18148,
+       .mnd_width = 16,
+       .hid_width = 5,
+       .parent_map = gcc_parent_map_0,
+       .freq_tbl = ftbl_gcc_qupv3_wrap0_s0_clk_src,
+       .clkr.hw.init = &(struct clk_init_data){
+               .name = "gcc_qupv3_wrap1_s0_clk_src",
+               .parent_data = gcc_parents_0,
+               .num_parents = 4,
+               .flags = CLK_SET_RATE_PARENT,
+               .ops = &clk_rcg2_ops,
+       },
+};
+
+static struct clk_rcg2 gcc_qupv3_wrap1_s1_clk_src = {
+       .cmd_rcgr = 0x18278,
+       .mnd_width = 16,
+       .hid_width = 5,
+       .parent_map = gcc_parent_map_0,
+       .freq_tbl = ftbl_gcc_qupv3_wrap0_s0_clk_src,
+       .clkr.hw.init = &(struct clk_init_data){
+               .name = "gcc_qupv3_wrap1_s1_clk_src",
+               .parent_data = gcc_parents_0,
+               .num_parents = 4,
+               .flags = CLK_SET_RATE_PARENT,
+               .ops = &clk_rcg2_ops,
+       },
+};
+
+static struct clk_rcg2 gcc_qupv3_wrap1_s2_clk_src = {
+       .cmd_rcgr = 0x183a8,
+       .mnd_width = 16,
+       .hid_width = 5,
+       .parent_map = gcc_parent_map_0,
+       .freq_tbl = ftbl_gcc_qupv3_wrap0_s0_clk_src,
+       .clkr.hw.init = &(struct clk_init_data){
+               .name = "gcc_qupv3_wrap1_s2_clk_src",
+               .parent_data = gcc_parents_0,
+               .num_parents = 4,
+               .flags = CLK_SET_RATE_PARENT,
+               .ops = &clk_rcg2_ops,
+       },
+};
+
+static struct clk_rcg2 gcc_qupv3_wrap1_s3_clk_src = {
+       .cmd_rcgr = 0x184d8,
+       .mnd_width = 16,
+       .hid_width = 5,
+       .parent_map = gcc_parent_map_0,
+       .freq_tbl = ftbl_gcc_qupv3_wrap0_s0_clk_src,
+       .clkr.hw.init = &(struct clk_init_data){
+               .name = "gcc_qupv3_wrap1_s3_clk_src",
+               .parent_data = gcc_parents_0,
+               .num_parents = 4,
+               .flags = CLK_SET_RATE_PARENT,
+               .ops = &clk_rcg2_ops,
+       },
+};
+
+static struct clk_rcg2 gcc_qupv3_wrap1_s4_clk_src = {
+       .cmd_rcgr = 0x18608,
+       .mnd_width = 16,
+       .hid_width = 5,
+       .parent_map = gcc_parent_map_0,
+       .freq_tbl = ftbl_gcc_qupv3_wrap0_s0_clk_src,
+       .clkr.hw.init = &(struct clk_init_data){
+               .name = "gcc_qupv3_wrap1_s4_clk_src",
+               .parent_data = gcc_parents_0,
+               .num_parents = 4,
+               .flags = CLK_SET_RATE_PARENT,
+               .ops = &clk_rcg2_ops,
+       },
+};
+
+static struct clk_rcg2 gcc_qupv3_wrap1_s5_clk_src = {
+       .cmd_rcgr = 0x18738,
+       .mnd_width = 16,
+       .hid_width = 5,
+       .parent_map = gcc_parent_map_0,
+       .freq_tbl = ftbl_gcc_qupv3_wrap0_s0_clk_src,
+       .clkr.hw.init = &(struct clk_init_data){
+               .name = "gcc_qupv3_wrap1_s5_clk_src",
+               .parent_data = gcc_parents_0,
+               .num_parents = 4,
+               .flags = CLK_SET_RATE_PARENT,
+               .ops = &clk_rcg2_ops,
+       },
+};
+
+static struct clk_rcg2 gcc_qupv3_wrap2_s0_clk_src = {
+       .cmd_rcgr = 0x1e148,
+       .mnd_width = 16,
+       .hid_width = 5,
+       .parent_map = gcc_parent_map_0,
+       .freq_tbl = ftbl_gcc_qupv3_wrap0_s0_clk_src,
+       .clkr.hw.init = &(struct clk_init_data){
+               .name = "gcc_qupv3_wrap2_s0_clk_src",
+               .parent_data = gcc_parents_0,
+               .num_parents = 4,
+               .flags = CLK_SET_RATE_PARENT,
+               .ops = &clk_rcg2_ops,
+       },
+};
+
+static struct clk_rcg2 gcc_qupv3_wrap2_s1_clk_src = {
+       .cmd_rcgr = 0x1e278,
+       .mnd_width = 16,
+       .hid_width = 5,
+       .parent_map = gcc_parent_map_0,
+       .freq_tbl = ftbl_gcc_qupv3_wrap0_s0_clk_src,
+       .clkr.hw.init = &(struct clk_init_data){
+               .name = "gcc_qupv3_wrap2_s1_clk_src",
+               .parent_data = gcc_parents_0,
+               .num_parents = 4,
+               .flags = CLK_SET_RATE_PARENT,
+               .ops = &clk_rcg2_ops,
+       },
+};
+
+static struct clk_rcg2 gcc_qupv3_wrap2_s2_clk_src = {
+       .cmd_rcgr = 0x1e3a8,
+       .mnd_width = 16,
+       .hid_width = 5,
+       .parent_map = gcc_parent_map_0,
+       .freq_tbl = ftbl_gcc_qupv3_wrap0_s0_clk_src,
+       .clkr.hw.init = &(struct clk_init_data){
+               .name = "gcc_qupv3_wrap2_s2_clk_src",
+               .parent_data = gcc_parents_0,
+               .num_parents = 4,
+               .flags = CLK_SET_RATE_PARENT,
+               .ops = &clk_rcg2_ops,
+       },
+};
+
+static struct clk_rcg2 gcc_qupv3_wrap2_s3_clk_src = {
+       .cmd_rcgr = 0x1e4d8,
+       .mnd_width = 16,
+       .hid_width = 5,
+       .parent_map = gcc_parent_map_0,
+       .freq_tbl = ftbl_gcc_qupv3_wrap0_s0_clk_src,
+       .clkr.hw.init = &(struct clk_init_data){
+               .name = "gcc_qupv3_wrap2_s3_clk_src",
+               .parent_data = gcc_parents_0,
+               .num_parents = 4,
+               .flags = CLK_SET_RATE_PARENT,
+               .ops = &clk_rcg2_ops,
+       },
+};
+
+static struct clk_rcg2 gcc_qupv3_wrap2_s4_clk_src = {
+       .cmd_rcgr = 0x1e608,
+       .mnd_width = 16,
+       .hid_width = 5,
+       .parent_map = gcc_parent_map_0,
+       .freq_tbl = ftbl_gcc_qupv3_wrap0_s0_clk_src,
+       .clkr.hw.init = &(struct clk_init_data){
+               .name = "gcc_qupv3_wrap2_s4_clk_src",
+               .parent_data = gcc_parents_0,
+               .num_parents = 4,
+               .flags = CLK_SET_RATE_PARENT,
+               .ops = &clk_rcg2_ops,
+       },
+};
+
+static struct clk_rcg2 gcc_qupv3_wrap2_s5_clk_src = {
+       .cmd_rcgr = 0x1e738,
+       .mnd_width = 16,
+       .hid_width = 5,
+       .parent_map = gcc_parent_map_0,
+       .freq_tbl = ftbl_gcc_qupv3_wrap0_s0_clk_src,
+       .clkr.hw.init = &(struct clk_init_data){
+               .name = "gcc_qupv3_wrap2_s5_clk_src",
+               .parent_data = gcc_parents_0,
+               .num_parents = 4,
+               .flags = CLK_SET_RATE_PARENT,
+               .ops = &clk_rcg2_ops,
+       },
+};
+
+static const struct freq_tbl ftbl_gcc_sdcc2_apps_clk_src[] = {
+       F(400000, P_BI_TCXO, 12, 1, 4),
+       F(9600000, P_BI_TCXO, 2, 0, 0),
+       F(19200000, P_BI_TCXO, 1, 0, 0),
+       F(25000000, P_GPLL0_OUT_MAIN, 12, 1, 2),
+       F(50000000, P_GPLL0_OUT_MAIN, 12, 0, 0),
+       F(100000000, P_GPLL0_OUT_MAIN, 6, 0, 0),
+       F(202000000, P_GPLL9_OUT_MAIN, 4, 0, 0),
+       { }
+};
+
+static struct clk_rcg2 gcc_sdcc2_apps_clk_src = {
+       .cmd_rcgr = 0x1400c,
+       .mnd_width = 8,
+       .hid_width = 5,
+       .parent_map = gcc_parent_map_6,
+       .freq_tbl = ftbl_gcc_sdcc2_apps_clk_src,
+       .clkr.hw.init = &(struct clk_init_data){
+               .name = "gcc_sdcc2_apps_clk_src",
+               .parent_data = gcc_parents_6,
+               .num_parents = 5,
+               .flags = CLK_SET_RATE_PARENT,
+               .ops = &clk_rcg2_floor_ops,
+       },
+};
+
+static const struct freq_tbl ftbl_gcc_sdcc4_apps_clk_src[] = {
+       F(400000, P_BI_TCXO, 12, 1, 4),
+       F(9600000, P_BI_TCXO, 2, 0, 0),
+       F(19200000, P_BI_TCXO, 1, 0, 0),
+       F(25000000, P_GPLL0_OUT_MAIN, 12, 1, 2),
+       F(50000000, P_GPLL0_OUT_MAIN, 12, 0, 0),
+       F(100000000, P_GPLL0_OUT_MAIN, 6, 0, 0),
+       { }
+};
+
+static struct clk_rcg2 gcc_sdcc4_apps_clk_src = {
+       .cmd_rcgr = 0x1600c,
+       .mnd_width = 8,
+       .hid_width = 5,
+       .parent_map = gcc_parent_map_3,
+       .freq_tbl = ftbl_gcc_sdcc4_apps_clk_src,
+       .clkr.hw.init = &(struct clk_init_data){
+               .name = "gcc_sdcc4_apps_clk_src",
+               .parent_data = gcc_parents_3,
+               .num_parents = 3,
+               .flags = CLK_SET_RATE_PARENT,
+               .ops = &clk_rcg2_floor_ops,
+       },
+};
+
+static const struct freq_tbl ftbl_gcc_tsif_ref_clk_src[] = {
+       F(105495, P_BI_TCXO, 2, 1, 91),
+       { }
+};
+
+static struct clk_rcg2 gcc_tsif_ref_clk_src = {
+       .cmd_rcgr = 0x36010,
+       .mnd_width = 8,
+       .hid_width = 5,
+       .parent_map = gcc_parent_map_7,
+       .freq_tbl = ftbl_gcc_tsif_ref_clk_src,
+       .clkr.hw.init = &(struct clk_init_data){
+               .name = "gcc_tsif_ref_clk_src",
+               .parent_data = gcc_parents_7,
+               .num_parents = 5,
+               .flags = CLK_SET_RATE_PARENT,
+               .ops = &clk_rcg2_ops,
+       },
+};
+
+static const struct freq_tbl ftbl_gcc_ufs_card_axi_clk_src[] = {
+       F(25000000, P_GPLL0_OUT_EVEN, 12, 0, 0),
+       F(50000000, P_GPLL0_OUT_EVEN, 6, 0, 0),
+       F(100000000, P_GPLL0_OUT_MAIN, 6, 0, 0),
+       F(200000000, P_GPLL0_OUT_MAIN, 3, 0, 0),
+       F(240000000, P_GPLL0_OUT_MAIN, 2.5, 0, 0),
+       { }
+};
+
+static struct clk_rcg2 gcc_ufs_card_axi_clk_src = {
+       .cmd_rcgr = 0x75020,
+       .mnd_width = 8,
+       .hid_width = 5,
+       .parent_map = gcc_parent_map_0,
+       .freq_tbl = ftbl_gcc_ufs_card_axi_clk_src,
+       .clkr.hw.init = &(struct clk_init_data){
+               .name = "gcc_ufs_card_axi_clk_src",
+               .parent_data = gcc_parents_0,
+               .num_parents = 4,
+               .flags = CLK_SET_RATE_PARENT,
+               .ops = &clk_rcg2_ops,
+       },
+};
+
+static const struct freq_tbl ftbl_gcc_ufs_card_ice_core_clk_src[] = {
+       F(37500000, P_GPLL0_OUT_EVEN, 8, 0, 0),
+       F(75000000, P_GPLL0_OUT_EVEN, 4, 0, 0),
+       F(150000000, P_GPLL0_OUT_MAIN, 4, 0, 0),
+       F(300000000, P_GPLL0_OUT_MAIN, 2, 0, 0),
+       { }
+};
+
+static struct clk_rcg2 gcc_ufs_card_ice_core_clk_src = {
+       .cmd_rcgr = 0x75060,
+       .mnd_width = 0,
+       .hid_width = 5,
+       .parent_map = gcc_parent_map_0,
+       .freq_tbl = ftbl_gcc_ufs_card_ice_core_clk_src,
+       .clkr.hw.init = &(struct clk_init_data){
+               .name = "gcc_ufs_card_ice_core_clk_src",
+               .parent_data = gcc_parents_0,
+               .num_parents = 4,
+               .flags = CLK_SET_RATE_PARENT,
+               .ops = &clk_rcg2_ops,
+       },
+};
+
+static const struct freq_tbl ftbl_gcc_ufs_card_phy_aux_clk_src[] = {
+       F(19200000, P_BI_TCXO, 1, 0, 0),
+       { }
+};
+
+static struct clk_rcg2 gcc_ufs_card_phy_aux_clk_src = {
+       .cmd_rcgr = 0x75094,
+       .mnd_width = 0,
+       .hid_width = 5,
+       .parent_map = gcc_parent_map_4,
+       .freq_tbl = ftbl_gcc_ufs_card_phy_aux_clk_src,
+       .clkr.hw.init = &(struct clk_init_data){
+               .name = "gcc_ufs_card_phy_aux_clk_src",
+               .parent_data = gcc_parents_4,
+               .num_parents = 2,
+               .flags = CLK_SET_RATE_PARENT,
+               .ops = &clk_rcg2_ops,
+       },
+};
+
+static const struct freq_tbl ftbl_gcc_ufs_card_unipro_core_clk_src[] = {
+       F(37500000, P_GPLL0_OUT_EVEN, 8, 0, 0),
+       F(75000000, P_GPLL0_OUT_MAIN, 8, 0, 0),
+       F(150000000, P_GPLL0_OUT_MAIN, 4, 0, 0),
+       { }
+};
+
+static struct clk_rcg2 gcc_ufs_card_unipro_core_clk_src = {
+       .cmd_rcgr = 0x75078,
+       .mnd_width = 0,
+       .hid_width = 5,
+       .parent_map = gcc_parent_map_0,
+       .freq_tbl = ftbl_gcc_ufs_card_unipro_core_clk_src,
+       .clkr.hw.init = &(struct clk_init_data){
+               .name = "gcc_ufs_card_unipro_core_clk_src",
+               .parent_data = gcc_parents_0,
+               .num_parents = 4,
+               .flags = CLK_SET_RATE_PARENT,
+               .ops = &clk_rcg2_ops,
+       },
+};
+
+static const struct freq_tbl ftbl_gcc_ufs_phy_axi_clk_src[] = {
+       F(25000000, P_GPLL0_OUT_EVEN, 12, 0, 0),
+       F(37500000, P_GPLL0_OUT_EVEN, 8, 0, 0),
+       F(75000000, P_GPLL0_OUT_EVEN, 4, 0, 0),
+       F(150000000, P_GPLL0_OUT_MAIN, 4, 0, 0),
+       F(300000000, P_GPLL0_OUT_MAIN, 2, 0, 0),
+       { }
+};
+
+static struct clk_rcg2 gcc_ufs_phy_axi_clk_src = {
+       .cmd_rcgr = 0x77020,
+       .mnd_width = 8,
+       .hid_width = 5,
+       .parent_map = gcc_parent_map_0,
+       .freq_tbl = ftbl_gcc_ufs_phy_axi_clk_src,
+       .clkr.hw.init = &(struct clk_init_data){
+               .name = "gcc_ufs_phy_axi_clk_src",
+               .parent_data = gcc_parents_0,
+               .num_parents = 4,
+               .flags = CLK_SET_RATE_PARENT,
+               .ops = &clk_rcg2_ops,
+       },
+};
+
+static struct clk_rcg2 gcc_ufs_phy_ice_core_clk_src = {
+       .cmd_rcgr = 0x77060,
+       .mnd_width = 0,
+       .hid_width = 5,
+       .parent_map = gcc_parent_map_0,
+       .freq_tbl = ftbl_gcc_ufs_card_ice_core_clk_src,
+       .clkr.hw.init = &(struct clk_init_data){
+               .name = "gcc_ufs_phy_ice_core_clk_src",
+               .parent_data = gcc_parents_0,
+               .num_parents = 4,
+               .flags = CLK_SET_RATE_PARENT,
+               .ops = &clk_rcg2_ops,
+       },
+};
+
+static struct clk_rcg2 gcc_ufs_phy_phy_aux_clk_src = {
+       .cmd_rcgr = 0x77094,
+       .mnd_width = 0,
+       .hid_width = 5,
+       .parent_map = gcc_parent_map_4,
+       .freq_tbl = ftbl_gcc_pcie_0_aux_clk_src,
+       .clkr.hw.init = &(struct clk_init_data){
+               .name = "gcc_ufs_phy_phy_aux_clk_src",
+               .parent_data = gcc_parents_4,
+               .num_parents = 2,
+               .flags = CLK_SET_RATE_PARENT,
+               .ops = &clk_rcg2_ops,
+       },
+};
+
+static struct clk_rcg2 gcc_ufs_phy_unipro_core_clk_src = {
+       .cmd_rcgr = 0x77078,
+       .mnd_width = 0,
+       .hid_width = 5,
+       .parent_map = gcc_parent_map_0,
+       .freq_tbl = ftbl_gcc_ufs_card_ice_core_clk_src,
+       .clkr.hw.init = &(struct clk_init_data){
+               .name = "gcc_ufs_phy_unipro_core_clk_src",
+               .parent_data = gcc_parents_0,
+               .num_parents = 4,
+               .flags = CLK_SET_RATE_PARENT,
+               .ops = &clk_rcg2_ops,
+       },
+};
+
+static const struct freq_tbl ftbl_gcc_usb30_prim_master_clk_src[] = {
+       F(33333333, P_GPLL0_OUT_EVEN, 9, 0, 0),
+       F(66666667, P_GPLL0_OUT_EVEN, 4.5, 0, 0),
+       F(133333333, P_GPLL0_OUT_MAIN, 4.5, 0, 0),
+       F(200000000, P_GPLL0_OUT_MAIN, 3, 0, 0),
+       F(240000000, P_GPLL0_OUT_MAIN, 2.5, 0, 0),
+       { }
+};
+
+static struct clk_rcg2 gcc_usb30_prim_master_clk_src = {
+       .cmd_rcgr = 0xf01c,
+       .mnd_width = 8,
+       .hid_width = 5,
+       .parent_map = gcc_parent_map_0,
+       .freq_tbl = ftbl_gcc_usb30_prim_master_clk_src,
+       .clkr.hw.init = &(struct clk_init_data){
+               .name = "gcc_usb30_prim_master_clk_src",
+               .parent_data = gcc_parents_0,
+               .num_parents = 4,
+               .flags = CLK_SET_RATE_PARENT,
+               .ops = &clk_rcg2_ops,
+       },
+};
+
+static const struct freq_tbl ftbl_gcc_usb30_prim_mock_utmi_clk_src[] = {
+       F(19200000, P_BI_TCXO, 1, 0, 0),
+       F(20000000, P_GPLL0_OUT_EVEN, 15, 0, 0),
+       F(60000000, P_GPLL0_OUT_EVEN, 5, 0, 0),
+       { }
+};
+
+static struct clk_rcg2 gcc_usb30_prim_mock_utmi_clk_src = {
+       .cmd_rcgr = 0xf034,
+       .mnd_width = 0,
+       .hid_width = 5,
+       .parent_map = gcc_parent_map_0,
+       .freq_tbl = ftbl_gcc_usb30_prim_mock_utmi_clk_src,
+       .clkr.hw.init = &(struct clk_init_data){
+               .name = "gcc_usb30_prim_mock_utmi_clk_src",
+               .parent_data = gcc_parents_0,
+               .num_parents = 4,
+               .flags = CLK_SET_RATE_PARENT,
+               .ops = &clk_rcg2_ops,
+       },
+};
+
+static struct clk_rcg2 gcc_usb30_sec_master_clk_src = {
+       .cmd_rcgr = 0x1001c,
+       .mnd_width = 8,
+       .hid_width = 5,
+       .parent_map = gcc_parent_map_0,
+       .freq_tbl = ftbl_gcc_usb30_prim_master_clk_src,
+       .clkr.hw.init = &(struct clk_init_data){
+               .name = "gcc_usb30_sec_master_clk_src",
+               .parent_data = gcc_parents_0,
+               .num_parents = 4,
+               .flags = CLK_SET_RATE_PARENT,
+               .ops = &clk_rcg2_ops,
+       },
+};
+
+static struct clk_rcg2 gcc_usb30_sec_mock_utmi_clk_src = {
+       .cmd_rcgr = 0x10034,
+       .mnd_width = 0,
+       .hid_width = 5,
+       .parent_map = gcc_parent_map_0,
+       .freq_tbl = ftbl_gcc_usb30_prim_mock_utmi_clk_src,
+       .clkr.hw.init = &(struct clk_init_data){
+               .name = "gcc_usb30_sec_mock_utmi_clk_src",
+               .parent_data = gcc_parents_0,
+               .num_parents = 4,
+               .flags = CLK_SET_RATE_PARENT,
+               .ops = &clk_rcg2_ops,
+       },
+};
+
+static struct clk_rcg2 gcc_usb3_prim_phy_aux_clk_src = {
+       .cmd_rcgr = 0xf060,
+       .mnd_width = 0,
+       .hid_width = 5,
+       .parent_map = gcc_parent_map_2,
+       .freq_tbl = ftbl_gcc_ufs_card_phy_aux_clk_src,
+       .clkr.hw.init = &(struct clk_init_data){
+               .name = "gcc_usb3_prim_phy_aux_clk_src",
+               .parent_data = gcc_parents_2,
+               .num_parents = 3,
+               .flags = CLK_SET_RATE_PARENT,
+               .ops = &clk_rcg2_ops,
+       },
+};
+
+static struct clk_rcg2 gcc_usb3_sec_phy_aux_clk_src = {
+       .cmd_rcgr = 0x10060,
+       .mnd_width = 0,
+       .hid_width = 5,
+       .parent_map = gcc_parent_map_2,
+       .freq_tbl = ftbl_gcc_ufs_card_phy_aux_clk_src,
+       .clkr.hw.init = &(struct clk_init_data){
+               .name = "gcc_usb3_sec_phy_aux_clk_src",
+               .parent_data = gcc_parents_2,
+               .num_parents = 3,
+               .flags = CLK_SET_RATE_PARENT,
+               .ops = &clk_rcg2_ops,
+       },
+};
+
+static struct clk_branch gcc_aggre_noc_pcie_tbu_clk = {
+       .halt_reg = 0x90018,
+       .halt_check = BRANCH_HALT,
+       .clkr = {
+               .enable_reg = 0x90018,
+               .enable_mask = BIT(0),
+               .hw.init = &(struct clk_init_data){
+                       .name = "gcc_aggre_noc_pcie_tbu_clk",
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch gcc_aggre_ufs_card_axi_clk = {
+       .halt_reg = 0x750c0,
+       .halt_check = BRANCH_HALT,
+       .hwcg_reg = 0x750c0,
+       .hwcg_bit = 1,
+       .clkr = {
+               .enable_reg = 0x750c0,
+               .enable_mask = BIT(0),
+               .hw.init = &(struct clk_init_data){
+                       .name = "gcc_aggre_ufs_card_axi_clk",
+                       .parent_hws = (const struct clk_hw *[]){
+                                     &gcc_ufs_card_axi_clk_src.clkr.hw },
+                       .num_parents = 1,
+                       .flags = CLK_SET_RATE_PARENT,
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch gcc_aggre_ufs_card_axi_hw_ctl_clk = {
+       .halt_reg = 0x750c0,
+       .halt_check = BRANCH_HALT,
+       .hwcg_reg = 0x750c0,
+       .hwcg_bit = 1,
+       .clkr = {
+               .enable_reg = 0x750c0,
+               .enable_mask = BIT(1),
+               .hw.init = &(struct clk_init_data){
+                       .name = "gcc_aggre_ufs_card_axi_hw_ctl_clk",
+                       .parent_hws = (const struct clk_hw *[]){
+                                     &gcc_aggre_ufs_card_axi_clk.clkr.hw },
+                       .num_parents = 1,
+                       .flags = CLK_SET_RATE_PARENT,
+                       .ops = &clk_branch_simple_ops,
+               },
+       },
+};
+
+static struct clk_branch gcc_aggre_ufs_phy_axi_clk = {
+       .halt_reg = 0x770c0,
+       .halt_check = BRANCH_HALT,
+       .hwcg_reg = 0x770c0,
+       .hwcg_bit = 1,
+       .clkr = {
+               .enable_reg = 0x770c0,
+               .enable_mask = BIT(0),
+               .hw.init = &(struct clk_init_data){
+                       .name = "gcc_aggre_ufs_phy_axi_clk",
+                       .parent_hws = (const struct clk_hw *[]){
+                                     &gcc_ufs_phy_axi_clk_src.clkr.hw },
+                       .num_parents = 1,
+                       .flags = CLK_SET_RATE_PARENT,
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch gcc_aggre_ufs_phy_axi_hw_ctl_clk = {
+       .halt_reg = 0x770c0,
+       .halt_check = BRANCH_HALT,
+       .hwcg_reg = 0x770c0,
+       .hwcg_bit = 1,
+       .clkr = {
+               .enable_reg = 0x770c0,
+               .enable_mask = BIT(1),
+               .hw.init = &(struct clk_init_data){
+                       .name = "gcc_aggre_ufs_phy_axi_hw_ctl_clk",
+                       .parent_hws = (const struct clk_hw *[]){
+                                     &gcc_aggre_ufs_phy_axi_clk.clkr.hw },
+                       .num_parents = 1,
+                       .flags = CLK_SET_RATE_PARENT,
+                       .ops = &clk_branch_simple_ops,
+               },
+       },
+};
+
+static struct clk_branch gcc_aggre_usb3_prim_axi_clk = {
+       .halt_reg = 0xf07c,
+       .halt_check = BRANCH_HALT,
+       .clkr = {
+               .enable_reg = 0xf07c,
+               .enable_mask = BIT(0),
+               .hw.init = &(struct clk_init_data){
+                       .name = "gcc_aggre_usb3_prim_axi_clk",
+                       .parent_hws = (const struct clk_hw *[]){
+                                     &gcc_usb30_prim_master_clk_src.clkr.hw },
+                       .num_parents = 1,
+                       .flags = CLK_SET_RATE_PARENT,
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch gcc_aggre_usb3_sec_axi_clk = {
+       .halt_reg = 0x1007c,
+       .halt_check = BRANCH_HALT,
+       .clkr = {
+               .enable_reg = 0x1007c,
+               .enable_mask = BIT(0),
+               .hw.init = &(struct clk_init_data){
+                       .name = "gcc_aggre_usb3_sec_axi_clk",
+                       .parent_hws = (const struct clk_hw *[]){
+                                     &gcc_usb30_sec_master_clk_src.clkr.hw },
+                       .num_parents = 1,
+                       .flags = CLK_SET_RATE_PARENT,
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch gcc_boot_rom_ahb_clk = {
+       .halt_reg = 0x38004,
+       .halt_check = BRANCH_HALT_VOTED,
+       .hwcg_reg = 0x38004,
+       .hwcg_bit = 1,
+       .clkr = {
+               .enable_reg = 0x52004,
+               .enable_mask = BIT(10),
+               .hw.init = &(struct clk_init_data){
+                       .name = "gcc_boot_rom_ahb_clk",
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+/*
+ * Clock ON depends on external parent 'config noc', so cant poll
+ * delay and also mark as crtitical for camss boot
+ */
+static struct clk_branch gcc_camera_ahb_clk = {
+       .halt_reg = 0xb008,
+       .halt_check = BRANCH_HALT_DELAY,
+       .hwcg_reg = 0xb008,
+       .hwcg_bit = 1,
+       .clkr = {
+               .enable_reg = 0xb008,
+               .enable_mask = BIT(0),
+               .hw.init = &(struct clk_init_data){
+                       .name = "gcc_camera_ahb_clk",
+                       .flags = CLK_IS_CRITICAL,
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch gcc_camera_hf_axi_clk = {
+       .halt_reg = 0xb030,
+       .halt_check = BRANCH_HALT,
+       .clkr = {
+               .enable_reg = 0xb030,
+               .enable_mask = BIT(0),
+               .hw.init = &(struct clk_init_data){
+                       .name = "gcc_camera_hf_axi_clk",
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch gcc_camera_sf_axi_clk = {
+       .halt_reg = 0xb034,
+       .halt_check = BRANCH_HALT,
+       .clkr = {
+               .enable_reg = 0xb034,
+               .enable_mask = BIT(0),
+               .hw.init = &(struct clk_init_data){
+                       .name = "gcc_camera_sf_axi_clk",
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+/* XO critical input to camss, so no need to poll */
+static struct clk_branch gcc_camera_xo_clk = {
+       .halt_reg = 0xb044,
+       .halt_check = BRANCH_HALT_DELAY,
+       .clkr = {
+               .enable_reg = 0xb044,
+               .enable_mask = BIT(0),
+               .hw.init = &(struct clk_init_data){
+                       .name = "gcc_camera_xo_clk",
+                       .flags = CLK_IS_CRITICAL,
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch gcc_cfg_noc_usb3_prim_axi_clk = {
+       .halt_reg = 0xf078,
+       .halt_check = BRANCH_HALT,
+       .clkr = {
+               .enable_reg = 0xf078,
+               .enable_mask = BIT(0),
+               .hw.init = &(struct clk_init_data){
+                       .name = "gcc_cfg_noc_usb3_prim_axi_clk",
+                       .parent_hws = (const struct clk_hw *[]){
+                                     &gcc_usb30_prim_master_clk_src.clkr.hw },
+                       .num_parents = 1,
+                       .flags = CLK_SET_RATE_PARENT,
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch gcc_cfg_noc_usb3_sec_axi_clk = {
+       .halt_reg = 0x10078,
+       .halt_check = BRANCH_HALT,
+       .clkr = {
+               .enable_reg = 0x10078,
+               .enable_mask = BIT(0),
+               .hw.init = &(struct clk_init_data){
+                       .name = "gcc_cfg_noc_usb3_sec_axi_clk",
+                       .parent_hws = (const struct clk_hw *[]){
+                                     &gcc_usb30_sec_master_clk_src.clkr.hw },
+                       .num_parents = 1,
+                       .flags = CLK_SET_RATE_PARENT,
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch gcc_cpuss_ahb_clk = {
+       .halt_reg = 0x48000,
+       .halt_check = BRANCH_HALT_VOTED,
+       .clkr = {
+               .enable_reg = 0x52004,
+               .enable_mask = BIT(21),
+               .hw.init = &(struct clk_init_data){
+                       .name = "gcc_cpuss_ahb_clk",
+                       .parent_hws = (const struct clk_hw *[]){
+                                     &gcc_cpuss_ahb_clk_src.clkr.hw },
+                       .num_parents = 1,
+                        /* required for cpuss */
+                       .flags = CLK_IS_CRITICAL | CLK_SET_RATE_PARENT,
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch gcc_cpuss_dvm_bus_clk = {
+       .halt_reg = 0x48190,
+       .halt_check = BRANCH_HALT,
+       .clkr = {
+               .enable_reg = 0x48190,
+               .enable_mask = BIT(0),
+               .hw.init = &(struct clk_init_data){
+                       .name = "gcc_cpuss_dvm_bus_clk",
+                        /* required for cpuss */
+                       .flags = CLK_IS_CRITICAL,
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch gcc_cpuss_gnoc_clk = {
+       .halt_reg = 0x48004,
+       .halt_check = BRANCH_HALT_VOTED,
+       .hwcg_reg = 0x48004,
+       .hwcg_bit = 1,
+       .clkr = {
+               .enable_reg = 0x52004,
+               .enable_mask = BIT(22),
+               .hw.init = &(struct clk_init_data){
+                       .name = "gcc_cpuss_gnoc_clk",
+                        /* required for cpuss */
+                       .flags = CLK_IS_CRITICAL,
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch gcc_cpuss_rbcpr_clk = {
+       .halt_reg = 0x48008,
+       .halt_check = BRANCH_HALT,
+       .clkr = {
+               .enable_reg = 0x48008,
+               .enable_mask = BIT(0),
+               .hw.init = &(struct clk_init_data){
+                       .name = "gcc_cpuss_rbcpr_clk",
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch gcc_ddrss_gpu_axi_clk = {
+       .halt_reg = 0x71154,
+       .halt_check = BRANCH_VOTED,
+       .clkr = {
+               .enable_reg = 0x71154,
+               .enable_mask = BIT(0),
+               .hw.init = &(struct clk_init_data){
+                       .name = "gcc_ddrss_gpu_axi_clk",
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+/*
+ * Clock ON depends on external parent 'config noc', so cant poll
+ * delay and also mark as crtitical for disp boot
+ */
+static struct clk_branch gcc_disp_ahb_clk = {
+       .halt_reg = 0xb00c,
+       .halt_check = BRANCH_HALT_DELAY,
+       .hwcg_reg = 0xb00c,
+       .hwcg_bit = 1,
+       .clkr = {
+               .enable_reg = 0xb00c,
+               .enable_mask = BIT(0),
+               .hw.init = &(struct clk_init_data){
+                       .name = "gcc_disp_ahb_clk",
+                       .flags = CLK_IS_CRITICAL,
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch gcc_disp_hf_axi_clk = {
+       .halt_reg = 0xb038,
+       .halt_check = BRANCH_HALT,
+       .clkr = {
+               .enable_reg = 0xb038,
+               .enable_mask = BIT(0),
+               .hw.init = &(struct clk_init_data){
+                       .name = "gcc_disp_hf_axi_clk",
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch gcc_disp_sf_axi_clk = {
+       .halt_reg = 0xb03c,
+       .halt_check = BRANCH_HALT,
+       .clkr = {
+               .enable_reg = 0xb03c,
+               .enable_mask = BIT(0),
+               .hw.init = &(struct clk_init_data){
+                       .name = "gcc_disp_sf_axi_clk",
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+/* XO critical input to disp, so no need to poll */
+static struct clk_branch gcc_disp_xo_clk = {
+       .halt_reg = 0xb048,
+       .halt_check = BRANCH_HALT_DELAY,
+       .clkr = {
+               .enable_reg = 0xb048,
+               .enable_mask = BIT(0),
+               .hw.init = &(struct clk_init_data){
+                       .name = "gcc_disp_xo_clk",
+                       .flags = CLK_IS_CRITICAL,
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch gcc_emac_axi_clk = {
+       .halt_reg = 0x6010,
+       .halt_check = BRANCH_HALT,
+       .clkr = {
+               .enable_reg = 0x6010,
+               .enable_mask = BIT(0),
+               .hw.init = &(struct clk_init_data){
+                       .name = "gcc_emac_axi_clk",
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch gcc_emac_ptp_clk = {
+       .halt_reg = 0x6034,
+       .halt_check = BRANCH_HALT,
+       .clkr = {
+               .enable_reg = 0x6034,
+               .enable_mask = BIT(0),
+               .hw.init = &(struct clk_init_data){
+                       .name = "gcc_emac_ptp_clk",
+                       .parent_hws = (const struct clk_hw *[]){
+                                     &gcc_emac_ptp_clk_src.clkr.hw },
+                       .num_parents = 1,
+                       .flags = CLK_SET_RATE_PARENT,
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch gcc_emac_rgmii_clk = {
+       .halt_reg = 0x6018,
+       .halt_check = BRANCH_HALT,
+       .clkr = {
+               .enable_reg = 0x6018,
+               .enable_mask = BIT(0),
+               .hw.init = &(struct clk_init_data){
+                       .name = "gcc_emac_rgmii_clk",
+                       .parent_hws = (const struct clk_hw *[]){
+                                     &gcc_emac_rgmii_clk_src.clkr.hw },
+                       .num_parents = 1,
+                       .flags = CLK_SET_RATE_PARENT,
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch gcc_emac_slv_ahb_clk = {
+       .halt_reg = 0x6014,
+       .halt_check = BRANCH_HALT,
+       .hwcg_reg = 0x6014,
+       .hwcg_bit = 1,
+       .clkr = {
+               .enable_reg = 0x6014,
+               .enable_mask = BIT(0),
+               .hw.init = &(struct clk_init_data){
+                       .name = "gcc_emac_slv_ahb_clk",
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch gcc_gp1_clk = {
+       .halt_reg = 0x64000,
+       .halt_check = BRANCH_HALT,
+       .clkr = {
+               .enable_reg = 0x64000,
+               .enable_mask = BIT(0),
+               .hw.init = &(struct clk_init_data){
+                       .name = "gcc_gp1_clk",
+                       .parent_hws = (const struct clk_hw *[]){
+                                     &gcc_gp1_clk_src.clkr.hw },
+                       .num_parents = 1,
+                       .flags = CLK_SET_RATE_PARENT,
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch gcc_gp2_clk = {
+       .halt_reg = 0x65000,
+       .halt_check = BRANCH_HALT,
+       .clkr = {
+               .enable_reg = 0x65000,
+               .enable_mask = BIT(0),
+               .hw.init = &(struct clk_init_data){
+                       .name = "gcc_gp2_clk",
+                       .parent_hws = (const struct clk_hw *[]){
+                                     &gcc_gp2_clk_src.clkr.hw },
+                       .num_parents = 1,
+                       .flags = CLK_SET_RATE_PARENT,
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch gcc_gp3_clk = {
+       .halt_reg = 0x66000,
+       .halt_check = BRANCH_HALT,
+       .clkr = {
+               .enable_reg = 0x66000,
+               .enable_mask = BIT(0),
+               .hw.init = &(struct clk_init_data){
+                       .name = "gcc_gp3_clk",
+                       .parent_hws = (const struct clk_hw *[]){
+                                     &gcc_gp3_clk_src.clkr.hw },
+                       .num_parents = 1,
+                       .flags = CLK_SET_RATE_PARENT,
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch gcc_gpu_cfg_ahb_clk = {
+       .halt_reg = 0x71004,
+       .halt_check = BRANCH_HALT,
+       .hwcg_reg = 0x71004,
+       .hwcg_bit = 1,
+       .clkr = {
+               .enable_reg = 0x71004,
+               .enable_mask = BIT(0),
+               .hw.init = &(struct clk_init_data){
+                       .name = "gcc_gpu_cfg_ahb_clk",
+                        /* required for gpu */
+                       .flags = CLK_IS_CRITICAL,
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch gcc_gpu_iref_clk = {
+       .halt_reg = 0x8c010,
+       .halt_check = BRANCH_HALT,
+       .clkr = {
+               .enable_reg = 0x8c010,
+               .enable_mask = BIT(0),
+               .hw.init = &(struct clk_init_data){
+                       .name = "gcc_gpu_iref_clk",
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch gcc_gpu_memnoc_gfx_clk = {
+       .halt_reg = 0x7100c,
+       .halt_check = BRANCH_VOTED,
+       .clkr = {
+               .enable_reg = 0x7100c,
+               .enable_mask = BIT(0),
+               .hw.init = &(struct clk_init_data){
+                       .name = "gcc_gpu_memnoc_gfx_clk",
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch gcc_gpu_snoc_dvm_gfx_clk = {
+       .halt_reg = 0x71018,
+       .halt_check = BRANCH_HALT,
+       .clkr = {
+               .enable_reg = 0x71018,
+               .enable_mask = BIT(0),
+               .hw.init = &(struct clk_init_data){
+                       .name = "gcc_gpu_snoc_dvm_gfx_clk",
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch gcc_npu_at_clk = {
+       .halt_reg = 0x4d010,
+       .halt_check = BRANCH_VOTED,
+       .clkr = {
+               .enable_reg = 0x4d010,
+               .enable_mask = BIT(0),
+               .hw.init = &(struct clk_init_data){
+                       .name = "gcc_npu_at_clk",
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch gcc_npu_axi_clk = {
+       .halt_reg = 0x4d008,
+       .halt_check = BRANCH_VOTED,
+       .clkr = {
+               .enable_reg = 0x4d008,
+               .enable_mask = BIT(0),
+               .hw.init = &(struct clk_init_data){
+                       .name = "gcc_npu_axi_clk",
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch gcc_npu_cfg_ahb_clk = {
+       .halt_reg = 0x4d004,
+       .halt_check = BRANCH_HALT,
+       .hwcg_reg = 0x4d004,
+       .hwcg_bit = 1,
+       .clkr = {
+               .enable_reg = 0x4d004,
+               .enable_mask = BIT(0),
+               .hw.init = &(struct clk_init_data){
+                       .name = "gcc_npu_cfg_ahb_clk",
+                        /* required for npu */
+                       .flags = CLK_IS_CRITICAL,
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch gcc_npu_trig_clk = {
+       .halt_reg = 0x4d00c,
+       .halt_check = BRANCH_VOTED,
+       .clkr = {
+               .enable_reg = 0x4d00c,
+               .enable_mask = BIT(0),
+               .hw.init = &(struct clk_init_data){
+                       .name = "gcc_npu_trig_clk",
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch gcc_pcie0_phy_refgen_clk = {
+       .halt_reg = 0x6f02c,
+       .halt_check = BRANCH_HALT,
+       .clkr = {
+               .enable_reg = 0x6f02c,
+               .enable_mask = BIT(0),
+               .hw.init = &(struct clk_init_data){
+                       .name = "gcc_pcie0_phy_refgen_clk",
+                       .parent_hws = (const struct clk_hw *[]){
+                                     &gcc_pcie_phy_refgen_clk_src.clkr.hw },
+                       .num_parents = 1,
+                       .flags = CLK_SET_RATE_PARENT,
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch gcc_pcie1_phy_refgen_clk = {
+       .halt_reg = 0x6f030,
+       .halt_check = BRANCH_HALT,
+       .clkr = {
+               .enable_reg = 0x6f030,
+               .enable_mask = BIT(0),
+               .hw.init = &(struct clk_init_data){
+                       .name = "gcc_pcie1_phy_refgen_clk",
+                       .parent_hws = (const struct clk_hw *[]){
+                                     &gcc_pcie_phy_refgen_clk_src.clkr.hw },
+                       .num_parents = 1,
+                       .flags = CLK_SET_RATE_PARENT,
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch gcc_pcie_0_aux_clk = {
+       .halt_reg = 0x6b020,
+       .halt_check = BRANCH_HALT_VOTED,
+       .clkr = {
+               .enable_reg = 0x5200c,
+               .enable_mask = BIT(3),
+               .hw.init = &(struct clk_init_data){
+                       .name = "gcc_pcie_0_aux_clk",
+                       .parent_hws = (const struct clk_hw *[]){
+                                     &gcc_pcie_0_aux_clk_src.clkr.hw },
+                       .num_parents = 1,
+                       .flags = CLK_SET_RATE_PARENT,
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch gcc_pcie_0_cfg_ahb_clk = {
+       .halt_reg = 0x6b01c,
+       .halt_check = BRANCH_HALT_VOTED,
+       .hwcg_reg = 0x6b01c,
+       .hwcg_bit = 1,
+       .clkr = {
+               .enable_reg = 0x5200c,
+               .enable_mask = BIT(2),
+               .hw.init = &(struct clk_init_data){
+                       .name = "gcc_pcie_0_cfg_ahb_clk",
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch gcc_pcie_0_clkref_clk = {
+       .halt_reg = 0x8c00c,
+       .halt_check = BRANCH_HALT,
+       .clkr = {
+               .enable_reg = 0x8c00c,
+               .enable_mask = BIT(0),
+               .hw.init = &(struct clk_init_data){
+                       .name = "gcc_pcie_0_clkref_clk",
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch gcc_pcie_0_mstr_axi_clk = {
+       .halt_reg = 0x6b018,
+       .halt_check = BRANCH_HALT_VOTED,
+       .clkr = {
+               .enable_reg = 0x5200c,
+               .enable_mask = BIT(1),
+               .hw.init = &(struct clk_init_data){
+                       .name = "gcc_pcie_0_mstr_axi_clk",
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+/* Clock ON depends on external parent 'PIPE' clock, so dont poll */
+static struct clk_branch gcc_pcie_0_pipe_clk = {
+       .halt_reg = 0x6b024,
+       .halt_check = BRANCH_HALT_DELAY,
+       .clkr = {
+               .enable_reg = 0x5200c,
+               .enable_mask = BIT(4),
+               .hw.init = &(struct clk_init_data){
+                       .name = "gcc_pcie_0_pipe_clk",
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch gcc_pcie_0_slv_axi_clk = {
+       .halt_reg = 0x6b014,
+       .halt_check = BRANCH_HALT_VOTED,
+       .hwcg_reg = 0x6b014,
+       .hwcg_bit = 1,
+       .clkr = {
+               .enable_reg = 0x5200c,
+               .enable_mask = BIT(0),
+               .hw.init = &(struct clk_init_data){
+                       .name = "gcc_pcie_0_slv_axi_clk",
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch gcc_pcie_0_slv_q2a_axi_clk = {
+       .halt_reg = 0x6b010,
+       .halt_check = BRANCH_HALT_VOTED,
+       .clkr = {
+               .enable_reg = 0x5200c,
+               .enable_mask = BIT(5),
+               .hw.init = &(struct clk_init_data){
+                       .name = "gcc_pcie_0_slv_q2a_axi_clk",
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch gcc_pcie_1_aux_clk = {
+       .halt_reg = 0x8d020,
+       .halt_check = BRANCH_HALT_VOTED,
+       .clkr = {
+               .enable_reg = 0x52004,
+               .enable_mask = BIT(29),
+               .hw.init = &(struct clk_init_data){
+                       .name = "gcc_pcie_1_aux_clk",
+                       .parent_hws = (const struct clk_hw *[]){
+                                     &gcc_pcie_1_aux_clk_src.clkr.hw },
+                       .num_parents = 1,
+                       .flags = CLK_SET_RATE_PARENT,
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch gcc_pcie_1_cfg_ahb_clk = {
+       .halt_reg = 0x8d01c,
+       .halt_check = BRANCH_HALT_VOTED,
+       .hwcg_reg = 0x8d01c,
+       .hwcg_bit = 1,
+       .clkr = {
+               .enable_reg = 0x52004,
+               .enable_mask = BIT(28),
+               .hw.init = &(struct clk_init_data){
+                       .name = "gcc_pcie_1_cfg_ahb_clk",
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch gcc_pcie_1_clkref_clk = {
+       .halt_reg = 0x8c02c,
+       .halt_check = BRANCH_HALT,
+       .clkr = {
+               .enable_reg = 0x8c02c,
+               .enable_mask = BIT(0),
+               .hw.init = &(struct clk_init_data){
+                       .name = "gcc_pcie_1_clkref_clk",
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch gcc_pcie_1_mstr_axi_clk = {
+       .halt_reg = 0x8d018,
+       .halt_check = BRANCH_HALT_VOTED,
+       .clkr = {
+               .enable_reg = 0x52004,
+               .enable_mask = BIT(27),
+               .hw.init = &(struct clk_init_data){
+                       .name = "gcc_pcie_1_mstr_axi_clk",
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+/* Clock ON depends on external parent 'PIPE' clock, so dont poll */
+static struct clk_branch gcc_pcie_1_pipe_clk = {
+       .halt_reg = 0x8d024,
+       .halt_check = BRANCH_HALT_DELAY,
+       .clkr = {
+               .enable_reg = 0x52004,
+               .enable_mask = BIT(30),
+               .hw.init = &(struct clk_init_data){
+                       .name = "gcc_pcie_1_pipe_clk",
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch gcc_pcie_1_slv_axi_clk = {
+       .halt_reg = 0x8d014,
+       .halt_check = BRANCH_HALT_VOTED,
+       .hwcg_reg = 0x8d014,
+       .hwcg_bit = 1,
+       .clkr = {
+               .enable_reg = 0x52004,
+               .enable_mask = BIT(26),
+               .hw.init = &(struct clk_init_data){
+                       .name = "gcc_pcie_1_slv_axi_clk",
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch gcc_pcie_1_slv_q2a_axi_clk = {
+       .halt_reg = 0x8d010,
+       .halt_check = BRANCH_HALT_VOTED,
+       .clkr = {
+               .enable_reg = 0x52004,
+               .enable_mask = BIT(25),
+               .hw.init = &(struct clk_init_data){
+                       .name = "gcc_pcie_1_slv_q2a_axi_clk",
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch gcc_pcie_phy_aux_clk = {
+       .halt_reg = 0x6f004,
+       .halt_check = BRANCH_HALT,
+       .clkr = {
+               .enable_reg = 0x6f004,
+               .enable_mask = BIT(0),
+               .hw.init = &(struct clk_init_data){
+                       .name = "gcc_pcie_phy_aux_clk",
+                       .parent_hws = (const struct clk_hw *[]){
+                                     &gcc_pcie_0_aux_clk_src.clkr.hw },
+                       .num_parents = 1,
+                       .flags = CLK_SET_RATE_PARENT,
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch gcc_pdm2_clk = {
+       .halt_reg = 0x3300c,
+       .halt_check = BRANCH_HALT,
+       .clkr = {
+               .enable_reg = 0x3300c,
+               .enable_mask = BIT(0),
+               .hw.init = &(struct clk_init_data){
+                       .name = "gcc_pdm2_clk",
+                       .parent_hws = (const struct clk_hw *[]){
+                                     &gcc_pdm2_clk_src.clkr.hw },
+                       .num_parents = 1,
+                       .flags = CLK_SET_RATE_PARENT,
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch gcc_pdm_ahb_clk = {
+       .halt_reg = 0x33004,
+       .halt_check = BRANCH_HALT,
+       .hwcg_reg = 0x33004,
+       .hwcg_bit = 1,
+       .clkr = {
+               .enable_reg = 0x33004,
+               .enable_mask = BIT(0),
+               .hw.init = &(struct clk_init_data){
+                       .name = "gcc_pdm_ahb_clk",
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch gcc_pdm_xo4_clk = {
+       .halt_reg = 0x33008,
+       .halt_check = BRANCH_HALT,
+       .clkr = {
+               .enable_reg = 0x33008,
+               .enable_mask = BIT(0),
+               .hw.init = &(struct clk_init_data){
+                       .name = "gcc_pdm_xo4_clk",
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch gcc_prng_ahb_clk = {
+       .halt_reg = 0x34004,
+       .halt_check = BRANCH_HALT_VOTED,
+       .clkr = {
+               .enable_reg = 0x52004,
+               .enable_mask = BIT(13),
+               .hw.init = &(struct clk_init_data){
+                       .name = "gcc_prng_ahb_clk",
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch gcc_qmip_camera_nrt_ahb_clk = {
+       .halt_reg = 0xb018,
+       .halt_check = BRANCH_HALT,
+       .hwcg_reg = 0xb018,
+       .hwcg_bit = 1,
+       .clkr = {
+               .enable_reg = 0xb018,
+               .enable_mask = BIT(0),
+               .hw.init = &(struct clk_init_data){
+                       .name = "gcc_qmip_camera_nrt_ahb_clk",
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch gcc_qmip_camera_rt_ahb_clk = {
+       .halt_reg = 0xb01c,
+       .halt_check = BRANCH_HALT,
+       .hwcg_reg = 0xb01c,
+       .hwcg_bit = 1,
+       .clkr = {
+               .enable_reg = 0xb01c,
+               .enable_mask = BIT(0),
+               .hw.init = &(struct clk_init_data){
+                       .name = "gcc_qmip_camera_rt_ahb_clk",
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch gcc_qmip_disp_ahb_clk = {
+       .halt_reg = 0xb020,
+       .halt_check = BRANCH_HALT,
+       .hwcg_reg = 0xb020,
+       .hwcg_bit = 1,
+       .clkr = {
+               .enable_reg = 0xb020,
+               .enable_mask = BIT(0),
+               .hw.init = &(struct clk_init_data){
+                       .name = "gcc_qmip_disp_ahb_clk",
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch gcc_qmip_video_cvp_ahb_clk = {
+       .halt_reg = 0xb010,
+       .halt_check = BRANCH_HALT,
+       .hwcg_reg = 0xb010,
+       .hwcg_bit = 1,
+       .clkr = {
+               .enable_reg = 0xb010,
+               .enable_mask = BIT(0),
+               .hw.init = &(struct clk_init_data){
+                       .name = "gcc_qmip_video_cvp_ahb_clk",
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch gcc_qmip_video_vcodec_ahb_clk = {
+       .halt_reg = 0xb014,
+       .halt_check = BRANCH_HALT,
+       .hwcg_reg = 0xb014,
+       .hwcg_bit = 1,
+       .clkr = {
+               .enable_reg = 0xb014,
+               .enable_mask = BIT(0),
+               .hw.init = &(struct clk_init_data){
+                       .name = "gcc_qmip_video_vcodec_ahb_clk",
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch gcc_qspi_cnoc_periph_ahb_clk = {
+       .halt_reg = 0x4b000,
+       .halt_check = BRANCH_HALT,
+       .clkr = {
+               .enable_reg = 0x4b000,
+               .enable_mask = BIT(0),
+               .hw.init = &(struct clk_init_data){
+                       .name = "gcc_qspi_cnoc_periph_ahb_clk",
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch gcc_qspi_core_clk = {
+       .halt_reg = 0x4b004,
+       .halt_check = BRANCH_HALT,
+       .clkr = {
+               .enable_reg = 0x4b004,
+               .enable_mask = BIT(0),
+               .hw.init = &(struct clk_init_data){
+                       .name = "gcc_qspi_core_clk",
+                       .parent_hws = (const struct clk_hw *[]){
+                                     &gcc_qspi_core_clk_src.clkr.hw },
+                       .num_parents = 1,
+                       .flags = CLK_SET_RATE_PARENT,
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch gcc_qupv3_wrap0_s0_clk = {
+       .halt_reg = 0x17144,
+       .halt_check = BRANCH_HALT_VOTED,
+       .clkr = {
+               .enable_reg = 0x5200c,
+               .enable_mask = BIT(10),
+               .hw.init = &(struct clk_init_data){
+                       .name = "gcc_qupv3_wrap0_s0_clk",
+                       .parent_hws = (const struct clk_hw *[]){
+                                     &gcc_qupv3_wrap0_s0_clk_src.clkr.hw },
+                       .num_parents = 1,
+                       .flags = CLK_SET_RATE_PARENT,
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch gcc_qupv3_wrap0_s1_clk = {
+       .halt_reg = 0x17274,
+       .halt_check = BRANCH_HALT_VOTED,
+       .clkr = {
+               .enable_reg = 0x5200c,
+               .enable_mask = BIT(11),
+               .hw.init = &(struct clk_init_data){
+                       .name = "gcc_qupv3_wrap0_s1_clk",
+                       .parent_hws = (const struct clk_hw *[]){
+                                     &gcc_qupv3_wrap0_s1_clk_src.clkr.hw },
+                       .num_parents = 1,
+                       .flags = CLK_SET_RATE_PARENT,
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch gcc_qupv3_wrap0_s2_clk = {
+       .halt_reg = 0x173a4,
+       .halt_check = BRANCH_HALT_VOTED,
+       .clkr = {
+               .enable_reg = 0x5200c,
+               .enable_mask = BIT(12),
+               .hw.init = &(struct clk_init_data){
+                       .name = "gcc_qupv3_wrap0_s2_clk",
+                       .parent_hws = (const struct clk_hw *[]){
+                                     &gcc_qupv3_wrap0_s2_clk_src.clkr.hw },
+                       .num_parents = 1,
+                       .flags = CLK_SET_RATE_PARENT,
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch gcc_qupv3_wrap0_s3_clk = {
+       .halt_reg = 0x174d4,
+       .halt_check = BRANCH_HALT_VOTED,
+       .clkr = {
+               .enable_reg = 0x5200c,
+               .enable_mask = BIT(13),
+               .hw.init = &(struct clk_init_data){
+                       .name = "gcc_qupv3_wrap0_s3_clk",
+                       .parent_hws = (const struct clk_hw *[]){
+                                     &gcc_qupv3_wrap0_s3_clk_src.clkr.hw },
+                       .num_parents = 1,
+                       .flags = CLK_SET_RATE_PARENT,
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch gcc_qupv3_wrap0_s4_clk = {
+       .halt_reg = 0x17604,
+       .halt_check = BRANCH_HALT_VOTED,
+       .clkr = {
+               .enable_reg = 0x5200c,
+               .enable_mask = BIT(14),
+               .hw.init = &(struct clk_init_data){
+                       .name = "gcc_qupv3_wrap0_s4_clk",
+                       .parent_hws = (const struct clk_hw *[]){
+                                     &gcc_qupv3_wrap0_s4_clk_src.clkr.hw },
+                       .num_parents = 1,
+                       .flags = CLK_SET_RATE_PARENT,
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch gcc_qupv3_wrap0_s5_clk = {
+       .halt_reg = 0x17734,
+       .halt_check = BRANCH_HALT_VOTED,
+       .clkr = {
+               .enable_reg = 0x5200c,
+               .enable_mask = BIT(15),
+               .hw.init = &(struct clk_init_data){
+                       .name = "gcc_qupv3_wrap0_s5_clk",
+                       .parent_hws = (const struct clk_hw *[]){
+                                     &gcc_qupv3_wrap0_s5_clk_src.clkr.hw },
+                       .num_parents = 1,
+                       .flags = CLK_SET_RATE_PARENT,
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch gcc_qupv3_wrap0_s6_clk = {
+       .halt_reg = 0x17864,
+       .halt_check = BRANCH_HALT_VOTED,
+       .clkr = {
+               .enable_reg = 0x5200c,
+               .enable_mask = BIT(16),
+               .hw.init = &(struct clk_init_data){
+                       .name = "gcc_qupv3_wrap0_s6_clk",
+                       .parent_hws = (const struct clk_hw *[]){
+                                     &gcc_qupv3_wrap0_s6_clk_src.clkr.hw },
+                       .num_parents = 1,
+                       .flags = CLK_SET_RATE_PARENT,
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch gcc_qupv3_wrap0_s7_clk = {
+       .halt_reg = 0x17994,
+       .halt_check = BRANCH_HALT_VOTED,
+       .clkr = {
+               .enable_reg = 0x5200c,
+               .enable_mask = BIT(17),
+               .hw.init = &(struct clk_init_data){
+                       .name = "gcc_qupv3_wrap0_s7_clk",
+                       .parent_hws = (const struct clk_hw *[]){
+                                     &gcc_qupv3_wrap0_s7_clk_src.clkr.hw },
+                       .num_parents = 1,
+                       .flags = CLK_SET_RATE_PARENT,
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch gcc_qupv3_wrap1_s0_clk = {
+       .halt_reg = 0x18144,
+       .halt_check = BRANCH_HALT_VOTED,
+       .clkr = {
+               .enable_reg = 0x5200c,
+               .enable_mask = BIT(22),
+               .hw.init = &(struct clk_init_data){
+                       .name = "gcc_qupv3_wrap1_s0_clk",
+                       .parent_hws = (const struct clk_hw *[]){
+                                     &gcc_qupv3_wrap1_s0_clk_src.clkr.hw },
+                       .num_parents = 1,
+                       .flags = CLK_SET_RATE_PARENT,
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch gcc_qupv3_wrap1_s1_clk = {
+       .halt_reg = 0x18274,
+       .halt_check = BRANCH_HALT_VOTED,
+       .clkr = {
+               .enable_reg = 0x5200c,
+               .enable_mask = BIT(23),
+               .hw.init = &(struct clk_init_data){
+                       .name = "gcc_qupv3_wrap1_s1_clk",
+                       .parent_hws = (const struct clk_hw *[]){
+                                     &gcc_qupv3_wrap1_s1_clk_src.clkr.hw },
+                       .num_parents = 1,
+                       .flags = CLK_SET_RATE_PARENT,
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch gcc_qupv3_wrap1_s2_clk = {
+       .halt_reg = 0x183a4,
+       .halt_check = BRANCH_HALT_VOTED,
+       .clkr = {
+               .enable_reg = 0x5200c,
+               .enable_mask = BIT(24),
+               .hw.init = &(struct clk_init_data){
+                       .name = "gcc_qupv3_wrap1_s2_clk",
+                       .parent_hws = (const struct clk_hw *[]){
+                                     &gcc_qupv3_wrap1_s2_clk_src.clkr.hw },
+                       .num_parents = 1,
+                       .flags = CLK_SET_RATE_PARENT,
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch gcc_qupv3_wrap1_s3_clk = {
+       .halt_reg = 0x184d4,
+       .halt_check = BRANCH_HALT_VOTED,
+       .clkr = {
+               .enable_reg = 0x5200c,
+               .enable_mask = BIT(25),
+               .hw.init = &(struct clk_init_data){
+                       .name = "gcc_qupv3_wrap1_s3_clk",
+                       .parent_hws = (const struct clk_hw *[]){
+                                     &gcc_qupv3_wrap1_s3_clk_src.clkr.hw },
+                       .num_parents = 1,
+                       .flags = CLK_SET_RATE_PARENT,
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch gcc_qupv3_wrap1_s4_clk = {
+       .halt_reg = 0x18604,
+       .halt_check = BRANCH_HALT_VOTED,
+       .clkr = {
+               .enable_reg = 0x5200c,
+               .enable_mask = BIT(26),
+               .hw.init = &(struct clk_init_data){
+                       .name = "gcc_qupv3_wrap1_s4_clk",
+                       .parent_hws = (const struct clk_hw *[]){
+                                     &gcc_qupv3_wrap1_s4_clk_src.clkr.hw },
+                       .num_parents = 1,
+                       .flags = CLK_SET_RATE_PARENT,
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch gcc_qupv3_wrap1_s5_clk = {
+       .halt_reg = 0x18734,
+       .halt_check = BRANCH_HALT_VOTED,
+       .clkr = {
+               .enable_reg = 0x5200c,
+               .enable_mask = BIT(27),
+               .hw.init = &(struct clk_init_data){
+                       .name = "gcc_qupv3_wrap1_s5_clk",
+                       .parent_hws = (const struct clk_hw *[]){
+                                     &gcc_qupv3_wrap1_s5_clk_src.clkr.hw },
+                       .num_parents = 1,
+                       .flags = CLK_SET_RATE_PARENT,
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch gcc_qupv3_wrap2_s0_clk = {
+       .halt_reg = 0x1e144,
+       .halt_check = BRANCH_HALT_VOTED,
+       .clkr = {
+               .enable_reg = 0x52014,
+               .enable_mask = BIT(4),
+               .hw.init = &(struct clk_init_data){
+                       .name = "gcc_qupv3_wrap2_s0_clk",
+                       .parent_hws = (const struct clk_hw *[]){
+                                     &gcc_qupv3_wrap2_s0_clk_src.clkr.hw },
+                       .num_parents = 1,
+                       .flags = CLK_SET_RATE_PARENT,
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch gcc_qupv3_wrap2_s1_clk = {
+       .halt_reg = 0x1e274,
+       .halt_check = BRANCH_HALT_VOTED,
+       .clkr = {
+               .enable_reg = 0x52014,
+               .enable_mask = BIT(5),
+               .hw.init = &(struct clk_init_data){
+                       .name = "gcc_qupv3_wrap2_s1_clk",
+                       .parent_hws = (const struct clk_hw *[]){
+                                     &gcc_qupv3_wrap2_s1_clk_src.clkr.hw },
+                       .num_parents = 1,
+                       .flags = CLK_SET_RATE_PARENT,
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch gcc_qupv3_wrap2_s2_clk = {
+       .halt_reg = 0x1e3a4,
+       .halt_check = BRANCH_HALT_VOTED,
+       .clkr = {
+               .enable_reg = 0x52014,
+               .enable_mask = BIT(6),
+               .hw.init = &(struct clk_init_data){
+                       .name = "gcc_qupv3_wrap2_s2_clk",
+                       .parent_hws = (const struct clk_hw *[]){
+                                     &gcc_qupv3_wrap2_s2_clk_src.clkr.hw },
+                       .num_parents = 1,
+                       .flags = CLK_SET_RATE_PARENT,
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch gcc_qupv3_wrap2_s3_clk = {
+       .halt_reg = 0x1e4d4,
+       .halt_check = BRANCH_HALT_VOTED,
+       .clkr = {
+               .enable_reg = 0x52014,
+               .enable_mask = BIT(7),
+               .hw.init = &(struct clk_init_data){
+                       .name = "gcc_qupv3_wrap2_s3_clk",
+                       .parent_hws = (const struct clk_hw *[]){
+                                     &gcc_qupv3_wrap2_s3_clk_src.clkr.hw },
+                       .num_parents = 1,
+                       .flags = CLK_SET_RATE_PARENT,
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch gcc_qupv3_wrap2_s4_clk = {
+       .halt_reg = 0x1e604,
+       .halt_check = BRANCH_HALT_VOTED,
+       .clkr = {
+               .enable_reg = 0x52014,
+               .enable_mask = BIT(8),
+               .hw.init = &(struct clk_init_data){
+                       .name = "gcc_qupv3_wrap2_s4_clk",
+                       .parent_hws = (const struct clk_hw *[]){
+                                     &gcc_qupv3_wrap2_s4_clk_src.clkr.hw },
+                       .num_parents = 1,
+                       .flags = CLK_SET_RATE_PARENT,
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch gcc_qupv3_wrap2_s5_clk = {
+       .halt_reg = 0x1e734,
+       .halt_check = BRANCH_HALT_VOTED,
+       .clkr = {
+               .enable_reg = 0x52014,
+               .enable_mask = BIT(9),
+               .hw.init = &(struct clk_init_data){
+                       .name = "gcc_qupv3_wrap2_s5_clk",
+                       .parent_hws = (const struct clk_hw *[]){
+                                     &gcc_qupv3_wrap2_s5_clk_src.clkr.hw },
+                       .num_parents = 1,
+                       .flags = CLK_SET_RATE_PARENT,
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch gcc_qupv3_wrap_0_m_ahb_clk = {
+       .halt_reg = 0x17004,
+       .halt_check = BRANCH_HALT_VOTED,
+       .clkr = {
+               .enable_reg = 0x5200c,
+               .enable_mask = BIT(6),
+               .hw.init = &(struct clk_init_data){
+                       .name = "gcc_qupv3_wrap_0_m_ahb_clk",
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch gcc_qupv3_wrap_0_s_ahb_clk = {
+       .halt_reg = 0x17008,
+       .halt_check = BRANCH_HALT_VOTED,
+       .hwcg_reg = 0x17008,
+       .hwcg_bit = 1,
+       .clkr = {
+               .enable_reg = 0x5200c,
+               .enable_mask = BIT(7),
+               .hw.init = &(struct clk_init_data){
+                       .name = "gcc_qupv3_wrap_0_s_ahb_clk",
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch gcc_qupv3_wrap_1_m_ahb_clk = {
+       .halt_reg = 0x18004,
+       .halt_check = BRANCH_HALT_VOTED,
+       .clkr = {
+               .enable_reg = 0x5200c,
+               .enable_mask = BIT(20),
+               .hw.init = &(struct clk_init_data){
+                       .name = "gcc_qupv3_wrap_1_m_ahb_clk",
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch gcc_qupv3_wrap_1_s_ahb_clk = {
+       .halt_reg = 0x18008,
+       .halt_check = BRANCH_HALT_VOTED,
+       .hwcg_reg = 0x18008,
+       .hwcg_bit = 1,
+       .clkr = {
+               .enable_reg = 0x5200c,
+               .enable_mask = BIT(21),
+               .hw.init = &(struct clk_init_data){
+                       .name = "gcc_qupv3_wrap_1_s_ahb_clk",
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch gcc_qupv3_wrap_2_m_ahb_clk = {
+       .halt_reg = 0x1e004,
+       .halt_check = BRANCH_HALT_VOTED,
+       .clkr = {
+               .enable_reg = 0x52014,
+               .enable_mask = BIT(2),
+               .hw.init = &(struct clk_init_data){
+                       .name = "gcc_qupv3_wrap_2_m_ahb_clk",
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch gcc_qupv3_wrap_2_s_ahb_clk = {
+       .halt_reg = 0x1e008,
+       .halt_check = BRANCH_HALT_VOTED,
+       .hwcg_reg = 0x1e008,
+       .hwcg_bit = 1,
+       .clkr = {
+               .enable_reg = 0x52014,
+               .enable_mask = BIT(1),
+               .hw.init = &(struct clk_init_data){
+                       .name = "gcc_qupv3_wrap_2_s_ahb_clk",
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch gcc_sdcc2_ahb_clk = {
+       .halt_reg = 0x14008,
+       .halt_check = BRANCH_HALT,
+       .clkr = {
+               .enable_reg = 0x14008,
+               .enable_mask = BIT(0),
+               .hw.init = &(struct clk_init_data){
+                       .name = "gcc_sdcc2_ahb_clk",
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch gcc_sdcc2_apps_clk = {
+       .halt_reg = 0x14004,
+       .halt_check = BRANCH_HALT,
+       .clkr = {
+               .enable_reg = 0x14004,
+               .enable_mask = BIT(0),
+               .hw.init = &(struct clk_init_data){
+                       .name = "gcc_sdcc2_apps_clk",
+                       .parent_hws = (const struct clk_hw *[]){
+                                     &gcc_sdcc2_apps_clk_src.clkr.hw },
+                       .num_parents = 1,
+                       .flags = CLK_SET_RATE_PARENT,
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch gcc_sdcc4_ahb_clk = {
+       .halt_reg = 0x16008,
+       .halt_check = BRANCH_HALT,
+       .clkr = {
+               .enable_reg = 0x16008,
+               .enable_mask = BIT(0),
+               .hw.init = &(struct clk_init_data){
+                       .name = "gcc_sdcc4_ahb_clk",
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch gcc_sdcc4_apps_clk = {
+       .halt_reg = 0x16004,
+       .halt_check = BRANCH_HALT,
+       .clkr = {
+               .enable_reg = 0x16004,
+               .enable_mask = BIT(0),
+               .hw.init = &(struct clk_init_data){
+                       .name = "gcc_sdcc4_apps_clk",
+                       .parent_hws = (const struct clk_hw *[]){
+                                     &gcc_sdcc4_apps_clk_src.clkr.hw },
+                       .num_parents = 1,
+                       .flags = CLK_SET_RATE_PARENT,
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch gcc_sys_noc_cpuss_ahb_clk = {
+       .halt_reg = 0x4819c,
+       .halt_check = BRANCH_HALT_VOTED,
+       .clkr = {
+               .enable_reg = 0x52004,
+               .enable_mask = BIT(0),
+               .hw.init = &(struct clk_init_data){
+                       .name = "gcc_sys_noc_cpuss_ahb_clk",
+                       .parent_hws = (const struct clk_hw *[]){
+                                     &gcc_cpuss_ahb_clk_src.clkr.hw },
+                       .num_parents = 1,
+                       /* required for cpuss */
+                       .flags = CLK_IS_CRITICAL | CLK_SET_RATE_PARENT,
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch gcc_tsif_ahb_clk = {
+       .halt_reg = 0x36004,
+       .halt_check = BRANCH_HALT,
+       .clkr = {
+               .enable_reg = 0x36004,
+               .enable_mask = BIT(0),
+               .hw.init = &(struct clk_init_data){
+                       .name = "gcc_tsif_ahb_clk",
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch gcc_tsif_inactivity_timers_clk = {
+       .halt_reg = 0x3600c,
+       .halt_check = BRANCH_HALT,
+       .clkr = {
+               .enable_reg = 0x3600c,
+               .enable_mask = BIT(0),
+               .hw.init = &(struct clk_init_data){
+                       .name = "gcc_tsif_inactivity_timers_clk",
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch gcc_tsif_ref_clk = {
+       .halt_reg = 0x36008,
+       .halt_check = BRANCH_HALT,
+       .clkr = {
+               .enable_reg = 0x36008,
+               .enable_mask = BIT(0),
+               .hw.init = &(struct clk_init_data){
+                       .name = "gcc_tsif_ref_clk",
+                       .parent_hws = (const struct clk_hw *[]){
+                                     &gcc_tsif_ref_clk_src.clkr.hw },
+                       .num_parents = 1,
+                       .flags = CLK_SET_RATE_PARENT,
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch gcc_ufs_card_ahb_clk = {
+       .halt_reg = 0x75014,
+       .halt_check = BRANCH_HALT,
+       .hwcg_reg = 0x75014,
+       .hwcg_bit = 1,
+       .clkr = {
+               .enable_reg = 0x75014,
+               .enable_mask = BIT(0),
+               .hw.init = &(struct clk_init_data){
+                       .name = "gcc_ufs_card_ahb_clk",
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch gcc_ufs_card_axi_clk = {
+       .halt_reg = 0x75010,
+       .halt_check = BRANCH_HALT,
+       .hwcg_reg = 0x75010,
+       .hwcg_bit = 1,
+       .clkr = {
+               .enable_reg = 0x75010,
+               .enable_mask = BIT(0),
+               .hw.init = &(struct clk_init_data){
+                       .name = "gcc_ufs_card_axi_clk",
+                       .parent_hws = (const struct clk_hw *[]){
+                                     &gcc_ufs_card_axi_clk_src.clkr.hw },
+                       .num_parents = 1,
+                       .flags = CLK_SET_RATE_PARENT,
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch gcc_ufs_card_axi_hw_ctl_clk = {
+       .halt_reg = 0x75010,
+       .halt_check = BRANCH_HALT,
+       .hwcg_reg = 0x75010,
+       .hwcg_bit = 1,
+       .clkr = {
+               .enable_reg = 0x75010,
+               .enable_mask = BIT(1),
+               .hw.init = &(struct clk_init_data){
+                       .name = "gcc_ufs_card_axi_hw_ctl_clk",
+                       .parent_hws = (const struct clk_hw *[]){
+                                     &gcc_ufs_card_axi_clk.clkr.hw },
+                       .num_parents = 1,
+                       .flags = CLK_SET_RATE_PARENT,
+                       .ops = &clk_branch_simple_ops,
+               },
+       },
+};
+
+static struct clk_branch gcc_ufs_card_clkref_clk = {
+       .halt_reg = 0x8c004,
+       .halt_check = BRANCH_HALT,
+       .clkr = {
+               .enable_reg = 0x8c004,
+               .enable_mask = BIT(0),
+               .hw.init = &(struct clk_init_data){
+                       .name = "gcc_ufs_card_clkref_clk",
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch gcc_ufs_card_ice_core_clk = {
+       .halt_reg = 0x7505c,
+       .halt_check = BRANCH_HALT,
+       .hwcg_reg = 0x7505c,
+       .hwcg_bit = 1,
+       .clkr = {
+               .enable_reg = 0x7505c,
+               .enable_mask = BIT(0),
+               .hw.init = &(struct clk_init_data){
+                       .name = "gcc_ufs_card_ice_core_clk",
+                       .parent_hws = (const struct clk_hw *[]){
+                                     &gcc_ufs_card_ice_core_clk_src.clkr.hw },
+                       .num_parents = 1,
+                       .flags = CLK_SET_RATE_PARENT,
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch gcc_ufs_card_ice_core_hw_ctl_clk = {
+       .halt_reg = 0x7505c,
+       .halt_check = BRANCH_HALT,
+       .hwcg_reg = 0x7505c,
+       .hwcg_bit = 1,
+       .clkr = {
+               .enable_reg = 0x7505c,
+               .enable_mask = BIT(1),
+               .hw.init = &(struct clk_init_data){
+                       .name = "gcc_ufs_card_ice_core_hw_ctl_clk",
+                       .parent_hws = (const struct clk_hw *[]){
+                                     &gcc_ufs_card_ice_core_clk.clkr.hw },
+                       .num_parents = 1,
+                       .flags = CLK_SET_RATE_PARENT,
+                       .ops = &clk_branch_simple_ops,
+               },
+       },
+};
+
+static struct clk_branch gcc_ufs_card_phy_aux_clk = {
+       .halt_reg = 0x75090,
+       .halt_check = BRANCH_HALT,
+       .hwcg_reg = 0x75090,
+       .hwcg_bit = 1,
+       .clkr = {
+               .enable_reg = 0x75090,
+               .enable_mask = BIT(0),
+               .hw.init = &(struct clk_init_data){
+                       .name = "gcc_ufs_card_phy_aux_clk",
+                       .parent_hws = (const struct clk_hw *[]){
+                                     &gcc_ufs_card_phy_aux_clk_src.clkr.hw },
+                       .num_parents = 1,
+                       .flags = CLK_SET_RATE_PARENT,
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch gcc_ufs_card_phy_aux_hw_ctl_clk = {
+       .halt_reg = 0x75090,
+       .halt_check = BRANCH_HALT,
+       .hwcg_reg = 0x75090,
+       .hwcg_bit = 1,
+       .clkr = {
+               .enable_reg = 0x75090,
+               .enable_mask = BIT(1),
+               .hw.init = &(struct clk_init_data){
+                       .name = "gcc_ufs_card_phy_aux_hw_ctl_clk",
+                       .parent_hws = (const struct clk_hw *[]){
+                                     &gcc_ufs_card_phy_aux_clk.clkr.hw },
+                       .num_parents = 1,
+                       .flags = CLK_SET_RATE_PARENT,
+                       .ops = &clk_branch_simple_ops,
+               },
+       },
+};
+
+static struct clk_branch gcc_ufs_card_unipro_core_clk = {
+       .halt_reg = 0x75058,
+       .halt_check = BRANCH_HALT,
+       .hwcg_reg = 0x75058,
+       .hwcg_bit = 1,
+       .clkr = {
+               .enable_reg = 0x75058,
+               .enable_mask = BIT(0),
+               .hw.init = &(struct clk_init_data){
+                       .name = "gcc_ufs_card_unipro_core_clk",
+                       .parent_hws = (const struct clk_hw *[]){
+                               &gcc_ufs_card_unipro_core_clk_src.clkr.hw },
+                       .num_parents = 1,
+                       .flags = CLK_SET_RATE_PARENT,
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch gcc_ufs_card_unipro_core_hw_ctl_clk = {
+       .halt_reg = 0x75058,
+       .halt_check = BRANCH_HALT,
+       .hwcg_reg = 0x75058,
+       .hwcg_bit = 1,
+       .clkr = {
+               .enable_reg = 0x75058,
+               .enable_mask = BIT(1),
+               .hw.init = &(struct clk_init_data){
+                       .name = "gcc_ufs_card_unipro_core_hw_ctl_clk",
+                       .parent_hws = (const struct clk_hw *[]){
+                                     &gcc_ufs_card_unipro_core_clk.clkr.hw },
+                       .num_parents = 1,
+                       .flags = CLK_SET_RATE_PARENT,
+                       .ops = &clk_branch_simple_ops,
+               },
+       },
+};
+
+static struct clk_branch gcc_ufs_mem_clkref_clk = {
+       .halt_reg = 0x8c000,
+       .halt_check = BRANCH_HALT,
+       .clkr = {
+               .enable_reg = 0x8c000,
+               .enable_mask = BIT(0),
+               .hw.init = &(struct clk_init_data){
+                       .name = "gcc_ufs_mem_clkref_clk",
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch gcc_ufs_phy_ahb_clk = {
+       .halt_reg = 0x77014,
+       .halt_check = BRANCH_HALT,
+       .hwcg_reg = 0x77014,
+       .hwcg_bit = 1,
+       .clkr = {
+               .enable_reg = 0x77014,
+               .enable_mask = BIT(0),
+               .hw.init = &(struct clk_init_data){
+                       .name = "gcc_ufs_phy_ahb_clk",
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch gcc_ufs_phy_axi_clk = {
+       .halt_reg = 0x77010,
+       .halt_check = BRANCH_HALT,
+       .hwcg_reg = 0x77010,
+       .hwcg_bit = 1,
+       .clkr = {
+               .enable_reg = 0x77010,
+               .enable_mask = BIT(0),
+               .hw.init = &(struct clk_init_data){
+                       .name = "gcc_ufs_phy_axi_clk",
+                       .parent_hws = (const struct clk_hw *[]){
+                               &gcc_ufs_phy_axi_clk_src.clkr.hw },
+                       .num_parents = 1,
+                       .flags = CLK_SET_RATE_PARENT,
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch gcc_ufs_phy_axi_hw_ctl_clk = {
+       .halt_reg = 0x77010,
+       .halt_check = BRANCH_HALT,
+       .hwcg_reg = 0x77010,
+       .hwcg_bit = 1,
+       .clkr = {
+               .enable_reg = 0x77010,
+               .enable_mask = BIT(1),
+               .hw.init = &(struct clk_init_data){
+                       .name = "gcc_ufs_phy_axi_hw_ctl_clk",
+                       .parent_hws = (const struct clk_hw *[]){
+                                     &gcc_ufs_phy_axi_clk.clkr.hw },
+                       .num_parents = 1,
+                       .flags = CLK_SET_RATE_PARENT,
+                       .ops = &clk_branch_simple_ops,
+               },
+       },
+};
+
+static struct clk_branch gcc_ufs_phy_ice_core_clk = {
+       .halt_reg = 0x7705c,
+       .halt_check = BRANCH_HALT,
+       .hwcg_reg = 0x7705c,
+       .hwcg_bit = 1,
+       .clkr = {
+               .enable_reg = 0x7705c,
+               .enable_mask = BIT(0),
+               .hw.init = &(struct clk_init_data){
+                       .name = "gcc_ufs_phy_ice_core_clk",
+                       .parent_hws = (const struct clk_hw *[]){
+                                     &gcc_ufs_phy_ice_core_clk_src.clkr.hw },
+                       .num_parents = 1,
+                       .flags = CLK_SET_RATE_PARENT,
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch gcc_ufs_phy_ice_core_hw_ctl_clk = {
+       .halt_reg = 0x7705c,
+       .halt_check = BRANCH_HALT,
+       .hwcg_reg = 0x7705c,
+       .hwcg_bit = 1,
+       .clkr = {
+               .enable_reg = 0x7705c,
+               .enable_mask = BIT(1),
+               .hw.init = &(struct clk_init_data){
+                       .name = "gcc_ufs_phy_ice_core_hw_ctl_clk",
+                       .parent_hws = (const struct clk_hw *[]){
+                                     &gcc_ufs_phy_ice_core_clk.clkr.hw },
+                       .num_parents = 1,
+                       .flags = CLK_SET_RATE_PARENT,
+                       .ops = &clk_branch_simple_ops,
+               },
+       },
+};
+
+static struct clk_branch gcc_ufs_phy_phy_aux_clk = {
+       .halt_reg = 0x77090,
+       .halt_check = BRANCH_HALT,
+       .hwcg_reg = 0x77090,
+       .hwcg_bit = 1,
+       .clkr = {
+               .enable_reg = 0x77090,
+               .enable_mask = BIT(0),
+               .hw.init = &(struct clk_init_data){
+                       .name = "gcc_ufs_phy_phy_aux_clk",
+                       .parent_hws = (const struct clk_hw *[]){
+                                     &gcc_ufs_phy_phy_aux_clk_src.clkr.hw },
+                       .num_parents = 1,
+                       .flags = CLK_SET_RATE_PARENT,
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch gcc_ufs_phy_phy_aux_hw_ctl_clk = {
+       .halt_reg = 0x77090,
+       .halt_check = BRANCH_HALT,
+       .hwcg_reg = 0x77090,
+       .hwcg_bit = 1,
+       .clkr = {
+               .enable_reg = 0x77090,
+               .enable_mask = BIT(1),
+               .hw.init = &(struct clk_init_data){
+                       .name = "gcc_ufs_phy_phy_aux_hw_ctl_clk",
+                       .parent_hws = (const struct clk_hw *[]){
+                                     &gcc_ufs_phy_phy_aux_clk.clkr.hw },
+                       .num_parents = 1,
+                       .flags = CLK_SET_RATE_PARENT,
+                       .ops = &clk_branch_simple_ops,
+               },
+       },
+};
+
+static struct clk_branch gcc_ufs_phy_unipro_core_clk = {
+       .halt_reg = 0x77058,
+       .halt_check = BRANCH_HALT,
+       .hwcg_reg = 0x77058,
+       .hwcg_bit = 1,
+       .clkr = {
+               .enable_reg = 0x77058,
+               .enable_mask = BIT(0),
+               .hw.init = &(struct clk_init_data){
+                       .name = "gcc_ufs_phy_unipro_core_clk",
+                       .parent_hws = (const struct clk_hw *[]){
+                               &gcc_ufs_phy_unipro_core_clk_src.clkr.hw },
+                       .num_parents = 1,
+                       .flags = CLK_SET_RATE_PARENT,
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch gcc_ufs_phy_unipro_core_hw_ctl_clk = {
+       .halt_reg = 0x77058,
+       .halt_check = BRANCH_HALT,
+       .hwcg_reg = 0x77058,
+       .hwcg_bit = 1,
+       .clkr = {
+               .enable_reg = 0x77058,
+               .enable_mask = BIT(1),
+               .hw.init = &(struct clk_init_data){
+                       .name = "gcc_ufs_phy_unipro_core_hw_ctl_clk",
+                       .parent_hws = (const struct clk_hw *[]){
+                                     &gcc_ufs_phy_unipro_core_clk.clkr.hw },
+                       .num_parents = 1,
+                       .flags = CLK_SET_RATE_PARENT,
+                       .ops = &clk_branch_simple_ops,
+               },
+       },
+};
+
+static struct clk_branch gcc_usb30_prim_master_clk = {
+       .halt_reg = 0xf010,
+       .halt_check = BRANCH_HALT,
+       .clkr = {
+               .enable_reg = 0xf010,
+               .enable_mask = BIT(0),
+               .hw.init = &(struct clk_init_data){
+                       .name = "gcc_usb30_prim_master_clk",
+                       .parent_hws = (const struct clk_hw *[]){
+                                     &gcc_usb30_prim_master_clk_src.clkr.hw },
+                       .num_parents = 1,
+                       .flags = CLK_SET_RATE_PARENT,
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch gcc_usb30_prim_mock_utmi_clk = {
+       .halt_reg = 0xf018,
+       .halt_check = BRANCH_HALT,
+       .clkr = {
+               .enable_reg = 0xf018,
+               .enable_mask = BIT(0),
+               .hw.init = &(struct clk_init_data){
+                       .name = "gcc_usb30_prim_mock_utmi_clk",
+                       .parent_hws = (const struct clk_hw *[]){
+                               &gcc_usb30_prim_mock_utmi_clk_src.clkr.hw },
+                       .num_parents = 1,
+                       .flags = CLK_SET_RATE_PARENT,
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch gcc_usb30_prim_sleep_clk = {
+       .halt_reg = 0xf014,
+       .halt_check = BRANCH_HALT,
+       .clkr = {
+               .enable_reg = 0xf014,
+               .enable_mask = BIT(0),
+               .hw.init = &(struct clk_init_data){
+                       .name = "gcc_usb30_prim_sleep_clk",
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch gcc_usb30_sec_master_clk = {
+       .halt_reg = 0x10010,
+       .halt_check = BRANCH_HALT,
+       .clkr = {
+               .enable_reg = 0x10010,
+               .enable_mask = BIT(0),
+               .hw.init = &(struct clk_init_data){
+                       .name = "gcc_usb30_sec_master_clk",
+                       .parent_hws = (const struct clk_hw *[]){
+                                     &gcc_usb30_sec_master_clk_src.clkr.hw },
+                       .num_parents = 1,
+                       .flags = CLK_SET_RATE_PARENT,
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch gcc_usb30_sec_mock_utmi_clk = {
+       .halt_reg = 0x10018,
+       .halt_check = BRANCH_HALT,
+       .clkr = {
+               .enable_reg = 0x10018,
+               .enable_mask = BIT(0),
+               .hw.init = &(struct clk_init_data){
+                       .name = "gcc_usb30_sec_mock_utmi_clk",
+                       .parent_hws = (const struct clk_hw *[]){
+                               &gcc_usb30_sec_mock_utmi_clk_src.clkr.hw },
+                       .num_parents = 1,
+                       .flags = CLK_SET_RATE_PARENT,
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch gcc_usb30_sec_sleep_clk = {
+       .halt_reg = 0x10014,
+       .halt_check = BRANCH_HALT,
+       .clkr = {
+               .enable_reg = 0x10014,
+               .enable_mask = BIT(0),
+               .hw.init = &(struct clk_init_data){
+                       .name = "gcc_usb30_sec_sleep_clk",
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch gcc_usb3_prim_clkref_clk = {
+       .halt_reg = 0x8c008,
+       .halt_check = BRANCH_HALT,
+       .clkr = {
+               .enable_reg = 0x8c008,
+               .enable_mask = BIT(0),
+               .hw.init = &(struct clk_init_data){
+                       .name = "gcc_usb3_prim_clkref_clk",
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch gcc_usb3_prim_phy_aux_clk = {
+       .halt_reg = 0xf050,
+       .halt_check = BRANCH_HALT,
+       .clkr = {
+               .enable_reg = 0xf050,
+               .enable_mask = BIT(0),
+               .hw.init = &(struct clk_init_data){
+                       .name = "gcc_usb3_prim_phy_aux_clk",
+                       .parent_hws = (const struct clk_hw *[]){
+                                     &gcc_usb3_prim_phy_aux_clk_src.clkr.hw },
+                       .num_parents = 1,
+                       .flags = CLK_SET_RATE_PARENT,
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch gcc_usb3_prim_phy_com_aux_clk = {
+       .halt_reg = 0xf054,
+       .halt_check = BRANCH_HALT,
+       .clkr = {
+               .enable_reg = 0xf054,
+               .enable_mask = BIT(0),
+               .hw.init = &(struct clk_init_data){
+                       .name = "gcc_usb3_prim_phy_com_aux_clk",
+                       .parent_hws = (const struct clk_hw *[]){
+                                     &gcc_usb3_prim_phy_aux_clk_src.clkr.hw },
+                       .num_parents = 1,
+                       .flags = CLK_SET_RATE_PARENT,
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch gcc_usb3_sec_clkref_clk = {
+       .halt_reg = 0x8c028,
+       .halt_check = BRANCH_HALT,
+       .clkr = {
+               .enable_reg = 0x8c028,
+               .enable_mask = BIT(0),
+               .hw.init = &(struct clk_init_data){
+                       .name = "gcc_usb3_sec_clkref_clk",
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch gcc_usb3_sec_phy_aux_clk = {
+       .halt_reg = 0x10050,
+       .halt_check = BRANCH_HALT,
+       .clkr = {
+               .enable_reg = 0x10050,
+               .enable_mask = BIT(0),
+               .hw.init = &(struct clk_init_data){
+                       .name = "gcc_usb3_sec_phy_aux_clk",
+                       .parent_hws = (const struct clk_hw *[]){
+                                     &gcc_usb3_sec_phy_aux_clk_src.clkr.hw },
+                       .num_parents = 1,
+                       .flags = CLK_SET_RATE_PARENT,
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch gcc_usb3_sec_phy_com_aux_clk = {
+       .halt_reg = 0x10054,
+       .halt_check = BRANCH_HALT,
+       .clkr = {
+               .enable_reg = 0x10054,
+               .enable_mask = BIT(0),
+               .hw.init = &(struct clk_init_data){
+                       .name = "gcc_usb3_sec_phy_com_aux_clk",
+                       .parent_hws = (const struct clk_hw *[]){
+                                     &gcc_usb3_sec_phy_aux_clk_src.clkr.hw },
+                       .num_parents = 1,
+                       .flags = CLK_SET_RATE_PARENT,
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+/*
+ * Clock ON depends on external parent 'config noc', so cant poll
+ * delay and also mark as crtitical for video boot
+ */
+static struct clk_branch gcc_video_ahb_clk = {
+       .halt_reg = 0xb004,
+       .halt_check = BRANCH_HALT_DELAY,
+       .hwcg_reg = 0xb004,
+       .hwcg_bit = 1,
+       .clkr = {
+               .enable_reg = 0xb004,
+               .enable_mask = BIT(0),
+               .hw.init = &(struct clk_init_data){
+                       .name = "gcc_video_ahb_clk",
+                       .flags = CLK_IS_CRITICAL,
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch gcc_video_axi0_clk = {
+       .halt_reg = 0xb024,
+       .halt_check = BRANCH_HALT,
+       .clkr = {
+               .enable_reg = 0xb024,
+               .enable_mask = BIT(0),
+               .hw.init = &(struct clk_init_data){
+                       .name = "gcc_video_axi0_clk",
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch gcc_video_axi1_clk = {
+       .halt_reg = 0xb028,
+       .halt_check = BRANCH_HALT,
+       .clkr = {
+               .enable_reg = 0xb028,
+               .enable_mask = BIT(0),
+               .hw.init = &(struct clk_init_data){
+                       .name = "gcc_video_axi1_clk",
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch gcc_video_axic_clk = {
+       .halt_reg = 0xb02c,
+       .halt_check = BRANCH_HALT,
+       .clkr = {
+               .enable_reg = 0xb02c,
+               .enable_mask = BIT(0),
+               .hw.init = &(struct clk_init_data){
+                       .name = "gcc_video_axic_clk",
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+/* XO critical input to video, so no need to poll */
+static struct clk_branch gcc_video_xo_clk = {
+       .halt_reg = 0xb040,
+       .halt_check = BRANCH_HALT_DELAY,
+       .clkr = {
+               .enable_reg = 0xb040,
+               .enable_mask = BIT(0),
+               .hw.init = &(struct clk_init_data){
+                       .name = "gcc_video_xo_clk",
+                       .flags = CLK_IS_CRITICAL,
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_regmap *gcc_sm8150_clocks[] = {
+       [GCC_AGGRE_NOC_PCIE_TBU_CLK] = &gcc_aggre_noc_pcie_tbu_clk.clkr,
+       [GCC_AGGRE_UFS_CARD_AXI_CLK] = &gcc_aggre_ufs_card_axi_clk.clkr,
+       [GCC_AGGRE_UFS_CARD_AXI_HW_CTL_CLK] =
+               &gcc_aggre_ufs_card_axi_hw_ctl_clk.clkr,
+       [GCC_AGGRE_UFS_PHY_AXI_CLK] = &gcc_aggre_ufs_phy_axi_clk.clkr,
+       [GCC_AGGRE_UFS_PHY_AXI_HW_CTL_CLK] =
+               &gcc_aggre_ufs_phy_axi_hw_ctl_clk.clkr,
+       [GCC_AGGRE_USB3_PRIM_AXI_CLK] = &gcc_aggre_usb3_prim_axi_clk.clkr,
+       [GCC_AGGRE_USB3_SEC_AXI_CLK] = &gcc_aggre_usb3_sec_axi_clk.clkr,
+       [GCC_BOOT_ROM_AHB_CLK] = &gcc_boot_rom_ahb_clk.clkr,
+       [GCC_CAMERA_AHB_CLK] = &gcc_camera_ahb_clk.clkr,
+       [GCC_CAMERA_HF_AXI_CLK] = &gcc_camera_hf_axi_clk.clkr,
+       [GCC_CAMERA_SF_AXI_CLK] = &gcc_camera_sf_axi_clk.clkr,
+       [GCC_CAMERA_XO_CLK] = &gcc_camera_xo_clk.clkr,
+       [GCC_CFG_NOC_USB3_PRIM_AXI_CLK] = &gcc_cfg_noc_usb3_prim_axi_clk.clkr,
+       [GCC_CFG_NOC_USB3_SEC_AXI_CLK] = &gcc_cfg_noc_usb3_sec_axi_clk.clkr,
+       [GCC_CPUSS_AHB_CLK] = &gcc_cpuss_ahb_clk.clkr,
+       [GCC_CPUSS_AHB_CLK_SRC] = &gcc_cpuss_ahb_clk_src.clkr,
+       [GCC_CPUSS_DVM_BUS_CLK] = &gcc_cpuss_dvm_bus_clk.clkr,
+       [GCC_CPUSS_GNOC_CLK] = &gcc_cpuss_gnoc_clk.clkr,
+       [GCC_CPUSS_RBCPR_CLK] = &gcc_cpuss_rbcpr_clk.clkr,
+       [GCC_DDRSS_GPU_AXI_CLK] = &gcc_ddrss_gpu_axi_clk.clkr,
+       [GCC_DISP_AHB_CLK] = &gcc_disp_ahb_clk.clkr,
+       [GCC_DISP_HF_AXI_CLK] = &gcc_disp_hf_axi_clk.clkr,
+       [GCC_DISP_SF_AXI_CLK] = &gcc_disp_sf_axi_clk.clkr,
+       [GCC_DISP_XO_CLK] = &gcc_disp_xo_clk.clkr,
+       [GCC_EMAC_AXI_CLK] = &gcc_emac_axi_clk.clkr,
+       [GCC_EMAC_PTP_CLK] = &gcc_emac_ptp_clk.clkr,
+       [GCC_EMAC_PTP_CLK_SRC] = &gcc_emac_ptp_clk_src.clkr,
+       [GCC_EMAC_RGMII_CLK] = &gcc_emac_rgmii_clk.clkr,
+       [GCC_EMAC_RGMII_CLK_SRC] = &gcc_emac_rgmii_clk_src.clkr,
+       [GCC_EMAC_SLV_AHB_CLK] = &gcc_emac_slv_ahb_clk.clkr,
+       [GCC_GP1_CLK] = &gcc_gp1_clk.clkr,
+       [GCC_GP1_CLK_SRC] = &gcc_gp1_clk_src.clkr,
+       [GCC_GP2_CLK] = &gcc_gp2_clk.clkr,
+       [GCC_GP2_CLK_SRC] = &gcc_gp2_clk_src.clkr,
+       [GCC_GP3_CLK] = &gcc_gp3_clk.clkr,
+       [GCC_GP3_CLK_SRC] = &gcc_gp3_clk_src.clkr,
+       [GCC_GPU_CFG_AHB_CLK] = &gcc_gpu_cfg_ahb_clk.clkr,
+       [GCC_GPU_IREF_CLK] = &gcc_gpu_iref_clk.clkr,
+       [GCC_GPU_MEMNOC_GFX_CLK] = &gcc_gpu_memnoc_gfx_clk.clkr,
+       [GCC_GPU_SNOC_DVM_GFX_CLK] = &gcc_gpu_snoc_dvm_gfx_clk.clkr,
+       [GCC_NPU_AT_CLK] = &gcc_npu_at_clk.clkr,
+       [GCC_NPU_AXI_CLK] = &gcc_npu_axi_clk.clkr,
+       [GCC_NPU_CFG_AHB_CLK] = &gcc_npu_cfg_ahb_clk.clkr,
+       [GCC_NPU_TRIG_CLK] = &gcc_npu_trig_clk.clkr,
+       [GCC_PCIE0_PHY_REFGEN_CLK] = &gcc_pcie0_phy_refgen_clk.clkr,
+       [GCC_PCIE1_PHY_REFGEN_CLK] = &gcc_pcie1_phy_refgen_clk.clkr,
+       [GCC_PCIE_0_AUX_CLK] = &gcc_pcie_0_aux_clk.clkr,
+       [GCC_PCIE_0_AUX_CLK_SRC] = &gcc_pcie_0_aux_clk_src.clkr,
+       [GCC_PCIE_0_CFG_AHB_CLK] = &gcc_pcie_0_cfg_ahb_clk.clkr,
+       [GCC_PCIE_0_CLKREF_CLK] = &gcc_pcie_0_clkref_clk.clkr,
+       [GCC_PCIE_0_MSTR_AXI_CLK] = &gcc_pcie_0_mstr_axi_clk.clkr,
+       [GCC_PCIE_0_PIPE_CLK] = &gcc_pcie_0_pipe_clk.clkr,
+       [GCC_PCIE_0_SLV_AXI_CLK] = &gcc_pcie_0_slv_axi_clk.clkr,
+       [GCC_PCIE_0_SLV_Q2A_AXI_CLK] = &gcc_pcie_0_slv_q2a_axi_clk.clkr,
+       [GCC_PCIE_1_AUX_CLK] = &gcc_pcie_1_aux_clk.clkr,
+       [GCC_PCIE_1_AUX_CLK_SRC] = &gcc_pcie_1_aux_clk_src.clkr,
+       [GCC_PCIE_1_CFG_AHB_CLK] = &gcc_pcie_1_cfg_ahb_clk.clkr,
+       [GCC_PCIE_1_CLKREF_CLK] = &gcc_pcie_1_clkref_clk.clkr,
+       [GCC_PCIE_1_MSTR_AXI_CLK] = &gcc_pcie_1_mstr_axi_clk.clkr,
+       [GCC_PCIE_1_PIPE_CLK] = &gcc_pcie_1_pipe_clk.clkr,
+       [GCC_PCIE_1_SLV_AXI_CLK] = &gcc_pcie_1_slv_axi_clk.clkr,
+       [GCC_PCIE_1_SLV_Q2A_AXI_CLK] = &gcc_pcie_1_slv_q2a_axi_clk.clkr,
+       [GCC_PCIE_PHY_AUX_CLK] = &gcc_pcie_phy_aux_clk.clkr,
+       [GCC_PCIE_PHY_REFGEN_CLK_SRC] = &gcc_pcie_phy_refgen_clk_src.clkr,
+       [GCC_PDM2_CLK] = &gcc_pdm2_clk.clkr,
+       [GCC_PDM2_CLK_SRC] = &gcc_pdm2_clk_src.clkr,
+       [GCC_PDM_AHB_CLK] = &gcc_pdm_ahb_clk.clkr,
+       [GCC_PDM_XO4_CLK] = &gcc_pdm_xo4_clk.clkr,
+       [GCC_PRNG_AHB_CLK] = &gcc_prng_ahb_clk.clkr,
+       [GCC_QMIP_CAMERA_NRT_AHB_CLK] = &gcc_qmip_camera_nrt_ahb_clk.clkr,
+       [GCC_QMIP_CAMERA_RT_AHB_CLK] = &gcc_qmip_camera_rt_ahb_clk.clkr,
+       [GCC_QMIP_DISP_AHB_CLK] = &gcc_qmip_disp_ahb_clk.clkr,
+       [GCC_QMIP_VIDEO_CVP_AHB_CLK] = &gcc_qmip_video_cvp_ahb_clk.clkr,
+       [GCC_QMIP_VIDEO_VCODEC_AHB_CLK] = &gcc_qmip_video_vcodec_ahb_clk.clkr,
+       [GCC_QSPI_CNOC_PERIPH_AHB_CLK] = &gcc_qspi_cnoc_periph_ahb_clk.clkr,
+       [GCC_QSPI_CORE_CLK] = &gcc_qspi_core_clk.clkr,
+       [GCC_QSPI_CORE_CLK_SRC] = &gcc_qspi_core_clk_src.clkr,
+       [GCC_QUPV3_WRAP0_S0_CLK] = &gcc_qupv3_wrap0_s0_clk.clkr,
+       [GCC_QUPV3_WRAP0_S0_CLK_SRC] = &gcc_qupv3_wrap0_s0_clk_src.clkr,
+       [GCC_QUPV3_WRAP0_S1_CLK] = &gcc_qupv3_wrap0_s1_clk.clkr,
+       [GCC_QUPV3_WRAP0_S1_CLK_SRC] = &gcc_qupv3_wrap0_s1_clk_src.clkr,
+       [GCC_QUPV3_WRAP0_S2_CLK] = &gcc_qupv3_wrap0_s2_clk.clkr,
+       [GCC_QUPV3_WRAP0_S2_CLK_SRC] = &gcc_qupv3_wrap0_s2_clk_src.clkr,
+       [GCC_QUPV3_WRAP0_S3_CLK] = &gcc_qupv3_wrap0_s3_clk.clkr,
+       [GCC_QUPV3_WRAP0_S3_CLK_SRC] = &gcc_qupv3_wrap0_s3_clk_src.clkr,
+       [GCC_QUPV3_WRAP0_S4_CLK] = &gcc_qupv3_wrap0_s4_clk.clkr,
+       [GCC_QUPV3_WRAP0_S4_CLK_SRC] = &gcc_qupv3_wrap0_s4_clk_src.clkr,
+       [GCC_QUPV3_WRAP0_S5_CLK] = &gcc_qupv3_wrap0_s5_clk.clkr,
+       [GCC_QUPV3_WRAP0_S5_CLK_SRC] = &gcc_qupv3_wrap0_s5_clk_src.clkr,
+       [GCC_QUPV3_WRAP0_S6_CLK] = &gcc_qupv3_wrap0_s6_clk.clkr,
+       [GCC_QUPV3_WRAP0_S6_CLK_SRC] = &gcc_qupv3_wrap0_s6_clk_src.clkr,
+       [GCC_QUPV3_WRAP0_S7_CLK] = &gcc_qupv3_wrap0_s7_clk.clkr,
+       [GCC_QUPV3_WRAP0_S7_CLK_SRC] = &gcc_qupv3_wrap0_s7_clk_src.clkr,
+       [GCC_QUPV3_WRAP1_S0_CLK] = &gcc_qupv3_wrap1_s0_clk.clkr,
+       [GCC_QUPV3_WRAP1_S0_CLK_SRC] = &gcc_qupv3_wrap1_s0_clk_src.clkr,
+       [GCC_QUPV3_WRAP1_S1_CLK] = &gcc_qupv3_wrap1_s1_clk.clkr,
+       [GCC_QUPV3_WRAP1_S1_CLK_SRC] = &gcc_qupv3_wrap1_s1_clk_src.clkr,
+       [GCC_QUPV3_WRAP1_S2_CLK] = &gcc_qupv3_wrap1_s2_clk.clkr,
+       [GCC_QUPV3_WRAP1_S2_CLK_SRC] = &gcc_qupv3_wrap1_s2_clk_src.clkr,
+       [GCC_QUPV3_WRAP1_S3_CLK] = &gcc_qupv3_wrap1_s3_clk.clkr,
+       [GCC_QUPV3_WRAP1_S3_CLK_SRC] = &gcc_qupv3_wrap1_s3_clk_src.clkr,
+       [GCC_QUPV3_WRAP1_S4_CLK] = &gcc_qupv3_wrap1_s4_clk.clkr,
+       [GCC_QUPV3_WRAP1_S4_CLK_SRC] = &gcc_qupv3_wrap1_s4_clk_src.clkr,
+       [GCC_QUPV3_WRAP1_S5_CLK] = &gcc_qupv3_wrap1_s5_clk.clkr,
+       [GCC_QUPV3_WRAP1_S5_CLK_SRC] = &gcc_qupv3_wrap1_s5_clk_src.clkr,
+       [GCC_QUPV3_WRAP2_S0_CLK] = &gcc_qupv3_wrap2_s0_clk.clkr,
+       [GCC_QUPV3_WRAP2_S0_CLK_SRC] = &gcc_qupv3_wrap2_s0_clk_src.clkr,
+       [GCC_QUPV3_WRAP2_S1_CLK] = &gcc_qupv3_wrap2_s1_clk.clkr,
+       [GCC_QUPV3_WRAP2_S1_CLK_SRC] = &gcc_qupv3_wrap2_s1_clk_src.clkr,
+       [GCC_QUPV3_WRAP2_S2_CLK] = &gcc_qupv3_wrap2_s2_clk.clkr,
+       [GCC_QUPV3_WRAP2_S2_CLK_SRC] = &gcc_qupv3_wrap2_s2_clk_src.clkr,
+       [GCC_QUPV3_WRAP2_S3_CLK] = &gcc_qupv3_wrap2_s3_clk.clkr,
+       [GCC_QUPV3_WRAP2_S3_CLK_SRC] = &gcc_qupv3_wrap2_s3_clk_src.clkr,
+       [GCC_QUPV3_WRAP2_S4_CLK] = &gcc_qupv3_wrap2_s4_clk.clkr,
+       [GCC_QUPV3_WRAP2_S4_CLK_SRC] = &gcc_qupv3_wrap2_s4_clk_src.clkr,
+       [GCC_QUPV3_WRAP2_S5_CLK] = &gcc_qupv3_wrap2_s5_clk.clkr,
+       [GCC_QUPV3_WRAP2_S5_CLK_SRC] = &gcc_qupv3_wrap2_s5_clk_src.clkr,
+       [GCC_QUPV3_WRAP_0_M_AHB_CLK] = &gcc_qupv3_wrap_0_m_ahb_clk.clkr,
+       [GCC_QUPV3_WRAP_0_S_AHB_CLK] = &gcc_qupv3_wrap_0_s_ahb_clk.clkr,
+       [GCC_QUPV3_WRAP_1_M_AHB_CLK] = &gcc_qupv3_wrap_1_m_ahb_clk.clkr,
+       [GCC_QUPV3_WRAP_1_S_AHB_CLK] = &gcc_qupv3_wrap_1_s_ahb_clk.clkr,
+       [GCC_QUPV3_WRAP_2_M_AHB_CLK] = &gcc_qupv3_wrap_2_m_ahb_clk.clkr,
+       [GCC_QUPV3_WRAP_2_S_AHB_CLK] = &gcc_qupv3_wrap_2_s_ahb_clk.clkr,
+       [GCC_SDCC2_AHB_CLK] = &gcc_sdcc2_ahb_clk.clkr,
+       [GCC_SDCC2_APPS_CLK] = &gcc_sdcc2_apps_clk.clkr,
+       [GCC_SDCC2_APPS_CLK_SRC] = &gcc_sdcc2_apps_clk_src.clkr,
+       [GCC_SDCC4_AHB_CLK] = &gcc_sdcc4_ahb_clk.clkr,
+       [GCC_SDCC4_APPS_CLK] = &gcc_sdcc4_apps_clk.clkr,
+       [GCC_SDCC4_APPS_CLK_SRC] = &gcc_sdcc4_apps_clk_src.clkr,
+       [GCC_SYS_NOC_CPUSS_AHB_CLK] = &gcc_sys_noc_cpuss_ahb_clk.clkr,
+       [GCC_TSIF_AHB_CLK] = &gcc_tsif_ahb_clk.clkr,
+       [GCC_TSIF_INACTIVITY_TIMERS_CLK] = &gcc_tsif_inactivity_timers_clk.clkr,
+       [GCC_TSIF_REF_CLK] = &gcc_tsif_ref_clk.clkr,
+       [GCC_TSIF_REF_CLK_SRC] = &gcc_tsif_ref_clk_src.clkr,
+       [GCC_UFS_CARD_AHB_CLK] = &gcc_ufs_card_ahb_clk.clkr,
+       [GCC_UFS_CARD_AXI_CLK] = &gcc_ufs_card_axi_clk.clkr,
+       [GCC_UFS_CARD_AXI_CLK_SRC] = &gcc_ufs_card_axi_clk_src.clkr,
+       [GCC_UFS_CARD_AXI_HW_CTL_CLK] = &gcc_ufs_card_axi_hw_ctl_clk.clkr,
+       [GCC_UFS_CARD_CLKREF_CLK] = &gcc_ufs_card_clkref_clk.clkr,
+       [GCC_UFS_CARD_ICE_CORE_CLK] = &gcc_ufs_card_ice_core_clk.clkr,
+       [GCC_UFS_CARD_ICE_CORE_CLK_SRC] = &gcc_ufs_card_ice_core_clk_src.clkr,
+       [GCC_UFS_CARD_ICE_CORE_HW_CTL_CLK] =
+               &gcc_ufs_card_ice_core_hw_ctl_clk.clkr,
+       [GCC_UFS_CARD_PHY_AUX_CLK] = &gcc_ufs_card_phy_aux_clk.clkr,
+       [GCC_UFS_CARD_PHY_AUX_CLK_SRC] = &gcc_ufs_card_phy_aux_clk_src.clkr,
+       [GCC_UFS_CARD_PHY_AUX_HW_CTL_CLK] =
+               &gcc_ufs_card_phy_aux_hw_ctl_clk.clkr,
+       [GCC_UFS_CARD_UNIPRO_CORE_CLK] = &gcc_ufs_card_unipro_core_clk.clkr,
+       [GCC_UFS_CARD_UNIPRO_CORE_CLK_SRC] =
+               &gcc_ufs_card_unipro_core_clk_src.clkr,
+       [GCC_UFS_CARD_UNIPRO_CORE_HW_CTL_CLK] =
+               &gcc_ufs_card_unipro_core_hw_ctl_clk.clkr,
+       [GCC_UFS_MEM_CLKREF_CLK] = &gcc_ufs_mem_clkref_clk.clkr,
+       [GCC_UFS_PHY_AHB_CLK] = &gcc_ufs_phy_ahb_clk.clkr,
+       [GCC_UFS_PHY_AXI_CLK] = &gcc_ufs_phy_axi_clk.clkr,
+       [GCC_UFS_PHY_AXI_CLK_SRC] = &gcc_ufs_phy_axi_clk_src.clkr,
+       [GCC_UFS_PHY_AXI_HW_CTL_CLK] = &gcc_ufs_phy_axi_hw_ctl_clk.clkr,
+       [GCC_UFS_PHY_ICE_CORE_CLK] = &gcc_ufs_phy_ice_core_clk.clkr,
+       [GCC_UFS_PHY_ICE_CORE_CLK_SRC] = &gcc_ufs_phy_ice_core_clk_src.clkr,
+       [GCC_UFS_PHY_ICE_CORE_HW_CTL_CLK] =
+               &gcc_ufs_phy_ice_core_hw_ctl_clk.clkr,
+       [GCC_UFS_PHY_PHY_AUX_CLK] = &gcc_ufs_phy_phy_aux_clk.clkr,
+       [GCC_UFS_PHY_PHY_AUX_CLK_SRC] = &gcc_ufs_phy_phy_aux_clk_src.clkr,
+       [GCC_UFS_PHY_PHY_AUX_HW_CTL_CLK] = &gcc_ufs_phy_phy_aux_hw_ctl_clk.clkr,
+       [GCC_UFS_PHY_UNIPRO_CORE_CLK] = &gcc_ufs_phy_unipro_core_clk.clkr,
+       [GCC_UFS_PHY_UNIPRO_CORE_CLK_SRC] =
+               &gcc_ufs_phy_unipro_core_clk_src.clkr,
+       [GCC_UFS_PHY_UNIPRO_CORE_HW_CTL_CLK] =
+               &gcc_ufs_phy_unipro_core_hw_ctl_clk.clkr,
+       [GCC_USB30_PRIM_MASTER_CLK] = &gcc_usb30_prim_master_clk.clkr,
+       [GCC_USB30_PRIM_MASTER_CLK_SRC] = &gcc_usb30_prim_master_clk_src.clkr,
+       [GCC_USB30_PRIM_MOCK_UTMI_CLK] = &gcc_usb30_prim_mock_utmi_clk.clkr,
+       [GCC_USB30_PRIM_MOCK_UTMI_CLK_SRC] =
+               &gcc_usb30_prim_mock_utmi_clk_src.clkr,
+       [GCC_USB30_PRIM_SLEEP_CLK] = &gcc_usb30_prim_sleep_clk.clkr,
+       [GCC_USB30_SEC_MASTER_CLK] = &gcc_usb30_sec_master_clk.clkr,
+       [GCC_USB30_SEC_MASTER_CLK_SRC] = &gcc_usb30_sec_master_clk_src.clkr,
+       [GCC_USB30_SEC_MOCK_UTMI_CLK] = &gcc_usb30_sec_mock_utmi_clk.clkr,
+       [GCC_USB30_SEC_MOCK_UTMI_CLK_SRC] =
+               &gcc_usb30_sec_mock_utmi_clk_src.clkr,
+       [GCC_USB30_SEC_SLEEP_CLK] = &gcc_usb30_sec_sleep_clk.clkr,
+       [GCC_USB3_PRIM_CLKREF_CLK] = &gcc_usb3_prim_clkref_clk.clkr,
+       [GCC_USB3_PRIM_PHY_AUX_CLK] = &gcc_usb3_prim_phy_aux_clk.clkr,
+       [GCC_USB3_PRIM_PHY_AUX_CLK_SRC] = &gcc_usb3_prim_phy_aux_clk_src.clkr,
+       [GCC_USB3_PRIM_PHY_COM_AUX_CLK] = &gcc_usb3_prim_phy_com_aux_clk.clkr,
+       [GCC_USB3_SEC_CLKREF_CLK] = &gcc_usb3_sec_clkref_clk.clkr,
+       [GCC_USB3_SEC_PHY_AUX_CLK] = &gcc_usb3_sec_phy_aux_clk.clkr,
+       [GCC_USB3_SEC_PHY_AUX_CLK_SRC] = &gcc_usb3_sec_phy_aux_clk_src.clkr,
+       [GCC_USB3_SEC_PHY_COM_AUX_CLK] = &gcc_usb3_sec_phy_com_aux_clk.clkr,
+       [GCC_VIDEO_AHB_CLK] = &gcc_video_ahb_clk.clkr,
+       [GCC_VIDEO_AXI0_CLK] = &gcc_video_axi0_clk.clkr,
+       [GCC_VIDEO_AXI1_CLK] = &gcc_video_axi1_clk.clkr,
+       [GCC_VIDEO_AXIC_CLK] = &gcc_video_axic_clk.clkr,
+       [GCC_VIDEO_XO_CLK] = &gcc_video_xo_clk.clkr,
+       [GPLL0] = &gpll0.clkr,
+       [GPLL0_OUT_EVEN] = &gpll0_out_even.clkr,
+       [GPLL7] = &gpll7.clkr,
+       [GPLL9] = &gpll9.clkr,
+};
+
+static const struct qcom_reset_map gcc_sm8150_resets[] = {
+       [GCC_EMAC_BCR] = { 0x6000 },
+       [GCC_GPU_BCR] = { 0x71000 },
+       [GCC_MMSS_BCR] = { 0xb000 },
+       [GCC_NPU_BCR] = { 0x4d000 },
+       [GCC_PCIE_0_BCR] = { 0x6b000 },
+       [GCC_PCIE_0_PHY_BCR] = { 0x6c01c },
+       [GCC_PCIE_1_BCR] = { 0x8d000 },
+       [GCC_PCIE_1_PHY_BCR] = { 0x8e01c },
+       [GCC_PCIE_PHY_BCR] = { 0x6f000 },
+       [GCC_PDM_BCR] = { 0x33000 },
+       [GCC_PRNG_BCR] = { 0x34000 },
+       [GCC_QSPI_BCR] = { 0x24008 },
+       [GCC_QUPV3_WRAPPER_0_BCR] = { 0x17000 },
+       [GCC_QUPV3_WRAPPER_1_BCR] = { 0x18000 },
+       [GCC_QUPV3_WRAPPER_2_BCR] = { 0x1e000 },
+       [GCC_QUSB2PHY_PRIM_BCR] = { 0x12000 },
+       [GCC_QUSB2PHY_SEC_BCR] = { 0x12004 },
+       [GCC_USB3_PHY_PRIM_BCR] = { 0x50000 },
+       [GCC_USB3_DP_PHY_PRIM_BCR] = { 0x50008 },
+       [GCC_USB3_PHY_SEC_BCR] = { 0x5000c },
+       [GCC_USB3PHY_PHY_SEC_BCR] = { 0x50010 },
+       [GCC_SDCC2_BCR] = { 0x14000 },
+       [GCC_SDCC4_BCR] = { 0x16000 },
+       [GCC_TSIF_BCR] = { 0x36000 },
+       [GCC_UFS_CARD_BCR] = { 0x75000 },
+       [GCC_UFS_PHY_BCR] = { 0x77000 },
+       [GCC_USB30_PRIM_BCR] = { 0xf000 },
+       [GCC_USB30_SEC_BCR] = { 0x10000 },
+       [GCC_USB_PHY_CFG_AHB2PHY_BCR] = { 0x6a000 },
+};
+
+static const struct regmap_config gcc_sm8150_regmap_config = {
+       .reg_bits       = 32,
+       .reg_stride     = 4,
+       .val_bits       = 32,
+       .max_register   = 0x9c040,
+       .fast_io        = true,
+};
+
+static const struct qcom_cc_desc gcc_sm8150_desc = {
+       .config = &gcc_sm8150_regmap_config,
+       .clks = gcc_sm8150_clocks,
+       .num_clks = ARRAY_SIZE(gcc_sm8150_clocks),
+       .resets = gcc_sm8150_resets,
+       .num_resets = ARRAY_SIZE(gcc_sm8150_resets),
+};
+
+static const struct of_device_id gcc_sm8150_match_table[] = {
+       { .compatible = "qcom,gcc-sm8150" },
+       { }
+};
+MODULE_DEVICE_TABLE(of, gcc_sm8150_match_table);
+
+static int gcc_sm8150_probe(struct platform_device *pdev)
+{
+       struct regmap *regmap;
+
+       regmap = qcom_cc_map(pdev, &gcc_sm8150_desc);
+       if (IS_ERR(regmap))
+               return PTR_ERR(regmap);
+
+       /* Disable the GPLL0 active input to NPU and GPU via MISC registers */
+       regmap_update_bits(regmap, 0x4d110, 0x3, 0x3);
+       regmap_update_bits(regmap, 0x71028, 0x3, 0x3);
+
+       return qcom_cc_really_probe(pdev, &gcc_sm8150_desc, regmap);
+}
+
+static struct platform_driver gcc_sm8150_driver = {
+       .probe          = gcc_sm8150_probe,
+       .driver         = {
+               .name   = "gcc-sm8150",
+               .of_match_table = gcc_sm8150_match_table,
+       },
+};
+
+static int __init gcc_sm8150_init(void)
+{
+       return platform_driver_register(&gcc_sm8150_driver);
+}
+subsys_initcall(gcc_sm8150_init);
+
+static void __exit gcc_sm8150_exit(void)
+{
+       platform_driver_unregister(&gcc_sm8150_driver);
+}
+module_exit(gcc_sm8150_exit);
+
+MODULE_DESCRIPTION("QTI GCC SM8150 Driver");
+MODULE_LICENSE("GPL v2");
index e246b99..56d3e99 100644 (file)
@@ -112,25 +112,6 @@ static const struct qcom_cc_desc lpass_qdsp6ss_sdm845_desc = {
        .num_clks = ARRAY_SIZE(lpass_qdsp6ss_sdm845_clocks),
 };
 
-static int lpass_clocks_sdm845_probe(struct platform_device *pdev, int index,
-                                    const struct qcom_cc_desc *desc)
-{
-       struct regmap *regmap;
-       struct resource *res;
-       void __iomem *base;
-
-       res = platform_get_resource(pdev, IORESOURCE_MEM, index);
-       base = devm_ioremap_resource(&pdev->dev, res);
-       if (IS_ERR(base))
-               return PTR_ERR(base);
-
-       regmap = devm_regmap_init_mmio(&pdev->dev, base, desc->config);
-       if (IS_ERR(regmap))
-               return PTR_ERR(regmap);
-
-       return qcom_cc_really_probe(pdev, desc, regmap);
-}
-
 static int lpass_cc_sdm845_probe(struct platform_device *pdev)
 {
        const struct qcom_cc_desc *desc;
@@ -139,14 +120,14 @@ static int lpass_cc_sdm845_probe(struct platform_device *pdev)
        lpass_regmap_config.name = "cc";
        desc = &lpass_cc_sdm845_desc;
 
-       ret = lpass_clocks_sdm845_probe(pdev, 0, desc);
+       ret = qcom_cc_probe_by_index(pdev, 0, desc);
        if (ret)
                return ret;
 
        lpass_regmap_config.name = "qdsp6ss";
        desc = &lpass_qdsp6ss_sdm845_desc;
 
-       return lpass_clocks_sdm845_probe(pdev, 1, desc);
+       return qcom_cc_probe_by_index(pdev, 1, desc);
 }
 
 static const struct of_device_id lpass_cc_sdm845_match_table[] = {
index aa859e6..4cfbbf5 100644 (file)
@@ -96,7 +96,7 @@ static const struct regmap_config turingcc_regmap_config = {
        .reg_bits       = 32,
        .reg_stride     = 4,
        .val_bits       = 32,
-       .max_register   = 0x30000,
+       .max_register   = 0x23004,
        .fast_io        = true,
 };
 
index 2db9093..e326e6d 100644 (file)
@@ -334,7 +334,8 @@ void __init cpg_mstp_add_clk_domain(struct device_node *np)
                return;
 
        pd->name = np->name;
-       pd->flags = GENPD_FLAG_PM_CLK | GENPD_FLAG_ACTIVE_WAKEUP;
+       pd->flags = GENPD_FLAG_PM_CLK | GENPD_FLAG_ALWAYS_ON |
+                   GENPD_FLAG_ACTIVE_WAKEUP;
        pd->attach_dev = cpg_mstp_attach_dev;
        pd->detach_dev = cpg_mstp_detach_dev;
        pm_genpd_init(pd, &pm_domain_always_on_gov, false);
index b33e138..1907ee1 100644 (file)
@@ -421,7 +421,8 @@ static int r9a06g032_add_clk_domain(struct device *dev)
                return -ENOMEM;
 
        pd->name = np->name;
-       pd->flags = GENPD_FLAG_PM_CLK | GENPD_FLAG_ACTIVE_WAKEUP;
+       pd->flags = GENPD_FLAG_PM_CLK | GENPD_FLAG_ALWAYS_ON |
+                   GENPD_FLAG_ACTIVE_WAKEUP;
        pd->attach_dev = r9a06g032_attach_dev;
        pd->detach_dev = r9a06g032_detach_dev;
        pm_genpd_init(pd, &pm_domain_always_on_gov, false);
index cc90b11..b97f5f9 100644 (file)
@@ -117,7 +117,6 @@ static int rcar_usb2_clock_sel_probe(struct platform_device *pdev)
        struct device *dev = &pdev->dev;
        struct device_node *np = dev->of_node;
        struct usb2_clock_sel_priv *priv;
-       struct resource *res;
        struct clk *clk;
        struct clk_init_data init;
 
@@ -125,8 +124,7 @@ static int rcar_usb2_clock_sel_probe(struct platform_device *pdev)
        if (!priv)
                return -ENOMEM;
 
-       res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-       priv->base = devm_ioremap_resource(dev, res);
+       priv->base = devm_platform_ioremap_resource(pdev, 0);
        if (IS_ERR(priv->base))
                return PTR_ERR(priv->base);
 
index d4075b1..132cc96 100644 (file)
@@ -551,7 +551,8 @@ static int __init cpg_mssr_add_clk_domain(struct device *dev,
 
        genpd = &pd->genpd;
        genpd->name = np->name;
-       genpd->flags = GENPD_FLAG_PM_CLK | GENPD_FLAG_ACTIVE_WAKEUP;
+       genpd->flags = GENPD_FLAG_PM_CLK | GENPD_FLAG_ALWAYS_ON |
+                      GENPD_FLAG_ACTIVE_WAKEUP;
        genpd->attach_dev = cpg_mssr_attach_dev;
        genpd->detach_dev = cpg_mssr_detach_dev;
        pm_genpd_init(genpd, &pm_domain_always_on_gov, false);
index ff35ab4..7c5b581 100644 (file)
@@ -20,6 +20,7 @@ obj-y += clk-rk3128.o
 obj-y  += clk-rk3188.o
 obj-y  += clk-rk3228.o
 obj-y  += clk-rk3288.o
+obj-y  += clk-rk3308.o
 obj-y  += clk-rk3328.o
 obj-y  += clk-rk3368.o
 obj-y  += clk-rk3399.o
diff --git a/drivers/clk/rockchip/clk-rk3308.c b/drivers/clk/rockchip/clk-rk3308.c
new file mode 100644 (file)
index 0000000..b0baf87
--- /dev/null
@@ -0,0 +1,955 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (c) 2019 Rockchip Electronics Co. Ltd.
+ * Author: Finley Xiao <finley.xiao@rock-chips.com>
+ */
+
+#include <linux/clk-provider.h>
+#include <linux/io.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/syscore_ops.h>
+#include <dt-bindings/clock/rk3308-cru.h>
+#include "clk.h"
+
+#define RK3308_GRF_SOC_STATUS0         0x380
+
+enum rk3308_plls {
+       apll, dpll, vpll0, vpll1,
+};
+
+static struct rockchip_pll_rate_table rk3308_pll_rates[] = {
+       /* _mhz, _refdiv, _fbdiv, _postdiv1, _postdiv2, _dsmpd, _frac */
+       RK3036_PLL_RATE(1608000000, 1, 67, 1, 1, 1, 0),
+       RK3036_PLL_RATE(1584000000, 1, 66, 1, 1, 1, 0),
+       RK3036_PLL_RATE(1560000000, 1, 65, 1, 1, 1, 0),
+       RK3036_PLL_RATE(1536000000, 1, 64, 1, 1, 1, 0),
+       RK3036_PLL_RATE(1512000000, 1, 63, 1, 1, 1, 0),
+       RK3036_PLL_RATE(1488000000, 1, 62, 1, 1, 1, 0),
+       RK3036_PLL_RATE(1464000000, 1, 61, 1, 1, 1, 0),
+       RK3036_PLL_RATE(1440000000, 1, 60, 1, 1, 1, 0),
+       RK3036_PLL_RATE(1416000000, 1, 59, 1, 1, 1, 0),
+       RK3036_PLL_RATE(1392000000, 1, 58, 1, 1, 1, 0),
+       RK3036_PLL_RATE(1368000000, 1, 57, 1, 1, 1, 0),
+       RK3036_PLL_RATE(1344000000, 1, 56, 1, 1, 1, 0),
+       RK3036_PLL_RATE(1320000000, 1, 55, 1, 1, 1, 0),
+       RK3036_PLL_RATE(1296000000, 1, 54, 1, 1, 1, 0),
+       RK3036_PLL_RATE(1272000000, 1, 53, 1, 1, 1, 0),
+       RK3036_PLL_RATE(1248000000, 1, 52, 1, 1, 1, 0),
+       RK3036_PLL_RATE(1200000000, 1, 50, 1, 1, 1, 0),
+       RK3036_PLL_RATE(1188000000, 2, 99, 1, 1, 1, 0),
+       RK3036_PLL_RATE(1104000000, 1, 46, 1, 1, 1, 0),
+       RK3036_PLL_RATE(1100000000, 12, 550, 1, 1, 1, 0),
+       RK3036_PLL_RATE(1008000000, 1, 84, 2, 1, 1, 0),
+       RK3036_PLL_RATE(1000000000, 6, 500, 2, 1, 1, 0),
+       RK3036_PLL_RATE(984000000, 1, 82, 2, 1, 1, 0),
+       RK3036_PLL_RATE(960000000, 1, 80, 2, 1, 1, 0),
+       RK3036_PLL_RATE(936000000, 1, 78, 2, 1, 1, 0),
+       RK3036_PLL_RATE(912000000, 1, 76, 2, 1, 1, 0),
+       RK3036_PLL_RATE(900000000, 4, 300, 2, 1, 1, 0),
+       RK3036_PLL_RATE(888000000, 1, 74, 2, 1, 1, 0),
+       RK3036_PLL_RATE(864000000, 1, 72, 2, 1, 1, 0),
+       RK3036_PLL_RATE(840000000, 1, 70, 2, 1, 1, 0),
+       RK3036_PLL_RATE(816000000, 1, 68, 2, 1, 1, 0),
+       RK3036_PLL_RATE(800000000, 6, 400, 2, 1, 1, 0),
+       RK3036_PLL_RATE(700000000, 6, 350, 2, 1, 1, 0),
+       RK3036_PLL_RATE(696000000, 1, 58, 2, 1, 1, 0),
+       RK3036_PLL_RATE(624000000, 1, 52, 2, 1, 1, 0),
+       RK3036_PLL_RATE(600000000, 1, 75, 3, 1, 1, 0),
+       RK3036_PLL_RATE(594000000, 2, 99, 2, 1, 1, 0),
+       RK3036_PLL_RATE(504000000, 1, 63, 3, 1, 1, 0),
+       RK3036_PLL_RATE(500000000, 6, 250, 2, 1, 1, 0),
+       RK3036_PLL_RATE(408000000, 1, 68, 2, 2, 1, 0),
+       RK3036_PLL_RATE(312000000, 1, 52, 2, 2, 1, 0),
+       RK3036_PLL_RATE(216000000, 1, 72, 4, 2, 1, 0),
+       RK3036_PLL_RATE(96000000, 1, 64, 4, 4, 1, 0),
+       { /* sentinel */ },
+};
+
+#define RK3308_DIV_ACLKM_MASK          0x7
+#define RK3308_DIV_ACLKM_SHIFT         12
+#define RK3308_DIV_PCLK_DBG_MASK       0xf
+#define RK3308_DIV_PCLK_DBG_SHIFT      8
+
+#define RK3308_CLKSEL0(_aclk_core, _pclk_dbg)                          \
+{                                                                      \
+       .reg = RK3308_CLKSEL_CON(0),                                    \
+       .val = HIWORD_UPDATE(_aclk_core, RK3308_DIV_ACLKM_MASK,         \
+                            RK3308_DIV_ACLKM_SHIFT) |                  \
+              HIWORD_UPDATE(_pclk_dbg, RK3308_DIV_PCLK_DBG_MASK,       \
+                            RK3308_DIV_PCLK_DBG_SHIFT),                \
+}
+
+#define RK3308_CPUCLK_RATE(_prate, _aclk_core, _pclk_dbg)              \
+{                                                                      \
+       .prate = _prate,                                                \
+       .divs = {                                                       \
+               RK3308_CLKSEL0(_aclk_core, _pclk_dbg),                  \
+       },                                                              \
+}
+
+static struct rockchip_cpuclk_rate_table rk3308_cpuclk_rates[] __initdata = {
+       RK3308_CPUCLK_RATE(1608000000, 1, 7),
+       RK3308_CPUCLK_RATE(1512000000, 1, 7),
+       RK3308_CPUCLK_RATE(1488000000, 1, 5),
+       RK3308_CPUCLK_RATE(1416000000, 1, 5),
+       RK3308_CPUCLK_RATE(1392000000, 1, 5),
+       RK3308_CPUCLK_RATE(1296000000, 1, 5),
+       RK3308_CPUCLK_RATE(1200000000, 1, 5),
+       RK3308_CPUCLK_RATE(1104000000, 1, 5),
+       RK3308_CPUCLK_RATE(1008000000, 1, 5),
+       RK3308_CPUCLK_RATE(912000000, 1, 5),
+       RK3308_CPUCLK_RATE(816000000, 1, 3),
+       RK3308_CPUCLK_RATE(696000000, 1, 3),
+       RK3308_CPUCLK_RATE(600000000, 1, 3),
+       RK3308_CPUCLK_RATE(408000000, 1, 1),
+       RK3308_CPUCLK_RATE(312000000, 1, 1),
+       RK3308_CPUCLK_RATE(216000000,  1, 1),
+       RK3308_CPUCLK_RATE(96000000, 1, 1),
+};
+
+static const struct rockchip_cpuclk_reg_data rk3308_cpuclk_data = {
+       .core_reg = RK3308_CLKSEL_CON(0),
+       .div_core_shift = 0,
+       .div_core_mask = 0xf,
+       .mux_core_alt = 1,
+       .mux_core_main = 0,
+       .mux_core_shift = 6,
+       .mux_core_mask = 0x3,
+};
+
+PNAME(mux_pll_p)               = { "xin24m" };
+PNAME(mux_usb480m_p)           = { "xin24m", "usb480m_phy", "clk_rtc32k" };
+PNAME(mux_armclk_p)            = { "apll_core", "vpll0_core", "vpll1_core" };
+PNAME(mux_dpll_vpll0_p)                = { "dpll", "vpll0" };
+PNAME(mux_dpll_vpll0_xin24m_p) = { "dpll", "vpll0", "xin24m" };
+PNAME(mux_dpll_vpll0_vpll1_p)  = { "dpll", "vpll0", "vpll1" };
+PNAME(mux_dpll_vpll0_vpll1_xin24m_p)   = { "dpll", "vpll0", "vpll1", "xin24m" };
+PNAME(mux_dpll_vpll0_vpll1_usb480m_xin24m_p)   = { "dpll", "vpll0", "vpll1", "usb480m", "xin24m" };
+PNAME(mux_vpll0_vpll1_p)       = { "vpll0", "vpll1" };
+PNAME(mux_vpll0_vpll1_xin24m_p)        = { "vpll0", "vpll1", "xin24m" };
+PNAME(mux_uart0_p)             = { "clk_uart0_src", "dummy", "clk_uart0_frac" };
+PNAME(mux_uart1_p)             = { "clk_uart1_src", "dummy", "clk_uart1_frac" };
+PNAME(mux_uart2_p)             = { "clk_uart2_src", "dummy", "clk_uart2_frac" };
+PNAME(mux_uart3_p)             = { "clk_uart3_src", "dummy", "clk_uart3_frac" };
+PNAME(mux_uart4_p)             = { "clk_uart4_src", "dummy", "clk_uart4_frac" };
+PNAME(mux_timer_src_p)         = { "xin24m", "clk_rtc32k" };
+PNAME(mux_dclk_vop_p)          = { "dclk_vop_src", "dclk_vop_frac", "xin24m" };
+PNAME(mux_nandc_p)             = { "clk_nandc_div", "clk_nandc_div50" };
+PNAME(mux_sdmmc_p)             = { "clk_sdmmc_div", "clk_sdmmc_div50" };
+PNAME(mux_sdio_p)              = { "clk_sdio_div", "clk_sdio_div50" };
+PNAME(mux_emmc_p)              = { "clk_emmc_div", "clk_emmc_div50" };
+PNAME(mux_mac_p)               = { "clk_mac_src", "mac_clkin" };
+PNAME(mux_mac_rmii_sel_p)      = { "clk_mac_rx_tx_div20", "clk_mac_rx_tx_div2" };
+PNAME(mux_ddrstdby_p)          = { "clk_ddrphy1x_out", "clk_ddr_stdby_div4" };
+PNAME(mux_rtc32k_p)            = { "xin32k", "clk_pvtm_32k", "clk_rtc32k_frac", "clk_rtc32k_div" };
+PNAME(mux_usbphy_ref_p)                = { "xin24m", "clk_usbphy_ref_src" };
+PNAME(mux_wifi_src_p)          = { "clk_wifi_dpll", "clk_wifi_vpll0" };
+PNAME(mux_wifi_p)              = { "clk_wifi_osc", "clk_wifi_src" };
+PNAME(mux_pdm_p)               = { "clk_pdm_src", "clk_pdm_frac" };
+PNAME(mux_i2s0_8ch_tx_p)       = { "clk_i2s0_8ch_tx_src", "clk_i2s0_8ch_tx_frac", "mclk_i2s0_8ch_in" };
+PNAME(mux_i2s0_8ch_tx_rx_p)    = { "clk_i2s0_8ch_tx_mux", "clk_i2s0_8ch_rx_mux"};
+PNAME(mux_i2s0_8ch_tx_out_p)   = { "clk_i2s0_8ch_tx", "xin12m" };
+PNAME(mux_i2s0_8ch_rx_p)       = { "clk_i2s0_8ch_rx_src", "clk_i2s0_8ch_rx_frac", "mclk_i2s0_8ch_in" };
+PNAME(mux_i2s0_8ch_rx_tx_p)    = { "clk_i2s0_8ch_rx_mux", "clk_i2s0_8ch_tx_mux"};
+PNAME(mux_i2s1_8ch_tx_p)       = { "clk_i2s1_8ch_tx_src", "clk_i2s1_8ch_tx_frac", "mclk_i2s1_8ch_in" };
+PNAME(mux_i2s1_8ch_tx_rx_p)    = { "clk_i2s1_8ch_tx_mux", "clk_i2s1_8ch_rx_mux"};
+PNAME(mux_i2s1_8ch_tx_out_p)   = { "clk_i2s1_8ch_tx", "xin12m" };
+PNAME(mux_i2s1_8ch_rx_p)       = { "clk_i2s1_8ch_rx_src", "clk_i2s1_8ch_rx_frac", "mclk_i2s1_8ch_in" };
+PNAME(mux_i2s1_8ch_rx_tx_p)    = { "clk_i2s1_8ch_rx_mux", "clk_i2s1_8ch_tx_mux"};
+PNAME(mux_i2s2_8ch_tx_p)       = { "clk_i2s2_8ch_tx_src", "clk_i2s2_8ch_tx_frac", "mclk_i2s2_8ch_in" };
+PNAME(mux_i2s2_8ch_tx_rx_p)    = { "clk_i2s2_8ch_tx_mux", "clk_i2s2_8ch_rx_mux"};
+PNAME(mux_i2s2_8ch_tx_out_p)   = { "clk_i2s2_8ch_tx", "xin12m" };
+PNAME(mux_i2s2_8ch_rx_p)       = { "clk_i2s2_8ch_rx_src", "clk_i2s2_8ch_rx_frac", "mclk_i2s2_8ch_in" };
+PNAME(mux_i2s2_8ch_rx_tx_p)    = { "clk_i2s2_8ch_rx_mux", "clk_i2s2_8ch_tx_mux"};
+PNAME(mux_i2s3_8ch_tx_p)       = { "clk_i2s3_8ch_tx_src", "clk_i2s3_8ch_tx_frac", "mclk_i2s3_8ch_in" };
+PNAME(mux_i2s3_8ch_tx_rx_p)    = { "clk_i2s3_8ch_tx_mux", "clk_i2s3_8ch_rx_mux"};
+PNAME(mux_i2s3_8ch_tx_out_p)   = { "clk_i2s3_8ch_tx", "xin12m" };
+PNAME(mux_i2s3_8ch_rx_p)       = { "clk_i2s3_8ch_rx_src", "clk_i2s3_8ch_rx_frac", "mclk_i2s3_8ch_in" };
+PNAME(mux_i2s3_8ch_rx_tx_p)    = { "clk_i2s3_8ch_rx_mux", "clk_i2s3_8ch_tx_mux"};
+PNAME(mux_i2s0_2ch_p)          = { "clk_i2s0_2ch_src", "clk_i2s0_2ch_frac", "mclk_i2s0_2ch_in" };
+PNAME(mux_i2s0_2ch_out_p)      = { "clk_i2s0_2ch", "xin12m" };
+PNAME(mux_i2s1_2ch_p)          = { "clk_i2s1_2ch_src", "clk_i2s1_2ch_frac", "mclk_i2s1_2ch_in"};
+PNAME(mux_i2s1_2ch_out_p)      = { "clk_i2s1_2ch", "xin12m" };
+PNAME(mux_spdif_tx_src_p)      = { "clk_spdif_tx_div", "clk_spdif_tx_div50" };
+PNAME(mux_spdif_tx_p)          = { "clk_spdif_tx_src", "clk_spdif_tx_frac", "mclk_i2s0_2ch_in" };
+PNAME(mux_spdif_rx_src_p)      = { "clk_spdif_rx_div", "clk_spdif_rx_div50" };
+PNAME(mux_spdif_rx_p)          = { "clk_spdif_rx_src", "clk_spdif_rx_frac" };
+
+static struct rockchip_pll_clock rk3308_pll_clks[] __initdata = {
+       [apll] = PLL(pll_rk3328, PLL_APLL, "apll", mux_pll_p,
+                    0, RK3308_PLL_CON(0),
+                    RK3308_MODE_CON, 0, 0, 0, rk3308_pll_rates),
+       [dpll] = PLL(pll_rk3328, PLL_DPLL, "dpll", mux_pll_p,
+                    0, RK3308_PLL_CON(8),
+                    RK3308_MODE_CON, 2, 1, 0, rk3308_pll_rates),
+       [vpll0] = PLL(pll_rk3328, PLL_VPLL0, "vpll0", mux_pll_p,
+                    0, RK3308_PLL_CON(16),
+                    RK3308_MODE_CON, 4, 2, 0, rk3308_pll_rates),
+       [vpll1] = PLL(pll_rk3328, PLL_VPLL1, "vpll1", mux_pll_p,
+                    0, RK3308_PLL_CON(24),
+                    RK3308_MODE_CON, 6, 3, 0, rk3308_pll_rates),
+};
+
+#define MFLAGS CLK_MUX_HIWORD_MASK
+#define DFLAGS CLK_DIVIDER_HIWORD_MASK
+#define GFLAGS (CLK_GATE_HIWORD_MASK | CLK_GATE_SET_TO_DISABLE)
+
+static struct rockchip_clk_branch rk3308_uart0_fracmux __initdata =
+       MUX(0, "clk_uart0_mux", mux_uart0_p, CLK_SET_RATE_PARENT,
+                       RK3308_CLKSEL_CON(11), 14, 2, MFLAGS);
+
+static struct rockchip_clk_branch rk3308_uart1_fracmux __initdata =
+       MUX(0, "clk_uart1_mux", mux_uart1_p, CLK_SET_RATE_PARENT,
+                       RK3308_CLKSEL_CON(14), 14, 2, MFLAGS);
+
+static struct rockchip_clk_branch rk3308_uart2_fracmux __initdata =
+       MUX(0, "clk_uart2_mux", mux_uart2_p, CLK_SET_RATE_PARENT,
+                       RK3308_CLKSEL_CON(17), 14, 2, MFLAGS);
+
+static struct rockchip_clk_branch rk3308_uart3_fracmux __initdata =
+       MUX(0, "clk_uart3_mux", mux_uart3_p, CLK_SET_RATE_PARENT,
+                       RK3308_CLKSEL_CON(20), 14, 2, MFLAGS);
+
+static struct rockchip_clk_branch rk3308_uart4_fracmux __initdata =
+       MUX(0, "clk_uart4_mux", mux_uart4_p, CLK_SET_RATE_PARENT,
+                       RK3308_CLKSEL_CON(23), 14, 2, MFLAGS);
+
+static struct rockchip_clk_branch rk3308_dclk_vop_fracmux __initdata =
+       MUX(0, "dclk_vop_mux", mux_dclk_vop_p, CLK_SET_RATE_PARENT,
+                       RK3308_CLKSEL_CON(8), 14, 2, MFLAGS);
+
+static struct rockchip_clk_branch rk3308_rtc32k_fracmux __initdata =
+       MUX(SCLK_RTC32K, "clk_rtc32k", mux_rtc32k_p, CLK_SET_RATE_PARENT,
+                       RK3308_CLKSEL_CON(2), 8, 2, MFLAGS);
+
+static struct rockchip_clk_branch rk3308_pdm_fracmux __initdata =
+       MUX(0, "clk_pdm_mux", mux_pdm_p, CLK_SET_RATE_PARENT,
+                       RK3308_CLKSEL_CON(46), 15, 1, MFLAGS);
+
+static struct rockchip_clk_branch rk3308_i2s0_8ch_tx_fracmux __initdata =
+       MUX(SCLK_I2S0_8CH_TX_MUX, "clk_i2s0_8ch_tx_mux", mux_i2s0_8ch_tx_p, CLK_SET_RATE_PARENT,
+                       RK3308_CLKSEL_CON(52), 10, 2, MFLAGS);
+
+static struct rockchip_clk_branch rk3308_i2s0_8ch_rx_fracmux __initdata =
+       MUX(SCLK_I2S0_8CH_RX_MUX, "clk_i2s0_8ch_rx_mux", mux_i2s0_8ch_rx_p, CLK_SET_RATE_PARENT,
+                       RK3308_CLKSEL_CON(54), 10, 2, MFLAGS);
+
+static struct rockchip_clk_branch rk3308_i2s1_8ch_tx_fracmux __initdata =
+       MUX(SCLK_I2S1_8CH_TX_MUX, "clk_i2s1_8ch_tx_mux", mux_i2s1_8ch_tx_p, CLK_SET_RATE_PARENT,
+                       RK3308_CLKSEL_CON(56), 10, 2, MFLAGS);
+
+static struct rockchip_clk_branch rk3308_i2s1_8ch_rx_fracmux __initdata =
+       MUX(SCLK_I2S1_8CH_RX_MUX, "clk_i2s1_8ch_rx_mux", mux_i2s1_8ch_rx_p, CLK_SET_RATE_PARENT,
+                       RK3308_CLKSEL_CON(58), 10, 2, MFLAGS);
+
+static struct rockchip_clk_branch rk3308_i2s2_8ch_tx_fracmux __initdata =
+       MUX(SCLK_I2S2_8CH_TX_MUX, "clk_i2s2_8ch_tx_mux", mux_i2s2_8ch_tx_p, CLK_SET_RATE_PARENT,
+                       RK3308_CLKSEL_CON(60), 10, 2, MFLAGS);
+
+static struct rockchip_clk_branch rk3308_i2s2_8ch_rx_fracmux __initdata =
+       MUX(SCLK_I2S2_8CH_RX_MUX, "clk_i2s2_8ch_rx_mux", mux_i2s2_8ch_rx_p, CLK_SET_RATE_PARENT,
+                       RK3308_CLKSEL_CON(62), 10, 2, MFLAGS);
+
+static struct rockchip_clk_branch rk3308_i2s3_8ch_tx_fracmux __initdata =
+       MUX(SCLK_I2S3_8CH_TX_MUX, "clk_i2s3_8ch_tx_mux", mux_i2s3_8ch_tx_p, CLK_SET_RATE_PARENT,
+                       RK3308_CLKSEL_CON(64), 10, 2, MFLAGS);
+
+static struct rockchip_clk_branch rk3308_i2s3_8ch_rx_fracmux __initdata =
+       MUX(SCLK_I2S3_8CH_RX_MUX, "clk_i2s3_8ch_rx_mux", mux_i2s3_8ch_rx_p, CLK_SET_RATE_PARENT,
+                       RK3308_CLKSEL_CON(66), 10, 2, MFLAGS);
+
+static struct rockchip_clk_branch rk3308_i2s0_2ch_fracmux __initdata =
+       MUX(0, "clk_i2s0_2ch_mux", mux_i2s0_2ch_p, CLK_SET_RATE_PARENT,
+                       RK3308_CLKSEL_CON(68), 10, 2, MFLAGS);
+
+static struct rockchip_clk_branch rk3308_i2s1_2ch_fracmux __initdata =
+       MUX(0, "clk_i2s1_2ch_mux", mux_i2s1_2ch_p, CLK_SET_RATE_PARENT,
+                       RK3308_CLKSEL_CON(70), 10, 2, MFLAGS);
+
+static struct rockchip_clk_branch rk3308_spdif_tx_fracmux __initdata =
+       MUX(0, "clk_spdif_tx_mux", mux_spdif_tx_p, CLK_SET_RATE_PARENT,
+                       RK3308_CLKSEL_CON(48), 14, 2, MFLAGS);
+
+static struct rockchip_clk_branch rk3308_spdif_rx_fracmux __initdata =
+       MUX(0, "clk_spdif_rx_mux", mux_spdif_rx_p, CLK_SET_RATE_PARENT,
+                       RK3308_CLKSEL_CON(50), 15, 1, MFLAGS);
+
+
+static struct rockchip_clk_branch rk3308_clk_branches[] __initdata = {
+       /*
+        * Clock-Architecture Diagram 1
+        */
+
+       MUX(USB480M, "usb480m", mux_usb480m_p, CLK_SET_RATE_PARENT,
+                       RK3308_MODE_CON, 8, 2, MFLAGS),
+       FACTOR(0, "xin12m", "xin24m", 0, 1, 2),
+
+       /*
+        * Clock-Architecture Diagram 2
+        */
+
+       GATE(0, "apll_core", "apll", CLK_IGNORE_UNUSED,
+                       RK3308_CLKGATE_CON(0), 0, GFLAGS),
+       GATE(0, "vpll0_core", "vpll0", CLK_IGNORE_UNUSED,
+                       RK3308_CLKGATE_CON(0), 0, GFLAGS),
+       GATE(0, "vpll1_core", "vpll1", CLK_IGNORE_UNUSED,
+                       RK3308_CLKGATE_CON(0), 0, GFLAGS),
+       COMPOSITE_NOMUX(0, "pclk_core_dbg", "armclk", CLK_IGNORE_UNUSED,
+                       RK3308_CLKSEL_CON(0), 8, 4, DFLAGS | CLK_DIVIDER_READ_ONLY,
+                       RK3308_CLKGATE_CON(0), 2, GFLAGS),
+       COMPOSITE_NOMUX(0, "aclk_core", "armclk", CLK_IGNORE_UNUSED,
+                       RK3308_CLKSEL_CON(0), 12, 3, DFLAGS | CLK_DIVIDER_READ_ONLY,
+                       RK3308_CLKGATE_CON(0), 1, GFLAGS),
+
+       GATE(0, "clk_jtag", "jtag_clkin", CLK_IGNORE_UNUSED,
+                       RK3308_CLKGATE_CON(0), 3, GFLAGS),
+
+       GATE(SCLK_PVTM_CORE, "clk_pvtm_core", "xin24m", 0,
+                       RK3308_CLKGATE_CON(0), 4, GFLAGS),
+
+       /*
+        * Clock-Architecture Diagram 3
+        */
+
+       COMPOSITE_NODIV(ACLK_BUS_SRC, "clk_bus_src", mux_dpll_vpll0_vpll1_p, CLK_IGNORE_UNUSED,
+                       RK3308_CLKSEL_CON(5), 6, 2, MFLAGS,
+                       RK3308_CLKGATE_CON(1), 0, GFLAGS),
+       COMPOSITE_NOMUX(PCLK_BUS, "pclk_bus", "clk_bus_src", CLK_IGNORE_UNUSED,
+                       RK3308_CLKSEL_CON(6), 8, 5, DFLAGS,
+                       RK3308_CLKGATE_CON(1), 3, GFLAGS),
+       GATE(PCLK_DDR, "pclk_ddr", "pclk_bus", CLK_IGNORE_UNUSED,
+                       RK3308_CLKGATE_CON(4), 15, GFLAGS),
+       COMPOSITE_NOMUX(HCLK_BUS, "hclk_bus", "clk_bus_src", CLK_IGNORE_UNUSED,
+                       RK3308_CLKSEL_CON(6), 0, 5, DFLAGS,
+                       RK3308_CLKGATE_CON(1), 2, GFLAGS),
+       COMPOSITE_NOMUX(ACLK_BUS, "aclk_bus", "clk_bus_src", CLK_IGNORE_UNUSED,
+                       RK3308_CLKSEL_CON(5), 0, 5, DFLAGS,
+                       RK3308_CLKGATE_CON(1), 1, GFLAGS),
+
+       COMPOSITE(0, "clk_uart0_src", mux_dpll_vpll0_vpll1_usb480m_xin24m_p, 0,
+                       RK3308_CLKSEL_CON(10), 13, 3, MFLAGS, 0, 5, DFLAGS,
+                       RK3308_CLKGATE_CON(1), 9, GFLAGS),
+       COMPOSITE_FRACMUX(0, "clk_uart0_frac", "clk_uart0_src", CLK_SET_RATE_PARENT,
+                       RK3308_CLKSEL_CON(12), 0,
+                       RK3308_CLKGATE_CON(1), 11, GFLAGS,
+                       &rk3308_uart0_fracmux),
+       GATE(SCLK_UART0, "clk_uart0", "clk_uart0_mux", 0,
+                       RK3308_CLKGATE_CON(1), 12, GFLAGS),
+
+       COMPOSITE(0, "clk_uart1_src", mux_dpll_vpll0_vpll1_usb480m_xin24m_p, 0,
+                       RK3308_CLKSEL_CON(13), 13, 3, MFLAGS, 0, 5, DFLAGS,
+                       RK3308_CLKGATE_CON(1), 13, GFLAGS),
+       COMPOSITE_FRACMUX(0, "clk_uart1_frac", "clk_uart1_src", CLK_SET_RATE_PARENT,
+                       RK3308_CLKSEL_CON(15), 0,
+                       RK3308_CLKGATE_CON(1), 15, GFLAGS,
+                       &rk3308_uart1_fracmux),
+       GATE(SCLK_UART1, "clk_uart1", "clk_uart1_mux", 0,
+                       RK3308_CLKGATE_CON(2), 0, GFLAGS),
+
+       COMPOSITE(0, "clk_uart2_src", mux_dpll_vpll0_vpll1_usb480m_xin24m_p, 0,
+                       RK3308_CLKSEL_CON(16), 13, 3, MFLAGS, 0, 5, DFLAGS,
+                       RK3308_CLKGATE_CON(2), 1, GFLAGS),
+       COMPOSITE_FRACMUX(0, "clk_uart2_frac", "clk_uart2_src", CLK_SET_RATE_PARENT,
+                       RK3308_CLKSEL_CON(18), 0,
+                       RK3308_CLKGATE_CON(2), 3, GFLAGS,
+                       &rk3308_uart2_fracmux),
+       GATE(SCLK_UART2, "clk_uart2", "clk_uart2_mux", CLK_SET_RATE_PARENT,
+                       RK3308_CLKGATE_CON(2), 4, GFLAGS),
+
+       COMPOSITE(0, "clk_uart3_src", mux_dpll_vpll0_vpll1_usb480m_xin24m_p, 0,
+                       RK3308_CLKSEL_CON(19), 13, 3, MFLAGS, 0, 5, DFLAGS,
+                       RK3308_CLKGATE_CON(2), 5, GFLAGS),
+       COMPOSITE_FRACMUX(0, "clk_uart3_frac", "clk_uart3_src", CLK_SET_RATE_PARENT,
+                       RK3308_CLKSEL_CON(21), 0,
+                       RK3308_CLKGATE_CON(2), 7, GFLAGS,
+                       &rk3308_uart3_fracmux),
+       GATE(SCLK_UART3, "clk_uart3", "clk_uart3_mux", 0,
+                       RK3308_CLKGATE_CON(2), 8, GFLAGS),
+
+       COMPOSITE(0, "clk_uart4_src", mux_dpll_vpll0_vpll1_usb480m_xin24m_p, 0,
+                       RK3308_CLKSEL_CON(22), 13, 3, MFLAGS, 0, 5, DFLAGS,
+                       RK3308_CLKGATE_CON(2), 9, GFLAGS),
+       COMPOSITE_FRACMUX(0, "clk_uart4_frac", "clk_uart4_src", CLK_SET_RATE_PARENT,
+                       RK3308_CLKSEL_CON(24), 0,
+                       RK3308_CLKGATE_CON(2), 11, GFLAGS,
+                       &rk3308_uart4_fracmux),
+       GATE(SCLK_UART4, "clk_uart4", "clk_uart4_mux", 0,
+                       RK3308_CLKGATE_CON(2), 12, GFLAGS),
+
+       COMPOSITE(SCLK_I2C0, "clk_i2c0", mux_dpll_vpll0_xin24m_p, 0,
+                       RK3308_CLKSEL_CON(25), 14, 2, MFLAGS, 0, 7, DFLAGS,
+                       RK3308_CLKGATE_CON(2), 13, GFLAGS),
+       COMPOSITE(SCLK_I2C1, "clk_i2c1", mux_dpll_vpll0_xin24m_p, 0,
+                       RK3308_CLKSEL_CON(26), 14, 2, MFLAGS, 0, 7, DFLAGS,
+                       RK3308_CLKGATE_CON(2), 14, GFLAGS),
+       COMPOSITE(SCLK_I2C2, "clk_i2c2", mux_dpll_vpll0_xin24m_p, 0,
+                       RK3308_CLKSEL_CON(27), 14, 2, MFLAGS, 0, 7, DFLAGS,
+                       RK3308_CLKGATE_CON(2), 15, GFLAGS),
+       COMPOSITE(SCLK_I2C3, "clk_i2c3", mux_dpll_vpll0_xin24m_p, 0,
+                       RK3308_CLKSEL_CON(28), 14, 2, MFLAGS, 0, 7, DFLAGS,
+                       RK3308_CLKGATE_CON(3), 0, GFLAGS),
+
+       COMPOSITE(SCLK_PWM0, "clk_pwm0", mux_dpll_vpll0_xin24m_p, 0,
+                       RK3308_CLKSEL_CON(29), 14, 2, MFLAGS, 0, 7, DFLAGS,
+                       RK3308_CLKGATE_CON(3), 1, GFLAGS),
+       COMPOSITE(SCLK_PWM1, "clk_pwm1", mux_dpll_vpll0_xin24m_p, 0,
+                       RK3308_CLKSEL_CON(74), 14, 2, MFLAGS, 0, 7, DFLAGS,
+                       RK3308_CLKGATE_CON(15), 0, GFLAGS),
+       COMPOSITE(SCLK_PWM2, "clk_pwm2", mux_dpll_vpll0_xin24m_p, 0,
+                       RK3308_CLKSEL_CON(75), 14, 2, MFLAGS, 0, 7, DFLAGS,
+                       RK3308_CLKGATE_CON(15), 1, GFLAGS),
+
+       COMPOSITE(SCLK_SPI0, "clk_spi0", mux_dpll_vpll0_xin24m_p, 0,
+                       RK3308_CLKSEL_CON(30), 14, 2, MFLAGS, 0, 7, DFLAGS,
+                       RK3308_CLKGATE_CON(3), 2, GFLAGS),
+       COMPOSITE(SCLK_SPI1, "clk_spi1", mux_dpll_vpll0_xin24m_p, 0,
+                       RK3308_CLKSEL_CON(31), 14, 2, MFLAGS, 0, 7, DFLAGS,
+                       RK3308_CLKGATE_CON(3), 3, GFLAGS),
+       COMPOSITE(SCLK_SPI2, "clk_spi2", mux_dpll_vpll0_xin24m_p, 0,
+                       RK3308_CLKSEL_CON(32), 14, 2, MFLAGS, 0, 7, DFLAGS,
+                       RK3308_CLKGATE_CON(3), 4, GFLAGS),
+
+       GATE(SCLK_TIMER0, "sclk_timer0", "xin24m", 0,
+                       RK3308_CLKGATE_CON(3), 10, GFLAGS),
+       GATE(SCLK_TIMER1, "sclk_timer1", "xin24m", 0,
+                       RK3308_CLKGATE_CON(3), 11, GFLAGS),
+       GATE(SCLK_TIMER2, "sclk_timer2", "xin24m", 0,
+                       RK3308_CLKGATE_CON(3), 12, GFLAGS),
+       GATE(SCLK_TIMER3, "sclk_timer3", "xin24m", 0,
+                       RK3308_CLKGATE_CON(3), 13, GFLAGS),
+       GATE(SCLK_TIMER4, "sclk_timer4", "xin24m", 0,
+                       RK3308_CLKGATE_CON(3), 14, GFLAGS),
+       GATE(SCLK_TIMER5, "sclk_timer5", "xin24m", 0,
+                       RK3308_CLKGATE_CON(3), 15, GFLAGS),
+
+       COMPOSITE_NOMUX(SCLK_TSADC, "clk_tsadc", "xin24m", 0,
+                       RK3308_CLKSEL_CON(33), 0, 11, DFLAGS,
+                       RK3308_CLKGATE_CON(3), 5, GFLAGS),
+       COMPOSITE_NOMUX(SCLK_SARADC, "clk_saradc", "xin24m", 0,
+                       RK3308_CLKSEL_CON(34), 0, 11, DFLAGS,
+                       RK3308_CLKGATE_CON(3), 6, GFLAGS),
+
+       COMPOSITE_NOMUX(SCLK_OTP, "clk_otp", "xin24m", 0,
+                       RK3308_CLKSEL_CON(35), 0, 4, DFLAGS,
+                       RK3308_CLKGATE_CON(3), 7, GFLAGS),
+       COMPOSITE_NOMUX(SCLK_OTP_USR, "clk_otp_usr", "clk_otp", 0,
+                       RK3308_CLKSEL_CON(35), 4, 2, DFLAGS,
+                       RK3308_CLKGATE_CON(3), 8, GFLAGS),
+
+       GATE(SCLK_CPU_BOOST, "clk_cpu_boost", "xin24m", CLK_IGNORE_UNUSED,
+                       RK3308_CLKGATE_CON(3), 9, GFLAGS),
+
+       COMPOSITE(SCLK_CRYPTO, "clk_crypto", mux_dpll_vpll0_vpll1_p, 0,
+                       RK3308_CLKSEL_CON(7), 6, 2, MFLAGS, 0, 5, DFLAGS,
+                       RK3308_CLKGATE_CON(1), 4, GFLAGS),
+       COMPOSITE(SCLK_CRYPTO_APK, "clk_crypto_apk", mux_dpll_vpll0_vpll1_p, 0,
+                       RK3308_CLKSEL_CON(7), 14, 2, MFLAGS, 8, 5, DFLAGS,
+                       RK3308_CLKGATE_CON(1), 5, GFLAGS),
+
+       COMPOSITE(0, "dclk_vop_src", mux_dpll_vpll0_vpll1_p, 0,
+                       RK3308_CLKSEL_CON(8), 10, 2, MFLAGS, 0, 8, DFLAGS,
+                       RK3308_CLKGATE_CON(1), 6, GFLAGS),
+       COMPOSITE_FRACMUX(0, "dclk_vop_frac", "dclk_vop_src", CLK_SET_RATE_PARENT,
+                       RK3308_CLKSEL_CON(9), 0,
+                       RK3308_CLKGATE_CON(1), 7, GFLAGS,
+                       &rk3308_dclk_vop_fracmux),
+       GATE(DCLK_VOP, "dclk_vop", "dclk_vop_mux", 0,
+                       RK3308_CLKGATE_CON(1), 8, GFLAGS),
+
+       /*
+        * Clock-Architecture Diagram 4
+        */
+
+       COMPOSITE_NODIV(ACLK_PERI_SRC, "clk_peri_src", mux_dpll_vpll0_vpll1_p, CLK_IGNORE_UNUSED,
+                       RK3308_CLKSEL_CON(36), 6, 2, MFLAGS,
+                       RK3308_CLKGATE_CON(8), 0, GFLAGS),
+       COMPOSITE_NOMUX(ACLK_PERI, "aclk_peri", "clk_peri_src", CLK_IGNORE_UNUSED,
+                       RK3308_CLKSEL_CON(36), 0, 5, DFLAGS,
+                       RK3308_CLKGATE_CON(8), 1, GFLAGS),
+       COMPOSITE_NOMUX(HCLK_PERI, "hclk_peri", "clk_peri_src", CLK_IGNORE_UNUSED,
+                       RK3308_CLKSEL_CON(37), 0, 5, DFLAGS,
+                       RK3308_CLKGATE_CON(8), 2, GFLAGS),
+       COMPOSITE_NOMUX(PCLK_PERI, "pclk_peri", "clk_peri_src", CLK_IGNORE_UNUSED,
+                       RK3308_CLKSEL_CON(37), 8, 5, DFLAGS,
+                       RK3308_CLKGATE_CON(8), 3, GFLAGS),
+
+       COMPOSITE(SCLK_NANDC_DIV, "clk_nandc_div", mux_dpll_vpll0_vpll1_p, CLK_IGNORE_UNUSED,
+                       RK3308_CLKSEL_CON(38), 6, 2, MFLAGS, 0, 5, DFLAGS,
+                       RK3308_CLKGATE_CON(8), 4, GFLAGS),
+       COMPOSITE(SCLK_NANDC_DIV50, "clk_nandc_div50", mux_dpll_vpll0_vpll1_p, CLK_IGNORE_UNUSED,
+                       RK3308_CLKSEL_CON(38), 6, 2, MFLAGS, 0, 5, DFLAGS,
+                       RK3308_CLKGATE_CON(8), 4, GFLAGS),
+       COMPOSITE_NODIV(SCLK_NANDC, "clk_nandc", mux_nandc_p, CLK_SET_RATE_PARENT | CLK_SET_RATE_NO_REPARENT,
+                       RK3308_CLKSEL_CON(38), 15, 1, MFLAGS,
+                       RK3308_CLKGATE_CON(8), 5, GFLAGS),
+
+       COMPOSITE(SCLK_SDMMC_DIV, "clk_sdmmc_div", mux_dpll_vpll0_vpll1_xin24m_p, CLK_IGNORE_UNUSED,
+                       RK3308_CLKSEL_CON(39), 8, 2, MFLAGS, 0, 8, DFLAGS,
+                       RK3308_CLKGATE_CON(8), 6, GFLAGS),
+       COMPOSITE(SCLK_SDMMC_DIV50, "clk_sdmmc_div50", mux_dpll_vpll0_vpll1_xin24m_p, CLK_IGNORE_UNUSED,
+                       RK3308_CLKSEL_CON(39), 8, 2, MFLAGS, 0, 8, DFLAGS,
+                       RK3308_CLKGATE_CON(8), 6, GFLAGS),
+       COMPOSITE_NODIV(SCLK_SDMMC, "clk_sdmmc", mux_sdmmc_p, CLK_SET_RATE_PARENT | CLK_SET_RATE_NO_REPARENT,
+                       RK3308_CLKSEL_CON(39), 15, 1, MFLAGS,
+                       RK3308_CLKGATE_CON(8), 7, GFLAGS),
+       MMC(SCLK_SDMMC_DRV,     "sdmmc_drv",    "clk_sdmmc", RK3308_SDMMC_CON0, 1),
+       MMC(SCLK_SDMMC_SAMPLE,  "sdmmc_sample", "clk_sdmmc", RK3308_SDMMC_CON1, 1),
+
+       COMPOSITE(SCLK_SDIO_DIV, "clk_sdio_div", mux_dpll_vpll0_vpll1_xin24m_p, CLK_IGNORE_UNUSED,
+                       RK3308_CLKSEL_CON(40), 8, 2, MFLAGS, 0, 8, DFLAGS,
+                       RK3308_CLKGATE_CON(8), 8, GFLAGS),
+       COMPOSITE(SCLK_SDIO_DIV50, "clk_sdio_div50", mux_dpll_vpll0_vpll1_xin24m_p, CLK_IGNORE_UNUSED,
+                       RK3308_CLKSEL_CON(40), 8, 2, MFLAGS, 0, 8, DFLAGS,
+                       RK3308_CLKGATE_CON(8), 8, GFLAGS),
+       COMPOSITE_NODIV(SCLK_SDIO, "clk_sdio", mux_sdio_p, CLK_SET_RATE_PARENT | CLK_SET_RATE_NO_REPARENT,
+                       RK3308_CLKSEL_CON(40), 15, 1, MFLAGS,
+                       RK3308_CLKGATE_CON(8), 9, GFLAGS),
+       MMC(SCLK_SDIO_DRV,              "sdio_drv",    "clk_sdio",      RK3308_SDIO_CON0,  1),
+       MMC(SCLK_SDIO_SAMPLE,   "sdio_sample", "clk_sdio",      RK3308_SDIO_CON1,  1),
+
+       COMPOSITE(SCLK_EMMC_DIV, "clk_emmc_div", mux_dpll_vpll0_vpll1_xin24m_p, CLK_IGNORE_UNUSED,
+                       RK3308_CLKSEL_CON(41), 8, 2, MFLAGS, 0, 8, DFLAGS,
+                       RK3308_CLKGATE_CON(8), 10, GFLAGS),
+       COMPOSITE(SCLK_EMMC_DIV50, "clk_emmc_div50", mux_dpll_vpll0_vpll1_xin24m_p, CLK_IGNORE_UNUSED,
+                       RK3308_CLKSEL_CON(41), 8, 2, MFLAGS, 0, 8, DFLAGS,
+                       RK3308_CLKGATE_CON(8), 10, GFLAGS),
+       COMPOSITE_NODIV(SCLK_EMMC, "clk_emmc", mux_emmc_p, CLK_SET_RATE_PARENT | CLK_SET_RATE_NO_REPARENT,
+                       RK3308_CLKSEL_CON(41), 15, 1, MFLAGS,
+                       RK3308_CLKGATE_CON(8), 11, GFLAGS),
+       MMC(SCLK_EMMC_DRV,     "emmc_drv",     "clk_emmc",  RK3308_EMMC_CON0,  1),
+       MMC(SCLK_EMMC_SAMPLE,  "emmc_sample",  "clk_emmc",  RK3308_EMMC_CON1,  1),
+
+       COMPOSITE(SCLK_SFC, "clk_sfc", mux_dpll_vpll0_vpll1_p, 0,
+                       RK3308_CLKSEL_CON(42), 14, 2, MFLAGS, 0, 7, DFLAGS,
+                       RK3308_CLKGATE_CON(8), 12, GFLAGS),
+
+       GATE(SCLK_OTG_ADP, "clk_otg_adp", "clk_rtc32k", 0,
+                       RK3308_CLKGATE_CON(8), 13, GFLAGS),
+
+       COMPOSITE(SCLK_MAC_SRC, "clk_mac_src", mux_dpll_vpll0_vpll1_p, 0,
+                       RK3308_CLKSEL_CON(43), 6, 2, MFLAGS, 0, 5, DFLAGS,
+                       RK3308_CLKGATE_CON(8), 14, GFLAGS),
+       MUX(SCLK_MAC, "clk_mac", mux_mac_p,  CLK_SET_RATE_PARENT,
+                       RK3308_CLKSEL_CON(43), 14, 1, MFLAGS),
+       GATE(SCLK_MAC_REF, "clk_mac_ref", "clk_mac", 0,
+                       RK3308_CLKGATE_CON(9), 1, GFLAGS),
+       GATE(SCLK_MAC_RX_TX, "clk_mac_rx_tx", "clk_mac", 0,
+                       RK3308_CLKGATE_CON(9), 0, GFLAGS),
+       FACTOR(0, "clk_mac_rx_tx_div2", "clk_mac_rx_tx", 0, 1, 2),
+       FACTOR(0, "clk_mac_rx_tx_div20", "clk_mac_rx_tx", 0, 1, 20),
+       MUX(SCLK_MAC_RMII, "clk_mac_rmii_sel", mux_mac_rmii_sel_p,  CLK_SET_RATE_PARENT,
+                       RK3308_CLKSEL_CON(43), 15, 1, MFLAGS),
+
+       COMPOSITE(SCLK_OWIRE, "clk_owire", mux_dpll_vpll0_xin24m_p, 0,
+                       RK3308_CLKSEL_CON(44), 14, 2, MFLAGS, 8, 6, DFLAGS,
+                       RK3308_CLKGATE_CON(8), 15, GFLAGS),
+
+       /*
+        * Clock-Architecture Diagram 5
+        */
+
+       GATE(0, "clk_ddr_mon_timer", "xin24m", CLK_IGNORE_UNUSED,
+                       RK3308_CLKGATE_CON(0), 12, GFLAGS),
+
+       GATE(0, "clk_ddr_mon", "clk_ddrphy1x_out", CLK_IGNORE_UNUSED,
+                       RK3308_CLKGATE_CON(4), 10, GFLAGS),
+       GATE(0, "clk_ddr_upctrl", "clk_ddrphy1x_out", CLK_IGNORE_UNUSED,
+                       RK3308_CLKGATE_CON(4), 11, GFLAGS),
+       GATE(0, "clk_ddr_msch", "clk_ddrphy1x_out", CLK_IGNORE_UNUSED,
+                       RK3308_CLKGATE_CON(4), 12, GFLAGS),
+       GATE(0, "clk_ddr_msch_peribus", "clk_ddrphy1x_out", CLK_IGNORE_UNUSED,
+                       RK3308_CLKGATE_CON(4), 13, GFLAGS),
+
+       COMPOSITE(SCLK_DDRCLK, "clk_ddrphy4x_src", mux_dpll_vpll0_vpll1_p, CLK_IGNORE_UNUSED,
+                       RK3308_CLKSEL_CON(1), 6, 2, MFLAGS, 0, 3, DFLAGS,
+                       RK3308_CLKGATE_CON(0), 10, GFLAGS),
+       GATE(0, "clk_ddrphy4x", "clk_ddrphy4x_src", CLK_IGNORE_UNUSED,
+                       RK3308_CLKGATE_CON(0), 11, GFLAGS),
+       FACTOR_GATE(0, "clk_ddr_stdby_div4", "clk_ddrphy4x", CLK_IGNORE_UNUSED, 1, 4,
+                       RK3308_CLKGATE_CON(0), 13, GFLAGS),
+       COMPOSITE_NODIV(0, "clk_ddrstdby", mux_ddrstdby_p, CLK_IGNORE_UNUSED,
+                       RK3308_CLKSEL_CON(1), 8, 1, MFLAGS,
+                       RK3308_CLKGATE_CON(4), 14, GFLAGS),
+
+       /*
+        * Clock-Architecture Diagram 6
+        */
+
+       GATE(PCLK_PMU, "pclk_pmu", "pclk_bus", CLK_IGNORE_UNUSED,
+                       RK3308_CLKGATE_CON(4), 5, GFLAGS),
+       GATE(SCLK_PMU, "clk_pmu", "pclk_bus", CLK_IGNORE_UNUSED,
+                       RK3308_CLKGATE_CON(4), 6, GFLAGS),
+
+       COMPOSITE_FRACMUX(0, "clk_rtc32k_frac", "xin24m", CLK_IGNORE_UNUSED,
+                       RK3308_CLKSEL_CON(3), 0,
+                       RK3308_CLKGATE_CON(4), 3, GFLAGS,
+                       &rk3308_rtc32k_fracmux),
+       MUX(0, "clk_rtc32k_div_src", mux_vpll0_vpll1_p, 0,
+                       RK3308_CLKSEL_CON(2), 10, 1, MFLAGS),
+       COMPOSITE_NOMUX(0, "clk_rtc32k_div", "clk_rtc32k_div_src", CLK_IGNORE_UNUSED | CLK_SET_RATE_PARENT,
+                       RK3308_CLKSEL_CON(4), 0, 16, DFLAGS,
+                       RK3308_CLKGATE_CON(4), 2, GFLAGS),
+
+       COMPOSITE(0, "clk_usbphy_ref_src", mux_dpll_vpll0_p, 0,
+                       RK3308_CLKSEL_CON(72), 6, 1, MFLAGS, 0, 6, DFLAGS,
+                       RK3308_CLKGATE_CON(4), 7, GFLAGS),
+       COMPOSITE_NODIV(SCLK_USBPHY_REF, "clk_usbphy_ref", mux_usbphy_ref_p, CLK_SET_RATE_PARENT,
+                       RK3308_CLKSEL_CON(72), 7, 1, MFLAGS,
+                       RK3308_CLKGATE_CON(4), 8, GFLAGS),
+
+       GATE(0, "clk_wifi_dpll", "dpll", 0,
+                       RK3308_CLKGATE_CON(15), 2, GFLAGS),
+       GATE(0, "clk_wifi_vpll0", "vpll0", 0,
+                       RK3308_CLKGATE_CON(15), 3, GFLAGS),
+       GATE(0, "clk_wifi_osc", "xin24m", 0,
+                       RK3308_CLKGATE_CON(15), 4, GFLAGS),
+       COMPOSITE(0, "clk_wifi_src", mux_wifi_src_p, 0,
+                       RK3308_CLKSEL_CON(44), 6, 1, MFLAGS, 0, 6, DFLAGS,
+                       RK3308_CLKGATE_CON(4), 0, GFLAGS),
+       COMPOSITE_NODIV(SCLK_WIFI, "clk_wifi", mux_wifi_p, CLK_SET_RATE_PARENT,
+                       RK3308_CLKSEL_CON(44), 7, 1, MFLAGS,
+                       RK3308_CLKGATE_CON(4), 1, GFLAGS),
+
+       GATE(SCLK_PVTM_PMU, "clk_pvtm_pmu", "xin24m", 0,
+                       RK3308_CLKGATE_CON(4), 4, GFLAGS),
+
+       /*
+        * Clock-Architecture Diagram 7
+        */
+
+       COMPOSITE_NODIV(0, "clk_audio_src", mux_vpll0_vpll1_xin24m_p, 0,
+                       RK3308_CLKSEL_CON(45), 6, 2, MFLAGS,
+                       RK3308_CLKGATE_CON(10), 0, GFLAGS),
+       COMPOSITE_NOMUX(HCLK_AUDIO, "hclk_audio", "clk_audio_src", 0,
+                       RK3308_CLKSEL_CON(45), 0, 5, DFLAGS,
+                       RK3308_CLKGATE_CON(10), 1, GFLAGS),
+       COMPOSITE_NOMUX(PCLK_AUDIO, "pclk_audio", "clk_audio_src", 0,
+                       RK3308_CLKSEL_CON(45), 8, 5, DFLAGS,
+                       RK3308_CLKGATE_CON(10), 2, GFLAGS),
+
+       COMPOSITE(0, "clk_pdm_src", mux_vpll0_vpll1_xin24m_p, 0,
+                       RK3308_CLKSEL_CON(46), 8, 2, MFLAGS, 0, 7, DFLAGS,
+                       RK3308_CLKGATE_CON(10), 3, GFLAGS),
+       COMPOSITE_FRACMUX(0, "clk_pdm_frac", "clk_pdm_src", CLK_SET_RATE_PARENT,
+                       RK3308_CLKSEL_CON(47), 0,
+                       RK3308_CLKGATE_CON(10), 4, GFLAGS,
+                       &rk3308_pdm_fracmux),
+       GATE(SCLK_PDM, "clk_pdm", "clk_pdm_mux", 0,
+                       RK3308_CLKGATE_CON(10), 5, GFLAGS),
+
+       COMPOSITE(SCLK_I2S0_8CH_TX_SRC, "clk_i2s0_8ch_tx_src", mux_vpll0_vpll1_xin24m_p, 0,
+                       RK3308_CLKSEL_CON(52), 8, 2, MFLAGS, 0, 7, DFLAGS,
+                       RK3308_CLKGATE_CON(10), 12, GFLAGS),
+       COMPOSITE_FRACMUX(0, "clk_i2s0_8ch_tx_frac", "clk_i2s0_8ch_tx_src", CLK_SET_RATE_PARENT,
+                       RK3308_CLKSEL_CON(53), 0,
+                       RK3308_CLKGATE_CON(10), 13, GFLAGS,
+                       &rk3308_i2s0_8ch_tx_fracmux),
+       COMPOSITE_NODIV(SCLK_I2S0_8CH_TX, "clk_i2s0_8ch_tx", mux_i2s0_8ch_tx_rx_p, CLK_SET_RATE_PARENT,
+                       RK3308_CLKSEL_CON(52), 12, 1, MFLAGS,
+                       RK3308_CLKGATE_CON(10), 14, GFLAGS),
+       COMPOSITE_NODIV(SCLK_I2S0_8CH_TX_OUT, "clk_i2s0_8ch_tx_out", mux_i2s0_8ch_tx_out_p, CLK_SET_RATE_PARENT,
+                       RK3308_CLKSEL_CON(52), 15, 1, MFLAGS,
+                       RK3308_CLKGATE_CON(10), 15, GFLAGS),
+
+       COMPOSITE(SCLK_I2S0_8CH_RX_SRC, "clk_i2s0_8ch_rx_src", mux_vpll0_vpll1_xin24m_p, 0,
+                       RK3308_CLKSEL_CON(54), 8, 2, MFLAGS, 0, 7, DFLAGS,
+                       RK3308_CLKGATE_CON(11), 0, GFLAGS),
+       COMPOSITE_FRACMUX(0, "clk_i2s0_8ch_rx_frac", "clk_i2s0_8ch_rx_src", CLK_SET_RATE_PARENT,
+                       RK3308_CLKSEL_CON(55), 0,
+                       RK3308_CLKGATE_CON(11), 1, GFLAGS,
+                       &rk3308_i2s0_8ch_rx_fracmux),
+       COMPOSITE_NODIV(SCLK_I2S0_8CH_RX, "clk_i2s0_8ch_rx", mux_i2s0_8ch_rx_tx_p, CLK_SET_RATE_PARENT,
+                       RK3308_CLKSEL_CON(54), 12, 1, MFLAGS,
+                       RK3308_CLKGATE_CON(11), 2, GFLAGS),
+       GATE(SCLK_I2S0_8CH_RX_OUT, "clk_i2s0_8ch_rx_out", "clk_i2s0_8ch_rx", 0,
+                       RK3308_CLKGATE_CON(11), 3, GFLAGS),
+
+       COMPOSITE(SCLK_I2S1_8CH_TX_SRC, "clk_i2s1_8ch_tx_src", mux_vpll0_vpll1_xin24m_p, 0,
+                       RK3308_CLKSEL_CON(56), 8, 2, MFLAGS, 0, 7, DFLAGS,
+                       RK3308_CLKGATE_CON(11), 4, GFLAGS),
+       COMPOSITE_FRACMUX(0, "clk_i2s1_8ch_tx_frac", "clk_i2s1_8ch_tx_src", CLK_SET_RATE_PARENT,
+                       RK3308_CLKSEL_CON(57), 0,
+                       RK3308_CLKGATE_CON(11), 5, GFLAGS,
+                       &rk3308_i2s1_8ch_tx_fracmux),
+       COMPOSITE_NODIV(SCLK_I2S1_8CH_TX, "clk_i2s1_8ch_tx", mux_i2s1_8ch_tx_rx_p, CLK_SET_RATE_PARENT,
+                       RK3308_CLKSEL_CON(56), 12, 1, MFLAGS,
+                       RK3308_CLKGATE_CON(11), 6, GFLAGS),
+       COMPOSITE_NODIV(SCLK_I2S1_8CH_TX_OUT, "clk_i2s1_8ch_tx_out", mux_i2s1_8ch_tx_out_p, CLK_SET_RATE_PARENT,
+                       RK3308_CLKSEL_CON(56), 15, 1, MFLAGS,
+                       RK3308_CLKGATE_CON(11), 7, GFLAGS),
+
+       COMPOSITE(SCLK_I2S1_8CH_RX_SRC, "clk_i2s1_8ch_rx_src", mux_vpll0_vpll1_xin24m_p, 0,
+                       RK3308_CLKSEL_CON(58), 8, 2, MFLAGS, 0, 7, DFLAGS,
+                       RK3308_CLKGATE_CON(11), 8, GFLAGS),
+       COMPOSITE_FRACMUX(0, "clk_i2s1_8ch_rx_frac", "clk_i2s1_8ch_rx_src", CLK_SET_RATE_PARENT,
+                       RK3308_CLKSEL_CON(59), 0,
+                       RK3308_CLKGATE_CON(11), 9, GFLAGS,
+                       &rk3308_i2s1_8ch_rx_fracmux),
+       COMPOSITE_NODIV(SCLK_I2S1_8CH_RX, "clk_i2s1_8ch_rx", mux_i2s1_8ch_rx_tx_p, CLK_SET_RATE_PARENT,
+                       RK3308_CLKSEL_CON(58), 12, 1, MFLAGS,
+                       RK3308_CLKGATE_CON(11), 10, GFLAGS),
+       GATE(SCLK_I2S1_8CH_RX_OUT, "clk_i2s1_8ch_rx_out", "clk_i2s1_8ch_rx", 0,
+                       RK3308_CLKGATE_CON(11), 11, GFLAGS),
+
+       COMPOSITE(SCLK_I2S2_8CH_TX_SRC, "clk_i2s2_8ch_tx_src", mux_vpll0_vpll1_xin24m_p, 0,
+                       RK3308_CLKSEL_CON(60), 8, 2, MFLAGS, 0, 7, DFLAGS,
+                       RK3308_CLKGATE_CON(11), 12, GFLAGS),
+       COMPOSITE_FRACMUX(0, "clk_i2s2_8ch_tx_frac", "clk_i2s2_8ch_tx_src", CLK_SET_RATE_PARENT,
+                       RK3308_CLKSEL_CON(61), 0,
+                       RK3308_CLKGATE_CON(11), 13, GFLAGS,
+                       &rk3308_i2s2_8ch_tx_fracmux),
+       COMPOSITE_NODIV(SCLK_I2S2_8CH_TX, "clk_i2s2_8ch_tx", mux_i2s2_8ch_tx_rx_p, CLK_SET_RATE_PARENT,
+                       RK3308_CLKSEL_CON(60), 12, 1, MFLAGS,
+                       RK3308_CLKGATE_CON(11), 14, GFLAGS),
+       COMPOSITE_NODIV(SCLK_I2S2_8CH_TX_OUT, "clk_i2s2_8ch_tx_out", mux_i2s2_8ch_tx_out_p, CLK_SET_RATE_PARENT,
+                       RK3308_CLKSEL_CON(60), 15, 1, MFLAGS,
+                       RK3308_CLKGATE_CON(11), 15, GFLAGS),
+
+       COMPOSITE(SCLK_I2S2_8CH_RX_SRC, "clk_i2s2_8ch_rx_src", mux_vpll0_vpll1_xin24m_p, 0,
+                       RK3308_CLKSEL_CON(62), 8, 2, MFLAGS, 0, 7, DFLAGS,
+                       RK3308_CLKGATE_CON(12), 0, GFLAGS),
+       COMPOSITE_FRACMUX(0, "clk_i2s2_8ch_rx_frac", "clk_i2s2_8ch_rx_src", CLK_SET_RATE_PARENT,
+                       RK3308_CLKSEL_CON(63), 0,
+                       RK3308_CLKGATE_CON(12), 1, GFLAGS,
+                       &rk3308_i2s2_8ch_rx_fracmux),
+       COMPOSITE_NODIV(SCLK_I2S2_8CH_RX, "clk_i2s2_8ch_rx", mux_i2s2_8ch_rx_tx_p, CLK_SET_RATE_PARENT,
+                       RK3308_CLKSEL_CON(62), 12, 1, MFLAGS,
+                       RK3308_CLKGATE_CON(12), 2, GFLAGS),
+       GATE(SCLK_I2S2_8CH_RX_OUT, "clk_i2s2_8ch_rx_out", "clk_i2s2_8ch_rx", 0,
+                       RK3308_CLKGATE_CON(12), 3, GFLAGS),
+
+       COMPOSITE(SCLK_I2S3_8CH_TX_SRC, "clk_i2s3_8ch_tx_src", mux_vpll0_vpll1_xin24m_p, 0,
+                       RK3308_CLKSEL_CON(64), 8, 2, MFLAGS, 0, 7, DFLAGS,
+                       RK3308_CLKGATE_CON(12), 4, GFLAGS),
+       COMPOSITE_FRACMUX(0, "clk_i2s3_8ch_tx_frac", "clk_i2s3_8ch_tx_src", CLK_SET_RATE_PARENT,
+                       RK3308_CLKSEL_CON(65), 0,
+                       RK3308_CLKGATE_CON(12), 5, GFLAGS,
+                       &rk3308_i2s3_8ch_tx_fracmux),
+       COMPOSITE_NODIV(SCLK_I2S3_8CH_TX, "clk_i2s3_8ch_tx", mux_i2s3_8ch_tx_rx_p, CLK_SET_RATE_PARENT,
+                       RK3308_CLKSEL_CON(64), 12, 1, MFLAGS,
+                       RK3308_CLKGATE_CON(12), 6, GFLAGS),
+       COMPOSITE_NODIV(SCLK_I2S3_8CH_TX_OUT, "clk_i2s3_8ch_tx_out", mux_i2s3_8ch_tx_out_p, CLK_SET_RATE_PARENT,
+                       RK3308_CLKSEL_CON(64), 15, 1, MFLAGS,
+                       RK3308_CLKGATE_CON(12), 7, GFLAGS),
+
+       COMPOSITE(SCLK_I2S3_8CH_RX_SRC, "clk_i2s3_8ch_rx_src", mux_vpll0_vpll1_xin24m_p, 0,
+                       RK3308_CLKSEL_CON(66), 8, 2, MFLAGS, 0, 7, DFLAGS,
+                       RK3308_CLKGATE_CON(12), 8, GFLAGS),
+       COMPOSITE_FRACMUX(0, "clk_i2s3_8ch_rx_frac", "clk_i2s3_8ch_rx_src", CLK_SET_RATE_PARENT,
+                       RK3308_CLKSEL_CON(67), 0,
+                       RK3308_CLKGATE_CON(12), 9, GFLAGS,
+                       &rk3308_i2s3_8ch_rx_fracmux),
+       COMPOSITE_NODIV(SCLK_I2S3_8CH_RX, "clk_i2s3_8ch_rx", mux_i2s3_8ch_rx_tx_p, CLK_SET_RATE_PARENT,
+                       RK3308_CLKSEL_CON(66), 12, 1, MFLAGS,
+                       RK3308_CLKGATE_CON(12), 10, GFLAGS),
+       GATE(SCLK_I2S3_8CH_RX_OUT, "clk_i2s3_8ch_rx_out", "clk_i2s3_8ch_rx", 0,
+                       RK3308_CLKGATE_CON(12), 11, GFLAGS),
+
+       COMPOSITE(SCLK_I2S0_2CH_SRC, "clk_i2s0_2ch_src", mux_vpll0_vpll1_xin24m_p, 0,
+                       RK3308_CLKSEL_CON(68), 8, 2, MFLAGS, 0, 7, DFLAGS,
+                       RK3308_CLKGATE_CON(12), 12, GFLAGS),
+       COMPOSITE_FRACMUX(0, "clk_i2s0_2ch_frac", "clk_i2s0_2ch_src", CLK_SET_RATE_PARENT,
+                       RK3308_CLKSEL_CON(69), 0,
+                       RK3308_CLKGATE_CON(12), 13, GFLAGS,
+                       &rk3308_i2s0_2ch_fracmux),
+       GATE(SCLK_I2S0_2CH, "clk_i2s0_2ch", "clk_i2s0_2ch_mux", 0,
+                       RK3308_CLKGATE_CON(12), 14, GFLAGS),
+       COMPOSITE_NODIV(SCLK_I2S0_2CH_OUT, "clk_i2s0_2ch_out", mux_i2s0_2ch_out_p, CLK_SET_RATE_PARENT,
+                       RK3308_CLKSEL_CON(68), 15, 1, MFLAGS,
+                       RK3308_CLKGATE_CON(12), 15, GFLAGS),
+
+       COMPOSITE(SCLK_I2S1_2CH_SRC, "clk_i2s1_2ch_src", mux_vpll0_vpll1_xin24m_p, 0,
+                       RK3308_CLKSEL_CON(70), 8, 2, MFLAGS, 0, 7, DFLAGS,
+                       RK3308_CLKGATE_CON(13), 0, GFLAGS),
+       COMPOSITE_FRACMUX(0, "clk_i2s1_2ch_frac", "clk_i2s1_2ch_src", CLK_SET_RATE_PARENT,
+                       RK3308_CLKSEL_CON(71), 0,
+                       RK3308_CLKGATE_CON(13), 1, GFLAGS,
+                       &rk3308_i2s1_2ch_fracmux),
+       GATE(SCLK_I2S1_2CH, "clk_i2s1_2ch", "clk_i2s1_2ch_mux", 0,
+                       RK3308_CLKGATE_CON(13), 2, GFLAGS),
+       COMPOSITE_NODIV(SCLK_I2S1_2CH_OUT, "clk_i2s1_2ch_out", mux_i2s1_2ch_out_p, CLK_SET_RATE_PARENT,
+                       RK3308_CLKSEL_CON(70), 15, 1, MFLAGS,
+                       RK3308_CLKGATE_CON(13), 3, GFLAGS),
+
+       COMPOSITE(SCLK_SPDIF_TX_DIV, "clk_spdif_tx_div", mux_vpll0_vpll1_xin24m_p, CLK_IGNORE_UNUSED,
+                       RK3308_CLKSEL_CON(48), 8, 2, MFLAGS, 0, 7, DFLAGS,
+                       RK3308_CLKGATE_CON(10), 6, GFLAGS),
+       COMPOSITE(SCLK_SPDIF_TX_DIV50, "clk_spdif_tx_div50", mux_vpll0_vpll1_xin24m_p, CLK_IGNORE_UNUSED,
+                       RK3308_CLKSEL_CON(48), 8, 2, MFLAGS, 0, 7, DFLAGS,
+                       RK3308_CLKGATE_CON(10), 6, GFLAGS),
+       MUX(0, "clk_spdif_tx_src", mux_spdif_tx_src_p, CLK_SET_RATE_PARENT | CLK_SET_RATE_NO_REPARENT,
+                       RK3308_CLKSEL_CON(48), 12, 1, MFLAGS),
+       COMPOSITE_FRACMUX(0, "clk_spdif_tx_frac", "clk_spdif_tx_src", CLK_SET_RATE_PARENT,
+                       RK3308_CLKSEL_CON(49), 0,
+                       RK3308_CLKGATE_CON(10), 7, GFLAGS,
+                       &rk3308_spdif_tx_fracmux),
+       GATE(SCLK_SPDIF_TX, "clk_spdif_tx", "clk_spdif_tx_mux", 0,
+                       RK3308_CLKGATE_CON(10), 8, GFLAGS),
+
+       COMPOSITE(SCLK_SPDIF_RX_DIV, "clk_spdif_rx_div", mux_vpll0_vpll1_xin24m_p, CLK_IGNORE_UNUSED,
+                       RK3308_CLKSEL_CON(50), 8, 2, MFLAGS, 0, 7, DFLAGS,
+                       RK3308_CLKGATE_CON(10), 9, GFLAGS),
+       COMPOSITE(SCLK_SPDIF_RX_DIV50, "clk_spdif_rx_div50", mux_vpll0_vpll1_xin24m_p, CLK_IGNORE_UNUSED,
+                       RK3308_CLKSEL_CON(50), 8, 2, MFLAGS, 0, 7, DFLAGS,
+                       RK3308_CLKGATE_CON(10), 9, GFLAGS),
+       MUX(0, "clk_spdif_rx_src", mux_spdif_rx_src_p, CLK_SET_RATE_PARENT | CLK_SET_RATE_NO_REPARENT,
+                       RK3308_CLKSEL_CON(50), 14, 1, MFLAGS),
+       COMPOSITE_FRACMUX(0, "clk_spdif_rx_frac", "clk_spdif_rx_src", CLK_SET_RATE_PARENT,
+                       RK3308_CLKSEL_CON(51), 0,
+                       RK3308_CLKGATE_CON(10), 10, GFLAGS,
+                       &rk3308_spdif_rx_fracmux),
+       GATE(SCLK_SPDIF_RX, "clk_spdif_rx", "clk_spdif_rx_mux", 0,
+                       RK3308_CLKGATE_CON(10), 11, GFLAGS),
+
+       /*
+        * Clock-Architecture Diagram 8
+        */
+
+       GATE(0, "aclk_core_niu", "aclk_core", CLK_IGNORE_UNUSED, RK3308_CLKGATE_CON(0), 5, GFLAGS),
+       GATE(0, "pclk_core_dbg_niu", "aclk_core", CLK_IGNORE_UNUSED, RK3308_CLKGATE_CON(0), 6, GFLAGS),
+       GATE(0, "pclk_core_dbg_daplite", "pclk_core_dbg", CLK_IGNORE_UNUSED, RK3308_CLKGATE_CON(0), 7, GFLAGS),
+       GATE(0, "aclk_core_perf", "pclk_core_dbg", CLK_IGNORE_UNUSED, RK3308_CLKGATE_CON(0), 8, GFLAGS),
+       GATE(0, "pclk_core_grf", "pclk_core_dbg", CLK_IGNORE_UNUSED, RK3308_CLKGATE_CON(0), 9, GFLAGS),
+
+       GATE(0, "aclk_peri_niu", "aclk_peri", CLK_IGNORE_UNUSED, RK3308_CLKGATE_CON(9), 2, GFLAGS),
+       GATE(0, "aclk_peribus_niu", "aclk_peri", CLK_IGNORE_UNUSED, RK3308_CLKGATE_CON(9), 3, GFLAGS),
+       GATE(ACLK_MAC, "aclk_mac", "aclk_peri", 0, RK3308_CLKGATE_CON(9), 4, GFLAGS),
+
+       GATE(0, "hclk_peri_niu", "hclk_peri", CLK_IGNORE_UNUSED, RK3308_CLKGATE_CON(9), 5, GFLAGS),
+       GATE(HCLK_NANDC, "hclk_nandc", "hclk_peri", 0, RK3308_CLKGATE_CON(9), 6, GFLAGS),
+       GATE(HCLK_SDMMC, "hclk_sdmmc", "hclk_peri", 0, RK3308_CLKGATE_CON(9), 7, GFLAGS),
+       GATE(HCLK_SDIO, "hclk_sdio", "hclk_peri", 0, RK3308_CLKGATE_CON(9), 8, GFLAGS),
+       GATE(HCLK_EMMC, "hclk_emmc", "hclk_peri", 0, RK3308_CLKGATE_CON(9), 9, GFLAGS),
+       GATE(HCLK_SFC, "hclk_sfc", "hclk_peri", 0, RK3308_CLKGATE_CON(9), 10, GFLAGS),
+       GATE(HCLK_OTG, "hclk_otg", "hclk_peri", 0, RK3308_CLKGATE_CON(9), 11, GFLAGS),
+       GATE(HCLK_HOST, "hclk_host", "hclk_peri", 0, RK3308_CLKGATE_CON(9), 12, GFLAGS),
+       GATE(HCLK_HOST_ARB, "hclk_host_arb", "hclk_peri", 0, RK3308_CLKGATE_CON(9), 13, GFLAGS),
+
+       GATE(0, "pclk_peri_niu", "pclk_peri", CLK_IGNORE_UNUSED, RK3308_CLKGATE_CON(9), 14, GFLAGS),
+       GATE(PCLK_MAC, "pclk_mac", "pclk_peri", 0, RK3308_CLKGATE_CON(9), 15, GFLAGS),
+
+       GATE(0, "hclk_audio_niu", "hclk_audio", CLK_IGNORE_UNUSED, RK3308_CLKGATE_CON(14), 0, GFLAGS),
+       GATE(HCLK_PDM, "hclk_pdm", "hclk_audio", 0, RK3308_CLKGATE_CON(14), 1, GFLAGS),
+       GATE(HCLK_SPDIFTX, "hclk_spdiftx", "hclk_audio", 0, RK3308_CLKGATE_CON(14), 2, GFLAGS),
+       GATE(HCLK_SPDIFRX, "hclk_spdifrx", "hclk_audio", 0, RK3308_CLKGATE_CON(14), 3, GFLAGS),
+       GATE(HCLK_I2S0_8CH, "hclk_i2s0_8ch", "hclk_audio", 0, RK3308_CLKGATE_CON(14), 4, GFLAGS),
+       GATE(HCLK_I2S1_8CH, "hclk_i2s1_8ch", "hclk_audio", 0, RK3308_CLKGATE_CON(14), 5, GFLAGS),
+       GATE(HCLK_I2S2_8CH, "hclk_i2s2_8ch", "hclk_audio", 0, RK3308_CLKGATE_CON(14), 6, GFLAGS),
+       GATE(HCLK_I2S3_8CH, "hclk_i2s3_8ch", "hclk_audio", 0, RK3308_CLKGATE_CON(14), 7, GFLAGS),
+       GATE(HCLK_I2S0_2CH, "hclk_i2s0_2ch", "hclk_audio", 0, RK3308_CLKGATE_CON(14), 8, GFLAGS),
+       GATE(HCLK_I2S1_2CH, "hclk_i2s1_2ch", "hclk_audio", 0, RK3308_CLKGATE_CON(14), 9, GFLAGS),
+       GATE(HCLK_VAD, "hclk_vad", "hclk_audio", 0, RK3308_CLKGATE_CON(14), 10, GFLAGS),
+
+       GATE(0, "pclk_audio_niu", "pclk_audio", CLK_IGNORE_UNUSED, RK3308_CLKGATE_CON(14), 11, GFLAGS),
+       GATE(PCLK_ACODEC, "pclk_acodec", "pclk_audio", 0, RK3308_CLKGATE_CON(14), 12, GFLAGS),
+
+       GATE(0, "aclk_bus_niu", "aclk_bus", CLK_IGNORE_UNUSED, RK3308_CLKGATE_CON(5), 0, GFLAGS),
+       GATE(0, "aclk_intmem", "aclk_bus", CLK_IGNORE_UNUSED, RK3308_CLKGATE_CON(5), 1, GFLAGS),
+       GATE(ACLK_CRYPTO, "aclk_crypto", "aclk_bus", 0, RK3308_CLKGATE_CON(5), 2, GFLAGS),
+       GATE(ACLK_VOP, "aclk_vop", "aclk_bus", 0, RK3308_CLKGATE_CON(5), 3, GFLAGS),
+       GATE(0, "aclk_gic", "aclk_bus", CLK_IGNORE_UNUSED, RK3308_CLKGATE_CON(5), 4, GFLAGS),
+       /* aclk_dmaci0 is controlled by sgrf_clkgat_con. */
+       SGRF_GATE(ACLK_DMAC0, "aclk_dmac0", "aclk_bus"),
+       /* aclk_dmac1 is controlled by sgrf_clkgat_con. */
+       SGRF_GATE(ACLK_DMAC1, "aclk_dmac1", "aclk_bus"),
+       /* watchdog pclk is controlled by sgrf_clkgat_con. */
+       SGRF_GATE(PCLK_WDT, "pclk_wdt", "pclk_bus"),
+
+       GATE(0, "hclk_bus_niu", "hclk_bus", CLK_IGNORE_UNUSED, RK3308_CLKGATE_CON(5), 5, GFLAGS),
+       GATE(0, "hclk_rom", "hclk_bus", CLK_IGNORE_UNUSED, RK3308_CLKGATE_CON(5), 6, GFLAGS),
+       GATE(HCLK_CRYPTO, "hclk_crypto", "hclk_bus", 0, RK3308_CLKGATE_CON(5), 7, GFLAGS),
+       GATE(HCLK_VOP, "hclk_vop", "hclk_bus", 0, RK3308_CLKGATE_CON(5), 8, GFLAGS),
+
+       GATE(0, "pclk_bus_niu", "pclk_bus", CLK_IGNORE_UNUSED, RK3308_CLKGATE_CON(5), 9, GFLAGS),
+       GATE(PCLK_UART0, "pclk_uart0", "pclk_bus", 0, RK3308_CLKGATE_CON(5), 10, GFLAGS),
+       GATE(PCLK_UART1, "pclk_uart1", "pclk_bus", 0, RK3308_CLKGATE_CON(5), 11, GFLAGS),
+       GATE(PCLK_UART2, "pclk_uart2", "pclk_bus", 0, RK3308_CLKGATE_CON(5), 12, GFLAGS),
+       GATE(PCLK_UART3, "pclk_uart3", "pclk_bus", 0, RK3308_CLKGATE_CON(5), 13, GFLAGS),
+       GATE(PCLK_UART4, "pclk_uart4", "pclk_bus", 0, RK3308_CLKGATE_CON(5), 14, GFLAGS),
+       GATE(PCLK_I2C0, "pclk_i2c0", "pclk_bus", 0, RK3308_CLKGATE_CON(5), 15, GFLAGS),
+       GATE(PCLK_I2C1, "pclk_i2c1", "pclk_bus", 0, RK3308_CLKGATE_CON(6), 0, GFLAGS),
+       GATE(PCLK_I2C2, "pclk_i2c2", "pclk_bus", 0, RK3308_CLKGATE_CON(6), 1, GFLAGS),
+       GATE(PCLK_I2C3, "pclk_i2c3", "pclk_bus", 0, RK3308_CLKGATE_CON(6), 2, GFLAGS),
+       GATE(PCLK_PWM0, "pclk_pwm0", "pclk_bus", 0, RK3308_CLKGATE_CON(6), 3, GFLAGS),
+       GATE(PCLK_SPI0, "pclk_spi0", "pclk_bus", 0, RK3308_CLKGATE_CON(6), 4, GFLAGS),
+       GATE(PCLK_SPI1, "pclk_spi1", "pclk_bus", 0, RK3308_CLKGATE_CON(6), 5, GFLAGS),
+       GATE(PCLK_SPI2, "pclk_spi2", "pclk_bus", 0, RK3308_CLKGATE_CON(6), 6, GFLAGS),
+       GATE(PCLK_SARADC, "pclk_saradc", "pclk_bus", 0, RK3308_CLKGATE_CON(6), 7, GFLAGS),
+       GATE(PCLK_TSADC, "pclk_tsadc", "pclk_bus", 0, RK3308_CLKGATE_CON(6), 8, GFLAGS),
+       GATE(PCLK_TIMER, "pclk_timer", "pclk_bus", 0, RK3308_CLKGATE_CON(6), 9, GFLAGS),
+       GATE(PCLK_OTP_NS, "pclk_otp_ns", "pclk_bus", 0, RK3308_CLKGATE_CON(6), 10, GFLAGS),
+       GATE(PCLK_GPIO0, "pclk_gpio0", "pclk_bus", 0, RK3308_CLKGATE_CON(6), 12, GFLAGS),
+       GATE(PCLK_GPIO1, "pclk_gpio1", "pclk_bus", 0, RK3308_CLKGATE_CON(6), 13, GFLAGS),
+       GATE(PCLK_GPIO2, "pclk_gpio2", "pclk_bus", 0, RK3308_CLKGATE_CON(6), 14, GFLAGS),
+       GATE(PCLK_GPIO3, "pclk_gpio3", "pclk_bus", 0, RK3308_CLKGATE_CON(6), 15, GFLAGS),
+       GATE(PCLK_GPIO4, "pclk_gpio4", "pclk_bus", 0, RK3308_CLKGATE_CON(7), 0, GFLAGS),
+       GATE(PCLK_SGRF, "pclk_sgrf", "pclk_bus", CLK_IGNORE_UNUSED, RK3308_CLKGATE_CON(7), 1, GFLAGS),
+       GATE(PCLK_GRF, "pclk_grf", "pclk_bus", CLK_IGNORE_UNUSED, RK3308_CLKGATE_CON(7), 2, GFLAGS),
+       GATE(PCLK_USBSD_DET, "pclk_usbsd_det", "pclk_bus", CLK_IGNORE_UNUSED, RK3308_CLKGATE_CON(7), 3, GFLAGS),
+       GATE(PCLK_DDR_UPCTL, "pclk_ddr_upctl", "pclk_bus", CLK_IGNORE_UNUSED, RK3308_CLKGATE_CON(7), 4, GFLAGS),
+       GATE(PCLK_DDR_MON, "pclk_ddr_mon", "pclk_bus", CLK_IGNORE_UNUSED, RK3308_CLKGATE_CON(7), 5, GFLAGS),
+       GATE(PCLK_DDRPHY, "pclk_ddrphy", "pclk_bus", CLK_IGNORE_UNUSED, RK3308_CLKGATE_CON(7), 6, GFLAGS),
+       GATE(PCLK_DDR_STDBY, "pclk_ddr_stdby", "pclk_bus", CLK_IGNORE_UNUSED, RK3308_CLKGATE_CON(7), 7, GFLAGS),
+       GATE(PCLK_USB_GRF, "pclk_usb_grf", "pclk_bus", CLK_IGNORE_UNUSED, RK3308_CLKGATE_CON(7), 8, GFLAGS),
+       GATE(PCLK_CRU, "pclk_cru", "pclk_bus", CLK_IGNORE_UNUSED, RK3308_CLKGATE_CON(7), 9, GFLAGS),
+       GATE(PCLK_OTP_PHY, "pclk_otp_phy", "pclk_bus", 0, RK3308_CLKGATE_CON(7), 10, GFLAGS),
+       GATE(PCLK_CPU_BOOST, "pclk_cpu_boost", "pclk_bus", CLK_IGNORE_UNUSED, RK3308_CLKGATE_CON(7), 11, GFLAGS),
+       GATE(PCLK_PWM1, "pclk_pwm1", "pclk_bus", CLK_IGNORE_UNUSED, RK3308_CLKGATE_CON(7), 12, GFLAGS),
+       GATE(PCLK_PWM2, "pclk_pwm2", "pclk_bus", CLK_IGNORE_UNUSED, RK3308_CLKGATE_CON(7), 13, GFLAGS),
+       GATE(PCLK_CAN, "pclk_can", "pclk_bus", CLK_IGNORE_UNUSED, RK3308_CLKGATE_CON(7), 14, GFLAGS),
+       GATE(PCLK_OWIRE, "pclk_owire", "pclk_bus", CLK_IGNORE_UNUSED, RK3308_CLKGATE_CON(7), 15, GFLAGS),
+};
+
+static const char *const rk3308_critical_clocks[] __initconst = {
+       "aclk_bus",
+       "hclk_bus",
+       "pclk_bus",
+       "aclk_peri",
+       "hclk_peri",
+       "pclk_peri",
+       "hclk_audio",
+       "pclk_audio",
+       "sclk_ddrc",
+};
+
+static void __init rk3308_clk_init(struct device_node *np)
+{
+       struct rockchip_clk_provider *ctx;
+       void __iomem *reg_base;
+
+       reg_base = of_iomap(np, 0);
+       if (!reg_base) {
+               pr_err("%s: could not map cru region\n", __func__);
+               return;
+       }
+
+       ctx = rockchip_clk_init(np, reg_base, CLK_NR_CLKS);
+       if (IS_ERR(ctx)) {
+               pr_err("%s: rockchip clk init failed\n", __func__);
+               iounmap(reg_base);
+               return;
+       }
+
+       rockchip_clk_register_plls(ctx, rk3308_pll_clks,
+                                  ARRAY_SIZE(rk3308_pll_clks),
+                                  RK3308_GRF_SOC_STATUS0);
+       rockchip_clk_register_branches(ctx, rk3308_clk_branches,
+                                      ARRAY_SIZE(rk3308_clk_branches));
+       rockchip_clk_protect_critical(rk3308_critical_clocks,
+                                     ARRAY_SIZE(rk3308_critical_clocks));
+
+       rockchip_clk_register_armclk(ctx, ARMCLK, "armclk",
+                                    mux_armclk_p, ARRAY_SIZE(mux_armclk_p),
+                                    &rk3308_cpuclk_data, rk3308_cpuclk_rates,
+                                    ARRAY_SIZE(rk3308_cpuclk_rates));
+
+       rockchip_register_softrst(np, 10, reg_base + RK3308_SOFTRST_CON(0),
+                                 ROCKCHIP_SOFTRST_HIWORD_MASK);
+
+       rockchip_register_restart_notifier(ctx, RK3308_GLB_SRST_FST, NULL);
+
+       rockchip_clk_of_add_provider(np, ctx);
+}
+
+CLK_OF_DECLARE(rk3308_cru, "rockchip,rk3308-cru", rk3308_clk_init);
index 96cc6af..5947d31 100644 (file)
@@ -122,7 +122,6 @@ PNAME(mux_usb480m_pre_p)    = { "usbphy", "xin24m" };
 PNAME(mux_hdmiphy_phy_p)       = { "hdmiphy", "xin24m" };
 PNAME(mux_dclk_hdmiphy_pre_p)  = { "dclk_hdmiphy_src_gpll", "dclk_hdmiphy_src_dpll" };
 PNAME(mux_pll_src_4plls_p)     = { "dpll", "gpll", "hdmiphy", "usb480m" };
-PNAME(mux_pll_src_3plls_p)     = { "apll", "gpll", "dpll" };
 PNAME(mux_pll_src_2plls_p)     = { "dpll", "gpll" };
 PNAME(mux_pll_src_apll_gpll_p) = { "apll", "gpll" };
 PNAME(mux_aclk_peri_src_p)     = { "aclk_peri_src_gpll", "aclk_peri_src_dpll" };
index b811597..2271a84 100644 (file)
@@ -121,6 +121,19 @@ struct clk;
 #define RK3288_EMMC_CON0               0x218
 #define RK3288_EMMC_CON1               0x21c
 
+#define RK3308_PLL_CON(x)              RK2928_PLL_CON(x)
+#define RK3308_CLKSEL_CON(x)           ((x) * 0x4 + 0x100)
+#define RK3308_CLKGATE_CON(x)          ((x) * 0x4 + 0x300)
+#define RK3308_GLB_SRST_FST            0xb8
+#define RK3308_SOFTRST_CON(x)          ((x) * 0x4 + 0x400)
+#define RK3308_MODE_CON                        0xa0
+#define RK3308_SDMMC_CON0              0x480
+#define RK3308_SDMMC_CON1              0x484
+#define RK3308_SDIO_CON0               0x488
+#define RK3308_SDIO_CON1               0x48c
+#define RK3308_EMMC_CON0               0x490
+#define RK3308_EMMC_CON1               0x494
+
 #define RK3328_PLL_CON(x)              RK2928_PLL_CON(x)
 #define RK3328_CLKSEL_CON(x)           ((x) * 0x4 + 0x100)
 #define RK3328_CLKGATE_CON(x)          ((x) * 0x4 + 0x200)
index ad7951b..dcf4e25 100644 (file)
@@ -297,9 +297,10 @@ static u8 dmn_clk_get_parent(struct clk_hw *hw)
 {
        struct clk_dmn *clk = to_dmnclk(hw);
        u32 cfg = clkc_readl(clk->regofs);
+       const char *name = clk_hw_get_name(hw);
 
        /* parent of io domain can only be pll3 */
-       if (strcmp(hw->init->name, "io") == 0)
+       if (strcmp(name, "io") == 0)
                return 4;
 
        WARN_ON((cfg & (BIT(3) - 1)) > 4);
@@ -311,9 +312,10 @@ static int dmn_clk_set_parent(struct clk_hw *hw, u8 parent)
 {
        struct clk_dmn *clk = to_dmnclk(hw);
        u32 cfg = clkc_readl(clk->regofs);
+       const char *name = clk_hw_get_name(hw);
 
        /* parent of io domain can only be pll3 */
-       if (strcmp(hw->init->name, "io") == 0)
+       if (strcmp(name, "io") == 0)
                return -EINVAL;
 
        cfg &= ~(BIT(3) - 1);
@@ -353,7 +355,8 @@ static long dmn_clk_round_rate(struct clk_hw *hw, unsigned long rate,
 {
        unsigned long fin;
        unsigned ratio, wait, hold;
-       unsigned bits = (strcmp(hw->init->name, "mem") == 0) ? 3 : 4;
+       const char *name = clk_hw_get_name(hw);
+       unsigned bits = (strcmp(name, "mem") == 0) ? 3 : 4;
 
        fin = *parent_rate;
        ratio = fin / rate;
@@ -375,7 +378,8 @@ static int dmn_clk_set_rate(struct clk_hw *hw, unsigned long rate,
        struct clk_dmn *clk = to_dmnclk(hw);
        unsigned long fin;
        unsigned ratio, wait, hold, reg;
-       unsigned bits = (strcmp(hw->init->name, "mem") == 0) ? 3 : 4;
+       const char *name = clk_hw_get_name(hw);
+       unsigned bits = (strcmp(name, "mem") == 0) ? 3 : 4;
 
        fin = parent_rate;
        ratio = fin / rate;
index 3966cd4..43ecd50 100644 (file)
@@ -30,22 +30,23 @@ static u8 socfpga_clk_get_parent(struct clk_hw *hwclk)
 {
        u32 l4_src;
        u32 perpll_src;
+       const char *name = clk_hw_get_name(hwclk);
 
-       if (streq(hwclk->init->name, SOCFPGA_L4_MP_CLK)) {
+       if (streq(name, SOCFPGA_L4_MP_CLK)) {
                l4_src = readl(clk_mgr_base_addr + CLKMGR_L4SRC);
                return l4_src &= 0x1;
        }
-       if (streq(hwclk->init->name, SOCFPGA_L4_SP_CLK)) {
+       if (streq(name, SOCFPGA_L4_SP_CLK)) {
                l4_src = readl(clk_mgr_base_addr + CLKMGR_L4SRC);
                return !!(l4_src & 2);
        }
 
        perpll_src = readl(clk_mgr_base_addr + CLKMGR_PERPLL_SRC);
-       if (streq(hwclk->init->name, SOCFPGA_MMC_CLK))
+       if (streq(name, SOCFPGA_MMC_CLK))
                return perpll_src &= 0x3;
-       if (streq(hwclk->init->name, SOCFPGA_NAND_CLK) ||
-                       streq(hwclk->init->name, SOCFPGA_NAND_X_CLK))
-                       return (perpll_src >> 2) & 3;
+       if (streq(name, SOCFPGA_NAND_CLK) ||
+           streq(name, SOCFPGA_NAND_X_CLK))
+               return (perpll_src >> 2) & 3;
 
        /* QSPI clock */
        return (perpll_src >> 4) & 3;
@@ -55,24 +56,25 @@ static u8 socfpga_clk_get_parent(struct clk_hw *hwclk)
 static int socfpga_clk_set_parent(struct clk_hw *hwclk, u8 parent)
 {
        u32 src_reg;
+       const char *name = clk_hw_get_name(hwclk);
 
-       if (streq(hwclk->init->name, SOCFPGA_L4_MP_CLK)) {
+       if (streq(name, SOCFPGA_L4_MP_CLK)) {
                src_reg = readl(clk_mgr_base_addr + CLKMGR_L4SRC);
                src_reg &= ~0x1;
                src_reg |= parent;
                writel(src_reg, clk_mgr_base_addr + CLKMGR_L4SRC);
-       } else if (streq(hwclk->init->name, SOCFPGA_L4_SP_CLK)) {
+       } else if (streq(name, SOCFPGA_L4_SP_CLK)) {
                src_reg = readl(clk_mgr_base_addr + CLKMGR_L4SRC);
                src_reg &= ~0x2;
                src_reg |= (parent << 1);
                writel(src_reg, clk_mgr_base_addr + CLKMGR_L4SRC);
        } else {
                src_reg = readl(clk_mgr_base_addr + CLKMGR_PERPLL_SRC);
-               if (streq(hwclk->init->name, SOCFPGA_MMC_CLK)) {
+               if (streq(name, SOCFPGA_MMC_CLK)) {
                        src_reg &= ~0x3;
                        src_reg |= parent;
-               } else if (streq(hwclk->init->name, SOCFPGA_NAND_CLK) ||
-                       streq(hwclk->init->name, SOCFPGA_NAND_X_CLK)) {
+               } else if (streq(name, SOCFPGA_NAND_CLK) ||
+                       streq(name, SOCFPGA_NAND_X_CLK)) {
                        src_reg &= ~0xC;
                        src_reg |= (parent << 2);
                } else {/* QSPI clock */
index a8ff722..3e0c557 100644 (file)
@@ -40,11 +40,12 @@ static u8 clk_periclk_get_parent(struct clk_hw *hwclk)
 {
        struct socfpga_periph_clk *socfpgaclk = to_socfpga_periph_clk(hwclk);
        u32 clk_src;
+       const char *name = clk_hw_get_name(hwclk);
 
        clk_src = readl(socfpgaclk->hw.reg);
-       if (streq(hwclk->init->name, SOCFPGA_MPU_FREE_CLK) ||
-           streq(hwclk->init->name, SOCFPGA_NOC_FREE_CLK) ||
-           streq(hwclk->init->name, SOCFPGA_SDMMC_FREE_CLK))
+       if (streq(name, SOCFPGA_MPU_FREE_CLK) ||
+           streq(name, SOCFPGA_NOC_FREE_CLK) ||
+           streq(name, SOCFPGA_SDMMC_FREE_CLK))
                return (clk_src >> CLK_MGR_FREE_SHIFT) &
                        CLK_MGR_FREE_MASK;
        else
index e5bc8c8..9163bbb 100644 (file)
@@ -335,7 +335,7 @@ static const struct aux_clk_masks i2s_prs1_masks = {
 };
 
 /* i2s sclk (bit clock) syynthesizers masks */
-static struct aux_clk_masks i2s_sclk_masks = {
+static const struct aux_clk_masks i2s_sclk_masks = {
        .eq_sel_mask = AUX_EQ_SEL_MASK,
        .eq_sel_shift = SPEAR1340_I2S_SCLK_EQ_SEL_SHIFT,
        .eq1_mask = AUX_EQ1_SEL,
index a5bdca1..9d56eac 100644 (file)
@@ -76,16 +76,17 @@ int sprd_clk_probe(struct device *dev, struct clk_hw_onecell_data *clkhw)
        struct clk_hw *hw;
 
        for (i = 0; i < clkhw->num; i++) {
+               const char *name;
 
                hw = clkhw->hws[i];
-
                if (!hw)
                        continue;
 
+               name = hw->init->name;
                ret = devm_clk_hw_register(dev, hw);
                if (ret) {
                        dev_err(dev, "Couldn't register clock %d - %s\n",
-                               i, hw->init->name);
+                               i, name);
                        return ret;
                }
        }
index 36b4402..640270f 100644 (file)
@@ -136,6 +136,7 @@ static unsigned long _sprd_pll_recalc_rate(const struct sprd_pll *pll,
                                         k2 + refin * nint * CLK_PLL_1M;
        }
 
+       kfree(cfg);
        return rate;
 }
 
@@ -222,6 +223,7 @@ static int _sprd_pll_set_rate(const struct sprd_pll *pll,
        if (!ret)
                udelay(pll->udelay);
 
+       kfree(cfg);
        return ret;
 }
 
index d18e49b..4413b6e 100644 (file)
@@ -326,6 +326,7 @@ static void __init st_of_flexgen_setup(struct device_node *np)
                return;
 
        reg = of_iomap(pnode, 0);
+       of_node_put(pnode);
        if (!reg)
                return;
 
index ca1ccdb..a156bd0 100644 (file)
@@ -67,7 +67,6 @@ struct clkgen_quadfs_data {
 };
 
 static const struct clk_ops st_quadfs_pll_c32_ops;
-static const struct clk_ops st_quadfs_fs660c32_ops;
 
 static int clk_fs660c32_dig_get_params(unsigned long input,
                unsigned long output, struct stm_fs *fs);
index d8a688b..c3952f2 100644 (file)
@@ -61,19 +61,6 @@ static const struct clk_ops stm_pll3200c32_ops;
 static const struct clk_ops stm_pll3200c32_a9_ops;
 static const struct clk_ops stm_pll4600c28_ops;
 
-static const struct clkgen_pll_data st_pll3200c32_407_a0 = {
-       /* 407 A0 */
-       .pdn_status     = CLKGEN_FIELD(0x2a0,   0x1,                    8),
-       .pdn_ctrl       = CLKGEN_FIELD(0x2a0,   0x1,                    8),
-       .locked_status  = CLKGEN_FIELD(0x2a0,   0x1,                    24),
-       .ndiv           = CLKGEN_FIELD(0x2a4,   C32_NDIV_MASK,          16),
-       .idf            = CLKGEN_FIELD(0x2a4,   C32_IDF_MASK,           0x0),
-       .num_odfs = 1,
-       .odf            = { CLKGEN_FIELD(0x2b4, C32_ODF_MASK,           0) },
-       .odf_gate       = { CLKGEN_FIELD(0x2b4, 0x1,                    6) },
-       .ops            = &stm_pll3200c32_ops,
-};
-
 static const struct clkgen_pll_data st_pll3200c32_cx_0 = {
        /* 407 C0 PLL0 */
        .pdn_status     = CLKGEN_FIELD(0x2a0,   0x1,                    8),
index aebef4a..d89353a 100644 (file)
@@ -505,7 +505,7 @@ static struct ccu_div i2s3_clk = {
                .hw.init        = CLK_HW_INIT_PARENTS("i2s3",
                                                      audio_parents,
                                                      &ccu_div_ops,
-                                                     0),
+                                                     CLK_SET_RATE_PARENT),
        },
 };
 
@@ -518,7 +518,7 @@ static struct ccu_div i2s0_clk = {
                .hw.init        = CLK_HW_INIT_PARENTS("i2s0",
                                                      audio_parents,
                                                      &ccu_div_ops,
-                                                     0),
+                                                     CLK_SET_RATE_PARENT),
        },
 };
 
@@ -531,7 +531,7 @@ static struct ccu_div i2s1_clk = {
                .hw.init        = CLK_HW_INIT_PARENTS("i2s1",
                                                      audio_parents,
                                                      &ccu_div_ops,
-                                                     0),
+                                                     CLK_SET_RATE_PARENT),
        },
 };
 
@@ -544,7 +544,7 @@ static struct ccu_div i2s2_clk = {
                .hw.init        = CLK_HW_INIT_PARENTS("i2s2",
                                                      audio_parents,
                                                      &ccu_div_ops,
-                                                     0),
+                                                     CLK_SET_RATE_PARENT),
        },
 };
 
index 9b3939f..5c779ee 100644 (file)
@@ -77,7 +77,7 @@ static SUNXI_CCU_NM_WITH_FRAC_GATE_LOCK(pll_ve_clk, "pll-ve",
                                        BIT(28),        /* lock */
                                        0);
 
-static SUNXI_CCU_NKM_WITH_GATE_LOCK(pll_ddr_clk, "pll-ddr",
+static SUNXI_CCU_NKM_WITH_GATE_LOCK(pll_ddr0_clk, "pll-ddr0",
                                    "osc24M", 0x020,
                                    8, 5,       /* N */
                                    4, 2,       /* K */
@@ -116,6 +116,14 @@ static SUNXI_CCU_NK_WITH_GATE_LOCK_POSTDIV(pll_periph1_clk, "pll-periph1",
                                           2,           /* post-div */
                                           0);
 
+static SUNXI_CCU_NM_WITH_GATE_LOCK(pll_ddr1_clk, "pll-ddr1",
+                                  "osc24M", 0x04c,
+                                  8, 7,        /* N */
+                                  0, 2,        /* M */
+                                  BIT(31),     /* gate */
+                                  BIT(28),     /* lock */
+                                  0);
+
 static const char * const cpu_parents[] = { "osc32k", "osc24M",
                                             "pll-cpu", "pll-cpu" };
 static SUNXI_CCU_MUX(cpu_clk, "cpu", cpu_parents,
@@ -227,6 +235,8 @@ static SUNXI_CCU_GATE(bus_codec_clk,        "bus-codec",    "apb1",
                      0x068, BIT(0), 0);
 static SUNXI_CCU_GATE(bus_pio_clk,     "bus-pio",      "apb1",
                      0x068, BIT(5), 0);
+static SUNXI_CCU_GATE(bus_i2s0_clk,    "bus-i2s0",     "apb1",
+                     0x068, BIT(12), 0);
 
 static SUNXI_CCU_GATE(bus_i2c0_clk,    "bus-i2c0",     "apb2",
                      0x06c, BIT(0), 0);
@@ -298,12 +308,18 @@ static SUNXI_CCU_MP_WITH_MUX_GATE(spi0_clk, "spi0", mod0_default_parents, 0x0a0,
                                  BIT(31),      /* gate */
                                  0);
 
+static const char * const i2s_parents[] = { "pll-audio-8x", "pll-audio-4x",
+                                           "pll-audio-2x", "pll-audio" };
+static SUNXI_CCU_MUX_WITH_GATE(i2s0_clk, "i2s0", i2s_parents,
+                              0x0b0, 16, 2, BIT(31), CLK_SET_RATE_PARENT);
+
 static SUNXI_CCU_GATE(usb_phy0_clk,    "usb-phy0",     "osc24M",
                      0x0cc, BIT(8), 0);
 static SUNXI_CCU_GATE(usb_ohci0_clk,   "usb-ohci0",    "osc24M",
                      0x0cc, BIT(16), 0);
 
-static const char * const dram_parents[] = { "pll-ddr", "pll-periph0-2x" };
+static const char * const dram_parents[] = { "pll-ddr0", "pll-ddr1",
+                                            "pll-periph0-2x" };
 static SUNXI_CCU_M_WITH_MUX(dram_clk, "dram", dram_parents,
                            0x0f4, 0, 4, 20, 2, CLK_IS_CRITICAL);
 
@@ -363,10 +379,11 @@ static struct ccu_common *sun8i_v3s_ccu_clks[] = {
        &pll_audio_base_clk.common,
        &pll_video_clk.common,
        &pll_ve_clk.common,
-       &pll_ddr_clk.common,
+       &pll_ddr0_clk.common,
        &pll_periph0_clk.common,
        &pll_isp_clk.common,
        &pll_periph1_clk.common,
+       &pll_ddr1_clk.common,
        &cpu_clk.common,
        &axi_clk.common,
        &ahb1_clk.common,
@@ -433,6 +450,80 @@ static const struct clk_hw *clk_parent_pll_audio[] = {
        &pll_audio_base_clk.common.hw
 };
 
+static struct ccu_common *sun8i_v3_ccu_clks[] = {
+       &pll_cpu_clk.common,
+       &pll_audio_base_clk.common,
+       &pll_video_clk.common,
+       &pll_ve_clk.common,
+       &pll_ddr0_clk.common,
+       &pll_periph0_clk.common,
+       &pll_isp_clk.common,
+       &pll_periph1_clk.common,
+       &pll_ddr1_clk.common,
+       &cpu_clk.common,
+       &axi_clk.common,
+       &ahb1_clk.common,
+       &apb1_clk.common,
+       &apb2_clk.common,
+       &ahb2_clk.common,
+       &bus_ce_clk.common,
+       &bus_dma_clk.common,
+       &bus_mmc0_clk.common,
+       &bus_mmc1_clk.common,
+       &bus_mmc2_clk.common,
+       &bus_dram_clk.common,
+       &bus_emac_clk.common,
+       &bus_hstimer_clk.common,
+       &bus_spi0_clk.common,
+       &bus_otg_clk.common,
+       &bus_ehci0_clk.common,
+       &bus_ohci0_clk.common,
+       &bus_ve_clk.common,
+       &bus_tcon0_clk.common,
+       &bus_csi_clk.common,
+       &bus_de_clk.common,
+       &bus_codec_clk.common,
+       &bus_pio_clk.common,
+       &bus_i2s0_clk.common,
+       &bus_i2c0_clk.common,
+       &bus_i2c1_clk.common,
+       &bus_uart0_clk.common,
+       &bus_uart1_clk.common,
+       &bus_uart2_clk.common,
+       &bus_ephy_clk.common,
+       &bus_dbg_clk.common,
+       &mmc0_clk.common,
+       &mmc0_sample_clk.common,
+       &mmc0_output_clk.common,
+       &mmc1_clk.common,
+       &mmc1_sample_clk.common,
+       &mmc1_output_clk.common,
+       &mmc2_clk.common,
+       &mmc2_sample_clk.common,
+       &mmc2_output_clk.common,
+       &ce_clk.common,
+       &spi0_clk.common,
+       &i2s0_clk.common,
+       &usb_phy0_clk.common,
+       &usb_ohci0_clk.common,
+       &dram_clk.common,
+       &dram_ve_clk.common,
+       &dram_csi_clk.common,
+       &dram_ohci_clk.common,
+       &dram_ehci_clk.common,
+       &de_clk.common,
+       &tcon_clk.common,
+       &csi_misc_clk.common,
+       &csi0_mclk_clk.common,
+       &csi1_sclk_clk.common,
+       &csi1_mclk_clk.common,
+       &ve_clk.common,
+       &ac_dig_clk.common,
+       &avs_clk.common,
+       &mbus_clk.common,
+       &mipi_csi_clk.common,
+};
+
 /* We hardcode the divider to 4 for now */
 static CLK_FIXED_FACTOR_HWS(pll_audio_clk, "pll-audio",
                            clk_parent_pll_audio,
@@ -460,11 +551,12 @@ static struct clk_hw_onecell_data sun8i_v3s_hw_clks = {
                [CLK_PLL_AUDIO_8X]      = &pll_audio_8x_clk.hw,
                [CLK_PLL_VIDEO]         = &pll_video_clk.common.hw,
                [CLK_PLL_VE]            = &pll_ve_clk.common.hw,
-               [CLK_PLL_DDR]           = &pll_ddr_clk.common.hw,
+               [CLK_PLL_DDR0]          = &pll_ddr0_clk.common.hw,
                [CLK_PLL_PERIPH0]       = &pll_periph0_clk.common.hw,
                [CLK_PLL_PERIPH0_2X]    = &pll_periph0_2x_clk.hw,
                [CLK_PLL_ISP]           = &pll_isp_clk.common.hw,
                [CLK_PLL_PERIPH1]       = &pll_periph1_clk.common.hw,
+               [CLK_PLL_DDR1]          = &pll_ddr1_clk.common.hw,
                [CLK_CPU]               = &cpu_clk.common.hw,
                [CLK_AXI]               = &axi_clk.common.hw,
                [CLK_AHB1]              = &ahb1_clk.common.hw,
@@ -502,6 +594,9 @@ static struct clk_hw_onecell_data sun8i_v3s_hw_clks = {
                [CLK_MMC1]              = &mmc1_clk.common.hw,
                [CLK_MMC1_SAMPLE]       = &mmc1_sample_clk.common.hw,
                [CLK_MMC1_OUTPUT]       = &mmc1_output_clk.common.hw,
+               [CLK_MMC2]              = &mmc2_clk.common.hw,
+               [CLK_MMC2_SAMPLE]       = &mmc2_sample_clk.common.hw,
+               [CLK_MMC2_OUTPUT]       = &mmc2_output_clk.common.hw,
                [CLK_CE]                = &ce_clk.common.hw,
                [CLK_SPI0]              = &spi0_clk.common.hw,
                [CLK_USB_PHY0]          = &usb_phy0_clk.common.hw,
@@ -526,6 +621,88 @@ static struct clk_hw_onecell_data sun8i_v3s_hw_clks = {
        .num    = CLK_NUMBER,
 };
 
+static struct clk_hw_onecell_data sun8i_v3_hw_clks = {
+       .hws    = {
+               [CLK_PLL_CPU]           = &pll_cpu_clk.common.hw,
+               [CLK_PLL_AUDIO_BASE]    = &pll_audio_base_clk.common.hw,
+               [CLK_PLL_AUDIO]         = &pll_audio_clk.hw,
+               [CLK_PLL_AUDIO_2X]      = &pll_audio_2x_clk.hw,
+               [CLK_PLL_AUDIO_4X]      = &pll_audio_4x_clk.hw,
+               [CLK_PLL_AUDIO_8X]      = &pll_audio_8x_clk.hw,
+               [CLK_PLL_VIDEO]         = &pll_video_clk.common.hw,
+               [CLK_PLL_VE]            = &pll_ve_clk.common.hw,
+               [CLK_PLL_DDR0]          = &pll_ddr0_clk.common.hw,
+               [CLK_PLL_PERIPH0]       = &pll_periph0_clk.common.hw,
+               [CLK_PLL_PERIPH0_2X]    = &pll_periph0_2x_clk.hw,
+               [CLK_PLL_ISP]           = &pll_isp_clk.common.hw,
+               [CLK_PLL_PERIPH1]       = &pll_periph1_clk.common.hw,
+               [CLK_PLL_DDR1]          = &pll_ddr1_clk.common.hw,
+               [CLK_CPU]               = &cpu_clk.common.hw,
+               [CLK_AXI]               = &axi_clk.common.hw,
+               [CLK_AHB1]              = &ahb1_clk.common.hw,
+               [CLK_APB1]              = &apb1_clk.common.hw,
+               [CLK_APB2]              = &apb2_clk.common.hw,
+               [CLK_AHB2]              = &ahb2_clk.common.hw,
+               [CLK_BUS_CE]            = &bus_ce_clk.common.hw,
+               [CLK_BUS_DMA]           = &bus_dma_clk.common.hw,
+               [CLK_BUS_MMC0]          = &bus_mmc0_clk.common.hw,
+               [CLK_BUS_MMC1]          = &bus_mmc1_clk.common.hw,
+               [CLK_BUS_MMC2]          = &bus_mmc2_clk.common.hw,
+               [CLK_BUS_DRAM]          = &bus_dram_clk.common.hw,
+               [CLK_BUS_EMAC]          = &bus_emac_clk.common.hw,
+               [CLK_BUS_HSTIMER]       = &bus_hstimer_clk.common.hw,
+               [CLK_BUS_SPI0]          = &bus_spi0_clk.common.hw,
+               [CLK_BUS_OTG]           = &bus_otg_clk.common.hw,
+               [CLK_BUS_EHCI0]         = &bus_ehci0_clk.common.hw,
+               [CLK_BUS_OHCI0]         = &bus_ohci0_clk.common.hw,
+               [CLK_BUS_VE]            = &bus_ve_clk.common.hw,
+               [CLK_BUS_TCON0]         = &bus_tcon0_clk.common.hw,
+               [CLK_BUS_CSI]           = &bus_csi_clk.common.hw,
+               [CLK_BUS_DE]            = &bus_de_clk.common.hw,
+               [CLK_BUS_CODEC]         = &bus_codec_clk.common.hw,
+               [CLK_BUS_PIO]           = &bus_pio_clk.common.hw,
+               [CLK_BUS_I2S0]          = &bus_i2s0_clk.common.hw,
+               [CLK_BUS_I2C0]          = &bus_i2c0_clk.common.hw,
+               [CLK_BUS_I2C1]          = &bus_i2c1_clk.common.hw,
+               [CLK_BUS_UART0]         = &bus_uart0_clk.common.hw,
+               [CLK_BUS_UART1]         = &bus_uart1_clk.common.hw,
+               [CLK_BUS_UART2]         = &bus_uart2_clk.common.hw,
+               [CLK_BUS_EPHY]          = &bus_ephy_clk.common.hw,
+               [CLK_BUS_DBG]           = &bus_dbg_clk.common.hw,
+               [CLK_MMC0]              = &mmc0_clk.common.hw,
+               [CLK_MMC0_SAMPLE]       = &mmc0_sample_clk.common.hw,
+               [CLK_MMC0_OUTPUT]       = &mmc0_output_clk.common.hw,
+               [CLK_MMC1]              = &mmc1_clk.common.hw,
+               [CLK_MMC1_SAMPLE]       = &mmc1_sample_clk.common.hw,
+               [CLK_MMC1_OUTPUT]       = &mmc1_output_clk.common.hw,
+               [CLK_MMC2]              = &mmc2_clk.common.hw,
+               [CLK_MMC2_SAMPLE]       = &mmc2_sample_clk.common.hw,
+               [CLK_MMC2_OUTPUT]       = &mmc2_output_clk.common.hw,
+               [CLK_CE]                = &ce_clk.common.hw,
+               [CLK_SPI0]              = &spi0_clk.common.hw,
+               [CLK_I2S0]              = &i2s0_clk.common.hw,
+               [CLK_USB_PHY0]          = &usb_phy0_clk.common.hw,
+               [CLK_USB_OHCI0]         = &usb_ohci0_clk.common.hw,
+               [CLK_DRAM]              = &dram_clk.common.hw,
+               [CLK_DRAM_VE]           = &dram_ve_clk.common.hw,
+               [CLK_DRAM_CSI]          = &dram_csi_clk.common.hw,
+               [CLK_DRAM_EHCI]         = &dram_ehci_clk.common.hw,
+               [CLK_DRAM_OHCI]         = &dram_ohci_clk.common.hw,
+               [CLK_DE]                = &de_clk.common.hw,
+               [CLK_TCON0]             = &tcon_clk.common.hw,
+               [CLK_CSI_MISC]          = &csi_misc_clk.common.hw,
+               [CLK_CSI0_MCLK]         = &csi0_mclk_clk.common.hw,
+               [CLK_CSI1_SCLK]         = &csi1_sclk_clk.common.hw,
+               [CLK_CSI1_MCLK]         = &csi1_mclk_clk.common.hw,
+               [CLK_VE]                = &ve_clk.common.hw,
+               [CLK_AC_DIG]            = &ac_dig_clk.common.hw,
+               [CLK_AVS]               = &avs_clk.common.hw,
+               [CLK_MBUS]              = &mbus_clk.common.hw,
+               [CLK_MIPI_CSI]          = &mipi_csi_clk.common.hw,
+       },
+       .num    = CLK_NUMBER,
+};
+
 static struct ccu_reset_map sun8i_v3s_ccu_resets[] = {
        [RST_USB_PHY0]          =  { 0x0cc, BIT(0) },
 
@@ -561,6 +738,42 @@ static struct ccu_reset_map sun8i_v3s_ccu_resets[] = {
        [RST_BUS_UART2]         =  { 0x2d8, BIT(18) },
 };
 
+static struct ccu_reset_map sun8i_v3_ccu_resets[] = {
+       [RST_USB_PHY0]          =  { 0x0cc, BIT(0) },
+
+       [RST_MBUS]              =  { 0x0fc, BIT(31) },
+
+       [RST_BUS_CE]            =  { 0x2c0, BIT(5) },
+       [RST_BUS_DMA]           =  { 0x2c0, BIT(6) },
+       [RST_BUS_MMC0]          =  { 0x2c0, BIT(8) },
+       [RST_BUS_MMC1]          =  { 0x2c0, BIT(9) },
+       [RST_BUS_MMC2]          =  { 0x2c0, BIT(10) },
+       [RST_BUS_DRAM]          =  { 0x2c0, BIT(14) },
+       [RST_BUS_EMAC]          =  { 0x2c0, BIT(17) },
+       [RST_BUS_HSTIMER]       =  { 0x2c0, BIT(19) },
+       [RST_BUS_SPI0]          =  { 0x2c0, BIT(20) },
+       [RST_BUS_OTG]           =  { 0x2c0, BIT(24) },
+       [RST_BUS_EHCI0]         =  { 0x2c0, BIT(26) },
+       [RST_BUS_OHCI0]         =  { 0x2c0, BIT(29) },
+
+       [RST_BUS_VE]            =  { 0x2c4, BIT(0) },
+       [RST_BUS_TCON0]         =  { 0x2c4, BIT(4) },
+       [RST_BUS_CSI]           =  { 0x2c4, BIT(8) },
+       [RST_BUS_DE]            =  { 0x2c4, BIT(12) },
+       [RST_BUS_DBG]           =  { 0x2c4, BIT(31) },
+
+       [RST_BUS_EPHY]          =  { 0x2c8, BIT(2) },
+
+       [RST_BUS_CODEC]         =  { 0x2d0, BIT(0) },
+       [RST_BUS_I2S0]          =  { 0x2d0, BIT(12) },
+
+       [RST_BUS_I2C0]          =  { 0x2d8, BIT(0) },
+       [RST_BUS_I2C1]          =  { 0x2d8, BIT(1) },
+       [RST_BUS_UART0]         =  { 0x2d8, BIT(16) },
+       [RST_BUS_UART1]         =  { 0x2d8, BIT(17) },
+       [RST_BUS_UART2]         =  { 0x2d8, BIT(18) },
+};
+
 static const struct sunxi_ccu_desc sun8i_v3s_ccu_desc = {
        .ccu_clks       = sun8i_v3s_ccu_clks,
        .num_ccu_clks   = ARRAY_SIZE(sun8i_v3s_ccu_clks),
@@ -571,7 +784,18 @@ static const struct sunxi_ccu_desc sun8i_v3s_ccu_desc = {
        .num_resets     = ARRAY_SIZE(sun8i_v3s_ccu_resets),
 };
 
-static void __init sun8i_v3s_ccu_setup(struct device_node *node)
+static const struct sunxi_ccu_desc sun8i_v3_ccu_desc = {
+       .ccu_clks       = sun8i_v3_ccu_clks,
+       .num_ccu_clks   = ARRAY_SIZE(sun8i_v3_ccu_clks),
+
+       .hw_clks        = &sun8i_v3_hw_clks,
+
+       .resets         = sun8i_v3_ccu_resets,
+       .num_resets     = ARRAY_SIZE(sun8i_v3_ccu_resets),
+};
+
+static void __init sun8i_v3_v3s_ccu_init(struct device_node *node,
+                                        const struct sunxi_ccu_desc *ccu_desc)
 {
        void __iomem *reg;
        u32 val;
@@ -587,7 +811,21 @@ static void __init sun8i_v3s_ccu_setup(struct device_node *node)
        val &= ~GENMASK(19, 16);
        writel(val | (3 << 16), reg + SUN8I_V3S_PLL_AUDIO_REG);
 
-       sunxi_ccu_probe(node, reg, &sun8i_v3s_ccu_desc);
+       sunxi_ccu_probe(node, reg, ccu_desc);
+}
+
+static void __init sun8i_v3s_ccu_setup(struct device_node *node)
+{
+       sun8i_v3_v3s_ccu_init(node, &sun8i_v3s_ccu_desc);
+}
+
+static void __init sun8i_v3_ccu_setup(struct device_node *node)
+{
+       sun8i_v3_v3s_ccu_init(node, &sun8i_v3_ccu_desc);
 }
+
 CLK_OF_DECLARE(sun8i_v3s_ccu, "allwinner,sun8i-v3s-ccu",
               sun8i_v3s_ccu_setup);
+
+CLK_OF_DECLARE(sun8i_v3_ccu, "allwinner,sun8i-v3-ccu",
+              sun8i_v3_ccu_setup);
index fbc1da8..b0160d3 100644 (file)
@@ -20,7 +20,7 @@
 #define CLK_PLL_AUDIO_8X       5
 #define CLK_PLL_VIDEO          6
 #define CLK_PLL_VE             7
-#define CLK_PLL_DDR            8
+#define CLK_PLL_DDR0           8
 #define CLK_PLL_PERIPH0                9
 #define CLK_PLL_PERIPH0_2X     10
 #define CLK_PLL_ISP            11
@@ -49,6 +49,8 @@
 
 /* And the GPU module clock is exported */
 
-#define CLK_NUMBER             (CLK_MIPI_CSI + 1)
+#define CLK_PLL_DDR1           74
+
+#define CLK_NUMBER             (CLK_I2S0 + 1)
 
 #endif /* _CCU_SUN8I_H3_H_ */
index 7fe3ac9..2e20e65 100644 (file)
@@ -97,14 +97,15 @@ int sunxi_ccu_probe(struct device_node *node, void __iomem *reg,
 
        for (i = 0; i < desc->hw_clks->num ; i++) {
                struct clk_hw *hw = desc->hw_clks->hws[i];
+               const char *name;
 
                if (!hw)
                        continue;
 
+               name = hw->init->name;
                ret = of_clk_hw_register(node, hw);
                if (ret) {
-                       pr_err("Couldn't register clock %d - %s\n",
-                              i, clk_hw_get_name(hw));
+                       pr_err("Couldn't register clock %d - %s\n", i, name);
                        goto err_clk_unreg;
                }
        }
index 015a657..ac5bc88 100644 (file)
@@ -140,6 +140,7 @@ static void __init omap_clk_register_apll(void *user,
        struct clk_hw_omap *clk_hw = to_clk_hw_omap(hw);
        struct dpll_data *ad = clk_hw->dpll_data;
        struct clk *clk;
+       const struct clk_init_data *init = clk_hw->hw.init;
 
        clk = of_clk_get(node, 0);
        if (IS_ERR(clk)) {
@@ -168,15 +169,15 @@ static void __init omap_clk_register_apll(void *user,
        clk = ti_clk_register_omap_hw(NULL, &clk_hw->hw, node->name);
        if (!IS_ERR(clk)) {
                of_clk_add_provider(node, of_clk_src_simple_get, clk);
-               kfree(clk_hw->hw.init->parent_names);
-               kfree(clk_hw->hw.init);
+               kfree(init->parent_names);
+               kfree(init);
                return;
        }
 
 cleanup:
        kfree(clk_hw->dpll_data);
-       kfree(clk_hw->hw.init->parent_names);
-       kfree(clk_hw->hw.init);
+       kfree(init->parent_names);
+       kfree(init);
        kfree(clk_hw);
 }
 
index dafef7e..e675e27 100644 (file)
@@ -314,6 +314,39 @@ static const struct omap_clkctrl_reg_data omap5_dss_clkctrl_regs[] __initconst =
        { 0 },
 };
 
+static const char * const omap5_gpu_core_mux_parents[] __initconst = {
+       "dpll_core_h14x2_ck",
+       "dpll_per_h14x2_ck",
+       NULL,
+};
+
+static const char * const omap5_gpu_hyd_mux_parents[] __initconst = {
+       "dpll_core_h14x2_ck",
+       "dpll_per_h14x2_ck",
+       NULL,
+};
+
+static const char * const omap5_gpu_sys_clk_parents[] __initconst = {
+       "sys_clkin",
+       NULL,
+};
+
+static const struct omap_clkctrl_div_data omap5_gpu_sys_clk_data __initconst = {
+       .max_div = 2,
+};
+
+static const struct omap_clkctrl_bit_data omap5_gpu_core_bit_data[] __initconst = {
+       { 24, TI_CLK_MUX, omap5_gpu_core_mux_parents, NULL },
+       { 25, TI_CLK_MUX, omap5_gpu_hyd_mux_parents, NULL },
+       { 26, TI_CLK_DIVIDER, omap5_gpu_sys_clk_parents, &omap5_gpu_sys_clk_data },
+       { 0 },
+};
+
+static const struct omap_clkctrl_reg_data omap5_gpu_clkctrl_regs[] __initconst = {
+       { OMAP5_GPU_CLKCTRL, omap5_gpu_core_bit_data, CLKF_SW_SUP, "gpu_cm:clk:0000:24" },
+       { 0 },
+};
+
 static const char * const omap5_mmc1_fclk_mux_parents[] __initconst = {
        "func_128m_clk",
        "dpll_per_m2x2_ck",
@@ -470,6 +503,7 @@ const struct omap_clkctrl_data omap5_clkctrl_data[] __initconst = {
        { 0x4a008e20, omap5_l3instr_clkctrl_regs },
        { 0x4a009020, omap5_l4per_clkctrl_regs },
        { 0x4a009420, omap5_dss_clkctrl_regs },
+       { 0x4a009520, omap5_gpu_clkctrl_regs },
        { 0x4a009620, omap5_l3init_clkctrl_regs },
        { 0x4ae07920, omap5_wkupaon_clkctrl_regs },
        { 0 },
index e8cee6f..087cfa7 100644 (file)
@@ -66,6 +66,7 @@ static int __init dm814x_adpll_early_init(void)
        }
 
        of_platform_populate(np, NULL, NULL, NULL);
+       of_node_put(np);
 
        return 0;
 }
index 659dadb..247510e 100644 (file)
@@ -165,6 +165,7 @@ static void __init _register_dpll(void *user,
        struct clk_hw_omap *clk_hw = to_clk_hw_omap(hw);
        struct dpll_data *dd = clk_hw->dpll_data;
        struct clk *clk;
+       const struct clk_init_data *init = hw->init;
 
        clk = of_clk_get(node, 0);
        if (IS_ERR(clk)) {
@@ -196,15 +197,15 @@ static void __init _register_dpll(void *user,
 
        if (!IS_ERR(clk)) {
                of_clk_add_provider(node, of_clk_src_simple_get, clk);
-               kfree(clk_hw->hw.init->parent_names);
-               kfree(clk_hw->hw.init);
+               kfree(init->parent_names);
+               kfree(init);
                return;
        }
 
 cleanup:
        kfree(clk_hw->dpll_data);
-       kfree(clk_hw->hw.init->parent_names);
-       kfree(clk_hw->hw.init);
+       kfree(init->parent_names);
+       kfree(init);
        kfree(clk_hw);
 }
 
@@ -291,14 +292,12 @@ static void __init of_ti_dpll_setup(struct device_node *node,
        struct dpll_data *dd = NULL;
        u8 dpll_mode = 0;
 
-       dd = kzalloc(sizeof(*dd), GFP_KERNEL);
+       dd = kmemdup(ddt, sizeof(*dd), GFP_KERNEL);
        clk_hw = kzalloc(sizeof(*clk_hw), GFP_KERNEL);
        init = kzalloc(sizeof(*init), GFP_KERNEL);
        if (!dd || !clk_hw || !init)
                goto cleanup;
 
-       memcpy(dd, ddt, sizeof(*dd));
-
        clk_hw->dpll_data = dd;
        clk_hw->ops = &clkhwops_omap3_dpll;
        clk_hw->hw.init = init;
index 90bb0b0..fd54d5c 100644 (file)
@@ -70,6 +70,7 @@ static void __init cm_osc_setup(struct device_node *np,
                        return;
                }
                cm_base = of_iomap(parent, 0);
+               of_node_put(parent);
                if (!cm_base) {
                        pr_err("could not remap core module base\n");
                        return;
index fd6c347..dd7045b 100644 (file)
@@ -564,6 +564,7 @@ static int __init top_clocks_init(struct device_node *np)
 {
        void __iomem *reg_base;
        int i, ret;
+       const char *name;
 
        reg_base = of_iomap(np, 0);
        if (!reg_base) {
@@ -573,11 +574,10 @@ static int __init top_clocks_init(struct device_node *np)
 
        for (i = 0; i < ARRAY_SIZE(zx296718_pll_clk); i++) {
                zx296718_pll_clk[i].reg_base += (uintptr_t)reg_base;
+               name = zx296718_pll_clk[i].hw.init->name;
                ret = clk_hw_register(NULL, &zx296718_pll_clk[i].hw);
-               if (ret) {
-                       pr_warn("top clk %s init error!\n",
-                               zx296718_pll_clk[i].hw.init->name);
-               }
+               if (ret)
+                       pr_warn("top clk %s init error!\n", name);
        }
 
        for (i = 0; i < ARRAY_SIZE(top_ffactor_clk); i++) {
@@ -585,11 +585,10 @@ static int __init top_clocks_init(struct device_node *np)
                        top_hw_onecell_data.hws[top_ffactor_clk[i].id] =
                                        &top_ffactor_clk[i].factor.hw;
 
+               name = top_ffactor_clk[i].factor.hw.init->name;
                ret = clk_hw_register(NULL, &top_ffactor_clk[i].factor.hw);
-               if (ret) {
-                       pr_warn("top clk %s init error!\n",
-                               top_ffactor_clk[i].factor.hw.init->name);
-               }
+               if (ret)
+                       pr_warn("top clk %s init error!\n", name);
        }
 
        for (i = 0; i < ARRAY_SIZE(top_mux_clk); i++) {
@@ -598,11 +597,10 @@ static int __init top_clocks_init(struct device_node *np)
                                        &top_mux_clk[i].mux.hw;
 
                top_mux_clk[i].mux.reg += (uintptr_t)reg_base;
+               name = top_mux_clk[i].mux.hw.init->name;
                ret = clk_hw_register(NULL, &top_mux_clk[i].mux.hw);
-               if (ret) {
-                       pr_warn("top clk %s init error!\n",
-                               top_mux_clk[i].mux.hw.init->name);
-               }
+               if (ret)
+                       pr_warn("top clk %s init error!\n", name);
        }
 
        for (i = 0; i < ARRAY_SIZE(top_gate_clk); i++) {
@@ -611,11 +609,10 @@ static int __init top_clocks_init(struct device_node *np)
                                        &top_gate_clk[i].gate.hw;
 
                top_gate_clk[i].gate.reg += (uintptr_t)reg_base;
+               name = top_gate_clk[i].gate.hw.init->name;
                ret = clk_hw_register(NULL, &top_gate_clk[i].gate.hw);
-               if (ret) {
-                       pr_warn("top clk %s init error!\n",
-                               top_gate_clk[i].gate.hw.init->name);
-               }
+               if (ret)
+                       pr_warn("top clk %s init error!\n", name);
        }
 
        for (i = 0; i < ARRAY_SIZE(top_div_clk); i++) {
@@ -624,11 +621,10 @@ static int __init top_clocks_init(struct device_node *np)
                                        &top_div_clk[i].div.hw;
 
                top_div_clk[i].div.reg += (uintptr_t)reg_base;
+               name = top_div_clk[i].div.hw.init->name;
                ret = clk_hw_register(NULL, &top_div_clk[i].div.hw);
-               if (ret) {
-                       pr_warn("top clk %s init error!\n",
-                               top_div_clk[i].div.hw.init->name);
-               }
+               if (ret)
+                       pr_warn("top clk %s init error!\n", name);
        }
 
        ret = of_clk_add_hw_provider(np, of_clk_hw_onecell_get,
@@ -754,6 +750,7 @@ static int __init lsp0_clocks_init(struct device_node *np)
 {
        void __iomem *reg_base;
        int i, ret;
+       const char *name;
 
        reg_base = of_iomap(np, 0);
        if (!reg_base) {
@@ -767,11 +764,10 @@ static int __init lsp0_clocks_init(struct device_node *np)
                                        &lsp0_mux_clk[i].mux.hw;
 
                lsp0_mux_clk[i].mux.reg += (uintptr_t)reg_base;
+               name = lsp0_mux_clk[i].mux.hw.init->name;
                ret = clk_hw_register(NULL, &lsp0_mux_clk[i].mux.hw);
-               if (ret) {
-                       pr_warn("lsp0 clk %s init error!\n",
-                               lsp0_mux_clk[i].mux.hw.init->name);
-               }
+               if (ret)
+                       pr_warn("lsp0 clk %s init error!\n", name);
        }
 
        for (i = 0; i < ARRAY_SIZE(lsp0_gate_clk); i++) {
@@ -780,11 +776,10 @@ static int __init lsp0_clocks_init(struct device_node *np)
                                        &lsp0_gate_clk[i].gate.hw;
 
                lsp0_gate_clk[i].gate.reg += (uintptr_t)reg_base;
+               name = lsp0_gate_clk[i].gate.hw.init->name;
                ret = clk_hw_register(NULL, &lsp0_gate_clk[i].gate.hw);
-               if (ret) {
-                       pr_warn("lsp0 clk %s init error!\n",
-                               lsp0_gate_clk[i].gate.hw.init->name);
-               }
+               if (ret)
+                       pr_warn("lsp0 clk %s init error!\n", name);
        }
 
        for (i = 0; i < ARRAY_SIZE(lsp0_div_clk); i++) {
@@ -793,11 +788,10 @@ static int __init lsp0_clocks_init(struct device_node *np)
                                        &lsp0_div_clk[i].div.hw;
 
                lsp0_div_clk[i].div.reg += (uintptr_t)reg_base;
+               name = lsp0_div_clk[i].div.hw.init->name;
                ret = clk_hw_register(NULL, &lsp0_div_clk[i].div.hw);
-               if (ret) {
-                       pr_warn("lsp0 clk %s init error!\n",
-                               lsp0_div_clk[i].div.hw.init->name);
-               }
+               if (ret)
+                       pr_warn("lsp0 clk %s init error!\n", name);
        }
 
        ret = of_clk_add_hw_provider(np, of_clk_hw_onecell_get,
@@ -862,6 +856,7 @@ static int __init lsp1_clocks_init(struct device_node *np)
 {
        void __iomem *reg_base;
        int i, ret;
+       const char *name;
 
        reg_base = of_iomap(np, 0);
        if (!reg_base) {
@@ -875,11 +870,10 @@ static int __init lsp1_clocks_init(struct device_node *np)
                                        &lsp0_mux_clk[i].mux.hw;
 
                lsp1_mux_clk[i].mux.reg += (uintptr_t)reg_base;
+               name = lsp1_mux_clk[i].mux.hw.init->name;
                ret = clk_hw_register(NULL, &lsp1_mux_clk[i].mux.hw);
-               if (ret) {
-                       pr_warn("lsp1 clk %s init error!\n",
-                               lsp1_mux_clk[i].mux.hw.init->name);
-               }
+               if (ret)
+                       pr_warn("lsp1 clk %s init error!\n", name);
        }
 
        for (i = 0; i < ARRAY_SIZE(lsp1_gate_clk); i++) {
@@ -888,11 +882,10 @@ static int __init lsp1_clocks_init(struct device_node *np)
                                        &lsp1_gate_clk[i].gate.hw;
 
                lsp1_gate_clk[i].gate.reg += (uintptr_t)reg_base;
+               name = lsp1_gate_clk[i].gate.hw.init->name;
                ret = clk_hw_register(NULL, &lsp1_gate_clk[i].gate.hw);
-               if (ret) {
-                       pr_warn("lsp1 clk %s init error!\n",
-                               lsp1_gate_clk[i].gate.hw.init->name);
-               }
+               if (ret)
+                       pr_warn("lsp1 clk %s init error!\n", name);
        }
 
        for (i = 0; i < ARRAY_SIZE(lsp1_div_clk); i++) {
@@ -901,11 +894,10 @@ static int __init lsp1_clocks_init(struct device_node *np)
                                        &lsp1_div_clk[i].div.hw;
 
                lsp1_div_clk[i].div.reg += (uintptr_t)reg_base;
+               name = lsp1_div_clk[i].div.hw.init->name;
                ret = clk_hw_register(NULL, &lsp1_div_clk[i].div.hw);
-               if (ret) {
-                       pr_warn("lsp1 clk %s init error!\n",
-                               lsp1_div_clk[i].div.hw.init->name);
-               }
+               if (ret)
+                       pr_warn("lsp1 clk %s init error!\n", name);
        }
 
        ret = of_clk_add_hw_provider(np, of_clk_hw_onecell_get,
@@ -979,6 +971,7 @@ static int __init audio_clocks_init(struct device_node *np)
 {
        void __iomem *reg_base;
        int i, ret;
+       const char *name;
 
        reg_base = of_iomap(np, 0);
        if (!reg_base) {
@@ -992,11 +985,10 @@ static int __init audio_clocks_init(struct device_node *np)
                                        &audio_mux_clk[i].mux.hw;
 
                audio_mux_clk[i].mux.reg += (uintptr_t)reg_base;
+               name = audio_mux_clk[i].mux.hw.init->name;
                ret = clk_hw_register(NULL, &audio_mux_clk[i].mux.hw);
-               if (ret) {
-                       pr_warn("audio clk %s init error!\n",
-                               audio_mux_clk[i].mux.hw.init->name);
-               }
+               if (ret)
+                       pr_warn("audio clk %s init error!\n", name);
        }
 
        for (i = 0; i < ARRAY_SIZE(audio_adiv_clk); i++) {
@@ -1005,11 +997,10 @@ static int __init audio_clocks_init(struct device_node *np)
                                        &audio_adiv_clk[i].hw;
 
                audio_adiv_clk[i].reg_base += (uintptr_t)reg_base;
+               name = audio_adiv_clk[i].hw.init->name;
                ret = clk_hw_register(NULL, &audio_adiv_clk[i].hw);
-               if (ret) {
-                       pr_warn("audio clk %s init error!\n",
-                               audio_adiv_clk[i].hw.init->name);
-               }
+               if (ret)
+                       pr_warn("audio clk %s init error!\n", name);
        }
 
        for (i = 0; i < ARRAY_SIZE(audio_div_clk); i++) {
@@ -1018,11 +1009,10 @@ static int __init audio_clocks_init(struct device_node *np)
                                        &audio_div_clk[i].div.hw;
 
                audio_div_clk[i].div.reg += (uintptr_t)reg_base;
+               name = audio_div_clk[i].div.hw.init->name;
                ret = clk_hw_register(NULL, &audio_div_clk[i].div.hw);
-               if (ret) {
-                       pr_warn("audio clk %s init error!\n",
-                               audio_div_clk[i].div.hw.init->name);
-               }
+               if (ret)
+                       pr_warn("audio clk %s init error!\n", name);
        }
 
        for (i = 0; i < ARRAY_SIZE(audio_gate_clk); i++) {
@@ -1031,11 +1021,10 @@ static int __init audio_clocks_init(struct device_node *np)
                                        &audio_gate_clk[i].gate.hw;
 
                audio_gate_clk[i].gate.reg += (uintptr_t)reg_base;
+               name = audio_gate_clk[i].gate.hw.init->name;
                ret = clk_hw_register(NULL, &audio_gate_clk[i].gate.hw);
-               if (ret) {
-                       pr_warn("audio clk %s init error!\n",
-                               audio_gate_clk[i].gate.hw.init->name);
-               }
+               if (ret)
+                       pr_warn("audio clk %s init error!\n", name);
        }
 
        ret = of_clk_add_hw_provider(np, of_clk_hw_onecell_get,
index a642c23..f35a53c 100644 (file)
@@ -685,4 +685,15 @@ config MILBEAUT_TIMER
        help
          Enables the support for Milbeaut timer driver.
 
+config INGENIC_TIMER
+       bool "Clocksource/timer using the TCU in Ingenic JZ SoCs"
+       default MACH_INGENIC
+       depends on MIPS || COMPILE_TEST
+       depends on COMMON_CLK
+       select MFD_SYSCON
+       select TIMER_OF
+       select IRQ_DOMAIN
+       help
+         Support for the timer/counter unit of the Ingenic JZ SoCs.
+
 endmenu
index 2e7936e..4dfe422 100644 (file)
@@ -80,6 +80,7 @@ obj-$(CONFIG_ASM9260_TIMER)           += asm9260_timer.o
 obj-$(CONFIG_H8300_TMR8)               += h8300_timer8.o
 obj-$(CONFIG_H8300_TMR16)              += h8300_timer16.o
 obj-$(CONFIG_H8300_TPU)                        += h8300_tpu.o
+obj-$(CONFIG_INGENIC_TIMER)            += ingenic-timer.o
 obj-$(CONFIG_CLKSRC_ST_LPC)            += clksrc_st_lpc.o
 obj-$(CONFIG_X86_NUMACHIP)             += numachip.o
 obj-$(CONFIG_ATCPIT100_TIMER)          += timer-atcpit100.o
diff --git a/drivers/clocksource/ingenic-timer.c b/drivers/clocksource/ingenic-timer.c
new file mode 100644 (file)
index 0000000..4bbdb3d
--- /dev/null
@@ -0,0 +1,356 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * JZ47xx SoCs TCU IRQ driver
+ * Copyright (C) 2019 Paul Cercueil <paul@crapouillou.net>
+ */
+
+#include <linux/bitops.h>
+#include <linux/clk.h>
+#include <linux/clockchips.h>
+#include <linux/clocksource.h>
+#include <linux/interrupt.h>
+#include <linux/mfd/ingenic-tcu.h>
+#include <linux/mfd/syscon.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/of_irq.h>
+#include <linux/of_platform.h>
+#include <linux/platform_device.h>
+#include <linux/regmap.h>
+#include <linux/sched_clock.h>
+
+#include <dt-bindings/clock/ingenic,tcu.h>
+
+struct ingenic_soc_info {
+       unsigned int num_channels;
+};
+
+struct ingenic_tcu {
+       struct regmap *map;
+       struct clk *timer_clk, *cs_clk;
+       unsigned int timer_channel, cs_channel;
+       struct clock_event_device cevt;
+       struct clocksource cs;
+       char name[4];
+       unsigned long pwm_channels_mask;
+};
+
+static struct ingenic_tcu *ingenic_tcu;
+
+static u64 notrace ingenic_tcu_timer_read(void)
+{
+       struct ingenic_tcu *tcu = ingenic_tcu;
+       unsigned int count;
+
+       regmap_read(tcu->map, TCU_REG_TCNTc(tcu->cs_channel), &count);
+
+       return count;
+}
+
+static u64 notrace ingenic_tcu_timer_cs_read(struct clocksource *cs)
+{
+       return ingenic_tcu_timer_read();
+}
+
+static inline struct ingenic_tcu *to_ingenic_tcu(struct clock_event_device *evt)
+{
+       return container_of(evt, struct ingenic_tcu, cevt);
+}
+
+static int ingenic_tcu_cevt_set_state_shutdown(struct clock_event_device *evt)
+{
+       struct ingenic_tcu *tcu = to_ingenic_tcu(evt);
+
+       regmap_write(tcu->map, TCU_REG_TECR, BIT(tcu->timer_channel));
+
+       return 0;
+}
+
+static int ingenic_tcu_cevt_set_next(unsigned long next,
+                                    struct clock_event_device *evt)
+{
+       struct ingenic_tcu *tcu = to_ingenic_tcu(evt);
+
+       if (next > 0xffff)
+               return -EINVAL;
+
+       regmap_write(tcu->map, TCU_REG_TDFRc(tcu->timer_channel), next);
+       regmap_write(tcu->map, TCU_REG_TCNTc(tcu->timer_channel), 0);
+       regmap_write(tcu->map, TCU_REG_TESR, BIT(tcu->timer_channel));
+
+       return 0;
+}
+
+static irqreturn_t ingenic_tcu_cevt_cb(int irq, void *dev_id)
+{
+       struct clock_event_device *evt = dev_id;
+       struct ingenic_tcu *tcu = to_ingenic_tcu(evt);
+
+       regmap_write(tcu->map, TCU_REG_TECR, BIT(tcu->timer_channel));
+
+       if (evt->event_handler)
+               evt->event_handler(evt);
+
+       return IRQ_HANDLED;
+}
+
+static struct clk * __init ingenic_tcu_get_clock(struct device_node *np, int id)
+{
+       struct of_phandle_args args;
+
+       args.np = np;
+       args.args_count = 1;
+       args.args[0] = id;
+
+       return of_clk_get_from_provider(&args);
+}
+
+static int __init ingenic_tcu_timer_init(struct device_node *np,
+                                        struct ingenic_tcu *tcu)
+{
+       unsigned int timer_virq, channel = tcu->timer_channel;
+       struct irq_domain *domain;
+       unsigned long rate;
+       int err;
+
+       tcu->timer_clk = ingenic_tcu_get_clock(np, channel);
+       if (IS_ERR(tcu->timer_clk))
+               return PTR_ERR(tcu->timer_clk);
+
+       err = clk_prepare_enable(tcu->timer_clk);
+       if (err)
+               goto err_clk_put;
+
+       rate = clk_get_rate(tcu->timer_clk);
+       if (!rate) {
+               err = -EINVAL;
+               goto err_clk_disable;
+       }
+
+       domain = irq_find_host(np);
+       if (!domain) {
+               err = -ENODEV;
+               goto err_clk_disable;
+       }
+
+       timer_virq = irq_create_mapping(domain, channel);
+       if (!timer_virq) {
+               err = -EINVAL;
+               goto err_clk_disable;
+       }
+
+       snprintf(tcu->name, sizeof(tcu->name), "TCU");
+
+       err = request_irq(timer_virq, ingenic_tcu_cevt_cb, IRQF_TIMER,
+                         tcu->name, &tcu->cevt);
+       if (err)
+               goto err_irq_dispose_mapping;
+
+       tcu->cevt.cpumask = cpumask_of(smp_processor_id());
+       tcu->cevt.features = CLOCK_EVT_FEAT_ONESHOT;
+       tcu->cevt.name = tcu->name;
+       tcu->cevt.rating = 200;
+       tcu->cevt.set_state_shutdown = ingenic_tcu_cevt_set_state_shutdown;
+       tcu->cevt.set_next_event = ingenic_tcu_cevt_set_next;
+
+       clockevents_config_and_register(&tcu->cevt, rate, 10, 0xffff);
+
+       return 0;
+
+err_irq_dispose_mapping:
+       irq_dispose_mapping(timer_virq);
+err_clk_disable:
+       clk_disable_unprepare(tcu->timer_clk);
+err_clk_put:
+       clk_put(tcu->timer_clk);
+       return err;
+}
+
+static int __init ingenic_tcu_clocksource_init(struct device_node *np,
+                                              struct ingenic_tcu *tcu)
+{
+       unsigned int channel = tcu->cs_channel;
+       struct clocksource *cs = &tcu->cs;
+       unsigned long rate;
+       int err;
+
+       tcu->cs_clk = ingenic_tcu_get_clock(np, channel);
+       if (IS_ERR(tcu->cs_clk))
+               return PTR_ERR(tcu->cs_clk);
+
+       err = clk_prepare_enable(tcu->cs_clk);
+       if (err)
+               goto err_clk_put;
+
+       rate = clk_get_rate(tcu->cs_clk);
+       if (!rate) {
+               err = -EINVAL;
+               goto err_clk_disable;
+       }
+
+       /* Reset channel */
+       regmap_update_bits(tcu->map, TCU_REG_TCSRc(channel),
+                          0xffff & ~TCU_TCSR_RESERVED_BITS, 0);
+
+       /* Reset counter */
+       regmap_write(tcu->map, TCU_REG_TDFRc(channel), 0xffff);
+       regmap_write(tcu->map, TCU_REG_TCNTc(channel), 0);
+
+       /* Enable channel */
+       regmap_write(tcu->map, TCU_REG_TESR, BIT(channel));
+
+       cs->name = "ingenic-timer";
+       cs->rating = 200;
+       cs->flags = CLOCK_SOURCE_IS_CONTINUOUS;
+       cs->mask = CLOCKSOURCE_MASK(16);
+       cs->read = ingenic_tcu_timer_cs_read;
+
+       err = clocksource_register_hz(cs, rate);
+       if (err)
+               goto err_clk_disable;
+
+       return 0;
+
+err_clk_disable:
+       clk_disable_unprepare(tcu->cs_clk);
+err_clk_put:
+       clk_put(tcu->cs_clk);
+       return err;
+}
+
+static const struct ingenic_soc_info jz4740_soc_info = {
+       .num_channels = 8,
+};
+
+static const struct ingenic_soc_info jz4725b_soc_info = {
+       .num_channels = 6,
+};
+
+static const struct of_device_id ingenic_tcu_of_match[] = {
+       { .compatible = "ingenic,jz4740-tcu", .data = &jz4740_soc_info, },
+       { .compatible = "ingenic,jz4725b-tcu", .data = &jz4725b_soc_info, },
+       { .compatible = "ingenic,jz4770-tcu", .data = &jz4740_soc_info, },
+       { /* sentinel */ }
+};
+
+static int __init ingenic_tcu_init(struct device_node *np)
+{
+       const struct of_device_id *id = of_match_node(ingenic_tcu_of_match, np);
+       const struct ingenic_soc_info *soc_info = id->data;
+       struct ingenic_tcu *tcu;
+       struct regmap *map;
+       long rate;
+       int ret;
+
+       of_node_clear_flag(np, OF_POPULATED);
+
+       map = device_node_to_regmap(np);
+       if (IS_ERR(map))
+               return PTR_ERR(map);
+
+       tcu = kzalloc(sizeof(*tcu), GFP_KERNEL);
+       if (!tcu)
+               return -ENOMEM;
+
+       /* Enable all TCU channels for PWM use by default except channels 0/1 */
+       tcu->pwm_channels_mask = GENMASK(soc_info->num_channels - 1, 2);
+       of_property_read_u32(np, "ingenic,pwm-channels-mask",
+                            (u32 *)&tcu->pwm_channels_mask);
+
+       /* Verify that we have at least two free channels */
+       if (hweight8(tcu->pwm_channels_mask) > soc_info->num_channels - 2) {
+               pr_crit("%s: Invalid PWM channel mask: 0x%02lx\n", __func__,
+                       tcu->pwm_channels_mask);
+               ret = -EINVAL;
+               goto err_free_ingenic_tcu;
+       }
+
+       tcu->map = map;
+       ingenic_tcu = tcu;
+
+       tcu->timer_channel = find_first_zero_bit(&tcu->pwm_channels_mask,
+                                                soc_info->num_channels);
+       tcu->cs_channel = find_next_zero_bit(&tcu->pwm_channels_mask,
+                                            soc_info->num_channels,
+                                            tcu->timer_channel + 1);
+
+       ret = ingenic_tcu_clocksource_init(np, tcu);
+       if (ret) {
+               pr_crit("%s: Unable to init clocksource: %d\n", __func__, ret);
+               goto err_free_ingenic_tcu;
+       }
+
+       ret = ingenic_tcu_timer_init(np, tcu);
+       if (ret)
+               goto err_tcu_clocksource_cleanup;
+
+       /* Register the sched_clock at the end as there's no way to undo it */
+       rate = clk_get_rate(tcu->cs_clk);
+       sched_clock_register(ingenic_tcu_timer_read, 16, rate);
+
+       return 0;
+
+err_tcu_clocksource_cleanup:
+       clocksource_unregister(&tcu->cs);
+       clk_disable_unprepare(tcu->cs_clk);
+       clk_put(tcu->cs_clk);
+err_free_ingenic_tcu:
+       kfree(tcu);
+       return ret;
+}
+
+TIMER_OF_DECLARE(jz4740_tcu_intc,  "ingenic,jz4740-tcu",  ingenic_tcu_init);
+TIMER_OF_DECLARE(jz4725b_tcu_intc, "ingenic,jz4725b-tcu", ingenic_tcu_init);
+TIMER_OF_DECLARE(jz4770_tcu_intc,  "ingenic,jz4770-tcu",  ingenic_tcu_init);
+
+
+static int __init ingenic_tcu_probe(struct platform_device *pdev)
+{
+       platform_set_drvdata(pdev, ingenic_tcu);
+
+       return 0;
+}
+
+static int __maybe_unused ingenic_tcu_suspend(struct device *dev)
+{
+       struct ingenic_tcu *tcu = dev_get_drvdata(dev);
+
+       clk_disable(tcu->cs_clk);
+       clk_disable(tcu->timer_clk);
+       return 0;
+}
+
+static int __maybe_unused ingenic_tcu_resume(struct device *dev)
+{
+       struct ingenic_tcu *tcu = dev_get_drvdata(dev);
+       int ret;
+
+       ret = clk_enable(tcu->timer_clk);
+       if (ret)
+               return ret;
+
+       ret = clk_enable(tcu->cs_clk);
+       if (ret) {
+               clk_disable(tcu->timer_clk);
+               return ret;
+       }
+
+       return 0;
+}
+
+static const struct dev_pm_ops __maybe_unused ingenic_tcu_pm_ops = {
+       /* _noirq: We want the TCU clocks to be gated last / ungated first */
+       .suspend_noirq = ingenic_tcu_suspend,
+       .resume_noirq  = ingenic_tcu_resume,
+};
+
+static struct platform_driver ingenic_tcu_driver = {
+       .driver = {
+               .name   = "ingenic-tcu-timer",
+#ifdef CONFIG_PM_SLEEP
+               .pm     = &ingenic_tcu_pm_ops,
+#endif
+               .of_match_table = ingenic_tcu_of_match,
+       },
+};
+builtin_platform_driver_probe(ingenic_tcu_driver, ingenic_tcu_probe);
index c70cb5f..0891ab8 100644 (file)
@@ -1078,7 +1078,7 @@ new_buf:
                        bool merge;
 
                        if (page)
-                               pg_size <<= compound_order(page);
+                               pg_size = page_size(page);
                        if (off < pg_size &&
                            skb_can_coalesce(skb, i, page, off)) {
                                merge = 1;
@@ -1105,8 +1105,7 @@ new_buf:
                                                           __GFP_NORETRY,
                                                           order);
                                        if (page)
-                                               pg_size <<=
-                                                       compound_order(page);
+                                               pg_size <<= order;
                                }
                                if (!page) {
                                        page = alloc_page(gfp);
index e0508ea..c27e716 100644 (file)
@@ -153,6 +153,24 @@ static void sec_alg_skcipher_init_context(struct crypto_skcipher *atfm,
                                       ctx->cipher_alg);
 }
 
+static void sec_free_hw_sgl(struct sec_hw_sgl *hw_sgl,
+                           dma_addr_t psec_sgl, struct sec_dev_info *info)
+{
+       struct sec_hw_sgl *sgl_current, *sgl_next;
+       dma_addr_t sgl_next_dma;
+
+       sgl_current = hw_sgl;
+       while (sgl_current) {
+               sgl_next = sgl_current->next;
+               sgl_next_dma = sgl_current->next_sgl;
+
+               dma_pool_free(info->hw_sgl_pool, sgl_current, psec_sgl);
+
+               sgl_current = sgl_next;
+               psec_sgl = sgl_next_dma;
+       }
+}
+
 static int sec_alloc_and_fill_hw_sgl(struct sec_hw_sgl **sec_sgl,
                                     dma_addr_t *psec_sgl,
                                     struct scatterlist *sgl,
@@ -199,35 +217,12 @@ static int sec_alloc_and_fill_hw_sgl(struct sec_hw_sgl **sec_sgl,
        return 0;
 
 err_free_hw_sgls:
-       sgl_current = *sec_sgl;
-       while (sgl_current) {
-               sgl_next = sgl_current->next;
-               dma_pool_free(info->hw_sgl_pool, sgl_current,
-                             sgl_current->next_sgl);
-               sgl_current = sgl_next;
-       }
+       sec_free_hw_sgl(*sec_sgl, *psec_sgl, info);
        *psec_sgl = 0;
 
        return ret;
 }
 
-static void sec_free_hw_sgl(struct sec_hw_sgl *hw_sgl,
-                           dma_addr_t psec_sgl, struct sec_dev_info *info)
-{
-       struct sec_hw_sgl *sgl_current, *sgl_next;
-
-       if (!hw_sgl)
-               return;
-       sgl_current = hw_sgl;
-       while (sgl_current->next) {
-               sgl_next = sgl_current->next;
-               dma_pool_free(info->hw_sgl_pool, sgl_current,
-                             sgl_current->next_sgl);
-               sgl_current = sgl_next;
-       }
-       dma_pool_free(info->hw_sgl_pool, hw_sgl, psec_sgl);
-}
-
 static int sec_alg_skcipher_setkey(struct crypto_skcipher *tfm,
                                   const u8 *key, unsigned int keylen,
                                   enum sec_cipher_alg alg)
index 5a3f84d..5902354 100644 (file)
@@ -559,7 +559,7 @@ static int hisi_zip_acompress(struct acomp_req *acomp_req)
        struct hisi_zip_ctx *ctx = crypto_tfm_ctx(acomp_req->base.tfm);
        struct hisi_zip_qp_ctx *qp_ctx = &ctx->qp_ctx[QPC_COMP];
        struct hisi_zip_req *req;
-       size_t head_size;
+       int head_size;
        int ret;
 
        /* let's output compression head now */
@@ -567,7 +567,7 @@ static int hisi_zip_acompress(struct acomp_req *acomp_req)
        if (head_size < 0)
                return -ENOMEM;
 
-       req = hisi_zip_create_req(acomp_req, qp_ctx, head_size, true);
+       req = hisi_zip_create_req(acomp_req, qp_ctx, (size_t)head_size, true);
        if (IS_ERR(req))
                return PTR_ERR(req);
 
index 6e0ca75..1b2ee96 100644 (file)
@@ -785,7 +785,6 @@ static int hisi_zip_clear_vft_config(struct hisi_zip *hisi_zip)
 
 static int hisi_zip_sriov_enable(struct pci_dev *pdev, int max_vfs)
 {
-#ifdef CONFIG_PCI_IOV
        struct hisi_zip *hisi_zip = pci_get_drvdata(pdev);
        int pre_existing_vfs, num_vfs, ret;
 
@@ -815,9 +814,6 @@ static int hisi_zip_sriov_enable(struct pci_dev *pdev, int max_vfs)
        }
 
        return num_vfs;
-#else
-       return 0;
-#endif
 }
 
 static int hisi_zip_sriov_disable(struct pci_dev *pdev)
@@ -948,7 +944,8 @@ static struct pci_driver hisi_zip_pci_driver = {
        .id_table               = hisi_zip_dev_ids,
        .probe                  = hisi_zip_probe,
        .remove                 = hisi_zip_remove,
-       .sriov_configure        = hisi_zip_sriov_configure,
+       .sriov_configure        = IS_ENABLED(CONFIG_PCI_IOV) ?
+                                       hisi_zip_sriov_configure : 0,
        .err_handler            = &hisi_zip_err_handler,
 };
 
index b456b85..4ab1bde 100644 (file)
@@ -1789,32 +1789,50 @@ static struct pci_driver safexcel_pci_driver = {
 };
 #endif
 
-static int __init safexcel_init(void)
-{
-       int rc;
-
+/* Unfortunately, we have to resort to global variables here */
+#if IS_ENABLED(CONFIG_PCI)
+int pcireg_rc = -EINVAL; /* Default safe value */
+#endif
 #if IS_ENABLED(CONFIG_OF)
-               /* Register platform driver */
-               platform_driver_register(&crypto_safexcel);
+int ofreg_rc = -EINVAL; /* Default safe value */
 #endif
 
+static int __init safexcel_init(void)
+{
 #if IS_ENABLED(CONFIG_PCI)
-               /* Register PCI driver */
-               rc = pci_register_driver(&safexcel_pci_driver);
+       /* Register PCI driver */
+       pcireg_rc = pci_register_driver(&safexcel_pci_driver);
 #endif
 
-       return 0;
+#if IS_ENABLED(CONFIG_OF)
+       /* Register platform driver */
+       ofreg_rc = platform_driver_register(&crypto_safexcel);
+ #if IS_ENABLED(CONFIG_PCI)
+       /* Return success if either PCI or OF registered OK */
+       return pcireg_rc ? ofreg_rc : 0;
+ #else
+       return ofreg_rc;
+ #endif
+#else
+ #if IS_ENABLED(CONFIG_PCI)
+       return pcireg_rc;
+ #else
+       return -EINVAL;
+ #endif
+#endif
 }
 
 static void __exit safexcel_exit(void)
 {
 #if IS_ENABLED(CONFIG_OF)
-               /* Unregister platform driver */
+       /* Unregister platform driver */
+       if (!ofreg_rc)
                platform_driver_unregister(&crypto_safexcel);
 #endif
 
 #if IS_ENABLED(CONFIG_PCI)
-               /* Unregister PCI driver if successfully registered before */
+       /* Unregister PCI driver if successfully registered before */
+       if (!pcireg_rc)
                pci_unregister_driver(&safexcel_pci_driver);
 #endif
 }
index cb6c10b..56e3068 100644 (file)
@@ -3116,6 +3116,7 @@ static int talitos_remove(struct platform_device *ofdev)
                        break;
                case CRYPTO_ALG_TYPE_AEAD:
                        crypto_unregister_aead(&t_alg->algt.alg.aead);
+                       break;
                case CRYPTO_ALG_TYPE_AHASH:
                        crypto_unregister_ahash(&t_alg->algt.alg.hash);
                        break;
index 7c511e3..7af874b 100644 (file)
@@ -137,12 +137,6 @@ config DMA_BCM2835
        select DMA_ENGINE
        select DMA_VIRTUAL_CHANNELS
 
-config DMA_JZ4740
-       tristate "JZ4740 DMA support"
-       depends on MACH_JZ4740 || COMPILE_TEST
-       select DMA_ENGINE
-       select DMA_VIRTUAL_CHANNELS
-
 config DMA_JZ4780
        tristate "JZ4780 DMA support"
        depends on MIPS || COMPILE_TEST
index 5bddf6f..f5ce866 100644 (file)
@@ -22,7 +22,6 @@ obj-$(CONFIG_AXI_DMAC) += dma-axi-dmac.o
 obj-$(CONFIG_BCM_SBA_RAID) += bcm-sba-raid.o
 obj-$(CONFIG_COH901318) += coh901318.o coh901318_lli.o
 obj-$(CONFIG_DMA_BCM2835) += bcm2835-dma.o
-obj-$(CONFIG_DMA_JZ4740) += dma-jz4740.o
 obj-$(CONFIG_DMA_JZ4780) += dma-jz4780.o
 obj-$(CONFIG_DMA_SA11X0) += sa11x0-dma.o
 obj-$(CONFIG_DMA_SUN4I) += sun4i-dma.o
diff --git a/drivers/dma/dma-jz4740.c b/drivers/dma/dma-jz4740.c
deleted file mode 100644 (file)
index 39c676c..0000000
+++ /dev/null
@@ -1,623 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- *  Copyright (C) 2013, Lars-Peter Clausen <lars@metafoo.de>
- *  JZ4740 DMAC support
- */
-
-#include <linux/dmaengine.h>
-#include <linux/dma-mapping.h>
-#include <linux/err.h>
-#include <linux/init.h>
-#include <linux/list.h>
-#include <linux/module.h>
-#include <linux/platform_device.h>
-#include <linux/slab.h>
-#include <linux/spinlock.h>
-#include <linux/irq.h>
-#include <linux/clk.h>
-
-#include "virt-dma.h"
-
-#define JZ_DMA_NR_CHANS 6
-
-#define JZ_REG_DMA_SRC_ADDR(x)         (0x00 + (x) * 0x20)
-#define JZ_REG_DMA_DST_ADDR(x)         (0x04 + (x) * 0x20)
-#define JZ_REG_DMA_TRANSFER_COUNT(x)   (0x08 + (x) * 0x20)
-#define JZ_REG_DMA_REQ_TYPE(x)         (0x0C + (x) * 0x20)
-#define JZ_REG_DMA_STATUS_CTRL(x)      (0x10 + (x) * 0x20)
-#define JZ_REG_DMA_CMD(x)              (0x14 + (x) * 0x20)
-#define JZ_REG_DMA_DESC_ADDR(x)                (0x18 + (x) * 0x20)
-
-#define JZ_REG_DMA_CTRL                        0x300
-#define JZ_REG_DMA_IRQ                 0x304
-#define JZ_REG_DMA_DOORBELL            0x308
-#define JZ_REG_DMA_DOORBELL_SET                0x30C
-
-#define JZ_DMA_STATUS_CTRL_NO_DESC             BIT(31)
-#define JZ_DMA_STATUS_CTRL_DESC_INV            BIT(6)
-#define JZ_DMA_STATUS_CTRL_ADDR_ERR            BIT(4)
-#define JZ_DMA_STATUS_CTRL_TRANSFER_DONE       BIT(3)
-#define JZ_DMA_STATUS_CTRL_HALT                        BIT(2)
-#define JZ_DMA_STATUS_CTRL_COUNT_TERMINATE     BIT(1)
-#define JZ_DMA_STATUS_CTRL_ENABLE              BIT(0)
-
-#define JZ_DMA_CMD_SRC_INC                     BIT(23)
-#define JZ_DMA_CMD_DST_INC                     BIT(22)
-#define JZ_DMA_CMD_RDIL_MASK                   (0xf << 16)
-#define JZ_DMA_CMD_SRC_WIDTH_MASK              (0x3 << 14)
-#define JZ_DMA_CMD_DST_WIDTH_MASK              (0x3 << 12)
-#define JZ_DMA_CMD_INTERVAL_LENGTH_MASK                (0x7 << 8)
-#define JZ_DMA_CMD_BLOCK_MODE                  BIT(7)
-#define JZ_DMA_CMD_DESC_VALID                  BIT(4)
-#define JZ_DMA_CMD_DESC_VALID_MODE             BIT(3)
-#define JZ_DMA_CMD_VALID_IRQ_ENABLE            BIT(2)
-#define JZ_DMA_CMD_TRANSFER_IRQ_ENABLE         BIT(1)
-#define JZ_DMA_CMD_LINK_ENABLE                 BIT(0)
-
-#define JZ_DMA_CMD_FLAGS_OFFSET 22
-#define JZ_DMA_CMD_RDIL_OFFSET 16
-#define JZ_DMA_CMD_SRC_WIDTH_OFFSET 14
-#define JZ_DMA_CMD_DST_WIDTH_OFFSET 12
-#define JZ_DMA_CMD_TRANSFER_SIZE_OFFSET 8
-#define JZ_DMA_CMD_MODE_OFFSET 7
-
-#define JZ_DMA_CTRL_PRIORITY_MASK              (0x3 << 8)
-#define JZ_DMA_CTRL_HALT                       BIT(3)
-#define JZ_DMA_CTRL_ADDRESS_ERROR              BIT(2)
-#define JZ_DMA_CTRL_ENABLE                     BIT(0)
-
-enum jz4740_dma_width {
-       JZ4740_DMA_WIDTH_32BIT  = 0,
-       JZ4740_DMA_WIDTH_8BIT   = 1,
-       JZ4740_DMA_WIDTH_16BIT  = 2,
-};
-
-enum jz4740_dma_transfer_size {
-       JZ4740_DMA_TRANSFER_SIZE_4BYTE  = 0,
-       JZ4740_DMA_TRANSFER_SIZE_1BYTE  = 1,
-       JZ4740_DMA_TRANSFER_SIZE_2BYTE  = 2,
-       JZ4740_DMA_TRANSFER_SIZE_16BYTE = 3,
-       JZ4740_DMA_TRANSFER_SIZE_32BYTE = 4,
-};
-
-enum jz4740_dma_flags {
-       JZ4740_DMA_SRC_AUTOINC = 0x2,
-       JZ4740_DMA_DST_AUTOINC = 0x1,
-};
-
-enum jz4740_dma_mode {
-       JZ4740_DMA_MODE_SINGLE  = 0,
-       JZ4740_DMA_MODE_BLOCK   = 1,
-};
-
-struct jz4740_dma_sg {
-       dma_addr_t addr;
-       unsigned int len;
-};
-
-struct jz4740_dma_desc {
-       struct virt_dma_desc vdesc;
-
-       enum dma_transfer_direction direction;
-       bool cyclic;
-
-       unsigned int num_sgs;
-       struct jz4740_dma_sg sg[];
-};
-
-struct jz4740_dmaengine_chan {
-       struct virt_dma_chan vchan;
-       unsigned int id;
-       struct dma_slave_config config;
-
-       dma_addr_t fifo_addr;
-       unsigned int transfer_shift;
-
-       struct jz4740_dma_desc *desc;
-       unsigned int next_sg;
-};
-
-struct jz4740_dma_dev {
-       struct dma_device ddev;
-       void __iomem *base;
-       struct clk *clk;
-
-       struct jz4740_dmaengine_chan chan[JZ_DMA_NR_CHANS];
-};
-
-static struct jz4740_dma_dev *jz4740_dma_chan_get_dev(
-       struct jz4740_dmaengine_chan *chan)
-{
-       return container_of(chan->vchan.chan.device, struct jz4740_dma_dev,
-               ddev);
-}
-
-static struct jz4740_dmaengine_chan *to_jz4740_dma_chan(struct dma_chan *c)
-{
-       return container_of(c, struct jz4740_dmaengine_chan, vchan.chan);
-}
-
-static struct jz4740_dma_desc *to_jz4740_dma_desc(struct virt_dma_desc *vdesc)
-{
-       return container_of(vdesc, struct jz4740_dma_desc, vdesc);
-}
-
-static inline uint32_t jz4740_dma_read(struct jz4740_dma_dev *dmadev,
-       unsigned int reg)
-{
-       return readl(dmadev->base + reg);
-}
-
-static inline void jz4740_dma_write(struct jz4740_dma_dev *dmadev,
-       unsigned reg, uint32_t val)
-{
-       writel(val, dmadev->base + reg);
-}
-
-static inline void jz4740_dma_write_mask(struct jz4740_dma_dev *dmadev,
-       unsigned int reg, uint32_t val, uint32_t mask)
-{
-       uint32_t tmp;
-
-       tmp = jz4740_dma_read(dmadev, reg);
-       tmp &= ~mask;
-       tmp |= val;
-       jz4740_dma_write(dmadev, reg, tmp);
-}
-
-static struct jz4740_dma_desc *jz4740_dma_alloc_desc(unsigned int num_sgs)
-{
-       return kzalloc(sizeof(struct jz4740_dma_desc) +
-               sizeof(struct jz4740_dma_sg) * num_sgs, GFP_ATOMIC);
-}
-
-static enum jz4740_dma_width jz4740_dma_width(enum dma_slave_buswidth width)
-{
-       switch (width) {
-       case DMA_SLAVE_BUSWIDTH_1_BYTE:
-               return JZ4740_DMA_WIDTH_8BIT;
-       case DMA_SLAVE_BUSWIDTH_2_BYTES:
-               return JZ4740_DMA_WIDTH_16BIT;
-       case DMA_SLAVE_BUSWIDTH_4_BYTES:
-               return JZ4740_DMA_WIDTH_32BIT;
-       default:
-               return JZ4740_DMA_WIDTH_32BIT;
-       }
-}
-
-static enum jz4740_dma_transfer_size jz4740_dma_maxburst(u32 maxburst)
-{
-       if (maxburst <= 1)
-               return JZ4740_DMA_TRANSFER_SIZE_1BYTE;
-       else if (maxburst <= 3)
-               return JZ4740_DMA_TRANSFER_SIZE_2BYTE;
-       else if (maxburst <= 15)
-               return JZ4740_DMA_TRANSFER_SIZE_4BYTE;
-       else if (maxburst <= 31)
-               return JZ4740_DMA_TRANSFER_SIZE_16BYTE;
-
-       return JZ4740_DMA_TRANSFER_SIZE_32BYTE;
-}
-
-static int jz4740_dma_slave_config_write(struct dma_chan *c,
-                                  struct dma_slave_config *config,
-                                  enum dma_transfer_direction direction)
-{
-       struct jz4740_dmaengine_chan *chan = to_jz4740_dma_chan(c);
-       struct jz4740_dma_dev *dmadev = jz4740_dma_chan_get_dev(chan);
-       enum jz4740_dma_width src_width;
-       enum jz4740_dma_width dst_width;
-       enum jz4740_dma_transfer_size transfer_size;
-       enum jz4740_dma_flags flags;
-       uint32_t cmd;
-
-       switch (direction) {
-       case DMA_MEM_TO_DEV:
-               flags = JZ4740_DMA_SRC_AUTOINC;
-               transfer_size = jz4740_dma_maxburst(config->dst_maxburst);
-               chan->fifo_addr = config->dst_addr;
-               break;
-       case DMA_DEV_TO_MEM:
-               flags = JZ4740_DMA_DST_AUTOINC;
-               transfer_size = jz4740_dma_maxburst(config->src_maxburst);
-               chan->fifo_addr = config->src_addr;
-               break;
-       default:
-               return -EINVAL;
-       }
-
-       src_width = jz4740_dma_width(config->src_addr_width);
-       dst_width = jz4740_dma_width(config->dst_addr_width);
-
-       switch (transfer_size) {
-       case JZ4740_DMA_TRANSFER_SIZE_2BYTE:
-               chan->transfer_shift = 1;
-               break;
-       case JZ4740_DMA_TRANSFER_SIZE_4BYTE:
-               chan->transfer_shift = 2;
-               break;
-       case JZ4740_DMA_TRANSFER_SIZE_16BYTE:
-               chan->transfer_shift = 4;
-               break;
-       case JZ4740_DMA_TRANSFER_SIZE_32BYTE:
-               chan->transfer_shift = 5;
-               break;
-       default:
-               chan->transfer_shift = 0;
-               break;
-       }
-
-       cmd = flags << JZ_DMA_CMD_FLAGS_OFFSET;
-       cmd |= src_width << JZ_DMA_CMD_SRC_WIDTH_OFFSET;
-       cmd |= dst_width << JZ_DMA_CMD_DST_WIDTH_OFFSET;
-       cmd |= transfer_size << JZ_DMA_CMD_TRANSFER_SIZE_OFFSET;
-       cmd |= JZ4740_DMA_MODE_SINGLE << JZ_DMA_CMD_MODE_OFFSET;
-       cmd |= JZ_DMA_CMD_TRANSFER_IRQ_ENABLE;
-
-       jz4740_dma_write(dmadev, JZ_REG_DMA_CMD(chan->id), cmd);
-       jz4740_dma_write(dmadev, JZ_REG_DMA_STATUS_CTRL(chan->id), 0);
-       jz4740_dma_write(dmadev, JZ_REG_DMA_REQ_TYPE(chan->id),
-               config->slave_id);
-
-       return 0;
-}
-
-static int jz4740_dma_slave_config(struct dma_chan *c,
-                                  struct dma_slave_config *config)
-{
-       struct jz4740_dmaengine_chan *chan = to_jz4740_dma_chan(c);
-
-       memcpy(&chan->config, config, sizeof(*config));
-       return 0;
-}
-
-static int jz4740_dma_terminate_all(struct dma_chan *c)
-{
-       struct jz4740_dmaengine_chan *chan = to_jz4740_dma_chan(c);
-       struct jz4740_dma_dev *dmadev = jz4740_dma_chan_get_dev(chan);
-       unsigned long flags;
-       LIST_HEAD(head);
-
-       spin_lock_irqsave(&chan->vchan.lock, flags);
-       jz4740_dma_write_mask(dmadev, JZ_REG_DMA_STATUS_CTRL(chan->id), 0,
-                       JZ_DMA_STATUS_CTRL_ENABLE);
-       chan->desc = NULL;
-       vchan_get_all_descriptors(&chan->vchan, &head);
-       spin_unlock_irqrestore(&chan->vchan.lock, flags);
-
-       vchan_dma_desc_free_list(&chan->vchan, &head);
-
-       return 0;
-}
-
-static int jz4740_dma_start_transfer(struct jz4740_dmaengine_chan *chan)
-{
-       struct jz4740_dma_dev *dmadev = jz4740_dma_chan_get_dev(chan);
-       dma_addr_t src_addr, dst_addr;
-       struct virt_dma_desc *vdesc;
-       struct jz4740_dma_sg *sg;
-
-       jz4740_dma_write_mask(dmadev, JZ_REG_DMA_STATUS_CTRL(chan->id), 0,
-                       JZ_DMA_STATUS_CTRL_ENABLE);
-
-       if (!chan->desc) {
-               vdesc = vchan_next_desc(&chan->vchan);
-               if (!vdesc)
-                       return 0;
-               chan->desc = to_jz4740_dma_desc(vdesc);
-               chan->next_sg = 0;
-       }
-
-       if (chan->next_sg == chan->desc->num_sgs)
-               chan->next_sg = 0;
-
-       sg = &chan->desc->sg[chan->next_sg];
-
-       if (chan->desc->direction == DMA_MEM_TO_DEV) {
-               src_addr = sg->addr;
-               dst_addr = chan->fifo_addr;
-       } else {
-               src_addr = chan->fifo_addr;
-               dst_addr = sg->addr;
-       }
-       jz4740_dma_write(dmadev, JZ_REG_DMA_SRC_ADDR(chan->id), src_addr);
-       jz4740_dma_write(dmadev, JZ_REG_DMA_DST_ADDR(chan->id), dst_addr);
-       jz4740_dma_write(dmadev, JZ_REG_DMA_TRANSFER_COUNT(chan->id),
-                       sg->len >> chan->transfer_shift);
-
-       chan->next_sg++;
-
-       jz4740_dma_write_mask(dmadev, JZ_REG_DMA_STATUS_CTRL(chan->id),
-                       JZ_DMA_STATUS_CTRL_NO_DESC | JZ_DMA_STATUS_CTRL_ENABLE,
-                       JZ_DMA_STATUS_CTRL_HALT | JZ_DMA_STATUS_CTRL_NO_DESC |
-                       JZ_DMA_STATUS_CTRL_ENABLE);
-
-       jz4740_dma_write_mask(dmadev, JZ_REG_DMA_CTRL,
-                       JZ_DMA_CTRL_ENABLE,
-                       JZ_DMA_CTRL_HALT | JZ_DMA_CTRL_ENABLE);
-
-       return 0;
-}
-
-static void jz4740_dma_chan_irq(struct jz4740_dmaengine_chan *chan)
-{
-       spin_lock(&chan->vchan.lock);
-       if (chan->desc) {
-               if (chan->desc->cyclic) {
-                       vchan_cyclic_callback(&chan->desc->vdesc);
-               } else {
-                       if (chan->next_sg == chan->desc->num_sgs) {
-                               list_del(&chan->desc->vdesc.node);
-                               vchan_cookie_complete(&chan->desc->vdesc);
-                               chan->desc = NULL;
-                       }
-               }
-       }
-       jz4740_dma_start_transfer(chan);
-       spin_unlock(&chan->vchan.lock);
-}
-
-static irqreturn_t jz4740_dma_irq(int irq, void *devid)
-{
-       struct jz4740_dma_dev *dmadev = devid;
-       uint32_t irq_status;
-       unsigned int i;
-
-       irq_status = readl(dmadev->base + JZ_REG_DMA_IRQ);
-
-       for (i = 0; i < 6; ++i) {
-               if (irq_status & (1 << i)) {
-                       jz4740_dma_write_mask(dmadev,
-                               JZ_REG_DMA_STATUS_CTRL(i), 0,
-                               JZ_DMA_STATUS_CTRL_ENABLE |
-                               JZ_DMA_STATUS_CTRL_TRANSFER_DONE);
-
-                       jz4740_dma_chan_irq(&dmadev->chan[i]);
-               }
-       }
-
-       return IRQ_HANDLED;
-}
-
-static void jz4740_dma_issue_pending(struct dma_chan *c)
-{
-       struct jz4740_dmaengine_chan *chan = to_jz4740_dma_chan(c);
-       unsigned long flags;
-
-       spin_lock_irqsave(&chan->vchan.lock, flags);
-       if (vchan_issue_pending(&chan->vchan) && !chan->desc)
-               jz4740_dma_start_transfer(chan);
-       spin_unlock_irqrestore(&chan->vchan.lock, flags);
-}
-
-static struct dma_async_tx_descriptor *jz4740_dma_prep_slave_sg(
-       struct dma_chan *c, struct scatterlist *sgl,
-       unsigned int sg_len, enum dma_transfer_direction direction,
-       unsigned long flags, void *context)
-{
-       struct jz4740_dmaengine_chan *chan = to_jz4740_dma_chan(c);
-       struct jz4740_dma_desc *desc;
-       struct scatterlist *sg;
-       unsigned int i;
-
-       desc = jz4740_dma_alloc_desc(sg_len);
-       if (!desc)
-               return NULL;
-
-       for_each_sg(sgl, sg, sg_len, i) {
-               desc->sg[i].addr = sg_dma_address(sg);
-               desc->sg[i].len = sg_dma_len(sg);
-       }
-
-       desc->num_sgs = sg_len;
-       desc->direction = direction;
-       desc->cyclic = false;
-
-       jz4740_dma_slave_config_write(c, &chan->config, direction);
-
-       return vchan_tx_prep(&chan->vchan, &desc->vdesc, flags);
-}
-
-static struct dma_async_tx_descriptor *jz4740_dma_prep_dma_cyclic(
-       struct dma_chan *c, dma_addr_t buf_addr, size_t buf_len,
-       size_t period_len, enum dma_transfer_direction direction,
-       unsigned long flags)
-{
-       struct jz4740_dmaengine_chan *chan = to_jz4740_dma_chan(c);
-       struct jz4740_dma_desc *desc;
-       unsigned int num_periods, i;
-
-       if (buf_len % period_len)
-               return NULL;
-
-       num_periods = buf_len / period_len;
-
-       desc = jz4740_dma_alloc_desc(num_periods);
-       if (!desc)
-               return NULL;
-
-       for (i = 0; i < num_periods; i++) {
-               desc->sg[i].addr = buf_addr;
-               desc->sg[i].len = period_len;
-               buf_addr += period_len;
-       }
-
-       desc->num_sgs = num_periods;
-       desc->direction = direction;
-       desc->cyclic = true;
-
-       jz4740_dma_slave_config_write(c, &chan->config, direction);
-
-       return vchan_tx_prep(&chan->vchan, &desc->vdesc, flags);
-}
-
-static size_t jz4740_dma_desc_residue(struct jz4740_dmaengine_chan *chan,
-       struct jz4740_dma_desc *desc, unsigned int next_sg)
-{
-       struct jz4740_dma_dev *dmadev = jz4740_dma_chan_get_dev(chan);
-       unsigned int residue, count;
-       unsigned int i;
-
-       residue = 0;
-
-       for (i = next_sg; i < desc->num_sgs; i++)
-               residue += desc->sg[i].len;
-
-       if (next_sg != 0) {
-               count = jz4740_dma_read(dmadev,
-                       JZ_REG_DMA_TRANSFER_COUNT(chan->id));
-               residue += count << chan->transfer_shift;
-       }
-
-       return residue;
-}
-
-static enum dma_status jz4740_dma_tx_status(struct dma_chan *c,
-       dma_cookie_t cookie, struct dma_tx_state *state)
-{
-       struct jz4740_dmaengine_chan *chan = to_jz4740_dma_chan(c);
-       struct virt_dma_desc *vdesc;
-       enum dma_status status;
-       unsigned long flags;
-
-       status = dma_cookie_status(c, cookie, state);
-       if (status == DMA_COMPLETE || !state)
-               return status;
-
-       spin_lock_irqsave(&chan->vchan.lock, flags);
-       vdesc = vchan_find_desc(&chan->vchan, cookie);
-       if (cookie == chan->desc->vdesc.tx.cookie) {
-               state->residue = jz4740_dma_desc_residue(chan, chan->desc,
-                               chan->next_sg);
-       } else if (vdesc) {
-               state->residue = jz4740_dma_desc_residue(chan,
-                               to_jz4740_dma_desc(vdesc), 0);
-       } else {
-               state->residue = 0;
-       }
-       spin_unlock_irqrestore(&chan->vchan.lock, flags);
-
-       return status;
-}
-
-static void jz4740_dma_free_chan_resources(struct dma_chan *c)
-{
-       vchan_free_chan_resources(to_virt_chan(c));
-}
-
-static void jz4740_dma_desc_free(struct virt_dma_desc *vdesc)
-{
-       kfree(container_of(vdesc, struct jz4740_dma_desc, vdesc));
-}
-
-#define JZ4740_DMA_BUSWIDTHS (BIT(DMA_SLAVE_BUSWIDTH_1_BYTE) | \
-       BIT(DMA_SLAVE_BUSWIDTH_2_BYTES) | BIT(DMA_SLAVE_BUSWIDTH_4_BYTES))
-
-static int jz4740_dma_probe(struct platform_device *pdev)
-{
-       struct jz4740_dmaengine_chan *chan;
-       struct jz4740_dma_dev *dmadev;
-       struct dma_device *dd;
-       unsigned int i;
-       struct resource *res;
-       int ret;
-       int irq;
-
-       dmadev = devm_kzalloc(&pdev->dev, sizeof(*dmadev), GFP_KERNEL);
-       if (!dmadev)
-               return -EINVAL;
-
-       dd = &dmadev->ddev;
-
-       res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-       dmadev->base = devm_ioremap_resource(&pdev->dev, res);
-       if (IS_ERR(dmadev->base))
-               return PTR_ERR(dmadev->base);
-
-       dmadev->clk = clk_get(&pdev->dev, "dma");
-       if (IS_ERR(dmadev->clk))
-               return PTR_ERR(dmadev->clk);
-
-       clk_prepare_enable(dmadev->clk);
-
-       dma_cap_set(DMA_SLAVE, dd->cap_mask);
-       dma_cap_set(DMA_CYCLIC, dd->cap_mask);
-       dd->device_free_chan_resources = jz4740_dma_free_chan_resources;
-       dd->device_tx_status = jz4740_dma_tx_status;
-       dd->device_issue_pending = jz4740_dma_issue_pending;
-       dd->device_prep_slave_sg = jz4740_dma_prep_slave_sg;
-       dd->device_prep_dma_cyclic = jz4740_dma_prep_dma_cyclic;
-       dd->device_config = jz4740_dma_slave_config;
-       dd->device_terminate_all = jz4740_dma_terminate_all;
-       dd->src_addr_widths = JZ4740_DMA_BUSWIDTHS;
-       dd->dst_addr_widths = JZ4740_DMA_BUSWIDTHS;
-       dd->directions = BIT(DMA_DEV_TO_MEM) | BIT(DMA_MEM_TO_DEV);
-       dd->residue_granularity = DMA_RESIDUE_GRANULARITY_BURST;
-       dd->dev = &pdev->dev;
-       INIT_LIST_HEAD(&dd->channels);
-
-       for (i = 0; i < JZ_DMA_NR_CHANS; i++) {
-               chan = &dmadev->chan[i];
-               chan->id = i;
-               chan->vchan.desc_free = jz4740_dma_desc_free;
-               vchan_init(&chan->vchan, dd);
-       }
-
-       ret = dma_async_device_register(dd);
-       if (ret)
-               goto err_clk;
-
-       irq = platform_get_irq(pdev, 0);
-       ret = request_irq(irq, jz4740_dma_irq, 0, dev_name(&pdev->dev), dmadev);
-       if (ret)
-               goto err_unregister;
-
-       platform_set_drvdata(pdev, dmadev);
-
-       return 0;
-
-err_unregister:
-       dma_async_device_unregister(dd);
-err_clk:
-       clk_disable_unprepare(dmadev->clk);
-       return ret;
-}
-
-static void jz4740_cleanup_vchan(struct dma_device *dmadev)
-{
-       struct jz4740_dmaengine_chan *chan, *_chan;
-
-       list_for_each_entry_safe(chan, _chan,
-                               &dmadev->channels, vchan.chan.device_node) {
-               list_del(&chan->vchan.chan.device_node);
-               tasklet_kill(&chan->vchan.task);
-       }
-}
-
-
-static int jz4740_dma_remove(struct platform_device *pdev)
-{
-       struct jz4740_dma_dev *dmadev = platform_get_drvdata(pdev);
-       int irq = platform_get_irq(pdev, 0);
-
-       free_irq(irq, dmadev);
-
-       jz4740_cleanup_vchan(&dmadev->ddev);
-       dma_async_device_unregister(&dmadev->ddev);
-       clk_disable_unprepare(dmadev->clk);
-
-       return 0;
-}
-
-static struct platform_driver jz4740_dma_driver = {
-       .probe = jz4740_dma_probe,
-       .remove = jz4740_dma_remove,
-       .driver = {
-               .name = "jz4740-dma",
-       },
-};
-module_platform_driver(jz4740_dma_driver);
-
-MODULE_AUTHOR("Lars-Peter Clausen <lars@metafoo.de>");
-MODULE_DESCRIPTION("JZ4740 DMA driver");
-MODULE_LICENSE("GPL v2");
index 2a2603b..417dad6 100644 (file)
@@ -466,6 +466,13 @@ config EDAC_SIFIVE
        help
          Support for error detection and correction on the SiFive SoCs.
 
+config EDAC_ARMADA_XP
+       bool "Marvell Armada XP DDR and L2 Cache ECC"
+       depends on MACH_MVEBU_V7
+       help
+         Support for error correction and detection on the Marvell Aramada XP
+         DDR RAM and L2 cache controllers.
+
 config EDAC_SYNOPSYS
        tristate "Synopsys DDR Memory Controller"
        depends on ARCH_ZYNQ || ARCH_ZYNQMP
index d265ff9..d77200c 100644 (file)
@@ -80,6 +80,7 @@ obj-$(CONFIG_EDAC_THUNDERX)           += thunderx_edac.o
 
 obj-$(CONFIG_EDAC_ALTERA)              += altera_edac.o
 obj-$(CONFIG_EDAC_SIFIVE)              += sifive_edac.o
+obj-$(CONFIG_EDAC_ARMADA_XP)           += armada_xp_edac.o
 obj-$(CONFIG_EDAC_SYNOPSYS)            += synopsys_edac.o
 obj-$(CONFIG_EDAC_XGENE)               += xgene_edac.o
 obj-$(CONFIG_EDAC_TI)                  += ti_edac.o
diff --git a/drivers/edac/armada_xp_edac.c b/drivers/edac/armada_xp_edac.c
new file mode 100644 (file)
index 0000000..7f227bd
--- /dev/null
@@ -0,0 +1,635 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2017 Pengutronix, Jan Luebbe <kernel@pengutronix.de>
+ */
+
+#include <linux/kernel.h>
+#include <linux/edac.h>
+#include <linux/of_platform.h>
+
+#include <asm/hardware/cache-l2x0.h>
+#include <asm/hardware/cache-aurora-l2.h>
+
+#include "edac_mc.h"
+#include "edac_device.h"
+#include "edac_module.h"
+
+/************************ EDAC MC (DDR RAM) ********************************/
+
+#define SDRAM_NUM_CS 4
+
+#define SDRAM_CONFIG_REG        0x0
+#define SDRAM_CONFIG_ECC_MASK         BIT(18)
+#define SDRAM_CONFIG_REGISTERED_MASK  BIT(17)
+#define SDRAM_CONFIG_BUS_WIDTH_MASK   BIT(15)
+
+#define SDRAM_ADDR_CTRL_REG     0x10
+#define SDRAM_ADDR_CTRL_SIZE_HIGH_OFFSET(cs) (20+cs)
+#define SDRAM_ADDR_CTRL_SIZE_HIGH_MASK(cs)   (0x1 << SDRAM_ADDR_CTRL_SIZE_HIGH_OFFSET(cs))
+#define SDRAM_ADDR_CTRL_ADDR_SEL_MASK(cs)    BIT(16+cs)
+#define SDRAM_ADDR_CTRL_SIZE_LOW_OFFSET(cs)  (cs*4+2)
+#define SDRAM_ADDR_CTRL_SIZE_LOW_MASK(cs)    (0x3 << SDRAM_ADDR_CTRL_SIZE_LOW_OFFSET(cs))
+#define SDRAM_ADDR_CTRL_STRUCT_OFFSET(cs)    (cs*4)
+#define SDRAM_ADDR_CTRL_STRUCT_MASK(cs)      (0x3 << SDRAM_ADDR_CTRL_STRUCT_OFFSET(cs))
+
+#define SDRAM_ERR_DATA_H_REG    0x40
+#define SDRAM_ERR_DATA_L_REG    0x44
+
+#define SDRAM_ERR_RECV_ECC_REG  0x48
+#define SDRAM_ERR_RECV_ECC_VALUE_MASK 0xff
+
+#define SDRAM_ERR_CALC_ECC_REG  0x4c
+#define SDRAM_ERR_CALC_ECC_ROW_OFFSET 8
+#define SDRAM_ERR_CALC_ECC_ROW_MASK   (0xffff << SDRAM_ERR_CALC_ECC_ROW_OFFSET)
+#define SDRAM_ERR_CALC_ECC_VALUE_MASK 0xff
+
+#define SDRAM_ERR_ADDR_REG      0x50
+#define SDRAM_ERR_ADDR_BANK_OFFSET    23
+#define SDRAM_ERR_ADDR_BANK_MASK      (0x7 << SDRAM_ERR_ADDR_BANK_OFFSET)
+#define SDRAM_ERR_ADDR_COL_OFFSET     8
+#define SDRAM_ERR_ADDR_COL_MASK       (0x7fff << SDRAM_ERR_ADDR_COL_OFFSET)
+#define SDRAM_ERR_ADDR_CS_OFFSET      1
+#define SDRAM_ERR_ADDR_CS_MASK        (0x3 << SDRAM_ERR_ADDR_CS_OFFSET)
+#define SDRAM_ERR_ADDR_TYPE_MASK      BIT(0)
+
+#define SDRAM_ERR_CTRL_REG      0x54
+#define SDRAM_ERR_CTRL_THR_OFFSET     16
+#define SDRAM_ERR_CTRL_THR_MASK       (0xff << SDRAM_ERR_CTRL_THR_OFFSET)
+#define SDRAM_ERR_CTRL_PROP_MASK      BIT(9)
+
+#define SDRAM_ERR_SBE_COUNT_REG 0x58
+#define SDRAM_ERR_DBE_COUNT_REG 0x5c
+
+#define SDRAM_ERR_CAUSE_ERR_REG 0xd0
+#define SDRAM_ERR_CAUSE_MSG_REG 0xd8
+#define SDRAM_ERR_CAUSE_DBE_MASK      BIT(1)
+#define SDRAM_ERR_CAUSE_SBE_MASK      BIT(0)
+
+#define SDRAM_RANK_CTRL_REG 0x1e0
+#define SDRAM_RANK_CTRL_EXIST_MASK(cs) BIT(cs)
+
+struct axp_mc_drvdata {
+       void __iomem *base;
+       /* width in bytes */
+       unsigned int width;
+       /* bank interleaving */
+       bool cs_addr_sel[SDRAM_NUM_CS];
+
+       char msg[128];
+};
+
+/* derived from "DRAM Address Multiplexing" in the ARAMDA XP Functional Spec */
+static uint32_t axp_mc_calc_address(struct axp_mc_drvdata *drvdata,
+                                   uint8_t cs, uint8_t bank, uint16_t row,
+                                   uint16_t col)
+{
+       if (drvdata->width == 8) {
+               /* 64 bit */
+               if (drvdata->cs_addr_sel[cs])
+                       /* bank interleaved */
+                       return (((row & 0xfff8) << 16) |
+                               ((bank & 0x7) << 16) |
+                               ((row & 0x7) << 13) |
+                               ((col & 0x3ff) << 3));
+               else
+                       return (((row & 0xffff << 16) |
+                                ((bank & 0x7) << 13) |
+                                ((col & 0x3ff)) << 3));
+       } else if (drvdata->width == 4) {
+               /* 32 bit */
+               if (drvdata->cs_addr_sel[cs])
+                       /* bank interleaved */
+                       return (((row & 0xfff0) << 15) |
+                               ((bank & 0x7) << 16) |
+                               ((row & 0xf) << 12) |
+                               ((col & 0x3ff) << 2));
+               else
+                       return (((row & 0xffff << 15) |
+                                ((bank & 0x7) << 12) |
+                                ((col & 0x3ff)) << 2));
+       } else {
+               /* 16 bit */
+               if (drvdata->cs_addr_sel[cs])
+                       /* bank interleaved */
+                       return (((row & 0xffe0) << 14) |
+                               ((bank & 0x7) << 16) |
+                               ((row & 0x1f) << 11) |
+                               ((col & 0x3ff) << 1));
+               else
+                       return (((row & 0xffff << 14) |
+                                ((bank & 0x7) << 11) |
+                                ((col & 0x3ff)) << 1));
+       }
+}
+
+static void axp_mc_check(struct mem_ctl_info *mci)
+{
+       struct axp_mc_drvdata *drvdata = mci->pvt_info;
+       uint32_t data_h, data_l, recv_ecc, calc_ecc, addr;
+       uint32_t cnt_sbe, cnt_dbe, cause_err, cause_msg;
+       uint32_t row_val, col_val, bank_val, addr_val;
+       uint8_t syndrome_val, cs_val;
+       char *msg = drvdata->msg;
+
+       data_h    = readl(drvdata->base + SDRAM_ERR_DATA_H_REG);
+       data_l    = readl(drvdata->base + SDRAM_ERR_DATA_L_REG);
+       recv_ecc  = readl(drvdata->base + SDRAM_ERR_RECV_ECC_REG);
+       calc_ecc  = readl(drvdata->base + SDRAM_ERR_CALC_ECC_REG);
+       addr      = readl(drvdata->base + SDRAM_ERR_ADDR_REG);
+       cnt_sbe   = readl(drvdata->base + SDRAM_ERR_SBE_COUNT_REG);
+       cnt_dbe   = readl(drvdata->base + SDRAM_ERR_DBE_COUNT_REG);
+       cause_err = readl(drvdata->base + SDRAM_ERR_CAUSE_ERR_REG);
+       cause_msg = readl(drvdata->base + SDRAM_ERR_CAUSE_MSG_REG);
+
+       /* clear cause registers */
+       writel(~(SDRAM_ERR_CAUSE_DBE_MASK | SDRAM_ERR_CAUSE_SBE_MASK),
+              drvdata->base + SDRAM_ERR_CAUSE_ERR_REG);
+       writel(~(SDRAM_ERR_CAUSE_DBE_MASK | SDRAM_ERR_CAUSE_SBE_MASK),
+              drvdata->base + SDRAM_ERR_CAUSE_MSG_REG);
+
+       /* clear error counter registers */
+       if (cnt_sbe)
+               writel(0, drvdata->base + SDRAM_ERR_SBE_COUNT_REG);
+       if (cnt_dbe)
+               writel(0, drvdata->base + SDRAM_ERR_DBE_COUNT_REG);
+
+       if (!cnt_sbe && !cnt_dbe)
+               return;
+
+       if (!(addr & SDRAM_ERR_ADDR_TYPE_MASK)) {
+               if (cnt_sbe)
+                       cnt_sbe--;
+               else
+                       dev_warn(mci->pdev, "inconsistent SBE count detected");
+       } else {
+               if (cnt_dbe)
+                       cnt_dbe--;
+               else
+                       dev_warn(mci->pdev, "inconsistent DBE count detected");
+       }
+
+       /* report earlier errors */
+       if (cnt_sbe)
+               edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci,
+                                    cnt_sbe, /* error count */
+                                    0, 0, 0, /* pfn, offset, syndrome */
+                                    -1, -1, -1, /* top, mid, low layer */
+                                    mci->ctl_name,
+                                    "details unavailable (multiple errors)");
+       if (cnt_dbe)
+               edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci,
+                                    cnt_sbe, /* error count */
+                                    0, 0, 0, /* pfn, offset, syndrome */
+                                    -1, -1, -1, /* top, mid, low layer */
+                                    mci->ctl_name,
+                                    "details unavailable (multiple errors)");
+
+       /* report details for most recent error */
+       cs_val   = (addr & SDRAM_ERR_ADDR_CS_MASK) >> SDRAM_ERR_ADDR_CS_OFFSET;
+       bank_val = (addr & SDRAM_ERR_ADDR_BANK_MASK) >> SDRAM_ERR_ADDR_BANK_OFFSET;
+       row_val  = (calc_ecc & SDRAM_ERR_CALC_ECC_ROW_MASK) >> SDRAM_ERR_CALC_ECC_ROW_OFFSET;
+       col_val  = (addr & SDRAM_ERR_ADDR_COL_MASK) >> SDRAM_ERR_ADDR_COL_OFFSET;
+       syndrome_val = (recv_ecc ^ calc_ecc) & 0xff;
+       addr_val = axp_mc_calc_address(drvdata, cs_val, bank_val, row_val,
+                                      col_val);
+       msg += sprintf(msg, "row=0x%04x ", row_val); /* 11 chars */
+       msg += sprintf(msg, "bank=0x%x ", bank_val); /*  9 chars */
+       msg += sprintf(msg, "col=0x%04x ", col_val); /* 11 chars */
+       msg += sprintf(msg, "cs=%d", cs_val);        /*  4 chars */
+
+       if (!(addr & SDRAM_ERR_ADDR_TYPE_MASK)) {
+               edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci,
+                                    1, /* error count */
+                                    addr_val >> PAGE_SHIFT,
+                                    addr_val & ~PAGE_MASK,
+                                    syndrome_val,
+                                    cs_val, -1, -1, /* top, mid, low layer */
+                                    mci->ctl_name, drvdata->msg);
+       } else {
+               edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci,
+                                    1, /* error count */
+                                    addr_val >> PAGE_SHIFT,
+                                    addr_val & ~PAGE_MASK,
+                                    syndrome_val,
+                                    cs_val, -1, -1, /* top, mid, low layer */
+                                    mci->ctl_name, drvdata->msg);
+       }
+}
+
+static void axp_mc_read_config(struct mem_ctl_info *mci)
+{
+       struct axp_mc_drvdata *drvdata = mci->pvt_info;
+       uint32_t config, addr_ctrl, rank_ctrl;
+       unsigned int i, cs_struct, cs_size;
+       struct dimm_info *dimm;
+
+       config = readl(drvdata->base + SDRAM_CONFIG_REG);
+       if (config & SDRAM_CONFIG_BUS_WIDTH_MASK)
+               /* 64 bit */
+               drvdata->width = 8;
+       else
+               /* 32 bit */
+               drvdata->width = 4;
+
+       addr_ctrl = readl(drvdata->base + SDRAM_ADDR_CTRL_REG);
+       rank_ctrl = readl(drvdata->base + SDRAM_RANK_CTRL_REG);
+       for (i = 0; i < SDRAM_NUM_CS; i++) {
+               dimm = mci->dimms[i];
+
+               if (!(rank_ctrl & SDRAM_RANK_CTRL_EXIST_MASK(i)))
+                       continue;
+
+               drvdata->cs_addr_sel[i] =
+                       !!(addr_ctrl & SDRAM_ADDR_CTRL_ADDR_SEL_MASK(i));
+
+               cs_struct = (addr_ctrl & SDRAM_ADDR_CTRL_STRUCT_MASK(i)) >> SDRAM_ADDR_CTRL_STRUCT_OFFSET(i);
+               cs_size   = ((addr_ctrl & SDRAM_ADDR_CTRL_SIZE_HIGH_MASK(i)) >> (SDRAM_ADDR_CTRL_SIZE_HIGH_OFFSET(i) - 2) |
+                           ((addr_ctrl & SDRAM_ADDR_CTRL_SIZE_LOW_MASK(i)) >> SDRAM_ADDR_CTRL_SIZE_LOW_OFFSET(i)));
+
+               switch (cs_size) {
+               case 0: /* 2GBit */
+                       dimm->nr_pages = 524288;
+                       break;
+               case 1: /* 256MBit */
+                       dimm->nr_pages = 65536;
+                       break;
+               case 2: /* 512MBit */
+                       dimm->nr_pages = 131072;
+                       break;
+               case 3: /* 1GBit */
+                       dimm->nr_pages = 262144;
+                       break;
+               case 4: /* 4GBit */
+                       dimm->nr_pages = 1048576;
+                       break;
+               case 5: /* 8GBit */
+                       dimm->nr_pages = 2097152;
+                       break;
+               }
+               dimm->grain = 8;
+               dimm->dtype = cs_struct ? DEV_X16 : DEV_X8;
+               dimm->mtype = (config & SDRAM_CONFIG_REGISTERED_MASK) ?
+                       MEM_RDDR3 : MEM_DDR3;
+               dimm->edac_mode = EDAC_SECDED;
+       }
+}
+
+static const struct of_device_id axp_mc_of_match[] = {
+       {.compatible = "marvell,armada-xp-sdram-controller",},
+       {},
+};
+MODULE_DEVICE_TABLE(of, axp_mc_of_match);
+
+static int axp_mc_probe(struct platform_device *pdev)
+{
+       struct axp_mc_drvdata *drvdata;
+       struct edac_mc_layer layers[1];
+       const struct of_device_id *id;
+       struct mem_ctl_info *mci;
+       struct resource *r;
+       void __iomem *base;
+       uint32_t config;
+
+       r = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+       if (!r) {
+               dev_err(&pdev->dev, "Unable to get mem resource\n");
+               return -ENODEV;
+       }
+
+       base = devm_ioremap_resource(&pdev->dev, r);
+       if (IS_ERR(base)) {
+               dev_err(&pdev->dev, "Unable to map regs\n");
+               return PTR_ERR(base);
+       }
+
+       config = readl(base + SDRAM_CONFIG_REG);
+       if (!(config & SDRAM_CONFIG_ECC_MASK)) {
+               dev_warn(&pdev->dev, "SDRAM ECC is not enabled");
+               return -EINVAL;
+       }
+
+       layers[0].type = EDAC_MC_LAYER_CHIP_SELECT;
+       layers[0].size = SDRAM_NUM_CS;
+       layers[0].is_virt_csrow = true;
+
+       mci = edac_mc_alloc(0, ARRAY_SIZE(layers), layers, sizeof(*drvdata));
+       if (!mci)
+               return -ENOMEM;
+
+       drvdata = mci->pvt_info;
+       drvdata->base = base;
+       mci->pdev = &pdev->dev;
+       platform_set_drvdata(pdev, mci);
+
+       id = of_match_device(axp_mc_of_match, &pdev->dev);
+       mci->edac_check = axp_mc_check;
+       mci->mtype_cap = MEM_FLAG_DDR3;
+       mci->edac_cap = EDAC_FLAG_SECDED;
+       mci->mod_name = pdev->dev.driver->name;
+       mci->ctl_name = id ? id->compatible : "unknown";
+       mci->dev_name = dev_name(&pdev->dev);
+       mci->scrub_mode = SCRUB_NONE;
+
+       axp_mc_read_config(mci);
+
+       /* These SoCs have a reduced width bus */
+       if (of_machine_is_compatible("marvell,armada380") ||
+           of_machine_is_compatible("marvell,armadaxp-98dx3236"))
+               drvdata->width /= 2;
+
+       /* configure SBE threshold */
+       /* it seems that SBEs are not captured otherwise */
+       writel(1 << SDRAM_ERR_CTRL_THR_OFFSET, drvdata->base + SDRAM_ERR_CTRL_REG);
+
+       /* clear cause registers */
+       writel(~(SDRAM_ERR_CAUSE_DBE_MASK | SDRAM_ERR_CAUSE_SBE_MASK), drvdata->base + SDRAM_ERR_CAUSE_ERR_REG);
+       writel(~(SDRAM_ERR_CAUSE_DBE_MASK | SDRAM_ERR_CAUSE_SBE_MASK), drvdata->base + SDRAM_ERR_CAUSE_MSG_REG);
+
+       /* clear counter registers */
+       writel(0, drvdata->base + SDRAM_ERR_SBE_COUNT_REG);
+       writel(0, drvdata->base + SDRAM_ERR_DBE_COUNT_REG);
+
+       if (edac_mc_add_mc(mci)) {
+               edac_mc_free(mci);
+               return -EINVAL;
+       }
+       edac_op_state = EDAC_OPSTATE_POLL;
+
+       return 0;
+}
+
+static int axp_mc_remove(struct platform_device *pdev)
+{
+       struct mem_ctl_info *mci = platform_get_drvdata(pdev);
+
+       edac_mc_del_mc(&pdev->dev);
+       edac_mc_free(mci);
+       platform_set_drvdata(pdev, NULL);
+
+       return 0;
+}
+
+static struct platform_driver axp_mc_driver = {
+       .probe = axp_mc_probe,
+       .remove = axp_mc_remove,
+       .driver = {
+               .name = "armada_xp_mc_edac",
+               .of_match_table = of_match_ptr(axp_mc_of_match),
+       },
+};
+
+/************************ EDAC Device (L2 Cache) ***************************/
+
+struct aurora_l2_drvdata {
+       void __iomem *base;
+
+       char msg[128];
+
+       /* error injection via debugfs */
+       uint32_t inject_addr;
+       uint32_t inject_mask;
+       uint8_t inject_ctl;
+
+       struct dentry *debugfs;
+};
+
+#ifdef CONFIG_EDAC_DEBUG
+static void aurora_l2_inject(struct aurora_l2_drvdata *drvdata)
+{
+       drvdata->inject_addr &= AURORA_ERR_INJECT_CTL_ADDR_MASK;
+       drvdata->inject_ctl &= AURORA_ERR_INJECT_CTL_EN_MASK;
+       writel(0, drvdata->base + AURORA_ERR_INJECT_CTL_REG);
+       writel(drvdata->inject_mask, drvdata->base + AURORA_ERR_INJECT_MASK_REG);
+       writel(drvdata->inject_addr | drvdata->inject_ctl, drvdata->base + AURORA_ERR_INJECT_CTL_REG);
+}
+#endif
+
+static void aurora_l2_check(struct edac_device_ctl_info *dci)
+{
+       struct aurora_l2_drvdata *drvdata = dci->pvt_info;
+       uint32_t cnt, src, txn, err, attr_cap, addr_cap, way_cap;
+       unsigned int cnt_ce, cnt_ue;
+       char *msg = drvdata->msg;
+       size_t size = sizeof(drvdata->msg);
+       size_t len = 0;
+
+       cnt = readl(drvdata->base + AURORA_ERR_CNT_REG);
+       attr_cap = readl(drvdata->base + AURORA_ERR_ATTR_CAP_REG);
+       addr_cap = readl(drvdata->base + AURORA_ERR_ADDR_CAP_REG);
+       way_cap = readl(drvdata->base + AURORA_ERR_WAY_CAP_REG);
+
+       cnt_ce = (cnt & AURORA_ERR_CNT_CE_MASK) >> AURORA_ERR_CNT_CE_OFFSET;
+       cnt_ue = (cnt & AURORA_ERR_CNT_UE_MASK) >> AURORA_ERR_CNT_UE_OFFSET;
+       /* clear error counter registers */
+       if (cnt_ce || cnt_ue)
+               writel(AURORA_ERR_CNT_CLR, drvdata->base + AURORA_ERR_CNT_REG);
+
+       if (!(attr_cap & AURORA_ERR_ATTR_CAP_VALID))
+               goto clear_remaining;
+
+       src = (attr_cap & AURORA_ERR_ATTR_SRC_MSK) >> AURORA_ERR_ATTR_SRC_OFF;
+       if (src <= 3)
+               len += snprintf(msg+len, size-len, "src=CPU%d ", src);
+       else
+               len += snprintf(msg+len, size-len, "src=IO ");
+
+       txn =  (attr_cap & AURORA_ERR_ATTR_TXN_MSK) >> AURORA_ERR_ATTR_TXN_OFF;
+       switch (txn) {
+       case 0:
+               len += snprintf(msg+len, size-len, "txn=Data-Read ");
+               break;
+       case 1:
+               len += snprintf(msg+len, size-len, "txn=Isn-Read ");
+               break;
+       case 2:
+               len += snprintf(msg+len, size-len, "txn=Clean-Flush ");
+               break;
+       case 3:
+               len += snprintf(msg+len, size-len, "txn=Eviction ");
+               break;
+       case 4:
+               len += snprintf(msg+len, size-len,
+                               "txn=Read-Modify-Write ");
+               break;
+       }
+
+       err = (attr_cap & AURORA_ERR_ATTR_ERR_MSK) >> AURORA_ERR_ATTR_ERR_OFF;
+       switch (err) {
+       case 0:
+               len += snprintf(msg+len, size-len, "err=CorrECC ");
+               break;
+       case 1:
+               len += snprintf(msg+len, size-len, "err=UnCorrECC ");
+               break;
+       case 2:
+               len += snprintf(msg+len, size-len, "err=TagParity ");
+               break;
+       }
+
+       len += snprintf(msg+len, size-len, "addr=0x%x ", addr_cap & AURORA_ERR_ADDR_CAP_ADDR_MASK);
+       len += snprintf(msg+len, size-len, "index=0x%x ", (way_cap & AURORA_ERR_WAY_IDX_MSK) >> AURORA_ERR_WAY_IDX_OFF);
+       len += snprintf(msg+len, size-len, "way=0x%x", (way_cap & AURORA_ERR_WAY_CAP_WAY_MASK) >> AURORA_ERR_WAY_CAP_WAY_OFFSET);
+
+       /* clear error capture registers */
+       writel(AURORA_ERR_ATTR_CAP_VALID, drvdata->base + AURORA_ERR_ATTR_CAP_REG);
+       if (err) {
+               /* UnCorrECC or TagParity */
+               if (cnt_ue)
+                       cnt_ue--;
+               edac_device_handle_ue(dci, 0, 0, drvdata->msg);
+       } else {
+               if (cnt_ce)
+                       cnt_ce--;
+               edac_device_handle_ce(dci, 0, 0, drvdata->msg);
+       }
+
+clear_remaining:
+       /* report remaining errors */
+       while (cnt_ue--)
+               edac_device_handle_ue(dci, 0, 0, "details unavailable (multiple errors)");
+       while (cnt_ce--)
+               edac_device_handle_ue(dci, 0, 0, "details unavailable (multiple errors)");
+}
+
+static void aurora_l2_poll(struct edac_device_ctl_info *dci)
+{
+#ifdef CONFIG_EDAC_DEBUG
+       struct aurora_l2_drvdata *drvdata = dci->pvt_info;
+#endif
+
+       aurora_l2_check(dci);
+#ifdef CONFIG_EDAC_DEBUG
+       aurora_l2_inject(drvdata);
+#endif
+}
+
+static const struct of_device_id aurora_l2_of_match[] = {
+       {.compatible = "marvell,aurora-system-cache",},
+       {},
+};
+MODULE_DEVICE_TABLE(of, aurora_l2_of_match);
+
+static int aurora_l2_probe(struct platform_device *pdev)
+{
+       struct aurora_l2_drvdata *drvdata;
+       struct edac_device_ctl_info *dci;
+       const struct of_device_id *id;
+       uint32_t l2x0_aux_ctrl;
+       void __iomem *base;
+       struct resource *r;
+
+       r = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+       if (!r) {
+               dev_err(&pdev->dev, "Unable to get mem resource\n");
+               return -ENODEV;
+       }
+
+       base = devm_ioremap_resource(&pdev->dev, r);
+       if (IS_ERR(base)) {
+               dev_err(&pdev->dev, "Unable to map regs\n");
+               return PTR_ERR(base);
+       }
+
+       l2x0_aux_ctrl = readl(base + L2X0_AUX_CTRL);
+       if (!(l2x0_aux_ctrl & AURORA_ACR_PARITY_EN))
+               dev_warn(&pdev->dev, "tag parity is not enabled");
+       if (!(l2x0_aux_ctrl & AURORA_ACR_ECC_EN))
+               dev_warn(&pdev->dev, "data ECC is not enabled");
+
+       dci = edac_device_alloc_ctl_info(sizeof(*drvdata),
+                                        "cpu", 1, "L", 1, 2, NULL, 0, 0);
+       if (!dci)
+               return -ENOMEM;
+
+       drvdata = dci->pvt_info;
+       drvdata->base = base;
+       dci->dev = &pdev->dev;
+       platform_set_drvdata(pdev, dci);
+
+       id = of_match_device(aurora_l2_of_match, &pdev->dev);
+       dci->edac_check = aurora_l2_poll;
+       dci->mod_name = pdev->dev.driver->name;
+       dci->ctl_name = id ? id->compatible : "unknown";
+       dci->dev_name = dev_name(&pdev->dev);
+
+       /* clear registers */
+       writel(AURORA_ERR_CNT_CLR, drvdata->base + AURORA_ERR_CNT_REG);
+       writel(AURORA_ERR_ATTR_CAP_VALID, drvdata->base + AURORA_ERR_ATTR_CAP_REG);
+
+       if (edac_device_add_device(dci)) {
+               edac_device_free_ctl_info(dci);
+               return -EINVAL;
+       }
+
+#ifdef CONFIG_EDAC_DEBUG
+       drvdata->debugfs = edac_debugfs_create_dir(dev_name(&pdev->dev));
+       if (drvdata->debugfs) {
+               edac_debugfs_create_x32("inject_addr", 0644,
+                                       drvdata->debugfs,
+                                       &drvdata->inject_addr);
+               edac_debugfs_create_x32("inject_mask", 0644,
+                                       drvdata->debugfs,
+                                       &drvdata->inject_mask);
+               edac_debugfs_create_x8("inject_ctl", 0644,
+                                      drvdata->debugfs, &drvdata->inject_ctl);
+       }
+#endif
+
+       return 0;
+}
+
+static int aurora_l2_remove(struct platform_device *pdev)
+{
+       struct edac_device_ctl_info *dci = platform_get_drvdata(pdev);
+#ifdef CONFIG_EDAC_DEBUG
+       struct aurora_l2_drvdata *drvdata = dci->pvt_info;
+
+       edac_debugfs_remove_recursive(drvdata->debugfs);
+#endif
+       edac_device_del_device(&pdev->dev);
+       edac_device_free_ctl_info(dci);
+       platform_set_drvdata(pdev, NULL);
+
+       return 0;
+}
+
+static struct platform_driver aurora_l2_driver = {
+       .probe = aurora_l2_probe,
+       .remove = aurora_l2_remove,
+       .driver = {
+               .name = "aurora_l2_edac",
+               .of_match_table = of_match_ptr(aurora_l2_of_match),
+       },
+};
+
+/************************ Driver registration ******************************/
+
+static struct platform_driver * const drivers[] = {
+       &axp_mc_driver,
+       &aurora_l2_driver,
+};
+
+static int __init armada_xp_edac_init(void)
+{
+       int res;
+
+       /* only polling is supported */
+       edac_op_state = EDAC_OPSTATE_POLL;
+
+       res = platform_register_drivers(drivers, ARRAY_SIZE(drivers));
+       if (res)
+               pr_warn("Aramda XP EDAC drivers fail to register\n");
+
+       return 0;
+}
+module_init(armada_xp_edac_init);
+
+static void __exit armada_xp_edac_exit(void)
+{
+       platform_unregister_drivers(drivers, ARRAY_SIZE(drivers));
+}
+module_exit(armada_xp_edac_exit);
+
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Pengutronix");
+MODULE_DESCRIPTION("EDAC Drivers for Marvell Armada XP SDRAM and L2 Cache Controller");
index 1f94359..4804332 100644 (file)
@@ -138,3 +138,14 @@ void edac_debugfs_create_x16(const char *name, umode_t mode,
        debugfs_create_x16(name, mode, parent, value);
 }
 EXPORT_SYMBOL_GPL(edac_debugfs_create_x16);
+
+/* Wrapper for debugfs_create_x32() */
+void edac_debugfs_create_x32(const char *name, umode_t mode,
+                            struct dentry *parent, u32 *value)
+{
+       if (!parent)
+               parent = edac_debugfs;
+
+       debugfs_create_x32(name, mode, parent, value);
+}
+EXPORT_SYMBOL_GPL(edac_debugfs_create_x32);
index b2f59ee..388427d 100644 (file)
@@ -82,6 +82,8 @@ void edac_debugfs_create_x8(const char *name, umode_t mode,
                            struct dentry *parent, u8 *value);
 void edac_debugfs_create_x16(const char *name, umode_t mode,
                             struct dentry *parent, u16 *value);
+void edac_debugfs_create_x32(const char *name, umode_t mode,
+                            struct dentry *parent, u32 *value);
 #else
 static inline void edac_debugfs_init(void)                                     { }
 static inline void edac_debugfs_exit(void)                                     { }
@@ -96,6 +98,8 @@ static inline void edac_debugfs_create_x8(const char *name, umode_t mode,
                                          struct dentry *parent, u8 *value)     { }
 static inline void edac_debugfs_create_x16(const char *name, umode_t mode,
                                           struct dentry *parent, u16 *value)   { }
+static inline void edac_debugfs_create_x32(const char *name, umode_t mode,
+                      struct dentry *parent, u32 *value)                       { }
 #endif
 
 /*
index 6468082..d03ed8e 100644 (file)
@@ -1,7 +1,7 @@
 # SPDX-License-Identifier: GPL-2.0-only
 config BCM47XX_NVRAM
        bool "Broadcom NVRAM driver"
-       depends on BCM47XX || ARCH_BCM_5301X
+       depends on BCM47XX || ARCH_BCM_5301X || COMPILE_TEST
        help
          Broadcom home routers contain flash partition called "nvram" with all
          important hardware configuration as well as some minor user setup.
index 77eb746..da04fda 100644 (file)
@@ -96,7 +96,7 @@ found:
                nvram_len = size;
        }
        if (nvram_len >= NVRAM_SPACE) {
-               pr_err("nvram on flash (%i bytes) is bigger than the reserved space in memory, will just copy the first %i bytes\n",
+               pr_err("nvram on flash (%zu bytes) is bigger than the reserved space in memory, will just copy the first %i bytes\n",
                       nvram_len, NVRAM_SPACE - 1);
                nvram_len = NVRAM_SPACE - 1;
        }
@@ -148,8 +148,8 @@ static int nvram_init(void)
            header.len > sizeof(header)) {
                nvram_len = header.len;
                if (nvram_len >= NVRAM_SPACE) {
-                       pr_err("nvram on flash (%i bytes) is bigger than the reserved space in memory, will just copy the first %i bytes\n",
-                               header.len, NVRAM_SPACE);
+                       pr_err("nvram on flash (%zu bytes) is bigger than the reserved space in memory, will just copy the first %i bytes\n",
+                               nvram_len, NVRAM_SPACE);
                        nvram_len = NVRAM_SPACE - 1;
                }
 
index 8f1ab04..8d3e778 100644 (file)
@@ -30,6 +30,7 @@
 #include <linux/acpi.h>
 #include <linux/ucs2_string.h>
 #include <linux/memblock.h>
+#include <linux/security.h>
 
 #include <asm/early_ioremap.h>
 
@@ -221,6 +222,11 @@ static void generic_ops_unregister(void)
 static char efivar_ssdt[EFIVAR_SSDT_NAME_MAX] __initdata;
 static int __init efivar_ssdt_setup(char *str)
 {
+       int ret = security_locked_down(LOCKDOWN_ACPI_TABLES);
+
+       if (ret)
+               return ret;
+
        if (strlen(str) < sizeof(efivar_ssdt))
                memcpy(efivar_ssdt, str, strlen(str));
        else
index 869d47f..6c06876 100644 (file)
@@ -694,7 +694,7 @@ static void mvebu_pwm_get_state(struct pwm_chip *chip,
 }
 
 static int mvebu_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm,
-                          struct pwm_state *state)
+                          const struct pwm_state *state)
 {
        struct mvebu_pwm *mvpwm = to_mvebu_pwm(chip);
        struct mvebu_gpio_chip *mvchip = mvpwm->mvchip;
index f6e5c02..2e98c01 100644 (file)
@@ -27,7 +27,9 @@ config DRM_AMDGPU_CIK
 config DRM_AMDGPU_USERPTR
        bool "Always enable userptr write support"
        depends on DRM_AMDGPU
-       depends on HMM_MIRROR
+       depends on MMU
+       select HMM_MIRROR
+       select MMU_NOTIFIER
        help
          This option selects CONFIG_HMM and CONFIG_HMM_MIRROR if it
          isn't already selected to enabled full userptr support.
index 42b936b..6d021ec 100644 (file)
@@ -1103,7 +1103,7 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
                alloc_flags = 0;
                if (!offset || !*offset)
                        return -EINVAL;
-               user_addr = *offset;
+               user_addr = untagged_addr(*offset);
        } else if (flags & (ALLOC_MEM_FLAGS_DOORBELL |
                        ALLOC_MEM_FLAGS_MMIO_REMAP)) {
                domain = AMDGPU_GEM_DOMAIN_GTT;
index 61bd103..5803fcb 100644 (file)
@@ -948,6 +948,7 @@ int amdgpu_dpm_set_powergating_by_smu(struct amdgpu_device *adev, uint32_t block
        case AMD_IP_BLOCK_TYPE_UVD:
        case AMD_IP_BLOCK_TYPE_VCN:
        case AMD_IP_BLOCK_TYPE_VCE:
+       case AMD_IP_BLOCK_TYPE_SDMA:
                if (swsmu)
                        ret = smu_dpm_set_power_gate(&adev->smu, block_type, gate);
                else
@@ -956,7 +957,6 @@ int amdgpu_dpm_set_powergating_by_smu(struct amdgpu_device *adev, uint32_t block
                break;
        case AMD_IP_BLOCK_TYPE_GMC:
        case AMD_IP_BLOCK_TYPE_ACP:
-       case AMD_IP_BLOCK_TYPE_SDMA:
                ret = ((adev)->powerplay.pp_funcs->set_powergating_by_smu(
                                (adev)->powerplay.pp_handle, block_type, gate));
                break;
index 48a2070..264677a 100644 (file)
@@ -35,6 +35,7 @@
 #include <linux/pm_runtime.h>
 #include <linux/vga_switcheroo.h>
 #include <drm/drm_probe_helper.h>
+#include <linux/mmu_notifier.h>
 
 #include "amdgpu.h"
 #include "amdgpu_irq.h"
@@ -1011,11 +1012,16 @@ static const struct pci_device_id pciidlist[] = {
        {0x1002, 0x731B, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI10},
        {0x1002, 0x731F, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI10},
        /* Navi14 */
-       {0x1002, 0x7340, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI14},
+       {0x1002, 0x7340, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI14|AMD_EXP_HW_SUPPORT},
+       {0x1002, 0x7341, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI14|AMD_EXP_HW_SUPPORT},
+       {0x1002, 0x7347, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI14|AMD_EXP_HW_SUPPORT},
 
        /* Renoir */
        {0x1002, 0x1636, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RENOIR|AMD_IS_APU|AMD_EXP_HW_SUPPORT},
 
+       /* Navi12 */
+       {0x1002, 0x7360, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI12|AMD_EXP_HW_SUPPORT},
+
        {0, 0, 0}
 };
 
@@ -1469,6 +1475,7 @@ static void __exit amdgpu_exit(void)
        amdgpu_unregister_atpx_handler();
        amdgpu_sync_fini();
        amdgpu_fence_slab_fini();
+       mmu_notifier_synchronize();
 }
 
 module_init(amdgpu_init);
index b174bd5..8ceb449 100644 (file)
@@ -291,6 +291,8 @@ int amdgpu_gem_userptr_ioctl(struct drm_device *dev, void *data,
        uint32_t handle;
        int r;
 
+       args->addr = untagged_addr(args->addr);
+
        if (offset_in_page(args->addr | args->size))
                return -EINVAL;
 
index 7850084..6065583 100644 (file)
@@ -143,7 +143,8 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
        /* ring tests don't use a job */
        if (job) {
                vm = job->vm;
-               fence_ctx = job->base.s_fence->scheduled.context;
+               fence_ctx = job->base.s_fence ?
+                       job->base.s_fence->scheduled.context : 0;
        } else {
                vm = NULL;
                fence_ctx = 0;
index 0e2ec60..f614752 100644 (file)
@@ -677,6 +677,9 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file
                if (sh_num == AMDGPU_INFO_MMR_SH_INDEX_MASK)
                        sh_num = 0xffffffff;
 
+               if (info->read_mmr_reg.count > 128)
+                       return -EINVAL;
+
                regs = kmalloc_array(info->read_mmr_reg.count, sizeof(*regs), GFP_KERNEL);
                if (!regs)
                        return -ENOMEM;
index f1f8cdd..31d4deb 100644 (file)
@@ -195,13 +195,14 @@ static void amdgpu_mn_invalidate_node(struct amdgpu_mn_node *node,
  * Block for operations on BOs to finish and mark pages as accessed and
  * potentially dirty.
  */
-static int amdgpu_mn_sync_pagetables_gfx(struct hmm_mirror *mirror,
-                       const struct hmm_update *update)
+static int
+amdgpu_mn_sync_pagetables_gfx(struct hmm_mirror *mirror,
+                             const struct mmu_notifier_range *update)
 {
        struct amdgpu_mn *amn = container_of(mirror, struct amdgpu_mn, mirror);
        unsigned long start = update->start;
        unsigned long end = update->end;
-       bool blockable = update->blockable;
+       bool blockable = mmu_notifier_range_blockable(update);
        struct interval_tree_node *it;
 
        /* notification is exclusive, but interval is inclusive */
@@ -243,13 +244,14 @@ static int amdgpu_mn_sync_pagetables_gfx(struct hmm_mirror *mirror,
  * necessitates evicting all user-mode queues of the process. The BOs
  * are restorted in amdgpu_mn_invalidate_range_end_hsa.
  */
-static int amdgpu_mn_sync_pagetables_hsa(struct hmm_mirror *mirror,
-                       const struct hmm_update *update)
+static int
+amdgpu_mn_sync_pagetables_hsa(struct hmm_mirror *mirror,
+                             const struct mmu_notifier_range *update)
 {
        struct amdgpu_mn *amn = container_of(mirror, struct amdgpu_mn, mirror);
        unsigned long start = update->start;
        unsigned long end = update->end;
-       bool blockable = update->blockable;
+       bool blockable = mmu_notifier_range_blockable(update);
        struct interval_tree_node *it;
 
        /* notification is exclusive, but interval is inclusive */
@@ -482,6 +484,5 @@ void amdgpu_hmm_init_range(struct hmm_range *range)
                range->flags = hmm_range_flags;
                range->values = hmm_range_values;
                range->pfn_shift = PAGE_SHIFT;
-               INIT_LIST_HEAD(&range->list);
        }
 }
index 13b144c..dff41d0 100644 (file)
@@ -794,7 +794,6 @@ int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo, struct page **pages)
        struct hmm_range *range;
        unsigned long i;
        uint64_t *pfns;
-       int retry = 0;
        int r = 0;
 
        if (!mm) /* Happens during process shutdown */
@@ -835,10 +834,11 @@ int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo, struct page **pages)
                                0 : range->flags[HMM_PFN_WRITE];
        range->pfn_flags_mask = 0;
        range->pfns = pfns;
-       hmm_range_register(range, mirror, start,
-                          start + ttm->num_pages * PAGE_SIZE, PAGE_SHIFT);
+       range->start = start;
+       range->end = start + ttm->num_pages * PAGE_SIZE;
+
+       hmm_range_register(range, mirror);
 
-retry:
        /*
         * Just wait for range to be valid, safe to ignore return value as we
         * will use the return value of hmm_range_fault() below under the
@@ -847,24 +847,12 @@ retry:
        hmm_range_wait_until_valid(range, HMM_RANGE_DEFAULT_TIMEOUT);
 
        down_read(&mm->mmap_sem);
-
-       r = hmm_range_fault(range, true);
-       if (unlikely(r < 0)) {
-               if (likely(r == -EAGAIN)) {
-                       /*
-                        * return -EAGAIN, mmap_sem is dropped
-                        */
-                       if (retry++ < MAX_RETRY_HMM_RANGE_FAULT)
-                               goto retry;
-                       else
-                               pr_err("Retry hmm fault too many times\n");
-               }
-
-               goto out_up_read;
-       }
-
+       r = hmm_range_fault(range, 0);
        up_read(&mm->mmap_sem);
 
+       if (unlikely(r < 0))
+               goto out_free_pfns;
+
        for (i = 0; i < ttm->num_pages; i++) {
                pages[i] = hmm_device_entry_to_page(range, pfns[i]);
                if (unlikely(!pages[i])) {
@@ -880,9 +868,6 @@ retry:
 
        return 0;
 
-out_up_read:
-       if (likely(r != -EAGAIN))
-               up_read(&mm->mmap_sem);
 out_free_pfns:
        hmm_range_unregister(range);
        kvfree(pfns);
index db28823..638c821 100644 (file)
@@ -70,6 +70,11 @@ MODULE_FIRMWARE("amdgpu/navi10_mec.bin");
 MODULE_FIRMWARE("amdgpu/navi10_mec2.bin");
 MODULE_FIRMWARE("amdgpu/navi10_rlc.bin");
 
+MODULE_FIRMWARE("amdgpu/navi14_ce_wks.bin");
+MODULE_FIRMWARE("amdgpu/navi14_pfp_wks.bin");
+MODULE_FIRMWARE("amdgpu/navi14_me_wks.bin");
+MODULE_FIRMWARE("amdgpu/navi14_mec_wks.bin");
+MODULE_FIRMWARE("amdgpu/navi14_mec2_wks.bin");
 MODULE_FIRMWARE("amdgpu/navi14_ce.bin");
 MODULE_FIRMWARE("amdgpu/navi14_pfp.bin");
 MODULE_FIRMWARE("amdgpu/navi14_me.bin");
@@ -594,7 +599,8 @@ static void gfx_v10_0_check_gfxoff_flag(struct amdgpu_device *adev)
 static int gfx_v10_0_init_microcode(struct amdgpu_device *adev)
 {
        const char *chip_name;
-       char fw_name[30];
+       char fw_name[40];
+       char wks[10];
        int err;
        struct amdgpu_firmware_info *info = NULL;
        const struct common_firmware_header *header = NULL;
@@ -607,12 +613,16 @@ static int gfx_v10_0_init_microcode(struct amdgpu_device *adev)
 
        DRM_DEBUG("\n");
 
+       memset(wks, 0, sizeof(wks));
        switch (adev->asic_type) {
        case CHIP_NAVI10:
                chip_name = "navi10";
                break;
        case CHIP_NAVI14:
                chip_name = "navi14";
+               if (!(adev->pdev->device == 0x7340 &&
+                     adev->pdev->revision != 0x00))
+                       snprintf(wks, sizeof(wks), "_wks");
                break;
        case CHIP_NAVI12:
                chip_name = "navi12";
@@ -621,7 +631,7 @@ static int gfx_v10_0_init_microcode(struct amdgpu_device *adev)
                BUG();
        }
 
-       snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
+       snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp%s.bin", chip_name, wks);
        err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
        if (err)
                goto out;
@@ -632,7 +642,7 @@ static int gfx_v10_0_init_microcode(struct amdgpu_device *adev)
        adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
        adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
 
-       snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
+       snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me%s.bin", chip_name, wks);
        err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
        if (err)
                goto out;
@@ -643,7 +653,7 @@ static int gfx_v10_0_init_microcode(struct amdgpu_device *adev)
        adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
        adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
 
-       snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
+       snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce%s.bin", chip_name, wks);
        err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
        if (err)
                goto out;
@@ -708,7 +718,7 @@ static int gfx_v10_0_init_microcode(struct amdgpu_device *adev)
        if (adev->gfx.rlc.is_rlc_v2_1)
                gfx_v10_0_init_rlc_ext_microcode(adev);
 
-       snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
+       snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec%s.bin", chip_name, wks);
        err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
        if (err)
                goto out;
@@ -719,7 +729,7 @@ static int gfx_v10_0_init_microcode(struct amdgpu_device *adev)
        adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
        adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
 
-       snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
+       snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2%s.bin", chip_name, wks);
        err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
        if (!err) {
                err = amdgpu_ucode_validate(adev->gfx.mec2_fw);
index 83d45f9..dcadc73 100644 (file)
@@ -1650,7 +1650,6 @@ static int gfx_v9_0_rlc_init(struct amdgpu_device *adev)
 
        switch (adev->asic_type) {
        case CHIP_RAVEN:
-       case CHIP_RENOIR:
                gfx_v9_0_init_lbpw(adev);
                break;
        case CHIP_VEGA20:
@@ -3026,7 +3025,6 @@ static int gfx_v9_0_rlc_resume(struct amdgpu_device *adev)
 
        switch (adev->asic_type) {
        case CHIP_RAVEN:
-       case CHIP_RENOIR:
                if (amdgpu_lbpw == 0)
                        gfx_v9_0_enable_lbpw(adev, false);
                else
index ff18b3a..78452cf 100644 (file)
@@ -1889,8 +1889,9 @@ static int sdma_v4_0_hw_init(void *handle)
        int r;
        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
-       if (adev->asic_type == CHIP_RAVEN && adev->powerplay.pp_funcs &&
-                       adev->powerplay.pp_funcs->set_powergating_by_smu)
+       if ((adev->asic_type == CHIP_RAVEN && adev->powerplay.pp_funcs &&
+                       adev->powerplay.pp_funcs->set_powergating_by_smu) ||
+                       adev->asic_type == CHIP_RENOIR)
                amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_SDMA, false);
 
        if (!amdgpu_sriov_vf(adev))
@@ -1917,8 +1918,9 @@ static int sdma_v4_0_hw_fini(void *handle)
        sdma_v4_0_ctx_switch_enable(adev, false);
        sdma_v4_0_enable(adev, false);
 
-       if (adev->asic_type == CHIP_RAVEN && adev->powerplay.pp_funcs
-                       && adev->powerplay.pp_funcs->set_powergating_by_smu)
+       if ((adev->asic_type == CHIP_RAVEN && adev->powerplay.pp_funcs
+                       && adev->powerplay.pp_funcs->set_powergating_by_smu) ||
+                       adev->asic_type == CHIP_RENOIR)
                amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_SDMA, true);
 
        return 0;
index 4a59510..c44723c 100644 (file)
@@ -493,7 +493,15 @@ static void smu_v11_0_i2c_fini(struct i2c_adapter *control)
        }
 
        /* Restore clock gating */
-       smu_v11_0_i2c_set_clock_gating(control, true);
+
+       /*
+        * TODO Reenabling clock gating seems to break subsequent SMU operation
+        *      on the I2C bus. My guess is that SMU doesn't disable clock gating like
+        *      we do here before working with the bus. So for now just don't restore
+        *      it but later work with SMU to see if they have this issue and can
+        *      update their code appropriately
+        */
+       /* smu_v11_0_i2c_set_clock_gating(control, true); */
 
 }
 
index a8cf82d..901fe35 100644 (file)
@@ -694,10 +694,10 @@ static const uint32_t cwsr_trap_gfx10_hex[] = {
        0x003f8000, 0x8f6f896f,
        0x88776f77, 0x8a6eff6e,
        0x023f8000, 0xb9eef807,
-       0xb970f812, 0xb971f813,
-       0x8ff08870, 0xf4051bb8,
+       0xb97af812, 0xb97bf813,
+       0x8ffa887a, 0xf4051bbd,
        0xfa000000, 0xbf8cc07f,
-       0xf4051c38, 0xfa000008,
+       0xf4051ebd, 0xfa000008,
        0xbf8cc07f, 0x87ee6e6e,
        0xbf840001, 0xbe80206e,
        0xb971f803, 0x8771ff71,
index 3598621..cdaa523 100644 (file)
@@ -187,12 +187,12 @@ L_FETCH_2ND_TRAP:
        // Read second-level TBA/TMA from first-level TMA and jump if available.
        // ttmp[2:5] and ttmp12 can be used (others hold SPI-initialized debug data)
        // ttmp12 holds SQ_WAVE_STATUS
-       s_getreg_b32    ttmp4, hwreg(HW_REG_SHADER_TMA_LO)
-       s_getreg_b32    ttmp5, hwreg(HW_REG_SHADER_TMA_HI)
-       s_lshl_b64      [ttmp4, ttmp5], [ttmp4, ttmp5], 0x8
-       s_load_dwordx2  [ttmp2, ttmp3], [ttmp4, ttmp5], 0x0 glc:1               // second-level TBA
+       s_getreg_b32    ttmp14, hwreg(HW_REG_SHADER_TMA_LO)
+       s_getreg_b32    ttmp15, hwreg(HW_REG_SHADER_TMA_HI)
+       s_lshl_b64      [ttmp14, ttmp15], [ttmp14, ttmp15], 0x8
+       s_load_dwordx2  [ttmp2, ttmp3], [ttmp14, ttmp15], 0x0 glc:1             // second-level TBA
        s_waitcnt       lgkmcnt(0)
-       s_load_dwordx2  [ttmp4, ttmp5], [ttmp4, ttmp5], 0x8 glc:1               // second-level TMA
+       s_load_dwordx2  [ttmp14, ttmp15], [ttmp14, ttmp15], 0x8 glc:1           // second-level TMA
        s_waitcnt       lgkmcnt(0)
        s_and_b64       [ttmp2, ttmp3], [ttmp2, ttmp3], [ttmp2, ttmp3]
        s_cbranch_scc0  L_NO_NEXT_TRAP                                          // second-level trap handler not been set
index 3bb75d1..c893261 100644 (file)
@@ -687,9 +687,6 @@ struct kfd_process {
        /* We want to receive a notification when the mm_struct is destroyed */
        struct mmu_notifier mmu_notifier;
 
-       /* Use for delayed freeing of kfd_process structure */
-       struct rcu_head rcu;
-
        unsigned int pasid;
        unsigned int doorbell_index;
 
index 0c6ac04..40e3fc0 100644 (file)
@@ -62,8 +62,8 @@ static struct workqueue_struct *kfd_restore_wq;
 
 static struct kfd_process *find_process(const struct task_struct *thread);
 static void kfd_process_ref_release(struct kref *ref);
-static struct kfd_process *create_process(const struct task_struct *thread,
-                                       struct file *filep);
+static struct kfd_process *create_process(const struct task_struct *thread);
+static int kfd_process_init_cwsr_apu(struct kfd_process *p, struct file *filep);
 
 static void evict_process_worker(struct work_struct *work);
 static void restore_process_worker(struct work_struct *work);
@@ -289,7 +289,15 @@ struct kfd_process *kfd_create_process(struct file *filep)
        if (process) {
                pr_debug("Process already found\n");
        } else {
-               process = create_process(thread, filep);
+               process = create_process(thread);
+               if (IS_ERR(process))
+                       goto out;
+
+               ret = kfd_process_init_cwsr_apu(process, filep);
+               if (ret) {
+                       process = ERR_PTR(ret);
+                       goto out;
+               }
 
                if (!procfs.kobj)
                        goto out;
@@ -478,11 +486,9 @@ static void kfd_process_ref_release(struct kref *ref)
        queue_work(kfd_process_wq, &p->release_work);
 }
 
-static void kfd_process_destroy_delayed(struct rcu_head *rcu)
+static void kfd_process_free_notifier(struct mmu_notifier *mn)
 {
-       struct kfd_process *p = container_of(rcu, struct kfd_process, rcu);
-
-       kfd_unref_process(p);
+       kfd_unref_process(container_of(mn, struct kfd_process, mmu_notifier));
 }
 
 static void kfd_process_notifier_release(struct mmu_notifier *mn,
@@ -534,12 +540,12 @@ static void kfd_process_notifier_release(struct mmu_notifier *mn,
 
        mutex_unlock(&p->mutex);
 
-       mmu_notifier_unregister_no_release(&p->mmu_notifier, mm);
-       mmu_notifier_call_srcu(&p->rcu, &kfd_process_destroy_delayed);
+       mmu_notifier_put(&p->mmu_notifier);
 }
 
 static const struct mmu_notifier_ops kfd_process_mmu_notifier_ops = {
        .release = kfd_process_notifier_release,
+       .free_notifier = kfd_process_free_notifier,
 };
 
 static int kfd_process_init_cwsr_apu(struct kfd_process *p, struct file *filep)
@@ -609,81 +615,69 @@ static int kfd_process_device_init_cwsr_dgpu(struct kfd_process_device *pdd)
        return 0;
 }
 
-static struct kfd_process *create_process(const struct task_struct *thread,
-                                       struct file *filep)
+/*
+ * On return the kfd_process is fully operational and will be freed when the
+ * mm is released
+ */
+static struct kfd_process *create_process(const struct task_struct *thread)
 {
        struct kfd_process *process;
        int err = -ENOMEM;
 
        process = kzalloc(sizeof(*process), GFP_KERNEL);
-
        if (!process)
                goto err_alloc_process;
 
-       process->pasid = kfd_pasid_alloc();
-       if (process->pasid == 0)
-               goto err_alloc_pasid;
-
-       if (kfd_alloc_process_doorbells(process) < 0)
-               goto err_alloc_doorbells;
-
        kref_init(&process->ref);
-
        mutex_init(&process->mutex);
-
        process->mm = thread->mm;
-
-       /* register notifier */
-       process->mmu_notifier.ops = &kfd_process_mmu_notifier_ops;
-       err = mmu_notifier_register(&process->mmu_notifier, process->mm);
-       if (err)
-               goto err_mmu_notifier;
-
-       hash_add_rcu(kfd_processes_table, &process->kfd_processes,
-                       (uintptr_t)process->mm);
-
        process->lead_thread = thread->group_leader;
-       get_task_struct(process->lead_thread);
-
        INIT_LIST_HEAD(&process->per_device_data);
-
+       INIT_DELAYED_WORK(&process->eviction_work, evict_process_worker);
+       INIT_DELAYED_WORK(&process->restore_work, restore_process_worker);
+       process->last_restore_timestamp = get_jiffies_64();
        kfd_event_init_process(process);
+       process->is_32bit_user_mode = in_compat_syscall();
+
+       process->pasid = kfd_pasid_alloc();
+       if (process->pasid == 0)
+               goto err_alloc_pasid;
+
+       if (kfd_alloc_process_doorbells(process) < 0)
+               goto err_alloc_doorbells;
 
        err = pqm_init(&process->pqm, process);
        if (err != 0)
                goto err_process_pqm_init;
 
        /* init process apertures*/
-       process->is_32bit_user_mode = in_compat_syscall();
        err = kfd_init_apertures(process);
        if (err != 0)
                goto err_init_apertures;
 
-       INIT_DELAYED_WORK(&process->eviction_work, evict_process_worker);
-       INIT_DELAYED_WORK(&process->restore_work, restore_process_worker);
-       process->last_restore_timestamp = get_jiffies_64();
-
-       err = kfd_process_init_cwsr_apu(process, filep);
+       /* Must be last, have to use release destruction after this */
+       process->mmu_notifier.ops = &kfd_process_mmu_notifier_ops;
+       err = mmu_notifier_register(&process->mmu_notifier, process->mm);
        if (err)
-               goto err_init_cwsr;
+               goto err_register_notifier;
+
+       get_task_struct(process->lead_thread);
+       hash_add_rcu(kfd_processes_table, &process->kfd_processes,
+                       (uintptr_t)process->mm);
 
        return process;
 
-err_init_cwsr:
+err_register_notifier:
        kfd_process_free_outstanding_kfd_bos(process);
        kfd_process_destroy_pdds(process);
 err_init_apertures:
        pqm_uninit(&process->pqm);
 err_process_pqm_init:
-       hash_del_rcu(&process->kfd_processes);
-       synchronize_rcu();
-       mmu_notifier_unregister_no_release(&process->mmu_notifier, process->mm);
-err_mmu_notifier:
-       mutex_destroy(&process->mutex);
        kfd_free_process_doorbells(process);
 err_alloc_doorbells:
        kfd_pasid_free(process->pasid);
 err_alloc_pasid:
+       mutex_destroy(&process->mutex);
        kfree(process);
 err_alloc_process:
        return ERR_PTR(err);
index e1b09bb..8cab6da 100644 (file)
@@ -2113,6 +2113,7 @@ static int amdgpu_dm_backlight_get_brightness(struct backlight_device *bd)
 }
 
 static const struct backlight_ops amdgpu_dm_backlight_ops = {
+       .options = BL_CORE_SUSPENDRESUME,
        .get_brightness = amdgpu_dm_backlight_get_brightness,
        .update_status  = amdgpu_dm_backlight_update_status,
 };
@@ -2384,6 +2385,8 @@ static int amdgpu_dm_initialize_drm_device(struct amdgpu_device *adev)
 
        if (adev->asic_type != CHIP_CARRIZO && adev->asic_type != CHIP_STONEY)
                dm->dc->debug.disable_stutter = amdgpu_pp_feature_mask & PP_STUTTER_MODE ? false : true;
+       if (adev->asic_type == CHIP_RENOIR)
+               dm->dc->debug.disable_stutter = true;
 
        return 0;
 fail:
@@ -5770,8 +5773,7 @@ static void amdgpu_dm_commit_planes(struct drm_atomic_state *state,
                 * change FB pitch, DCC state, rotation or mirroing.
                 */
                bundle->flip_addrs[planes_count].flip_immediate =
-                       (crtc->state->pageflip_flags &
-                        DRM_MODE_PAGE_FLIP_ASYNC) != 0 &&
+                       crtc->state->async_flip &&
                        acrtc_state->update_type == UPDATE_TYPE_FAST;
 
                timestamp_ns = ktime_get_ns();
@@ -6348,7 +6350,7 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state)
        amdgpu_dm_enable_crtc_interrupts(dev, state, true);
 
        for_each_new_crtc_in_state(state, crtc, new_crtc_state, j)
-               if (new_crtc_state->pageflip_flags & DRM_MODE_PAGE_FLIP_ASYNC)
+               if (new_crtc_state->async_flip)
                        wait_for_vblank = false;
 
        /* update planes when needed per crtc*/
index 383f4f8..9b2cb57 100644 (file)
@@ -708,6 +708,10 @@ static void hack_bounding_box(struct dcn_bw_internal_vars *v,
 
 unsigned int get_highest_allowed_voltage_level(uint32_t hw_internal_rev)
 {
+       /* for dali, the highest voltage level we want is 0 */
+       if (ASICREV_IS_DALI(hw_internal_rev))
+               return 0;
+
        /* we are ok with all levels */
        return 4;
 }
index 5cc3acc..b1e657e 100644 (file)
@@ -98,11 +98,14 @@ uint32_t dce110_get_min_vblank_time_us(const struct dc_state *context)
                struct dc_stream_state *stream = context->streams[j];
                uint32_t vertical_blank_in_pixels = 0;
                uint32_t vertical_blank_time = 0;
+               uint32_t vertical_total_min = stream->timing.v_total;
+               struct dc_crtc_timing_adjust adjust = stream->adjust;
+               if (adjust.v_total_max != adjust.v_total_min)
+                       vertical_total_min = adjust.v_total_min;
 
                vertical_blank_in_pixels = stream->timing.h_total *
-                       (stream->timing.v_total
+                       (vertical_total_min
                         - stream->timing.v_addressable);
-
                vertical_blank_time = vertical_blank_in_pixels
                        * 10000 / stream->timing.pix_clk_100hz;
 
@@ -171,6 +174,10 @@ void dce11_pplib_apply_display_requirements(
        struct dc_state *context)
 {
        struct dm_pp_display_configuration *pp_display_cfg = &context->pp_display_cfg;
+       int memory_type_multiplier = MEMORY_TYPE_MULTIPLIER_CZ;
+
+       if (dc->bw_vbios && dc->bw_vbios->memory_type == bw_def_hbm)
+               memory_type_multiplier = MEMORY_TYPE_HBM;
 
        pp_display_cfg->all_displays_in_sync =
                context->bw_ctx.bw.dce.all_displays_in_sync;
@@ -183,8 +190,20 @@ void dce11_pplib_apply_display_requirements(
        pp_display_cfg->cpu_pstate_separation_time =
                        context->bw_ctx.bw.dce.blackout_recovery_time_us;
 
-       pp_display_cfg->min_memory_clock_khz = context->bw_ctx.bw.dce.yclk_khz
-               / MEMORY_TYPE_MULTIPLIER_CZ;
+       /*
+        * TODO: determine whether the bandwidth has reached memory's limitation
+        * , then change minimum memory clock based on real-time bandwidth
+        * limitation.
+        */
+       if (ASICREV_IS_VEGA20_P(dc->ctx->asic_id.hw_internal_rev) && (context->stream_count >= 2)) {
+               pp_display_cfg->min_memory_clock_khz = max(pp_display_cfg->min_memory_clock_khz,
+                                                          (uint32_t) div64_s64(
+                                                                  div64_s64(dc->bw_vbios->high_yclk.value,
+                                                                            memory_type_multiplier), 10000));
+       } else {
+               pp_display_cfg->min_memory_clock_khz = context->bw_ctx.bw.dce.yclk_khz
+                       / memory_type_multiplier;
+       }
 
        pp_display_cfg->min_engine_clock_khz = determine_sclk_from_bounding_box(
                        dc,
index 1488ffd..31b698b 100644 (file)
@@ -148,7 +148,7 @@ static void dce_mi_program_pte_vm(
                        pte->min_pte_before_flip_horiz_scan;
 
        REG_UPDATE(GRPH_PIPE_OUTSTANDING_REQUEST_LIMIT,
-                       GRPH_PIPE_OUTSTANDING_REQUEST_LIMIT, 0xff);
+                       GRPH_PIPE_OUTSTANDING_REQUEST_LIMIT, 0x7f);
 
        REG_UPDATE_3(DVMM_PTE_CONTROL,
                        DVMM_PAGE_WIDTH, page_width,
@@ -157,7 +157,7 @@ static void dce_mi_program_pte_vm(
 
        REG_UPDATE_2(DVMM_PTE_ARB_CONTROL,
                        DVMM_PTE_REQ_PER_CHUNK, pte->pte_req_per_chunk,
-                       DVMM_MAX_PTE_REQ_OUTSTANDING, 0xff);
+                       DVMM_MAX_PTE_REQ_OUTSTANDING, 0x7f);
 }
 
 static void program_urgency_watermark(
index afc6105..1787b9b 100644 (file)
@@ -1091,6 +1091,7 @@ struct resource_pool *dce100_create_resource_pool(
        if (construct(num_virtual_links, dc, pool))
                return &pool->base;
 
+       kfree(pool);
        BREAK_TO_DEBUGGER();
        return NULL;
 }
index c66fe17..318e9c2 100644 (file)
@@ -1462,6 +1462,7 @@ struct resource_pool *dce110_create_resource_pool(
        if (construct(num_virtual_links, dc, pool, asic_id))
                return &pool->base;
 
+       kfree(pool);
        BREAK_TO_DEBUGGER();
        return NULL;
 }
index 3ac4c7e..83e1878 100644 (file)
@@ -987,6 +987,10 @@ static void bw_calcs_data_update_from_pplib(struct dc *dc)
        struct dm_pp_clock_levels_with_latency mem_clks = {0};
        struct dm_pp_wm_sets_with_clock_ranges clk_ranges = {0};
        struct dm_pp_clock_levels clks = {0};
+       int memory_type_multiplier = MEMORY_TYPE_MULTIPLIER_CZ;
+
+       if (dc->bw_vbios && dc->bw_vbios->memory_type == bw_def_hbm)
+               memory_type_multiplier = MEMORY_TYPE_HBM;
 
        /*do system clock  TODO PPLIB: after PPLIB implement,
         * then remove old way
@@ -1026,12 +1030,12 @@ static void bw_calcs_data_update_from_pplib(struct dc *dc)
                                &clks);
 
                dc->bw_vbios->low_yclk = bw_frc_to_fixed(
-                       clks.clocks_in_khz[0] * MEMORY_TYPE_MULTIPLIER_CZ, 1000);
+                       clks.clocks_in_khz[0] * memory_type_multiplier, 1000);
                dc->bw_vbios->mid_yclk = bw_frc_to_fixed(
-                       clks.clocks_in_khz[clks.num_levels>>1] * MEMORY_TYPE_MULTIPLIER_CZ,
+                       clks.clocks_in_khz[clks.num_levels>>1] * memory_type_multiplier,
                        1000);
                dc->bw_vbios->high_yclk = bw_frc_to_fixed(
-                       clks.clocks_in_khz[clks.num_levels-1] * MEMORY_TYPE_MULTIPLIER_CZ,
+                       clks.clocks_in_khz[clks.num_levels-1] * memory_type_multiplier,
                        1000);
 
                return;
@@ -1067,12 +1071,12 @@ static void bw_calcs_data_update_from_pplib(struct dc *dc)
         * YCLK = UMACLK*m_memoryTypeMultiplier
         */
        dc->bw_vbios->low_yclk = bw_frc_to_fixed(
-               mem_clks.data[0].clocks_in_khz * MEMORY_TYPE_MULTIPLIER_CZ, 1000);
+               mem_clks.data[0].clocks_in_khz * memory_type_multiplier, 1000);
        dc->bw_vbios->mid_yclk = bw_frc_to_fixed(
-               mem_clks.data[mem_clks.num_levels>>1].clocks_in_khz * MEMORY_TYPE_MULTIPLIER_CZ,
+               mem_clks.data[mem_clks.num_levels>>1].clocks_in_khz * memory_type_multiplier,
                1000);
        dc->bw_vbios->high_yclk = bw_frc_to_fixed(
-               mem_clks.data[mem_clks.num_levels-1].clocks_in_khz * MEMORY_TYPE_MULTIPLIER_CZ,
+               mem_clks.data[mem_clks.num_levels-1].clocks_in_khz * memory_type_multiplier,
                1000);
 
        /* Now notify PPLib/SMU about which Watermarks sets they should select
@@ -1338,6 +1342,7 @@ struct resource_pool *dce112_create_resource_pool(
        if (construct(num_virtual_links, dc, pool))
                return &pool->base;
 
+       kfree(pool);
        BREAK_TO_DEBUGGER();
        return NULL;
 }
index 7d08154..8b85e52 100644 (file)
@@ -847,6 +847,8 @@ static void bw_calcs_data_update_from_pplib(struct dc *dc)
        int i;
        unsigned int clk;
        unsigned int latency;
+       /*original logic in dal3*/
+       int memory_type_multiplier = MEMORY_TYPE_MULTIPLIER_CZ;
 
        /*do system clock*/
        if (!dm_pp_get_clock_levels_by_type_with_latency(
@@ -905,13 +907,16 @@ static void bw_calcs_data_update_from_pplib(struct dc *dc)
         * ALSO always convert UMA clock (from PPLIB)  to YCLK (HW formula):
         * YCLK = UMACLK*m_memoryTypeMultiplier
         */
+       if (dc->bw_vbios->memory_type == bw_def_hbm)
+               memory_type_multiplier = MEMORY_TYPE_HBM;
+
        dc->bw_vbios->low_yclk = bw_frc_to_fixed(
-               mem_clks.data[0].clocks_in_khz * MEMORY_TYPE_MULTIPLIER_CZ, 1000);
+               mem_clks.data[0].clocks_in_khz * memory_type_multiplier, 1000);
        dc->bw_vbios->mid_yclk = bw_frc_to_fixed(
-               mem_clks.data[mem_clks.num_levels>>1].clocks_in_khz * MEMORY_TYPE_MULTIPLIER_CZ,
+               mem_clks.data[mem_clks.num_levels>>1].clocks_in_khz * memory_type_multiplier,
                1000);
        dc->bw_vbios->high_yclk = bw_frc_to_fixed(
-               mem_clks.data[mem_clks.num_levels-1].clocks_in_khz * MEMORY_TYPE_MULTIPLIER_CZ,
+               mem_clks.data[mem_clks.num_levels-1].clocks_in_khz * memory_type_multiplier,
                1000);
 
        /* Now notify PPLib/SMU about which Watermarks sets they should select
@@ -1203,6 +1208,7 @@ struct resource_pool *dce120_create_resource_pool(
        if (construct(num_virtual_links, dc, pool))
                return &pool->base;
 
+       kfree(pool);
        BREAK_TO_DEBUGGER();
        return NULL;
 }
index 5a89e46..59305e4 100644 (file)
@@ -1570,6 +1570,7 @@ struct resource_pool *dcn10_create_resource_pool(
        if (construct(init_data->num_virtual_links, dc, pool))
                return &pool->base;
 
+       kfree(pool);
        BREAK_TO_DEBUGGER();
        return NULL;
 }
index 34485d9..8572678 100644 (file)
 
 #include "hw_factory_dcn21.h"
 
-
 #include "dcn/dcn_2_1_0_offset.h"
 #include "dcn/dcn_2_1_0_sh_mask.h"
 #include "renoir_ip_offset.h"
 
-
 #include "reg_helper.h"
 #include "../hpd_regs.h"
 /* begin *********************
@@ -136,6 +134,39 @@ static const struct ddc_sh_mask ddc_mask[] = {
        DDC_MASK_SH_LIST_DCN2(_MASK, 6)
 };
 
+#include "../generic_regs.h"
+
+/* set field name */
+#define SF_GENERIC(reg_name, field_name, post_fix)\
+       .field_name = reg_name ## __ ## field_name ## post_fix
+
+#define generic_regs(id) \
+{\
+       GENERIC_REG_LIST(id)\
+}
+
+static const struct generic_registers generic_regs[] = {
+       generic_regs(A),
+};
+
+static const struct generic_sh_mask generic_shift[] = {
+       GENERIC_MASK_SH_LIST(__SHIFT, A),
+};
+
+static const struct generic_sh_mask generic_mask[] = {
+       GENERIC_MASK_SH_LIST(_MASK, A),
+};
+
+static void define_generic_registers(struct hw_gpio_pin *pin, uint32_t en)
+{
+       struct hw_generic *generic = HW_GENERIC_FROM_BASE(pin);
+
+       generic->regs = &generic_regs[en];
+       generic->shifts = &generic_shift[en];
+       generic->masks = &generic_mask[en];
+       generic->base.regs = &generic_regs[en].gpio;
+}
+
 static void define_ddc_registers(
                struct hw_gpio_pin *pin,
                uint32_t en)
@@ -181,7 +212,8 @@ static const struct hw_factory_funcs funcs = {
        .get_hpd_pin = dal_hw_hpd_get_pin,
        .get_generic_pin = dal_hw_generic_get_pin,
        .define_hpd_registers = define_hpd_registers,
-       .define_ddc_registers = define_ddc_registers
+       .define_ddc_registers = define_ddc_registers,
+       .define_generic_registers = define_generic_registers
 };
 /*
  * dal_hw_factory_dcn10_init
index ad7c437..fbb58fb 100644 (file)
@@ -58,7 +58,6 @@
 #define SF_HPD(reg_name, field_name, post_fix)\
        .field_name = reg_name ## __ ## field_name ## post_fix
 
-
 /* macros to expend register list macro defined in HW object header file
  * end *********************/
 
@@ -71,7 +70,7 @@ static bool offset_to_id(
 {
        switch (offset) {
        /* GENERIC */
-       case REG(DC_GENERICA):
+       case REG(DC_GPIO_GENERIC_A):
                *id = GPIO_ID_GENERIC;
                switch (mask) {
                case DC_GPIO_GENERIC_A__DC_GPIO_GENERICA_A_MASK:
index 1cc1c8c..bef224b 100644 (file)
@@ -31,6 +31,8 @@
 #include "dm_pp_smu.h"
 
 #define MEMORY_TYPE_MULTIPLIER_CZ 4
+#define MEMORY_TYPE_HBM 2
+
 
 enum dce_version resource_parse_asic_id(
                struct hw_asic_id asic_id);
index 1f16892..1be6c44 100644 (file)
 #define RAVEN1_F0 0xF0
 #define RAVEN_UNKNOWN 0xFF
 
+#define PICASSO_15D8_REV_E3 0xE3
+#define PICASSO_15D8_REV_E4 0xE4
+
 #define ASICREV_IS_RAVEN(eChipRev) ((eChipRev >= RAVEN_A0) && eChipRev < RAVEN_UNKNOWN)
 #define ASICREV_IS_PICASSO(eChipRev) ((eChipRev >= PICASSO_A0) && (eChipRev < RAVEN2_A0))
-#define ASICREV_IS_RAVEN2(eChipRev) ((eChipRev >= RAVEN2_A0) && (eChipRev < 0xF0))
-
+#define ASICREV_IS_RAVEN2(eChipRev) ((eChipRev >= RAVEN2_A0) && (eChipRev < PICASSO_15D8_REV_E3))
+#define ASICREV_IS_DALI(eChipRev) ((eChipRev >= PICASSO_15D8_REV_E3) && (eChipRev < RAVEN1_F0))
 
 #define ASICREV_IS_RV1_F0(eChipRev) ((eChipRev >= RAVEN1_F0) && (eChipRev < RAVEN_UNKNOWN))
 
index 554714c..094648c 100644 (file)
@@ -155,7 +155,7 @@ static const struct IP_BASE MP0_BASE ={ { { { 0x00016000, 0x0243FC00, 0x00DC0000
                                         { { 0, 0, 0, 0, 0 } },
                                         { { 0, 0, 0, 0, 0 } },
                                         { { 0, 0, 0, 0, 0 } } } };
-static const struct IP_BASE MP1_BASE ={ { { { 0x00016200, 0x02400400, 0x00E80000, 0x00EC0000, 0x00F00000 } },
+static const struct IP_BASE MP1_BASE ={ { { { 0x00016000, 0x02400400, 0x00E80000, 0x00EC0000, 0x00F00000 } },
                                         { { 0, 0, 0, 0, 0 } },
                                         { { 0, 0, 0, 0, 0 } },
                                         { { 0, 0, 0, 0, 0 } },
index fa636cb..fa8ad7d 100644 (file)
@@ -1531,6 +1531,7 @@ static int pp_asic_reset_mode_2(void *handle)
 static int pp_smu_i2c_bus_access(void *handle, bool acquire)
 {
        struct pp_hwmgr *hwmgr = handle;
+       int ret = 0;
 
        if (!hwmgr || !hwmgr->pm_en)
                return -EINVAL;
@@ -1540,7 +1541,11 @@ static int pp_smu_i2c_bus_access(void *handle, bool acquire)
                return -EINVAL;
        }
 
-       return hwmgr->hwmgr_func->smu_i2c_bus_access(hwmgr, acquire);
+       mutex_lock(&hwmgr->smu_lock);
+       ret = hwmgr->hwmgr_func->smu_i2c_bus_access(hwmgr, acquire);
+       mutex_unlock(&hwmgr->smu_lock);
+
+       return ret;
 }
 
 static const struct amd_pm_funcs pp_dpm_funcs = {
index 22f3c60..33960fb 100644 (file)
@@ -354,6 +354,9 @@ int smu_dpm_set_power_gate(struct smu_context *smu, uint32_t block_type,
        case AMD_IP_BLOCK_TYPE_GFX:
                ret = smu_gfx_off_control(smu, gate);
                break;
+       case AMD_IP_BLOCK_TYPE_SDMA:
+               ret = smu_powergate_sdma(smu, gate);
+               break;
        default:
                break;
        }
index 2a6da54..e62bfba 100644 (file)
@@ -177,12 +177,82 @@ static int renoir_get_dpm_uclk_limited(struct smu_context *smu, uint32_t *clock,
 
 }
 
+static int renoir_print_clk_levels(struct smu_context *smu,
+                       enum smu_clk_type clk_type, char *buf)
+{
+       int i, size = 0, ret = 0;
+       uint32_t cur_value = 0, value = 0, count = 0, min = 0, max = 0;
+       DpmClocks_t *clk_table = smu->smu_table.clocks_table;
+       SmuMetrics_t metrics = {0};
+
+       if (!clk_table || clk_type >= SMU_CLK_COUNT)
+               return -EINVAL;
+
+       ret = smu_update_table(smu, SMU_TABLE_SMU_METRICS, 0,
+                              (void *)&metrics, false);
+       if (ret)
+               return ret;
+
+       switch (clk_type) {
+       case SMU_GFXCLK:
+       case SMU_SCLK:
+               /* retirve table returned paramters unit is MHz */
+               cur_value = metrics.ClockFrequency[CLOCK_GFXCLK];
+               ret = smu_get_dpm_freq_range(smu, SMU_GFXCLK, &min, &max);
+               if (!ret) {
+                       /* driver only know min/max gfx_clk, Add level 1 for all other gfx clks */
+                       if (cur_value  == max)
+                               i = 2;
+                       else if (cur_value == min)
+                               i = 0;
+                       else
+                               i = 1;
+
+                       size += sprintf(buf + size, "0: %uMhz %s\n", min,
+                                       i == 0 ? "*" : "");
+                       size += sprintf(buf + size, "1: %uMhz %s\n",
+                                       i == 1 ? cur_value : RENOIR_UMD_PSTATE_GFXCLK,
+                                       i == 1 ? "*" : "");
+                       size += sprintf(buf + size, "2: %uMhz %s\n", max,
+                                       i == 2 ? "*" : "");
+               }
+               return size;
+       case SMU_SOCCLK:
+               count = NUM_SOCCLK_DPM_LEVELS;
+               cur_value = metrics.ClockFrequency[CLOCK_SOCCLK];
+               break;
+       case SMU_MCLK:
+               count = NUM_MEMCLK_DPM_LEVELS;
+               cur_value = metrics.ClockFrequency[CLOCK_UMCCLK];
+               break;
+       case SMU_DCEFCLK:
+               count = NUM_DCFCLK_DPM_LEVELS;
+               cur_value = metrics.ClockFrequency[CLOCK_DCFCLK];
+               break;
+       case SMU_FCLK:
+               count = NUM_FCLK_DPM_LEVELS;
+               cur_value = metrics.ClockFrequency[CLOCK_FCLK];
+               break;
+       default:
+               return -EINVAL;
+       }
+
+       for (i = 0; i < count; i++) {
+               GET_DPM_CUR_FREQ(clk_table, clk_type, i, value);
+               size += sprintf(buf + size, "%d: %uMhz %s\n", i, value,
+                               cur_value == value ? "*" : "");
+       }
+
+       return size;
+}
+
 static const struct pptable_funcs renoir_ppt_funcs = {
        .get_smu_msg_index = renoir_get_smu_msg_index,
        .get_smu_table_index = renoir_get_smu_table_index,
        .tables_init = renoir_tables_init,
        .set_power_state = NULL,
        .get_dpm_uclk_limited = renoir_get_dpm_uclk_limited,
+       .print_clk_levels = renoir_print_clk_levels,
 };
 
 void renoir_set_ppt_funcs(struct smu_context *smu)
index e9b7237..2a390dd 100644 (file)
 
 extern void renoir_set_ppt_funcs(struct smu_context *smu);
 
+/* UMD PState Renoir Msg Parameters in MHz */
+#define RENOIR_UMD_PSTATE_GFXCLK       700
+#define RENOIR_UMD_PSTATE_SOCCLK       678
+#define RENOIR_UMD_PSTATE_FCLK         800
+
+#define GET_DPM_CUR_FREQ(table, clk_type, dpm_level, freq)             \
+       do {                                                            \
+               switch (clk_type) {                                     \
+               case SMU_SOCCLK:                                        \
+                       freq = table->SocClocks[dpm_level].Freq;        \
+                       break;                                          \
+               case SMU_MCLK:                                          \
+                       freq = table->MemClocks[dpm_level].Freq;        \
+                       break;                                          \
+               case SMU_DCEFCLK:                                       \
+                       freq = table->DcfClocks[dpm_level].Freq;        \
+                       break;                                          \
+               case SMU_FCLK:                                          \
+                       freq = table->FClocks[dpm_level].Freq;          \
+                       break;                                          \
+               default:                                                \
+                       break;                                          \
+               }                                                       \
+       } while (0)
+
 #endif
index 98bccac..9e13e46 100644 (file)
@@ -874,6 +874,9 @@ static int adv7511_bridge_attach(struct drm_bridge *bridge)
                                 &adv7511_connector_helper_funcs);
        drm_connector_attach_encoder(&adv->connector, bridge->encoder);
 
+       if (adv->type == ADV7533)
+               ret = adv7533_attach_dsi(adv);
+
        if (adv->i2c_main->irq)
                regmap_write(adv->regmap, ADV7511_REG_INT_ENABLE(0),
                             ADV7511_INT0_HPD);
@@ -978,10 +981,10 @@ static int adv7511_init_cec_regmap(struct adv7511 *adv)
 {
        int ret;
 
-       adv->i2c_cec = i2c_new_secondary_device(adv->i2c_main, "cec",
+       adv->i2c_cec = i2c_new_ancillary_device(adv->i2c_main, "cec",
                                                ADV7511_CEC_I2C_ADDR_DEFAULT);
-       if (!adv->i2c_cec)
-               return -EINVAL;
+       if (IS_ERR(adv->i2c_cec))
+               return PTR_ERR(adv->i2c_cec);
        i2c_set_clientdata(adv->i2c_cec, adv);
 
        adv->regmap_cec = devm_regmap_init_i2c(adv->i2c_cec,
@@ -1162,20 +1165,20 @@ static int adv7511_probe(struct i2c_client *i2c, const struct i2c_device_id *id)
 
        adv7511_packet_disable(adv7511, 0xffff);
 
-       adv7511->i2c_edid = i2c_new_secondary_device(i2c, "edid",
+       adv7511->i2c_edid = i2c_new_ancillary_device(i2c, "edid",
                                        ADV7511_EDID_I2C_ADDR_DEFAULT);
-       if (!adv7511->i2c_edid) {
-               ret = -EINVAL;
+       if (IS_ERR(adv7511->i2c_edid)) {
+               ret = PTR_ERR(adv7511->i2c_edid);
                goto uninit_regulators;
        }
 
        regmap_write(adv7511->regmap, ADV7511_REG_EDID_I2C_ADDR,
                     adv7511->i2c_edid->addr << 1);
 
-       adv7511->i2c_packet = i2c_new_secondary_device(i2c, "packet",
+       adv7511->i2c_packet = i2c_new_ancillary_device(i2c, "packet",
                                        ADV7511_PACKET_I2C_ADDR_DEFAULT);
-       if (!adv7511->i2c_packet) {
-               ret = -EINVAL;
+       if (IS_ERR(adv7511->i2c_packet)) {
+               ret = PTR_ERR(adv7511->i2c_packet);
                goto err_i2c_unregister_edid;
        }
 
@@ -1219,17 +1222,8 @@ static int adv7511_probe(struct i2c_client *i2c, const struct i2c_device_id *id)
        drm_bridge_add(&adv7511->bridge);
 
        adv7511_audio_init(dev, adv7511);
-
-       if (adv7511->type == ADV7533) {
-               ret = adv7533_attach_dsi(adv7511);
-               if (ret)
-                       goto err_remove_bridge;
-       }
-
        return 0;
 
-err_remove_bridge:
-       drm_bridge_remove(&adv7511->bridge);
 err_unregister_cec:
        i2c_unregister_device(adv7511->i2c_cec);
        if (adv7511->cec_clk)
index aa16ea1..3ef2ac5 100644 (file)
@@ -26,6 +26,7 @@
  */
 
 #include <linux/dma-fence.h>
+#include <linux/ktime.h>
 
 #include <drm/drm_atomic.h>
 #include <drm/drm_atomic_helper.h>
@@ -1580,9 +1581,23 @@ static void commit_tail(struct drm_atomic_state *old_state)
 {
        struct drm_device *dev = old_state->dev;
        const struct drm_mode_config_helper_funcs *funcs;
+       ktime_t start;
+       s64 commit_time_ms;
 
        funcs = dev->mode_config.helper_private;
 
+       /*
+        * We're measuring the _entire_ commit, so the time will vary depending
+        * on how many fences and objects are involved. For the purposes of self
+        * refresh, this is desirable since it'll give us an idea of how
+        * congested things are. This will inform our decision on how often we
+        * should enter self refresh after idle.
+        *
+        * These times will be averaged out in the self refresh helpers to avoid
+        * overreacting over one outlier frame
+        */
+       start = ktime_get();
+
        drm_atomic_helper_wait_for_fences(dev, old_state, false);
 
        drm_atomic_helper_wait_for_dependencies(old_state);
@@ -1592,6 +1607,11 @@ static void commit_tail(struct drm_atomic_state *old_state)
        else
                drm_atomic_helper_commit_tail(old_state);
 
+       commit_time_ms = ktime_ms_delta(ktime_get(), start);
+       if (commit_time_ms > 0)
+               drm_self_refresh_helper_update_avg_times(old_state,
+                                                (unsigned long)commit_time_ms);
+
        drm_atomic_helper_commit_cleanup_done(old_state);
 
        drm_atomic_state_put(old_state);
@@ -3275,7 +3295,7 @@ static int page_flip_common(struct drm_atomic_state *state,
                return PTR_ERR(crtc_state);
 
        crtc_state->event = event;
-       crtc_state->pageflip_flags = flags;
+       crtc_state->async_flip = flags & DRM_MODE_PAGE_FLIP_ASYNC;
 
        plane_state = drm_atomic_get_plane_state(state, plane);
        if (IS_ERR(plane_state))
index 46dc264..d0a937f 100644 (file)
@@ -128,7 +128,7 @@ void __drm_atomic_helper_crtc_duplicate_state(struct drm_crtc *crtc,
        state->zpos_changed = false;
        state->commit = NULL;
        state->event = NULL;
-       state->pageflip_flags = 0;
+       state->async_flip = false;
 
        /* Self refresh should be canceled when a new update is available */
        state->active = drm_atomic_crtc_effectively_active(state);
index 5a5b42d..7a26bfb 100644 (file)
@@ -1305,8 +1305,7 @@ int drm_mode_atomic_ioctl(struct drm_device *dev,
        if (arg->reserved)
                return -EINVAL;
 
-       if ((arg->flags & DRM_MODE_PAGE_FLIP_ASYNC) &&
-                       !dev->mode_config.async_page_flip)
+       if (arg->flags & DRM_MODE_PAGE_FLIP_ASYNC)
                return -EINVAL;
 
        /* can't test and expect an event at the same time. */
index c456c3d..769feef 100644 (file)
@@ -976,14 +976,14 @@ int drm_dev_register(struct drm_device *dev, unsigned long flags)
        if (ret)
                goto err_minors;
 
+       dev->registered = true;
+
        if (dev->driver->load) {
                ret = dev->driver->load(dev, flags);
                if (ret)
                        goto err_minors;
        }
 
-       dev->registered = true;
-
        if (drm_core_check_feature(dev, DRIVER_MODESET))
                drm_modeset_register_all(dev);
 
index f675a3b..fcd728d 100644 (file)
@@ -336,7 +336,12 @@ drm_setclientcap(struct drm_device *dev, void *data, struct drm_file *file_priv)
        case DRM_CLIENT_CAP_ATOMIC:
                if (!drm_core_check_feature(dev, DRIVER_ATOMIC))
                        return -EOPNOTSUPP;
-               if (req->value > 1)
+               /* The modesetting DDX has a totally broken idea of atomic. */
+               if (current->comm[0] == 'X' && req->value == 1) {
+                       pr_info("broken atomic modeset userspace detected, disabling atomic\n");
+                       return -EOPNOTSUPP;
+               }
+               if (req->value > 2)
                        return -EINVAL;
                file_priv->atomic = req->value;
                file_priv->universal_planes = req->value;
index c355ba8..6a23e36 100644 (file)
@@ -42,7 +42,7 @@ int __drm_mode_object_add(struct drm_device *dev, struct drm_mode_object *obj,
 {
        int ret;
 
-       WARN_ON(dev->registered && !obj_free_cb);
+       WARN_ON(!dev->driver->load && dev->registered && !obj_free_cb);
 
        mutex_lock(&dev->mode_config.idr_mutex);
        ret = idr_alloc(&dev->mode_config.object_idr, register_obj ? obj : NULL,
@@ -104,7 +104,7 @@ void drm_mode_object_register(struct drm_device *dev,
 void drm_mode_object_unregister(struct drm_device *dev,
                                struct drm_mode_object *object)
 {
-       WARN_ON(dev->registered && !object->free_cb);
+       WARN_ON(!dev->driver->load && dev->registered && !object->free_cb);
 
        mutex_lock(&dev->mode_config.idr_mutex);
        if (object->id) {
index 4b9424a..68f4765 100644 (file)
@@ -5,6 +5,7 @@
  * Authors:
  * Sean Paul <seanpaul@chromium.org>
  */
+#include <linux/average.h>
 #include <linux/bitops.h>
 #include <linux/slab.h>
 #include <linux/workqueue.h>
  * atomic_check when &drm_crtc_state.self_refresh_active is true.
  */
 
+#define SELF_REFRESH_AVG_SEED_MS 200
+
+DECLARE_EWMA(psr_time, 4, 4)
+
 struct drm_self_refresh_data {
        struct drm_crtc *crtc;
        struct delayed_work entry_work;
-       struct drm_atomic_state *save_state;
-       unsigned int entry_delay_ms;
+
+       struct mutex avg_mutex;
+       struct ewma_psr_time entry_avg_ms;
+       struct ewma_psr_time exit_avg_ms;
 };
 
 static void drm_self_refresh_helper_entry_work(struct work_struct *work)
@@ -123,6 +130,44 @@ out_drop_locks:
 }
 
 /**
+ * drm_self_refresh_helper_update_avg_times - Updates a crtc's SR time averages
+ * @state: the state which has just been applied to hardware
+ * @commit_time_ms: the amount of time in ms that this commit took to complete
+ *
+ * Called after &drm_mode_config_funcs.atomic_commit_tail, this function will
+ * update the average entry/exit self refresh times on self refresh transitions.
+ * These averages will be used when calculating how long to delay before
+ * entering self refresh mode after activity.
+ */
+void drm_self_refresh_helper_update_avg_times(struct drm_atomic_state *state,
+                                             unsigned int commit_time_ms)
+{
+       struct drm_crtc *crtc;
+       struct drm_crtc_state *old_crtc_state, *new_crtc_state;
+       int i;
+
+       for_each_oldnew_crtc_in_state(state, crtc, old_crtc_state,
+                                     new_crtc_state, i) {
+               struct drm_self_refresh_data *sr_data = crtc->self_refresh_data;
+               struct ewma_psr_time *time;
+
+               if (old_crtc_state->self_refresh_active ==
+                   new_crtc_state->self_refresh_active)
+                       continue;
+
+               if (new_crtc_state->self_refresh_active)
+                       time = &sr_data->entry_avg_ms;
+               else
+                       time = &sr_data->exit_avg_ms;
+
+               mutex_lock(&sr_data->avg_mutex);
+               ewma_psr_time_add(time, commit_time_ms);
+               mutex_unlock(&sr_data->avg_mutex);
+       }
+}
+EXPORT_SYMBOL(drm_self_refresh_helper_update_avg_times);
+
+/**
  * drm_self_refresh_helper_alter_state - Alters the atomic state for SR exit
  * @state: the state currently being checked
  *
@@ -153,6 +198,7 @@ void drm_self_refresh_helper_alter_state(struct drm_atomic_state *state)
 
        for_each_new_crtc_in_state(state, crtc, crtc_state, i) {
                struct drm_self_refresh_data *sr_data;
+               unsigned int delay;
 
                /* Don't trigger the entry timer when we're already in SR */
                if (crtc_state->self_refresh_active)
@@ -162,8 +208,13 @@ void drm_self_refresh_helper_alter_state(struct drm_atomic_state *state)
                if (!sr_data)
                        continue;
 
+               mutex_lock(&sr_data->avg_mutex);
+               delay = (ewma_psr_time_read(&sr_data->entry_avg_ms) +
+                        ewma_psr_time_read(&sr_data->exit_avg_ms)) * 2;
+               mutex_unlock(&sr_data->avg_mutex);
+
                mod_delayed_work(system_wq, &sr_data->entry_work,
-                                msecs_to_jiffies(sr_data->entry_delay_ms));
+                                msecs_to_jiffies(delay));
        }
 }
 EXPORT_SYMBOL(drm_self_refresh_helper_alter_state);
@@ -171,12 +222,10 @@ EXPORT_SYMBOL(drm_self_refresh_helper_alter_state);
 /**
  * drm_self_refresh_helper_init - Initializes self refresh helpers for a crtc
  * @crtc: the crtc which supports self refresh supported displays
- * @entry_delay_ms: amount of inactivity to wait before entering self refresh
  *
  * Returns zero if successful or -errno on failure
  */
-int drm_self_refresh_helper_init(struct drm_crtc *crtc,
-                                unsigned int entry_delay_ms)
+int drm_self_refresh_helper_init(struct drm_crtc *crtc)
 {
        struct drm_self_refresh_data *sr_data = crtc->self_refresh_data;
 
@@ -190,8 +239,18 @@ int drm_self_refresh_helper_init(struct drm_crtc *crtc,
 
        INIT_DELAYED_WORK(&sr_data->entry_work,
                          drm_self_refresh_helper_entry_work);
-       sr_data->entry_delay_ms = entry_delay_ms;
        sr_data->crtc = crtc;
+       mutex_init(&sr_data->avg_mutex);
+       ewma_psr_time_init(&sr_data->entry_avg_ms);
+       ewma_psr_time_init(&sr_data->exit_avg_ms);
+
+       /*
+        * Seed the averages so they're non-zero (and sufficiently large
+        * for even poorly performing panels). As time goes on, this will be
+        * averaged out and the values will trend to their true value.
+        */
+       ewma_psr_time_add(&sr_data->entry_avg_ms, SELF_REFRESH_AVG_SEED_MS);
+       ewma_psr_time_add(&sr_data->exit_avg_ms, SELF_REFRESH_AVG_SEED_MS);
 
        crtc->self_refresh_data = sr_data;
        return 0;
index 96b9814..3558df0 100644 (file)
@@ -86,9 +86,10 @@ config DRM_NOUVEAU_SVM
        bool "(EXPERIMENTAL) Enable SVM (Shared Virtual Memory) support"
        depends on DEVICE_PRIVATE
        depends on DRM_NOUVEAU
-       depends on HMM_MIRROR
+       depends on MMU
        depends on STAGING
-       select MIGRATE_VMA_HELPER
+       select HMM_MIRROR
+       select MMU_NOTIFIER
        default n
        help
          Say Y here if you want to enable experimental support for
index 2db0293..5193b62 100644 (file)
@@ -267,7 +267,7 @@ nv50_wndw_atomic_check_acquire(struct nv50_wndw *wndw, bool modeset,
                        asyw->image.pitch[0] = fb->base.pitches[0];
                }
 
-               if (!(asyh->state.pageflip_flags & DRM_MODE_PAGE_FLIP_ASYNC))
+               if (!asyh->state.async_flip)
                        asyw->image.interval = 1;
                else
                        asyw->image.interval = 0;
@@ -383,7 +383,7 @@ nv50_wndw_atomic_check_lut(struct nv50_wndw *wndw,
        }
 
        /* Can't do an immediate flip while changing the LUT. */
-       asyh->state.pageflip_flags &= ~DRM_MODE_PAGE_FLIP_ASYNC;
+       asyh->state.async_flip = false;
 }
 
 static int
index 1333220..fa14399 100644 (file)
@@ -44,8 +44,6 @@
 #define DMEM_CHUNK_SIZE (2UL << 20)
 #define DMEM_CHUNK_NPAGES (DMEM_CHUNK_SIZE >> PAGE_SHIFT)
 
-struct nouveau_migrate;
-
 enum nouveau_aper {
        NOUVEAU_APER_VIRT,
        NOUVEAU_APER_VRAM,
@@ -86,21 +84,13 @@ static inline struct nouveau_dmem *page_to_dmem(struct page *page)
        return container_of(page->pgmap, struct nouveau_dmem, pagemap);
 }
 
-struct nouveau_dmem_fault {
-       struct nouveau_drm *drm;
-       struct nouveau_fence *fence;
-       dma_addr_t *dma;
-       unsigned long npages;
-};
+static unsigned long nouveau_dmem_page_addr(struct page *page)
+{
+       struct nouveau_dmem_chunk *chunk = page->zone_device_data;
+       unsigned long idx = page_to_pfn(page) - chunk->pfn_first;
 
-struct nouveau_migrate {
-       struct vm_area_struct *vma;
-       struct nouveau_drm *drm;
-       struct nouveau_fence *fence;
-       unsigned long npages;
-       dma_addr_t *dma;
-       unsigned long dma_nr;
-};
+       return (idx << PAGE_SHIFT) + chunk->bo->bo.offset;
+}
 
 static void nouveau_dmem_page_free(struct page *page)
 {
@@ -125,165 +115,90 @@ static void nouveau_dmem_page_free(struct page *page)
        spin_unlock(&chunk->lock);
 }
 
-static void
-nouveau_dmem_fault_alloc_and_copy(struct vm_area_struct *vma,
-                                 const unsigned long *src_pfns,
-                                 unsigned long *dst_pfns,
-                                 unsigned long start,
-                                 unsigned long end,
-                                 void *private)
+static void nouveau_dmem_fence_done(struct nouveau_fence **fence)
 {
-       struct nouveau_dmem_fault *fault = private;
-       struct nouveau_drm *drm = fault->drm;
-       struct device *dev = drm->dev->dev;
-       unsigned long addr, i, npages = 0;
-       nouveau_migrate_copy_t copy;
-       int ret;
-
-
-       /* First allocate new memory */
-       for (addr = start, i = 0; addr < end; addr += PAGE_SIZE, i++) {
-               struct page *dpage, *spage;
-
-               dst_pfns[i] = 0;
-               spage = migrate_pfn_to_page(src_pfns[i]);
-               if (!spage || !(src_pfns[i] & MIGRATE_PFN_MIGRATE))
-                       continue;
-
-               dpage = alloc_page_vma(GFP_HIGHUSER, vma, addr);
-               if (!dpage) {
-                       dst_pfns[i] = MIGRATE_PFN_ERROR;
-                       continue;
-               }
-               lock_page(dpage);
-
-               dst_pfns[i] = migrate_pfn(page_to_pfn(dpage)) |
-                             MIGRATE_PFN_LOCKED;
-               npages++;
-       }
-
-       /* Allocate storage for DMA addresses, so we can unmap later. */
-       fault->dma = kmalloc(sizeof(*fault->dma) * npages, GFP_KERNEL);
-       if (!fault->dma)
-               goto error;
-
-       /* Copy things over */
-       copy = drm->dmem->migrate.copy_func;
-       for (addr = start, i = 0; addr < end; addr += PAGE_SIZE, i++) {
-               struct nouveau_dmem_chunk *chunk;
-               struct page *spage, *dpage;
-               u64 src_addr, dst_addr;
-
-               dpage = migrate_pfn_to_page(dst_pfns[i]);
-               if (!dpage || dst_pfns[i] == MIGRATE_PFN_ERROR)
-                       continue;
-
-               spage = migrate_pfn_to_page(src_pfns[i]);
-               if (!spage || !(src_pfns[i] & MIGRATE_PFN_MIGRATE)) {
-                       dst_pfns[i] = MIGRATE_PFN_ERROR;
-                       __free_page(dpage);
-                       continue;
-               }
-
-               fault->dma[fault->npages] =
-                       dma_map_page_attrs(dev, dpage, 0, PAGE_SIZE,
-                                          PCI_DMA_BIDIRECTIONAL,
-                                          DMA_ATTR_SKIP_CPU_SYNC);
-               if (dma_mapping_error(dev, fault->dma[fault->npages])) {
-                       dst_pfns[i] = MIGRATE_PFN_ERROR;
-                       __free_page(dpage);
-                       continue;
-               }
-
-               dst_addr = fault->dma[fault->npages++];
-
-               chunk = spage->zone_device_data;
-               src_addr = page_to_pfn(spage) - chunk->pfn_first;
-               src_addr = (src_addr << PAGE_SHIFT) + chunk->bo->bo.offset;
-
-               ret = copy(drm, 1, NOUVEAU_APER_HOST, dst_addr,
-                                  NOUVEAU_APER_VRAM, src_addr);
-               if (ret) {
-                       dst_pfns[i] = MIGRATE_PFN_ERROR;
-                       __free_page(dpage);
-                       continue;
-               }
+       if (fence) {
+               nouveau_fence_wait(*fence, true, false);
+               nouveau_fence_unref(fence);
+       } else {
+               /*
+                * FIXME wait for channel to be IDLE before calling finalizing
+                * the hmem object.
+                */
        }
+}
 
-       nouveau_fence_new(drm->dmem->migrate.chan, false, &fault->fence);
-
-       return;
-
-error:
-       for (addr = start, i = 0; addr < end; addr += PAGE_SIZE, ++i) {
-               struct page *page;
+static vm_fault_t nouveau_dmem_fault_copy_one(struct nouveau_drm *drm,
+               struct vm_fault *vmf, struct migrate_vma *args,
+               dma_addr_t *dma_addr)
+{
+       struct device *dev = drm->dev->dev;
+       struct page *dpage, *spage;
 
-               if (!dst_pfns[i] || dst_pfns[i] == MIGRATE_PFN_ERROR)
-                       continue;
+       spage = migrate_pfn_to_page(args->src[0]);
+       if (!spage || !(args->src[0] & MIGRATE_PFN_MIGRATE))
+               return 0;
 
-               page = migrate_pfn_to_page(dst_pfns[i]);
-               dst_pfns[i] = MIGRATE_PFN_ERROR;
-               if (page == NULL)
-                       continue;
+       dpage = alloc_page_vma(GFP_HIGHUSER, vmf->vma, vmf->address);
+       if (!dpage)
+               return VM_FAULT_SIGBUS;
+       lock_page(dpage);
 
-               __free_page(page);
-       }
-}
+       *dma_addr = dma_map_page(dev, dpage, 0, PAGE_SIZE, DMA_BIDIRECTIONAL);
+       if (dma_mapping_error(dev, *dma_addr))
+               goto error_free_page;
 
-void nouveau_dmem_fault_finalize_and_map(struct vm_area_struct *vma,
-                                        const unsigned long *src_pfns,
-                                        const unsigned long *dst_pfns,
-                                        unsigned long start,
-                                        unsigned long end,
-                                        void *private)
-{
-       struct nouveau_dmem_fault *fault = private;
-       struct nouveau_drm *drm = fault->drm;
+       if (drm->dmem->migrate.copy_func(drm, 1, NOUVEAU_APER_HOST, *dma_addr,
+                       NOUVEAU_APER_VRAM, nouveau_dmem_page_addr(spage)))
+               goto error_dma_unmap;
 
-       if (fault->fence) {
-               nouveau_fence_wait(fault->fence, true, false);
-               nouveau_fence_unref(&fault->fence);
-       } else {
-               /*
-                * FIXME wait for channel to be IDLE before calling finalizing
-                * the hmem object below (nouveau_migrate_hmem_fini()).
-                */
-       }
+       args->dst[0] = migrate_pfn(page_to_pfn(dpage)) | MIGRATE_PFN_LOCKED;
+       return 0;
 
-       while (fault->npages--) {
-               dma_unmap_page(drm->dev->dev, fault->dma[fault->npages],
-                              PAGE_SIZE, PCI_DMA_BIDIRECTIONAL);
-       }
-       kfree(fault->dma);
+error_dma_unmap:
+       dma_unmap_page(dev, *dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL);
+error_free_page:
+       __free_page(dpage);
+       return VM_FAULT_SIGBUS;
 }
 
-static const struct migrate_vma_ops nouveau_dmem_fault_migrate_ops = {
-       .alloc_and_copy         = nouveau_dmem_fault_alloc_and_copy,
-       .finalize_and_map       = nouveau_dmem_fault_finalize_and_map,
-};
-
 static vm_fault_t nouveau_dmem_migrate_to_ram(struct vm_fault *vmf)
 {
        struct nouveau_dmem *dmem = page_to_dmem(vmf->page);
-       unsigned long src[1] = {0}, dst[1] = {0};
-       struct nouveau_dmem_fault fault = { .drm = dmem->drm };
-       int ret;
+       struct nouveau_drm *drm = dmem->drm;
+       struct nouveau_fence *fence;
+       unsigned long src = 0, dst = 0;
+       dma_addr_t dma_addr = 0;
+       vm_fault_t ret;
+       struct migrate_vma args = {
+               .vma            = vmf->vma,
+               .start          = vmf->address,
+               .end            = vmf->address + PAGE_SIZE,
+               .src            = &src,
+               .dst            = &dst,
+       };
 
        /*
         * FIXME what we really want is to find some heuristic to migrate more
         * than just one page on CPU fault. When such fault happens it is very
         * likely that more surrounding page will CPU fault too.
         */
-       ret = migrate_vma(&nouveau_dmem_fault_migrate_ops, vmf->vma,
-                       vmf->address, vmf->address + PAGE_SIZE,
-                       src, dst, &fault);
-       if (ret)
+       if (migrate_vma_setup(&args) < 0)
                return VM_FAULT_SIGBUS;
+       if (!args.cpages)
+               return 0;
 
-       if (dst[0] == MIGRATE_PFN_ERROR)
-               return VM_FAULT_SIGBUS;
+       ret = nouveau_dmem_fault_copy_one(drm, vmf, &args, &dma_addr);
+       if (ret || dst == 0)
+               goto done;
 
-       return 0;
+       nouveau_fence_new(dmem->migrate.chan, false, &fence);
+       migrate_vma_pages(&args);
+       nouveau_dmem_fence_done(&fence);
+       dma_unmap_page(drm->dev->dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL);
+done:
+       migrate_vma_finalize(&args);
+       return ret;
 }
 
 static const struct dev_pagemap_ops nouveau_dmem_pagemap_ops = {
@@ -642,188 +557,115 @@ out_free:
        drm->dmem = NULL;
 }
 
-static void
-nouveau_dmem_migrate_alloc_and_copy(struct vm_area_struct *vma,
-                                   const unsigned long *src_pfns,
-                                   unsigned long *dst_pfns,
-                                   unsigned long start,
-                                   unsigned long end,
-                                   void *private)
+static unsigned long nouveau_dmem_migrate_copy_one(struct nouveau_drm *drm,
+               unsigned long src, dma_addr_t *dma_addr)
 {
-       struct nouveau_migrate *migrate = private;
-       struct nouveau_drm *drm = migrate->drm;
        struct device *dev = drm->dev->dev;
-       unsigned long addr, i, npages = 0;
-       nouveau_migrate_copy_t copy;
-       int ret;
-
-       /* First allocate new memory */
-       for (addr = start, i = 0; addr < end; addr += PAGE_SIZE, i++) {
-               struct page *dpage, *spage;
-
-               dst_pfns[i] = 0;
-               spage = migrate_pfn_to_page(src_pfns[i]);
-               if (!spage || !(src_pfns[i] & MIGRATE_PFN_MIGRATE))
-                       continue;
-
-               dpage = nouveau_dmem_page_alloc_locked(drm);
-               if (!dpage)
-                       continue;
-
-               dst_pfns[i] = migrate_pfn(page_to_pfn(dpage)) |
-                             MIGRATE_PFN_LOCKED |
-                             MIGRATE_PFN_DEVICE;
-               npages++;
-       }
-
-       if (!npages)
-               return;
-
-       /* Allocate storage for DMA addresses, so we can unmap later. */
-       migrate->dma = kmalloc(sizeof(*migrate->dma) * npages, GFP_KERNEL);
-       if (!migrate->dma)
-               goto error;
-
-       /* Copy things over */
-       copy = drm->dmem->migrate.copy_func;
-       for (addr = start, i = 0; addr < end; addr += PAGE_SIZE, i++) {
-               struct nouveau_dmem_chunk *chunk;
-               struct page *spage, *dpage;
-               u64 src_addr, dst_addr;
-
-               dpage = migrate_pfn_to_page(dst_pfns[i]);
-               if (!dpage || dst_pfns[i] == MIGRATE_PFN_ERROR)
-                       continue;
-
-               chunk = dpage->zone_device_data;
-               dst_addr = page_to_pfn(dpage) - chunk->pfn_first;
-               dst_addr = (dst_addr << PAGE_SHIFT) + chunk->bo->bo.offset;
-
-               spage = migrate_pfn_to_page(src_pfns[i]);
-               if (!spage || !(src_pfns[i] & MIGRATE_PFN_MIGRATE)) {
-                       nouveau_dmem_page_free_locked(drm, dpage);
-                       dst_pfns[i] = 0;
-                       continue;
-               }
-
-               migrate->dma[migrate->dma_nr] =
-                       dma_map_page_attrs(dev, spage, 0, PAGE_SIZE,
-                                          PCI_DMA_BIDIRECTIONAL,
-                                          DMA_ATTR_SKIP_CPU_SYNC);
-               if (dma_mapping_error(dev, migrate->dma[migrate->dma_nr])) {
-                       nouveau_dmem_page_free_locked(drm, dpage);
-                       dst_pfns[i] = 0;
-                       continue;
-               }
-
-               src_addr = migrate->dma[migrate->dma_nr++];
+       struct page *dpage, *spage;
 
-               ret = copy(drm, 1, NOUVEAU_APER_VRAM, dst_addr,
-                                  NOUVEAU_APER_HOST, src_addr);
-               if (ret) {
-                       nouveau_dmem_page_free_locked(drm, dpage);
-                       dst_pfns[i] = 0;
-                       continue;
-               }
-       }
-
-       nouveau_fence_new(drm->dmem->migrate.chan, false, &migrate->fence);
+       spage = migrate_pfn_to_page(src);
+       if (!spage || !(src & MIGRATE_PFN_MIGRATE))
+               goto out;
 
-       return;
+       dpage = nouveau_dmem_page_alloc_locked(drm);
+       if (!dpage)
+               return 0;
 
-error:
-       for (addr = start, i = 0; addr < end; addr += PAGE_SIZE, ++i) {
-               struct page *page;
+       *dma_addr = dma_map_page(dev, spage, 0, PAGE_SIZE, DMA_BIDIRECTIONAL);
+       if (dma_mapping_error(dev, *dma_addr))
+               goto out_free_page;
 
-               if (!dst_pfns[i] || dst_pfns[i] == MIGRATE_PFN_ERROR)
-                       continue;
+       if (drm->dmem->migrate.copy_func(drm, 1, NOUVEAU_APER_VRAM,
+                       nouveau_dmem_page_addr(dpage), NOUVEAU_APER_HOST,
+                       *dma_addr))
+               goto out_dma_unmap;
 
-               page = migrate_pfn_to_page(dst_pfns[i]);
-               dst_pfns[i] = MIGRATE_PFN_ERROR;
-               if (page == NULL)
-                       continue;
+       return migrate_pfn(page_to_pfn(dpage)) | MIGRATE_PFN_LOCKED;
 
-               __free_page(page);
-       }
+out_dma_unmap:
+       dma_unmap_page(dev, *dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL);
+out_free_page:
+       nouveau_dmem_page_free_locked(drm, dpage);
+out:
+       return 0;
 }
 
-void nouveau_dmem_migrate_finalize_and_map(struct vm_area_struct *vma,
-                                          const unsigned long *src_pfns,
-                                          const unsigned long *dst_pfns,
-                                          unsigned long start,
-                                          unsigned long end,
-                                          void *private)
+static void nouveau_dmem_migrate_chunk(struct nouveau_drm *drm,
+               struct migrate_vma *args, dma_addr_t *dma_addrs)
 {
-       struct nouveau_migrate *migrate = private;
-       struct nouveau_drm *drm = migrate->drm;
-
-       if (migrate->fence) {
-               nouveau_fence_wait(migrate->fence, true, false);
-               nouveau_fence_unref(&migrate->fence);
-       } else {
-               /*
-                * FIXME wait for channel to be IDLE before finalizing
-                * the hmem object below (nouveau_migrate_hmem_fini()) ?
-                */
+       struct nouveau_fence *fence;
+       unsigned long addr = args->start, nr_dma = 0, i;
+
+       for (i = 0; addr < args->end; i++) {
+               args->dst[i] = nouveau_dmem_migrate_copy_one(drm, args->src[i],
+                               dma_addrs + nr_dma);
+               if (args->dst[i])
+                       nr_dma++;
+               addr += PAGE_SIZE;
        }
 
-       while (migrate->dma_nr--) {
-               dma_unmap_page(drm->dev->dev, migrate->dma[migrate->dma_nr],
-                              PAGE_SIZE, PCI_DMA_BIDIRECTIONAL);
-       }
-       kfree(migrate->dma);
+       nouveau_fence_new(drm->dmem->migrate.chan, false, &fence);
+       migrate_vma_pages(args);
+       nouveau_dmem_fence_done(&fence);
 
+       while (nr_dma--) {
+               dma_unmap_page(drm->dev->dev, dma_addrs[nr_dma], PAGE_SIZE,
+                               DMA_BIDIRECTIONAL);
+       }
        /*
-        * FIXME optimization: update GPU page table to point to newly
-        * migrated memory.
+        * FIXME optimization: update GPU page table to point to newly migrated
+        * memory.
         */
+       migrate_vma_finalize(args);
 }
 
-static const struct migrate_vma_ops nouveau_dmem_migrate_ops = {
-       .alloc_and_copy         = nouveau_dmem_migrate_alloc_and_copy,
-       .finalize_and_map       = nouveau_dmem_migrate_finalize_and_map,
-};
-
 int
 nouveau_dmem_migrate_vma(struct nouveau_drm *drm,
                         struct vm_area_struct *vma,
                         unsigned long start,
                         unsigned long end)
 {
-       unsigned long *src_pfns, *dst_pfns, npages;
-       struct nouveau_migrate migrate = {0};
-       unsigned long i, c, max;
-       int ret = 0;
-
-       npages = (end - start) >> PAGE_SHIFT;
-       max = min(SG_MAX_SINGLE_ALLOC, npages);
-       src_pfns = kzalloc(sizeof(long) * max, GFP_KERNEL);
-       if (src_pfns == NULL)
-               return -ENOMEM;
-       dst_pfns = kzalloc(sizeof(long) * max, GFP_KERNEL);
-       if (dst_pfns == NULL) {
-               kfree(src_pfns);
-               return -ENOMEM;
-       }
+       unsigned long npages = (end - start) >> PAGE_SHIFT;
+       unsigned long max = min(SG_MAX_SINGLE_ALLOC, npages);
+       dma_addr_t *dma_addrs;
+       struct migrate_vma args = {
+               .vma            = vma,
+               .start          = start,
+       };
+       unsigned long c, i;
+       int ret = -ENOMEM;
+
+       args.src = kcalloc(max, sizeof(args.src), GFP_KERNEL);
+       if (!args.src)
+               goto out;
+       args.dst = kcalloc(max, sizeof(args.dst), GFP_KERNEL);
+       if (!args.dst)
+               goto out_free_src;
 
-       migrate.drm = drm;
-       migrate.vma = vma;
-       migrate.npages = npages;
-       for (i = 0; i < npages; i += c) {
-               unsigned long next;
+       dma_addrs = kmalloc_array(max, sizeof(*dma_addrs), GFP_KERNEL);
+       if (!dma_addrs)
+               goto out_free_dst;
 
+       for (i = 0; i < npages; i += c) {
                c = min(SG_MAX_SINGLE_ALLOC, npages);
-               next = start + (c << PAGE_SHIFT);
-               ret = migrate_vma(&nouveau_dmem_migrate_ops, vma, start,
-                                 next, src_pfns, dst_pfns, &migrate);
+               args.end = start + (c << PAGE_SHIFT);
+               ret = migrate_vma_setup(&args);
                if (ret)
-                       goto out;
-               start = next;
+                       goto out_free_dma;
+
+               if (args.cpages)
+                       nouveau_dmem_migrate_chunk(drm, &args, dma_addrs);
+               args.start = args.end;
        }
 
+       ret = 0;
+out_free_dma:
+       kfree(dma_addrs);
+out_free_dst:
+       kfree(args.dst);
+out_free_src:
+       kfree(args.src);
 out:
-       kfree(dst_pfns);
-       kfree(src_pfns);
        return ret;
 }
 
@@ -841,11 +683,10 @@ nouveau_dmem_convert_pfn(struct nouveau_drm *drm,
 
        npages = (range->end - range->start) >> PAGE_SHIFT;
        for (i = 0; i < npages; ++i) {
-               struct nouveau_dmem_chunk *chunk;
                struct page *page;
                uint64_t addr;
 
-               page = hmm_pfn_to_page(range, range->pfns[i]);
+               page = hmm_device_entry_to_page(range, range->pfns[i]);
                if (page == NULL)
                        continue;
 
@@ -859,10 +700,7 @@ nouveau_dmem_convert_pfn(struct nouveau_drm *drm,
                        continue;
                }
 
-               chunk = page->zone_device_data;
-               addr = page_to_pfn(page) - chunk->pfn_first;
-               addr = (addr + chunk->bo->bo.mem.start) << PAGE_SHIFT;
-
+               addr = nouveau_dmem_page_addr(page);
                range->pfns[i] &= ((1UL << range->pfn_shift) - 1);
                range->pfns[i] |= (addr >> PAGE_SHIFT) << range->pfn_shift;
        }
index 9d97d75..92394be 100644 (file)
@@ -45,16 +45,5 @@ static inline void nouveau_dmem_init(struct nouveau_drm *drm) {}
 static inline void nouveau_dmem_fini(struct nouveau_drm *drm) {}
 static inline void nouveau_dmem_suspend(struct nouveau_drm *drm) {}
 static inline void nouveau_dmem_resume(struct nouveau_drm *drm) {}
-
-static inline int nouveau_dmem_migrate_vma(struct nouveau_drm *drm,
-                                          struct vm_area_struct *vma,
-                                          unsigned long start,
-                                          unsigned long end)
-{
-       return 0;
-}
-
-static inline void nouveau_dmem_convert_pfn(struct nouveau_drm *drm,
-                                           struct hmm_range *range) {}
 #endif /* IS_ENABLED(CONFIG_DRM_NOUVEAU_SVM) */
 #endif
index bdc9483..2cd8384 100644 (file)
@@ -28,6 +28,7 @@
 #include <linux/pci.h>
 #include <linux/pm_runtime.h>
 #include <linux/vga_switcheroo.h>
+#include <linux/mmu_notifier.h>
 
 #include <drm/drm_crtc_helper.h>
 #include <drm/drm_ioctl.h>
@@ -1290,6 +1291,8 @@ nouveau_drm_exit(void)
 #ifdef CONFIG_NOUVEAU_PLATFORM_DRIVER
        platform_driver_unregister(&nouveau_platform_driver);
 #endif
+       if (IS_ENABLED(CONFIG_DRM_NOUVEAU_SVM))
+               mmu_notifier_synchronize();
 }
 
 module_init(nouveau_drm_init);
index a835ceb..668d4bd 100644 (file)
@@ -252,13 +252,13 @@ nouveau_svmm_invalidate(struct nouveau_svmm *svmm, u64 start, u64 limit)
 
 static int
 nouveau_svmm_sync_cpu_device_pagetables(struct hmm_mirror *mirror,
-                                       const struct hmm_update *update)
+                                       const struct mmu_notifier_range *update)
 {
        struct nouveau_svmm *svmm = container_of(mirror, typeof(*svmm), mirror);
        unsigned long start = update->start;
        unsigned long limit = update->end;
 
-       if (!update->blockable)
+       if (!mmu_notifier_range_blockable(update))
                return -EAGAIN;
 
        SVMM_DBG(svmm, "invalidate %016lx-%016lx", start, limit);
@@ -485,31 +485,29 @@ nouveau_range_done(struct hmm_range *range)
 }
 
 static int
-nouveau_range_fault(struct hmm_mirror *mirror, struct hmm_range *range)
+nouveau_range_fault(struct nouveau_svmm *svmm, struct hmm_range *range)
 {
        long ret;
 
        range->default_flags = 0;
        range->pfn_flags_mask = -1UL;
 
-       ret = hmm_range_register(range, mirror,
-                                range->start, range->end,
-                                PAGE_SHIFT);
+       ret = hmm_range_register(range, &svmm->mirror);
        if (ret) {
-               up_read(&range->vma->vm_mm->mmap_sem);
+               up_read(&svmm->mm->mmap_sem);
                return (int)ret;
        }
 
        if (!hmm_range_wait_until_valid(range, HMM_RANGE_DEFAULT_TIMEOUT)) {
-               up_read(&range->vma->vm_mm->mmap_sem);
-               return -EAGAIN;
+               up_read(&svmm->mm->mmap_sem);
+               return -EBUSY;
        }
 
-       ret = hmm_range_fault(range, true);
+       ret = hmm_range_fault(range, 0);
        if (ret <= 0) {
                if (ret == 0)
                        ret = -EBUSY;
-               up_read(&range->vma->vm_mm->mmap_sem);
+               up_read(&svmm->mm->mmap_sem);
                hmm_range_unregister(range);
                return ret;
        }
@@ -682,7 +680,6 @@ nouveau_svm_fault(struct nvif_notify *notify)
                         args.i.p.addr + args.i.p.size, fn - fi);
 
                /* Have HMM fault pages within the fault window to the GPU. */
-               range.vma = vma;
                range.start = args.i.p.addr;
                range.end = args.i.p.addr + args.i.p.size;
                range.pfns = args.phys;
@@ -690,7 +687,7 @@ nouveau_svm_fault(struct nvif_notify *notify)
                range.values = nouveau_svm_pfn_values;
                range.pfn_shift = NVIF_VMM_PFNMAP_V0_ADDR_SHIFT;
 again:
-               ret = nouveau_range_fault(&svmm->mirror, &range);
+               ret = nouveau_range_fault(svmm, &range);
                if (ret == 0) {
                        mutex_lock(&svmm->mutex);
                        if (!nouveau_range_done(&range)) {
index a1f5fa6..12ff77d 100644 (file)
@@ -39,7 +39,7 @@ static int panfrost_devfreq_target(struct device *dev, unsigned long *freq,
         * If frequency scaling from low to high, adjust voltage first.
         * If frequency scaling from high to low, adjust frequency first.
         */
-       if (old_clk_rate < target_rate && pfdev->regulator) {
+       if (old_clk_rate < target_rate) {
                err = regulator_set_voltage(pfdev->regulator, target_volt,
                                            target_volt);
                if (err) {
@@ -53,14 +53,12 @@ static int panfrost_devfreq_target(struct device *dev, unsigned long *freq,
        if (err) {
                dev_err(dev, "Cannot set frequency %lu (%d)\n", target_rate,
                        err);
-               if (pfdev->regulator)
-                       regulator_set_voltage(pfdev->regulator,
-                                             pfdev->devfreq.cur_volt,
-                                             pfdev->devfreq.cur_volt);
+               regulator_set_voltage(pfdev->regulator, pfdev->devfreq.cur_volt,
+                                     pfdev->devfreq.cur_volt);
                return err;
        }
 
-       if (old_clk_rate > target_rate && pfdev->regulator) {
+       if (old_clk_rate > target_rate) {
                err = regulator_set_voltage(pfdev->regulator, target_volt,
                                            target_volt);
                if (err)
index 46b0b02..238fb6d 100644 (file)
@@ -89,12 +89,9 @@ static int panfrost_regulator_init(struct panfrost_device *pfdev)
 {
        int ret;
 
-       pfdev->regulator = devm_regulator_get_optional(pfdev->dev, "mali");
+       pfdev->regulator = devm_regulator_get(pfdev->dev, "mali");
        if (IS_ERR(pfdev->regulator)) {
                ret = PTR_ERR(pfdev->regulator);
-               pfdev->regulator = NULL;
-               if (ret == -ENODEV)
-                       return 0;
                dev_err(pfdev->dev, "failed to get regulator: %d\n", ret);
                return ret;
        }
@@ -110,8 +107,7 @@ static int panfrost_regulator_init(struct panfrost_device *pfdev)
 
 static void panfrost_regulator_fini(struct panfrost_device *pfdev)
 {
-       if (pfdev->regulator)
-               regulator_disable(pfdev->regulator);
+       regulator_disable(pfdev->regulator);
 }
 
 int panfrost_device_init(struct panfrost_device *pfdev)
index 6010f9e..bdd9905 100644 (file)
@@ -394,28 +394,40 @@ void panfrost_mmu_pgtable_free(struct panfrost_file_priv *priv)
        free_io_pgtable_ops(mmu->pgtbl_ops);
 }
 
-static struct drm_mm_node *addr_to_drm_mm_node(struct panfrost_device *pfdev, int as, u64 addr)
+static struct panfrost_gem_object *
+addr_to_drm_mm_node(struct panfrost_device *pfdev, int as, u64 addr)
 {
-       struct drm_mm_node *node = NULL;
+       struct panfrost_gem_object *bo = NULL;
+       struct panfrost_file_priv *priv;
+       struct drm_mm_node *node;
        u64 offset = addr >> PAGE_SHIFT;
        struct panfrost_mmu *mmu;
 
        spin_lock(&pfdev->as_lock);
        list_for_each_entry(mmu, &pfdev->as_lru_list, list) {
-               struct panfrost_file_priv *priv;
-               if (as != mmu->as)
-                       continue;
+               if (as == mmu->as)
+                       break;
+       }
+       if (as != mmu->as)
+               goto out;
+
+       priv = container_of(mmu, struct panfrost_file_priv, mmu);
 
-               priv = container_of(mmu, struct panfrost_file_priv, mmu);
-               drm_mm_for_each_node(node, &priv->mm) {
-                       if (offset >= node->start && offset < (node->start + node->size))
-                               goto out;
+       spin_lock(&priv->mm_lock);
+
+       drm_mm_for_each_node(node, &priv->mm) {
+               if (offset >= node->start &&
+                   offset < (node->start + node->size)) {
+                       bo = drm_mm_node_to_panfrost_bo(node);
+                       drm_gem_object_get(&bo->base.base);
+                       break;
                }
        }
 
+       spin_unlock(&priv->mm_lock);
 out:
        spin_unlock(&pfdev->as_lock);
-       return node;
+       return bo;
 }
 
 #define NUM_FAULT_PAGES (SZ_2M / PAGE_SIZE)
@@ -423,29 +435,28 @@ out:
 int panfrost_mmu_map_fault_addr(struct panfrost_device *pfdev, int as, u64 addr)
 {
        int ret, i;
-       struct drm_mm_node *node;
        struct panfrost_gem_object *bo;
        struct address_space *mapping;
        pgoff_t page_offset;
        struct sg_table *sgt;
        struct page **pages;
 
-       node = addr_to_drm_mm_node(pfdev, as, addr);
-       if (!node)
+       bo = addr_to_drm_mm_node(pfdev, as, addr);
+       if (!bo)
                return -ENOENT;
 
-       bo = drm_mm_node_to_panfrost_bo(node);
        if (!bo->is_heap) {
                dev_WARN(pfdev->dev, "matching BO is not heap type (GPU VA = %llx)",
-                        node->start << PAGE_SHIFT);
-               return -EINVAL;
+                        bo->node.start << PAGE_SHIFT);
+               ret = -EINVAL;
+               goto err_bo;
        }
        WARN_ON(bo->mmu->as != as);
 
        /* Assume 2MB alignment and size multiple */
        addr &= ~((u64)SZ_2M - 1);
        page_offset = addr >> PAGE_SHIFT;
-       page_offset -= node->start;
+       page_offset -= bo->node.start;
 
        mutex_lock(&bo->base.pages_lock);
 
@@ -454,7 +465,8 @@ int panfrost_mmu_map_fault_addr(struct panfrost_device *pfdev, int as, u64 addr)
                                     sizeof(struct sg_table), GFP_KERNEL | __GFP_ZERO);
                if (!bo->sgts) {
                        mutex_unlock(&bo->base.pages_lock);
-                       return -ENOMEM;
+                       ret = -ENOMEM;
+                       goto err_bo;
                }
 
                pages = kvmalloc_array(bo->base.base.size >> PAGE_SHIFT,
@@ -463,7 +475,8 @@ int panfrost_mmu_map_fault_addr(struct panfrost_device *pfdev, int as, u64 addr)
                        kfree(bo->sgts);
                        bo->sgts = NULL;
                        mutex_unlock(&bo->base.pages_lock);
-                       return -ENOMEM;
+                       ret = -ENOMEM;
+                       goto err_bo;
                }
                bo->base.pages = pages;
                bo->base.pages_use_count = 1;
@@ -501,12 +514,16 @@ int panfrost_mmu_map_fault_addr(struct panfrost_device *pfdev, int as, u64 addr)
 
        dev_dbg(pfdev->dev, "mapped page fault @ AS%d %llx", as, addr);
 
+       drm_gem_object_put_unlocked(&bo->base.base);
+
        return 0;
 
 err_map:
        sg_free_table(sgt);
 err_pages:
        drm_gem_shmem_put_pages(&bo->base);
+err_bo:
+       drm_gem_object_put_unlocked(&bo->base.base);
        return ret;
 }
 
index 05b8849..d59b004 100644 (file)
@@ -2449,9 +2449,6 @@ struct radeon_device {
        /* tracking pinned memory */
        u64 vram_pin_size;
        u64 gart_pin_size;
-
-       struct mutex    mn_lock;
-       DECLARE_HASHTABLE(mn_hash, 7);
 };
 
 bool radeon_is_px(struct drm_device *dev);
index 88eb7cb..5d017f0 100644 (file)
@@ -1325,8 +1325,6 @@ int radeon_device_init(struct radeon_device *rdev,
        init_rwsem(&rdev->pm.mclk_lock);
        init_rwsem(&rdev->exclusive_lock);
        init_waitqueue_head(&rdev->irq.vblank_queue);
-       mutex_init(&rdev->mn_lock);
-       hash_init(rdev->mn_hash);
        r = radeon_gem_init(rdev);
        if (r)
                return r;
index 5838162..d0bc91e 100644 (file)
@@ -35,6 +35,7 @@
 #include <linux/module.h>
 #include <linux/pm_runtime.h>
 #include <linux/vga_switcheroo.h>
+#include <linux/mmu_notifier.h>
 
 #include <drm/drm_crtc_helper.h>
 #include <drm/drm_drv.h>
@@ -323,8 +324,39 @@ bool radeon_device_is_virtual(void);
 static int radeon_pci_probe(struct pci_dev *pdev,
                            const struct pci_device_id *ent)
 {
+       unsigned long flags = 0;
        int ret;
 
+       if (!ent)
+               return -ENODEV; /* Avoid NULL-ptr deref in drm_get_pci_dev */
+
+       flags = ent->driver_data;
+
+       if (!radeon_si_support) {
+               switch (flags & RADEON_FAMILY_MASK) {
+               case CHIP_TAHITI:
+               case CHIP_PITCAIRN:
+               case CHIP_VERDE:
+               case CHIP_OLAND:
+               case CHIP_HAINAN:
+                       dev_info(&pdev->dev,
+                                "SI support disabled by module param\n");
+                       return -ENODEV;
+               }
+       }
+       if (!radeon_cik_support) {
+               switch (flags & RADEON_FAMILY_MASK) {
+               case CHIP_KAVERI:
+               case CHIP_BONAIRE:
+               case CHIP_HAWAII:
+               case CHIP_KABINI:
+               case CHIP_MULLINS:
+                       dev_info(&pdev->dev,
+                                "CIK support disabled by module param\n");
+                       return -ENODEV;
+               }
+       }
+
        if (vga_switcheroo_client_probe_defer(pdev))
                return -EPROBE_DEFER;
 
@@ -623,6 +655,7 @@ static void __exit radeon_exit(void)
 {
        pci_unregister_driver(pdriver);
        radeon_unregister_atpx_handler();
+       mmu_notifier_synchronize();
 }
 
 module_init(radeon_init);
index 4cf58db..b2b0766 100644 (file)
@@ -296,6 +296,8 @@ int radeon_gem_userptr_ioctl(struct drm_device *dev, void *data,
        uint32_t handle;
        int r;
 
+       args->addr = untagged_addr(args->addr);
+
        if (offset_in_page(args->addr | args->size))
                return -EINVAL;
 
index 07f7ace..e85c554 100644 (file)
@@ -100,31 +100,6 @@ int radeon_driver_load_kms(struct drm_device *dev, unsigned long flags)
        struct radeon_device *rdev;
        int r, acpi_status;
 
-       if (!radeon_si_support) {
-               switch (flags & RADEON_FAMILY_MASK) {
-               case CHIP_TAHITI:
-               case CHIP_PITCAIRN:
-               case CHIP_VERDE:
-               case CHIP_OLAND:
-               case CHIP_HAINAN:
-                       dev_info(dev->dev,
-                                "SI support disabled by module param\n");
-                       return -ENODEV;
-               }
-       }
-       if (!radeon_cik_support) {
-               switch (flags & RADEON_FAMILY_MASK) {
-               case CHIP_KAVERI:
-               case CHIP_BONAIRE:
-               case CHIP_HAWAII:
-               case CHIP_KABINI:
-               case CHIP_MULLINS:
-                       dev_info(dev->dev,
-                                "CIK support disabled by module param\n");
-                       return -ENODEV;
-               }
-       }
-
        rdev = kzalloc(sizeof(struct radeon_device), GFP_KERNEL);
        if (rdev == NULL) {
                return -ENOMEM;
index 6902f99..dbab9a3 100644 (file)
 #include "radeon.h"
 
 struct radeon_mn {
-       /* constant after initialisation */
-       struct radeon_device    *rdev;
-       struct mm_struct        *mm;
        struct mmu_notifier     mn;
 
-       /* only used on destruction */
-       struct work_struct      work;
-
-       /* protected by rdev->mn_lock */
-       struct hlist_node       node;
-
        /* objects protected by lock */
        struct mutex            lock;
        struct rb_root_cached   objects;
@@ -59,55 +50,6 @@ struct radeon_mn_node {
 };
 
 /**
- * radeon_mn_destroy - destroy the rmn
- *
- * @work: previously sheduled work item
- *
- * Lazy destroys the notifier from a work item
- */
-static void radeon_mn_destroy(struct work_struct *work)
-{
-       struct radeon_mn *rmn = container_of(work, struct radeon_mn, work);
-       struct radeon_device *rdev = rmn->rdev;
-       struct radeon_mn_node *node, *next_node;
-       struct radeon_bo *bo, *next_bo;
-
-       mutex_lock(&rdev->mn_lock);
-       mutex_lock(&rmn->lock);
-       hash_del(&rmn->node);
-       rbtree_postorder_for_each_entry_safe(node, next_node,
-                                            &rmn->objects.rb_root, it.rb) {
-
-               interval_tree_remove(&node->it, &rmn->objects);
-               list_for_each_entry_safe(bo, next_bo, &node->bos, mn_list) {
-                       bo->mn = NULL;
-                       list_del_init(&bo->mn_list);
-               }
-               kfree(node);
-       }
-       mutex_unlock(&rmn->lock);
-       mutex_unlock(&rdev->mn_lock);
-       mmu_notifier_unregister(&rmn->mn, rmn->mm);
-       kfree(rmn);
-}
-
-/**
- * radeon_mn_release - callback to notify about mm destruction
- *
- * @mn: our notifier
- * @mn: the mm this callback is about
- *
- * Shedule a work item to lazy destroy our notifier.
- */
-static void radeon_mn_release(struct mmu_notifier *mn,
-                             struct mm_struct *mm)
-{
-       struct radeon_mn *rmn = container_of(mn, struct radeon_mn, mn);
-       INIT_WORK(&rmn->work, radeon_mn_destroy);
-       schedule_work(&rmn->work);
-}
-
-/**
  * radeon_mn_invalidate_range_start - callback to notify about mm change
  *
  * @mn: our notifier
@@ -183,65 +125,44 @@ out_unlock:
        return ret;
 }
 
-static const struct mmu_notifier_ops radeon_mn_ops = {
-       .release = radeon_mn_release,
-       .invalidate_range_start = radeon_mn_invalidate_range_start,
-};
+static void radeon_mn_release(struct mmu_notifier *mn, struct mm_struct *mm)
+{
+       struct mmu_notifier_range range = {
+               .mm = mm,
+               .start = 0,
+               .end = ULONG_MAX,
+               .flags = 0,
+               .event = MMU_NOTIFY_UNMAP,
+       };
+
+       radeon_mn_invalidate_range_start(mn, &range);
+}
 
-/**
- * radeon_mn_get - create notifier context
- *
- * @rdev: radeon device pointer
- *
- * Creates a notifier context for current->mm.
- */
-static struct radeon_mn *radeon_mn_get(struct radeon_device *rdev)
+static struct mmu_notifier *radeon_mn_alloc_notifier(struct mm_struct *mm)
 {
-       struct mm_struct *mm = current->mm;
        struct radeon_mn *rmn;
-       int r;
-
-       if (down_write_killable(&mm->mmap_sem))
-               return ERR_PTR(-EINTR);
-
-       mutex_lock(&rdev->mn_lock);
-
-       hash_for_each_possible(rdev->mn_hash, rmn, node, (unsigned long)mm)
-               if (rmn->mm == mm)
-                       goto release_locks;
 
        rmn = kzalloc(sizeof(*rmn), GFP_KERNEL);
-       if (!rmn) {
-               rmn = ERR_PTR(-ENOMEM);
-               goto release_locks;
-       }
+       if (!rmn)
+               return ERR_PTR(-ENOMEM);
 
-       rmn->rdev = rdev;
-       rmn->mm = mm;
-       rmn->mn.ops = &radeon_mn_ops;
        mutex_init(&rmn->lock);
        rmn->objects = RB_ROOT_CACHED;
-       
-       r = __mmu_notifier_register(&rmn->mn, mm);
-       if (r)
-               goto free_rmn;
-
-       hash_add(rdev->mn_hash, &rmn->node, (unsigned long)mm);
-
-release_locks:
-       mutex_unlock(&rdev->mn_lock);
-       up_write(&mm->mmap_sem);
-
-       return rmn;
-
-free_rmn:
-       mutex_unlock(&rdev->mn_lock);
-       up_write(&mm->mmap_sem);
-       kfree(rmn);
+       return &rmn->mn;
+}
 
-       return ERR_PTR(r);
+static void radeon_mn_free_notifier(struct mmu_notifier *mn)
+{
+       kfree(container_of(mn, struct radeon_mn, mn));
 }
 
+static const struct mmu_notifier_ops radeon_mn_ops = {
+       .release = radeon_mn_release,
+       .invalidate_range_start = radeon_mn_invalidate_range_start,
+       .alloc_notifier = radeon_mn_alloc_notifier,
+       .free_notifier = radeon_mn_free_notifier,
+};
+
 /**
  * radeon_mn_register - register a BO for notifier updates
  *
@@ -254,15 +175,16 @@ free_rmn:
 int radeon_mn_register(struct radeon_bo *bo, unsigned long addr)
 {
        unsigned long end = addr + radeon_bo_size(bo) - 1;
-       struct radeon_device *rdev = bo->rdev;
+       struct mmu_notifier *mn;
        struct radeon_mn *rmn;
        struct radeon_mn_node *node = NULL;
        struct list_head bos;
        struct interval_tree_node *it;
 
-       rmn = radeon_mn_get(rdev);
-       if (IS_ERR(rmn))
-               return PTR_ERR(rmn);
+       mn = mmu_notifier_get(&radeon_mn_ops, current->mm);
+       if (IS_ERR(mn))
+               return PTR_ERR(mn);
+       rmn = container_of(mn, struct radeon_mn, mn);
 
        INIT_LIST_HEAD(&bos);
 
@@ -309,22 +231,16 @@ int radeon_mn_register(struct radeon_bo *bo, unsigned long addr)
  */
 void radeon_mn_unregister(struct radeon_bo *bo)
 {
-       struct radeon_device *rdev = bo->rdev;
-       struct radeon_mn *rmn;
+       struct radeon_mn *rmn = bo->mn;
        struct list_head *head;
 
-       mutex_lock(&rdev->mn_lock);
-       rmn = bo->mn;
-       if (rmn == NULL) {
-               mutex_unlock(&rdev->mn_lock);
+       if (!rmn)
                return;
-       }
 
        mutex_lock(&rmn->lock);
        /* save the next list entry for later */
        head = bo->mn_list.next;
 
-       bo->mn = NULL;
        list_del(&bo->mn_list);
 
        if (list_empty(head)) {
@@ -335,5 +251,7 @@ void radeon_mn_unregister(struct radeon_bo *bo)
        }
 
        mutex_unlock(&rmn->lock);
-       mutex_unlock(&rdev->mn_lock);
+
+       mmu_notifier_put(&rmn->mn);
+       bo->mn = NULL;
 }
index 2f821c5..613404f 100644 (file)
@@ -39,8 +39,6 @@
 #include "rockchip_drm_vop.h"
 #include "rockchip_rgb.h"
 
-#define VOP_SELF_REFRESH_ENTRY_DELAY_MS 100
-
 #define VOP_WIN_SET(vop, win, name, v) \
                vop_reg_set(vop, &win->phy->name, win->base, ~0, v, #name)
 #define VOP_SCL_SET(vop, win, name, v) \
@@ -1563,8 +1561,7 @@ static int vop_create_crtc(struct vop *vop)
        init_completion(&vop->line_flag_completion);
        crtc->port = port;
 
-       ret = drm_self_refresh_helper_init(crtc,
-                                          VOP_SELF_REFRESH_ENTRY_DELAY_MS);
+       ret = drm_self_refresh_helper_init(crtc);
        if (ret)
                DRM_DEV_DEBUG_KMS(vop->dev,
                        "Failed to init %s with SR helpers %d, ignoring\n",
index feaa538..3db000a 100644 (file)
@@ -174,7 +174,6 @@ via_map_blit_for_device(struct pci_dev *pdev,
 static void
 via_free_sg_info(struct pci_dev *pdev, drm_via_sg_info_t *vsg)
 {
-       struct page *page;
        int i;
 
        switch (vsg->state) {
@@ -189,13 +188,8 @@ via_free_sg_info(struct pci_dev *pdev, drm_via_sg_info_t *vsg)
                kfree(vsg->desc_pages);
                /* fall through */
        case dr_via_pages_locked:
-               for (i = 0; i < vsg->num_pages; ++i) {
-                       if (NULL != (page = vsg->pages[i])) {
-                               if (!PageReserved(page) && (DMA_FROM_DEVICE == vsg->direction))
-                                       SetPageDirty(page);
-                               put_page(page);
-                       }
-               }
+               put_user_pages_dirty_lock(vsg->pages, vsg->num_pages,
+                                         (vsg->direction == DMA_FROM_DEVICE));
                /* fall through */
        case dr_via_pages_alloc:
                vfree(vsg->pages);
index a958b96..1ecb512 100644 (file)
@@ -273,6 +273,15 @@ config HID_CP2112
        and gpiochip to expose these functions of the CP2112. The
        customizable USB descriptor fields are exposed as sysfs attributes.
 
+config HID_CREATIVE_SB0540
+       tristate "Creative SB0540 infrared receiver"
+       depends on USB_HID
+       help
+       Support for Creative infrared SB0540-compatible remote controls, such
+       as the RM-1500 and RM-1800 remotes.
+
+       Say Y here if you want support for Creative SB0540 infrared receiver.
+
 config HID_CYPRESS
        tristate "Cypress mouse and barcode readers"
        depends on HID
index cc5d827..0c03308 100644 (file)
@@ -27,6 +27,7 @@ obj-$(CONFIG_HID_ALPS)                += hid-alps.o
 obj-$(CONFIG_HID_ACRUX)                += hid-axff.o
 obj-$(CONFIG_HID_APPLE)                += hid-apple.o
 obj-$(CONFIG_HID_APPLEIR)      += hid-appleir.o
+obj-$(CONFIG_HID_CREATIVE_SB0540)      += hid-creative-sb0540.o
 obj-$(CONFIG_HID_ASUS)         += hid-asus.o
 obj-$(CONFIG_HID_AUREAL)       += hid-aureal.o
 obj-$(CONFIG_HID_BELKIN)       += hid-belkin.o
index 81df62f..6ac8bec 100644 (file)
@@ -54,7 +54,6 @@ MODULE_PARM_DESC(swap_opt_cmd, "Swap the Option (\"Alt\") and Command (\"Flag\")
 struct apple_sc {
        unsigned long quirks;
        unsigned int fn_on;
-       DECLARE_BITMAP(pressed_fn, KEY_CNT);
        DECLARE_BITMAP(pressed_numlock, KEY_CNT);
 };
 
@@ -181,6 +180,8 @@ static int hidinput_apple_event(struct hid_device *hid, struct input_dev *input,
 {
        struct apple_sc *asc = hid_get_drvdata(hid);
        const struct apple_key_translation *trans, *table;
+       bool do_translate;
+       u16 code = 0;
 
        if (usage->code == KEY_FN) {
                asc->fn_on = !!value;
@@ -189,8 +190,6 @@ static int hidinput_apple_event(struct hid_device *hid, struct input_dev *input,
        }
 
        if (fnmode) {
-               int do_translate;
-
                if (hid->product >= USB_DEVICE_ID_APPLE_WELLSPRING4_ANSI &&
                                hid->product <= USB_DEVICE_ID_APPLE_WELLSPRING4A_JIS)
                        table = macbookair_fn_keys;
@@ -202,25 +201,33 @@ static int hidinput_apple_event(struct hid_device *hid, struct input_dev *input,
                trans = apple_find_translation (table, usage->code);
 
                if (trans) {
-                       if (test_bit(usage->code, asc->pressed_fn))
-                               do_translate = 1;
-                       else if (trans->flags & APPLE_FLAG_FKEY)
-                               do_translate = (fnmode == 2 && asc->fn_on) ||
-                                       (fnmode == 1 && !asc->fn_on);
-                       else
-                               do_translate = asc->fn_on;
-
-                       if (do_translate) {
-                               if (value)
-                                       set_bit(usage->code, asc->pressed_fn);
-                               else
-                                       clear_bit(usage->code, asc->pressed_fn);
-
-                               input_event(input, usage->type, trans->to,
-                                               value);
-
-                               return 1;
+                       if (test_bit(trans->from, input->key))
+                               code = trans->from;
+                       else if (test_bit(trans->to, input->key))
+                               code = trans->to;
+
+                       if (!code) {
+                               if (trans->flags & APPLE_FLAG_FKEY) {
+                                       switch (fnmode) {
+                                       case 1:
+                                               do_translate = !asc->fn_on;
+                                               break;
+                                       case 2:
+                                               do_translate = asc->fn_on;
+                                               break;
+                                       default:
+                                               /* should never happen */
+                                               do_translate = false;
+                                       }
+                               } else {
+                                       do_translate = asc->fn_on;
+                               }
+
+                               code = do_translate ? trans->to : trans->from;
                        }
+
+                       input_event(input, usage->type, code, value);
+                       return 1;
                }
 
                if (asc->quirks & APPLE_NUMLOCK_EMULATION &&
index 210b81a..3eaee2c 100644 (file)
@@ -1311,8 +1311,8 @@ u32 hid_field_extract(const struct hid_device *hid, u8 *report,
                        unsigned offset, unsigned n)
 {
        if (n > 32) {
-               hid_warn(hid, "hid_field_extract() called with n (%d) > 32! (%s)\n",
-                        n, current->comm);
+               hid_warn_once(hid, "%s() called with n (%d) > 32! (%s)\n",
+                             __func__, n, current->comm);
                n = 32;
        }
 
index e0bb7b3..4ff3bc1 100644 (file)
@@ -207,7 +207,7 @@ static int cougar_probe(struct hid_device *hdev,
        error = hid_parse(hdev);
        if (error) {
                hid_err(hdev, "parse failed\n");
-               goto fail;
+               return error;
        }
 
        if (hdev->collection->usage == COUGAR_VENDOR_USAGE) {
@@ -219,7 +219,7 @@ static int cougar_probe(struct hid_device *hdev,
        error = hid_hw_start(hdev, connect_mask);
        if (error) {
                hid_err(hdev, "hw start failed\n");
-               goto fail;
+               return error;
        }
 
        error = cougar_bind_shared_data(hdev, cougar);
@@ -249,8 +249,6 @@ static int cougar_probe(struct hid_device *hdev,
 
 fail_stop_and_cleanup:
        hid_hw_stop(hdev);
-fail:
-       hid_set_drvdata(hdev, NULL);
        return error;
 }
 
diff --git a/drivers/hid/hid-creative-sb0540.c b/drivers/hid/hid-creative-sb0540.c
new file mode 100644 (file)
index 0000000..b4c8e7a
--- /dev/null
@@ -0,0 +1,268 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * HID driver for the Creative SB0540 receiver
+ *
+ * Copyright (C) 2019 Red Hat Inc. All Rights Reserved
+ *
+ */
+
+#include <linux/device.h>
+#include <linux/hid.h>
+#include <linux/module.h>
+#include "hid-ids.h"
+
+MODULE_AUTHOR("Bastien Nocera <hadess@hadess.net>");
+MODULE_DESCRIPTION("HID Creative SB0540 receiver");
+MODULE_LICENSE("GPL");
+
+static const unsigned short creative_sb0540_key_table[] = {
+       KEY_POWER,
+       KEY_RESERVED,           /* text: 24bit */
+       KEY_RESERVED,           /* 24bit wheel up */
+       KEY_RESERVED,           /* 24bit wheel down */
+       KEY_RESERVED,           /* text: CMSS */
+       KEY_RESERVED,           /* CMSS wheel Up */
+       KEY_RESERVED,           /* CMSS wheel Down */
+       KEY_RESERVED,           /* text: EAX */
+       KEY_RESERVED,           /* EAX wheel up */
+       KEY_RESERVED,           /* EAX wheel down */
+       KEY_RESERVED,           /* text: 3D Midi */
+       KEY_RESERVED,           /* 3D Midi wheel up */
+       KEY_RESERVED,           /* 3D Midi wheel down */
+       KEY_MUTE,
+       KEY_VOLUMEUP,
+       KEY_VOLUMEDOWN,
+       KEY_UP,
+       KEY_LEFT,
+       KEY_RIGHT,
+       KEY_REWIND,
+       KEY_OK,
+       KEY_FASTFORWARD,
+       KEY_DOWN,
+       KEY_AGAIN,              /* text: Return, symbol: Jump to */
+       KEY_PLAY,               /* text: Start */
+       KEY_ESC,                /* text: Cancel */
+       KEY_RECORD,
+       KEY_OPTION,
+       KEY_MENU,               /* text: Display */
+       KEY_PREVIOUS,
+       KEY_PLAYPAUSE,
+       KEY_NEXT,
+       KEY_SLOW,
+       KEY_STOP,
+       KEY_NUMERIC_1,
+       KEY_NUMERIC_2,
+       KEY_NUMERIC_3,
+       KEY_NUMERIC_4,
+       KEY_NUMERIC_5,
+       KEY_NUMERIC_6,
+       KEY_NUMERIC_7,
+       KEY_NUMERIC_8,
+       KEY_NUMERIC_9,
+       KEY_NUMERIC_0
+};
+
+/*
+ * Codes and keys from lirc's
+ * remotes/creative/lircd.conf.alsa_usb
+ * order and size must match creative_sb0540_key_table[] above
+ */
+static const unsigned short creative_sb0540_codes[] = {
+       0x619E,
+       0x916E,
+       0x926D,
+       0x936C,
+       0x718E,
+       0x946B,
+       0x956A,
+       0x8C73,
+       0x9669,
+       0x9768,
+       0x9867,
+       0x9966,
+       0x9A65,
+       0x6E91,
+       0x629D,
+       0x639C,
+       0x7B84,
+       0x6B94,
+       0x728D,
+       0x8778,
+       0x817E,
+       0x758A,
+       0x8D72,
+       0x8E71,
+       0x8877,
+       0x7C83,
+       0x738C,
+       0x827D,
+       0x7689,
+       0x7F80,
+       0x7986,
+       0x7A85,
+       0x7D82,
+       0x857A,
+       0x8B74,
+       0x8F70,
+       0x906F,
+       0x8A75,
+       0x847B,
+       0x7887,
+       0x8976,
+       0x837C,
+       0x7788,
+       0x807F
+};
+
+struct creative_sb0540 {
+       struct input_dev *input_dev;
+       struct hid_device *hid;
+       unsigned short keymap[ARRAY_SIZE(creative_sb0540_key_table)];
+};
+
+static inline u64 reverse(u64 data, int bits)
+{
+       int i;
+       u64 c;
+
+       c = 0;
+       for (i = 0; i < bits; i++) {
+               c |= (u64) (((data & (((u64) 1) << i)) ? 1 : 0))
+                       << (bits - 1 - i);
+       }
+       return (c);
+}
+
+static int get_key(struct creative_sb0540 *creative_sb0540, u64 keycode)
+{
+       int i;
+
+       for (i = 0; i < ARRAY_SIZE(creative_sb0540_codes); i++) {
+               if (creative_sb0540_codes[i] == keycode)
+                       return creative_sb0540->keymap[i];
+       }
+
+       return 0;
+
+}
+
+static int creative_sb0540_raw_event(struct hid_device *hid,
+       struct hid_report *report, u8 *data, int len)
+{
+       struct creative_sb0540 *creative_sb0540 = hid_get_drvdata(hid);
+       u64 code, main_code;
+       int key;
+
+       if (len != 6)
+               return 0;
+
+       /* From daemons/hw_hiddev.c sb0540_rec() in lirc */
+       code = reverse(data[5], 8);
+       main_code = (code << 8) + ((~code) & 0xff);
+
+       /*
+        * Flip to get values in the same format as
+        * remotes/creative/lircd.conf.alsa_usb in lirc
+        */
+       main_code = ((main_code & 0xff) << 8) +
+               ((main_code & 0xff00) >> 8);
+
+       key = get_key(creative_sb0540, main_code);
+       if (key == 0 || key == KEY_RESERVED) {
+               hid_err(hid, "Could not get a key for main_code %llX\n",
+                       main_code);
+               return 0;
+       }
+
+       input_report_key(creative_sb0540->input_dev, key, 1);
+       input_report_key(creative_sb0540->input_dev, key, 0);
+       input_sync(creative_sb0540->input_dev);
+
+       /* let hidraw and hiddev handle the report */
+       return 0;
+}
+
+static int creative_sb0540_input_configured(struct hid_device *hid,
+               struct hid_input *hidinput)
+{
+       struct input_dev *input_dev = hidinput->input;
+       struct creative_sb0540 *creative_sb0540 = hid_get_drvdata(hid);
+       int i;
+
+       creative_sb0540->input_dev = input_dev;
+
+       input_dev->keycode = creative_sb0540->keymap;
+       input_dev->keycodesize = sizeof(unsigned short);
+       input_dev->keycodemax = ARRAY_SIZE(creative_sb0540->keymap);
+
+       input_dev->evbit[0] = BIT(EV_KEY) | BIT(EV_REP);
+
+       memcpy(creative_sb0540->keymap, creative_sb0540_key_table,
+               sizeof(creative_sb0540->keymap));
+       for (i = 0; i < ARRAY_SIZE(creative_sb0540_key_table); i++)
+               set_bit(creative_sb0540->keymap[i], input_dev->keybit);
+       clear_bit(KEY_RESERVED, input_dev->keybit);
+
+       return 0;
+}
+
+static int creative_sb0540_input_mapping(struct hid_device *hid,
+               struct hid_input *hi, struct hid_field *field,
+               struct hid_usage *usage, unsigned long **bit, int *max)
+{
+       /*
+        * We are remapping the keys ourselves, so ignore the hid-input
+        * keymap processing.
+        */
+       return -1;
+}
+
+static int creative_sb0540_probe(struct hid_device *hid,
+               const struct hid_device_id *id)
+{
+       int ret;
+       struct creative_sb0540 *creative_sb0540;
+
+       creative_sb0540 = devm_kzalloc(&hid->dev,
+               sizeof(struct creative_sb0540), GFP_KERNEL);
+
+       if (!creative_sb0540)
+               return -ENOMEM;
+
+       creative_sb0540->hid = hid;
+
+       /* force input as some remotes bypass the input registration */
+       hid->quirks |= HID_QUIRK_HIDINPUT_FORCE;
+
+       hid_set_drvdata(hid, creative_sb0540);
+
+       ret = hid_parse(hid);
+       if (ret) {
+               hid_err(hid, "parse failed\n");
+               return ret;
+       }
+
+       ret = hid_hw_start(hid, HID_CONNECT_DEFAULT);
+       if (ret) {
+               hid_err(hid, "hw start failed\n");
+               return ret;
+       }
+
+       return ret;
+}
+
+static const struct hid_device_id creative_sb0540_devices[] = {
+       { HID_USB_DEVICE(USB_VENDOR_ID_CREATIVELABS, USB_DEVICE_ID_CREATIVE_SB0540) },
+       { }
+};
+MODULE_DEVICE_TABLE(hid, creative_sb0540_devices);
+
+static struct hid_driver creative_sb0540_driver = {
+       .name = "creative-sb0540",
+       .id_table = creative_sb0540_devices,
+       .raw_event = creative_sb0540_raw_event,
+       .input_configured = creative_sb0540_input_configured,
+       .probe = creative_sb0540_probe,
+       .input_mapping = creative_sb0540_input_mapping,
+};
+module_hid_driver(creative_sb0540_driver);
index 86c3173..699186f 100644 (file)
@@ -123,12 +123,6 @@ done:
        return ret;
 }
 
-static void gfrm_remove(struct hid_device *hdev)
-{
-       hid_hw_stop(hdev);
-       hid_set_drvdata(hdev, NULL);
-}
-
 static const struct hid_device_id gfrm_devices[] = {
        { HID_BLUETOOTH_DEVICE(0x58, 0x2000),
                .driver_data = GFRM100 },
@@ -142,7 +136,6 @@ static struct hid_driver gfrm_driver = {
        .name = "gfrm",
        .id_table = gfrm_devices,
        .probe = gfrm_probe,
-       .remove = gfrm_remove,
        .input_mapping = gfrm_input_mapping,
        .raw_event = gfrm_raw_event,
        .input_configured = gfrm_input_configured,
index 7795831..cc5b09b 100644 (file)
@@ -104,8 +104,8 @@ struct synthhid_input_report {
 
 #pragma pack(pop)
 
-#define INPUTVSC_SEND_RING_BUFFER_SIZE         (10*PAGE_SIZE)
-#define INPUTVSC_RECV_RING_BUFFER_SIZE         (10*PAGE_SIZE)
+#define INPUTVSC_SEND_RING_BUFFER_SIZE         (40 * 1024)
+#define INPUTVSC_RECV_RING_BUFFER_SIZE         (40 * 1024)
 
 
 enum pipe_prot_msg_type {
index 0a00be1..76969a2 100644 (file)
 #define USB_VENDOR_ID_CREATIVELABS     0x041e
 #define USB_DEVICE_ID_CREATIVE_SB_OMNI_SURROUND_51     0x322c
 #define USB_DEVICE_ID_PRODIKEYS_PCMIDI 0x2801
+#define USB_DEVICE_ID_CREATIVE_SB0540  0x3100
 
 #define USB_VENDOR_ID_CVTOUCH          0x1ff7
 #define USB_DEVICE_ID_CVTOUCH_SCREEN   0x0013
 #define USB_PRODUCT_ID_HP_LOGITECH_OEM_USB_OPTICAL_MOUSE_0B4A  0x0b4a
 #define USB_PRODUCT_ID_HP_PIXART_OEM_USB_OPTICAL_MOUSE         0x134a
 #define USB_PRODUCT_ID_HP_PIXART_OEM_USB_OPTICAL_MOUSE_094A    0x094a
+#define USB_PRODUCT_ID_HP_PIXART_OEM_USB_OPTICAL_MOUSE_0941    0x0941
 #define USB_PRODUCT_ID_HP_PIXART_OEM_USB_OPTICAL_MOUSE_0641    0x0641
 
 #define USB_VENDOR_ID_HUION            0x256c
 #define USB_DEVICE_ID_LOGITECH_NANO_RECEIVER           0xc52f
 #define USB_DEVICE_ID_LOGITECH_UNIFYING_RECEIVER_2     0xc532
 #define USB_DEVICE_ID_LOGITECH_NANO_RECEIVER_2         0xc534
-#define USB_DEVICE_ID_LOGITECH_NANO_RECEIVER_LIGHTSPEED        0xc539
+#define USB_DEVICE_ID_LOGITECH_NANO_RECEIVER_LIGHTSPEED_1      0xc539
+#define USB_DEVICE_ID_LOGITECH_NANO_RECEIVER_LIGHTSPEED_1_1    0xc53f
 #define USB_DEVICE_ID_LOGITECH_NANO_RECEIVER_POWERPLAY 0xc53a
 #define USB_DEVICE_ID_SPACETRAVELLER   0xc623
 #define USB_DEVICE_ID_SPACENAVIGATOR   0xc626
index 364bc7f..96fa2a2 100644 (file)
@@ -866,8 +866,6 @@ static void lenovo_remove_tpkbd(struct hid_device *hdev)
 
        led_classdev_unregister(&data_pointer->led_micmute);
        led_classdev_unregister(&data_pointer->led_mute);
-
-       hid_set_drvdata(hdev, NULL);
 }
 
 static void lenovo_remove_cptkbd(struct hid_device *hdev)
index 5008a3d..0dc7cdf 100644 (file)
@@ -818,7 +818,7 @@ static int lg_probe(struct hid_device *hdev, const struct hid_device_id *id)
 
                if (!buf) {
                        ret = -ENOMEM;
-                       goto err_free;
+                       goto err_stop;
                }
 
                ret = hid_hw_raw_request(hdev, buf[0], buf, sizeof(cbuf),
@@ -850,9 +850,12 @@ static int lg_probe(struct hid_device *hdev, const struct hid_device_id *id)
                ret = lg4ff_init(hdev);
 
        if (ret)
-               goto err_free;
+               goto err_stop;
 
        return 0;
+
+err_stop:
+       hid_hw_stop(hdev);
 err_free:
        kfree(drv_data);
        return ret;
@@ -863,8 +866,7 @@ static void lg_remove(struct hid_device *hdev)
        struct lg_drv_data *drv_data = hid_get_drvdata(hdev);
        if (drv_data->quirks & LG_FF4)
                lg4ff_deinit(hdev);
-       else
-               hid_hw_stop(hdev);
+       hid_hw_stop(hdev);
        kfree(drv_data);
 }
 
index cefba03..03f0220 100644 (file)
@@ -1477,7 +1477,6 @@ int lg4ff_deinit(struct hid_device *hid)
                }
        }
 #endif
-       hid_hw_stop(hid);
        drv_data->device_props = NULL;
 
        kfree(entry);
index cc47f94..bb50d6e 100644 (file)
@@ -380,9 +380,9 @@ static const char consumer_descriptor[] = {
        0x75, 0x10,             /* REPORT_SIZE (16)                    */
        0x95, 0x02,             /* REPORT_COUNT (2)                    */
        0x15, 0x01,             /* LOGICAL_MIN (1)                     */
-       0x26, 0x8C, 0x02,       /* LOGICAL_MAX (652)                   */
+       0x26, 0xFF, 0x02,       /* LOGICAL_MAX (767)                   */
        0x19, 0x01,             /* USAGE_MIN (1)                       */
-       0x2A, 0x8C, 0x02,       /* USAGE_MAX (652)                     */
+       0x2A, 0xFF, 0x02,       /* USAGE_MAX (767)                     */
        0x81, 0x00,             /* INPUT (Data Ary Abs)                */
        0xC0,                   /* END_COLLECTION                      */
 };                             /*                                     */
@@ -959,6 +959,7 @@ static void logi_hidpp_recv_queue_notif(struct hid_device *hdev,
                break;
        case 0x07:
                device_type = "eQUAD step 4 Gaming";
+               logi_hidpp_dev_conn_notif_equad(hdev, hidpp_report, &workitem);
                break;
        case 0x08:
                device_type = "eQUAD step 4 for gamepads";
@@ -968,7 +969,12 @@ static void logi_hidpp_recv_queue_notif(struct hid_device *hdev,
                logi_hidpp_dev_conn_notif_equad(hdev, hidpp_report, &workitem);
                break;
        case 0x0c:
-               device_type = "eQUAD Lightspeed";
+               device_type = "eQUAD Lightspeed 1";
+               logi_hidpp_dev_conn_notif_equad(hdev, hidpp_report, &workitem);
+               workitem.reports_supported |= STD_KEYBOARD;
+               break;
+       case 0x0d:
+               device_type = "eQUAD Lightspeed 1_1";
                logi_hidpp_dev_conn_notif_equad(hdev, hidpp_report, &workitem);
                workitem.reports_supported |= STD_KEYBOARD;
                break;
@@ -1734,14 +1740,14 @@ static int logi_dj_probe(struct hid_device *hdev,
                if (retval < 0) {
                        hid_err(hdev, "%s: logi_dj_recv_query_paired_devices error:%d\n",
                                __func__, retval);
-                       goto logi_dj_recv_query_paired_devices_failed;
+                       /*
+                        * This can happen with a KVM, let the probe succeed,
+                        * logi_dj_recv_queue_unknown_work will retry later.
+                        */
                }
        }
 
-       return retval;
-
-logi_dj_recv_query_paired_devices_failed:
-       hid_hw_close(hdev);
+       return 0;
 
 llopen_failed:
 switch_to_dj_mode_fail:
@@ -1832,9 +1838,17 @@ static const struct hid_device_id logi_dj_receivers[] = {
          HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH,
                         USB_DEVICE_ID_LOGITECH_NANO_RECEIVER_2),
         .driver_data = recvr_type_hidpp},
+       { /* Logitech G700(s) receiver (0xc531) */
+         HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH,
+               0xc531),
+        .driver_data = recvr_type_gaming_hidpp},
        { /* Logitech lightspeed receiver (0xc539) */
          HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH,
-               USB_DEVICE_ID_LOGITECH_NANO_RECEIVER_LIGHTSPEED),
+               USB_DEVICE_ID_LOGITECH_NANO_RECEIVER_LIGHTSPEED_1),
+        .driver_data = recvr_type_gaming_hidpp},
+       { /* Logitech lightspeed receiver (0xc53f) */
+         HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH,
+               USB_DEVICE_ID_LOGITECH_NANO_RECEIVER_LIGHTSPEED_1_1),
         .driver_data = recvr_type_gaming_hidpp},
        { /* Logitech 27 MHz HID++ 1.0 receiver (0xc513) */
          HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_MX3000_RECEIVER),
index b603c14..3cfeb16 100644 (file)
@@ -68,6 +68,7 @@ MODULE_LICENSE("GPL");
 #define MT_QUIRK_STICKY_FINGERS                BIT(16)
 #define MT_QUIRK_ASUS_CUSTOM_UP                BIT(17)
 #define MT_QUIRK_WIN8_PTP_BUTTONS      BIT(18)
+#define MT_QUIRK_SEPARATE_APP_REPORT   BIT(19)
 
 #define MT_INPUTMODE_TOUCHSCREEN       0x02
 #define MT_INPUTMODE_TOUCHPAD          0x03
@@ -103,6 +104,7 @@ struct mt_usages {
 struct mt_application {
        struct list_head list;
        unsigned int application;
+       unsigned int report_id;
        struct list_head mt_usages;     /* mt usages list */
 
        __s32 quirks;
@@ -203,6 +205,7 @@ static void mt_post_parse(struct mt_device *td, struct mt_application *app);
 #define MT_CLS_VTL                             0x0110
 #define MT_CLS_GOOGLE                          0x0111
 #define MT_CLS_RAZER_BLADE_STEALTH             0x0112
+#define MT_CLS_SMART_TECH                      0x0113
 
 #define MT_DEFAULT_MAXCONTACT  10
 #define MT_MAX_MAXCONTACT      250
@@ -263,7 +266,8 @@ static const struct mt_class mt_classes[] = {
                        MT_QUIRK_HOVERING |
                        MT_QUIRK_CONTACT_CNT_ACCURATE |
                        MT_QUIRK_STICKY_FINGERS |
-                       MT_QUIRK_WIN8_PTP_BUTTONS },
+                       MT_QUIRK_WIN8_PTP_BUTTONS,
+               .export_all_inputs = true },
        { .name = MT_CLS_EXPORT_ALL_INPUTS,
                .quirks = MT_QUIRK_ALWAYS_VALID |
                        MT_QUIRK_CONTACT_CNT_ACCURATE,
@@ -353,6 +357,12 @@ static const struct mt_class mt_classes[] = {
                        MT_QUIRK_CONTACT_CNT_ACCURATE |
                        MT_QUIRK_WIN8_PTP_BUTTONS,
        },
+       { .name = MT_CLS_SMART_TECH,
+               .quirks = MT_QUIRK_ALWAYS_VALID |
+                       MT_QUIRK_IGNORE_DUPLICATES |
+                       MT_QUIRK_CONTACT_CNT_ACCURATE |
+                       MT_QUIRK_SEPARATE_APP_REPORT,
+       },
        { }
 };
 
@@ -509,8 +519,9 @@ static struct mt_usages *mt_allocate_usage(struct hid_device *hdev,
 }
 
 static struct mt_application *mt_allocate_application(struct mt_device *td,
-                                                     unsigned int application)
+                                                     struct hid_report *report)
 {
+       unsigned int application = report->application;
        struct mt_application *mt_application;
 
        mt_application = devm_kzalloc(&td->hdev->dev, sizeof(*mt_application),
@@ -535,6 +546,7 @@ static struct mt_application *mt_allocate_application(struct mt_device *td,
        mt_application->scantime = DEFAULT_ZERO;
        mt_application->raw_cc = DEFAULT_ZERO;
        mt_application->quirks = td->mtclass.quirks;
+       mt_application->report_id = report->id;
 
        list_add_tail(&mt_application->list, &td->applications);
 
@@ -542,19 +554,23 @@ static struct mt_application *mt_allocate_application(struct mt_device *td,
 }
 
 static struct mt_application *mt_find_application(struct mt_device *td,
-                                                 unsigned int application)
+                                                 struct hid_report *report)
 {
+       unsigned int application = report->application;
        struct mt_application *tmp, *mt_application = NULL;
 
        list_for_each_entry(tmp, &td->applications, list) {
                if (application == tmp->application) {
-                       mt_application = tmp;
-                       break;
+                       if (!(td->mtclass.quirks & MT_QUIRK_SEPARATE_APP_REPORT) ||
+                           tmp->report_id == report->id) {
+                               mt_application = tmp;
+                               break;
+                       }
                }
        }
 
        if (!mt_application)
-               mt_application = mt_allocate_application(td, application);
+               mt_application = mt_allocate_application(td, report);
 
        return mt_application;
 }
@@ -571,7 +587,7 @@ static struct mt_report_data *mt_allocate_report_data(struct mt_device *td,
                return NULL;
 
        rdata->report = report;
-       rdata->application = mt_find_application(td, report->application);
+       rdata->application = mt_find_application(td, report);
 
        if (!rdata->application) {
                devm_kfree(&td->hdev->dev, rdata);
@@ -1561,6 +1577,9 @@ static int mt_input_configured(struct hid_device *hdev, struct hid_input *hi)
        case HID_VD_ASUS_CUSTOM_MEDIA_KEYS:
                suffix = "Custom Media Keys";
                break;
+       case HID_DG_PEN:
+               suffix = "Stylus";
+               break;
        default:
                suffix = "UNKNOWN";
                break;
@@ -2022,6 +2041,10 @@ static const struct hid_device_id mt_devices[] = {
                HID_DEVICE(BUS_I2C, HID_GROUP_MULTITOUCH_WIN_8,
                        USB_VENDOR_ID_SYNAPTICS, 0x8323) },
 
+       /* Smart Tech panels */
+       { .driver_data = MT_CLS_SMART_TECH,
+               MT_USB_DEVICE(0x0b8c, 0x0092)},
+
        /* Stantum panels */
        { .driver_data = MT_CLS_CONFIDENCE,
                MT_USB_DEVICE(USB_VENDOR_ID_STANTUM_STM,
index 5f7a39a..1b5c632 100644 (file)
@@ -534,8 +534,7 @@ static int picolcd_probe(struct hid_device *hdev,
        data = kzalloc(sizeof(struct picolcd_data), GFP_KERNEL);
        if (data == NULL) {
                hid_err(hdev, "can't allocate space for Minibox PicoLCD device data\n");
-               error = -ENOMEM;
-               goto err_no_cleanup;
+               return -ENOMEM;
        }
 
        spin_lock_init(&data->lock);
@@ -597,9 +596,6 @@ err_cleanup_hid_hw:
        hid_hw_stop(hdev);
 err_cleanup_data:
        kfree(data);
-err_no_cleanup:
-       hid_set_drvdata(hdev, NULL);
-
        return error;
 }
 
@@ -635,7 +631,6 @@ static void picolcd_remove(struct hid_device *hdev)
        picolcd_exit_cir(data);
        picolcd_exit_keys(data);
 
-       hid_set_drvdata(hdev, NULL);
        mutex_destroy(&data->mutex);
        /* Finally, clean up the picolcd data itself */
        kfree(data);
index 21544eb..5a3b3d9 100644 (file)
@@ -551,10 +551,14 @@ static void pcmidi_setup_extra_keys(
 
 static int pcmidi_set_operational(struct pcmidi_snd *pm)
 {
+       int rc;
+
        if (pm->ifnum != 1)
                return 0; /* only set up ONCE for interace 1 */
 
-       pcmidi_get_output_report(pm);
+       rc = pcmidi_get_output_report(pm);
+       if (rc < 0)
+               return rc;
        pcmidi_submit_output_report(pm, 0xc1);
        return 0;
 }
@@ -683,7 +687,11 @@ static int pcmidi_snd_initialise(struct pcmidi_snd *pm)
        spin_lock_init(&pm->rawmidi_in_lock);
 
        init_sustain_timers(pm);
-       pcmidi_set_operational(pm);
+       err = pcmidi_set_operational(pm);
+       if (err < 0) {
+               pk_error("failed to find output report\n");
+               goto fail_register;
+       }
 
        /* register it */
        err = snd_card_register(card);
index 166f41f..c50bcd9 100644 (file)
@@ -92,6 +92,7 @@ static const struct hid_device_id hid_quirks[] = {
        { HID_USB_DEVICE(USB_VENDOR_ID_HP, USB_PRODUCT_ID_HP_LOGITECH_OEM_USB_OPTICAL_MOUSE_0B4A), HID_QUIRK_ALWAYS_POLL },
        { HID_USB_DEVICE(USB_VENDOR_ID_HP, USB_PRODUCT_ID_HP_PIXART_OEM_USB_OPTICAL_MOUSE), HID_QUIRK_ALWAYS_POLL },
        { HID_USB_DEVICE(USB_VENDOR_ID_HP, USB_PRODUCT_ID_HP_PIXART_OEM_USB_OPTICAL_MOUSE_094A), HID_QUIRK_ALWAYS_POLL },
+       { HID_USB_DEVICE(USB_VENDOR_ID_HP, USB_PRODUCT_ID_HP_PIXART_OEM_USB_OPTICAL_MOUSE_0941), HID_QUIRK_ALWAYS_POLL },
        { HID_USB_DEVICE(USB_VENDOR_ID_HP, USB_PRODUCT_ID_HP_PIXART_OEM_USB_OPTICAL_MOUSE_0641), HID_QUIRK_ALWAYS_POLL },
        { HID_USB_DEVICE(USB_VENDOR_ID_IDEACOM, USB_DEVICE_ID_IDEACOM_IDC6680), HID_QUIRK_MULTI_INPUT },
        { HID_USB_DEVICE(USB_VENDOR_ID_INNOMEDIA, USB_DEVICE_ID_INNEX_GENESIS_ATARI), HID_QUIRK_MULTI_INPUT },
index be92a6f..94c7398 100644 (file)
@@ -742,7 +742,6 @@ static void sensor_hub_remove(struct hid_device *hdev)
        }
        spin_unlock_irqrestore(&data->lock, flags);
        mfd_remove_devices(&hdev->dev);
-       hid_set_drvdata(hdev, NULL);
        mutex_destroy(&data->mutex);
 }
 
index 49dd2d9..73c0f7a 100644 (file)
@@ -2811,7 +2811,6 @@ err_stop:
        sony_cancel_work_sync(sc);
        sony_remove_dev_list(sc);
        sony_release_device_id(sc);
-       hid_hw_stop(hdev);
        return ret;
 }
 
@@ -2876,6 +2875,7 @@ static int sony_probe(struct hid_device *hdev, const struct hid_device_id *id)
         */
        if (!(hdev->claimed & HID_CLAIMED_INPUT)) {
                hid_err(hdev, "failed to claim input\n");
+               hid_hw_stop(hdev);
                return -ENODEV;
        }
 
index 006bd6f..bbc6ec1 100644 (file)
@@ -252,7 +252,7 @@ static __poll_t hidraw_poll(struct file *file, poll_table *wait)
 
        poll_wait(file, &list->hidraw->wait, wait);
        if (list->head != list->tail)
-               return EPOLLIN | EPOLLRDNORM;
+               return EPOLLIN | EPOLLRDNORM | EPOLLOUT;
        if (!list->hidraw->exist)
                return EPOLLERR | EPOLLHUP;
        return 0;
@@ -370,7 +370,7 @@ static long hidraw_ioctl(struct file *file, unsigned int cmd,
 
        mutex_lock(&minors_lock);
        dev = hidraw_table[minor];
-       if (!dev) {
+       if (!dev || !dev->exist) {
                ret = -ENODEV;
                goto out;
        }
index 90164fe..2a7c6e3 100644 (file)
@@ -169,9 +169,7 @@ static const struct i2c_hid_quirks {
        __u16 idProduct;
        __u32 quirks;
 } i2c_hid_quirks[] = {
-       { USB_VENDOR_ID_WEIDA, USB_DEVICE_ID_WEIDA_8752,
-               I2C_HID_QUIRK_SET_PWR_WAKEUP_DEV },
-       { USB_VENDOR_ID_WEIDA, USB_DEVICE_ID_WEIDA_8755,
+       { USB_VENDOR_ID_WEIDA, HID_ANY_ID,
                I2C_HID_QUIRK_SET_PWR_WAKEUP_DEV },
        { I2C_VENDOR_ID_HANTICK, I2C_PRODUCT_ID_HANTICK_5288,
                I2C_HID_QUIRK_NO_IRQ_AFTER_RESET |
index 5792a10..6c1e611 100644 (file)
@@ -78,5 +78,6 @@ irqreturn_t ish_irq_handler(int irq, void *dev_id);
 struct ishtp_device *ish_dev_init(struct pci_dev *pdev);
 int ish_hw_start(struct ishtp_device *dev);
 void ish_device_disable(struct ishtp_device *dev);
+int ish_disable_dma(struct ishtp_device *dev);
 
 #endif /* _ISHTP_HW_ISH_H_ */
index 18fe8af..8f8dfdf 100644 (file)
@@ -672,7 +672,7 @@ eoi:
  *
  * Return: 0 for success else error code.
  */
-static int ish_disable_dma(struct ishtp_device *dev)
+int ish_disable_dma(struct ishtp_device *dev)
 {
        unsigned int    dma_delay;
 
index 279567b..784dcc8 100644 (file)
@@ -14,6 +14,7 @@
 #include <linux/types.h>
 #include <linux/pci.h>
 #include <linux/sched.h>
+#include <linux/suspend.h>
 #include <linux/interrupt.h>
 #include <linux/workqueue.h>
 #define CREATE_TRACE_POINTS
@@ -98,6 +99,11 @@ static const struct pci_device_id ish_invalid_pci_ids[] = {
        {}
 };
 
+static inline bool ish_should_enter_d0i3(struct pci_dev *pdev)
+{
+       return !pm_suspend_via_firmware() || pdev->device == CHV_DEVICE_ID;
+}
+
 /**
  * ish_probe() - PCI driver probe callback
  * @pdev:      pci device
@@ -148,7 +154,6 @@ static int ish_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
        /* mapping IO device memory */
        hw->mem_addr = pcim_iomap_table(pdev)[0];
        ishtp->pdev = pdev;
-       pdev->dev_flags |= PCI_DEV_FLAGS_NO_D3;
 
        /* request and enable interrupt */
        ret = pci_alloc_irq_vectors(pdev, 1, 1, PCI_IRQ_ALL_TYPES);
@@ -185,7 +190,6 @@ static void ish_remove(struct pci_dev *pdev)
        struct ishtp_device *ishtp_dev = pci_get_drvdata(pdev);
 
        ishtp_bus_remove_all_clients(ishtp_dev, false);
-       pdev->dev_flags &= ~PCI_DEV_FLAGS_NO_D3;
        ish_device_disable(ishtp_dev);
 }
 
@@ -207,17 +211,13 @@ static void __maybe_unused ish_resume_handler(struct work_struct *work)
 {
        struct pci_dev *pdev = to_pci_dev(ish_resume_device);
        struct ishtp_device *dev = pci_get_drvdata(pdev);
-       uint32_t fwsts;
        int ret;
 
-       /* Get ISH FW status */
-       fwsts = IPC_GET_ISH_FWSTS(dev->ops->get_fw_status(dev));
+       /* Check the NO_D3 flag to distinguish the resume paths */
+       if (pdev->dev_flags & PCI_DEV_FLAGS_NO_D3) {
+               pdev->dev_flags &= ~PCI_DEV_FLAGS_NO_D3;
+               disable_irq_wake(pdev->irq);
 
-       /*
-        * If currently, in ISH FW, sensor app is loaded or beyond that,
-        * it means ISH isn't powered off, in this case, send a resume message.
-        */
-       if (fwsts >= FWSTS_SENSOR_APP_LOADED) {
                ishtp_send_resume(dev);
 
                /* Waiting to get resume response */
@@ -225,16 +225,20 @@ static void __maybe_unused ish_resume_handler(struct work_struct *work)
                        ret = wait_event_interruptible_timeout(dev->resume_wait,
                                !dev->resume_flag,
                                msecs_to_jiffies(WAIT_FOR_RESUME_ACK_MS));
-       }
 
-       /*
-        * If in ISH FW, sensor app isn't loaded yet, or no resume response.
-        * That means this platform is not S0ix compatible, or something is
-        * wrong with ISH FW. So on resume, full reboot of ISH processor will
-        * happen, so need to go through init sequence again.
-        */
-       if (dev->resume_flag)
+               /*
+                * If the flag is not cleared, something is wrong with ISH FW.
+                * So on resume, need to go through init sequence again.
+                */
+               if (dev->resume_flag)
+                       ish_init(dev);
+       } else {
+               /*
+                * Resume from the D3, full reboot of ISH processor will happen,
+                * so need to go through init sequence again.
+                */
                ish_init(dev);
+       }
 }
 
 /**
@@ -250,23 +254,43 @@ static int __maybe_unused ish_suspend(struct device *device)
        struct pci_dev *pdev = to_pci_dev(device);
        struct ishtp_device *dev = pci_get_drvdata(pdev);
 
-       enable_irq_wake(pdev->irq);
-       /*
-        * If previous suspend hasn't been asnwered then ISH is likely dead,
-        * don't attempt nested notification
-        */
-       if (dev->suspend_flag)
-               return  0;
-
-       dev->resume_flag = 0;
-       dev->suspend_flag = 1;
-       ishtp_send_suspend(dev);
-
-       /* 25 ms should be enough for live ISH to flush all IPC buf */
-       if (dev->suspend_flag)
-               wait_event_interruptible_timeout(dev->suspend_wait,
-                                                !dev->suspend_flag,
-                                                 msecs_to_jiffies(25));
+       if (ish_should_enter_d0i3(pdev)) {
+               /*
+                * If previous suspend hasn't been asnwered then ISH is likely
+                * dead, don't attempt nested notification
+                */
+               if (dev->suspend_flag)
+                       return  0;
+
+               dev->resume_flag = 0;
+               dev->suspend_flag = 1;
+               ishtp_send_suspend(dev);
+
+               /* 25 ms should be enough for live ISH to flush all IPC buf */
+               if (dev->suspend_flag)
+                       wait_event_interruptible_timeout(dev->suspend_wait,
+                                       !dev->suspend_flag,
+                                       msecs_to_jiffies(25));
+
+               if (dev->suspend_flag) {
+                       /*
+                        * It looks like FW halt, clear the DMA bit, and put
+                        * ISH into D3, and FW would reset on resume.
+                        */
+                       ish_disable_dma(dev);
+               } else {
+                       /* Set the NO_D3 flag, the ISH would enter D0i3 */
+                       pdev->dev_flags |= PCI_DEV_FLAGS_NO_D3;
+
+                       enable_irq_wake(pdev->irq);
+               }
+       } else {
+               /*
+                * Clear the DMA bit before putting ISH into D3,
+                * or ISH FW would reset automatically.
+                */
+               ish_disable_dma(dev);
+       }
 
        return 0;
 }
@@ -288,7 +312,6 @@ static int __maybe_unused ish_resume(struct device *device)
        ish_resume_device = device;
        dev->resume_flag = 1;
 
-       disable_irq_wake(pdev->irq);
        schedule_work(&resume_work);
 
        return 0;
index 4e11cc6..1f9bc44 100644 (file)
@@ -428,7 +428,7 @@ static __poll_t hiddev_poll(struct file *file, poll_table *wait)
 
        poll_wait(file, &list->hiddev->wait, wait);
        if (list->head != list->tail)
-               return EPOLLIN | EPOLLRDNORM;
+               return EPOLLIN | EPOLLRDNORM | EPOLLOUT;
        if (!list->hiddev->exist)
                return EPOLLERR | EPOLLHUP;
        return 0;
index 53bddb5..5ded94b 100644 (file)
@@ -88,7 +88,7 @@ static void wacom_wac_queue_flush(struct hid_device *hdev,
 }
 
 static int wacom_wac_pen_serial_enforce(struct hid_device *hdev,
-               struct hid_report *report, u8 *raw_data, int size)
+               struct hid_report *report, u8 *raw_data, int report_size)
 {
        struct wacom *wacom = hid_get_drvdata(hdev);
        struct wacom_wac *wacom_wac = &wacom->wacom_wac;
@@ -149,7 +149,8 @@ static int wacom_wac_pen_serial_enforce(struct hid_device *hdev,
        if (flush)
                wacom_wac_queue_flush(hdev, &wacom_wac->pen_fifo);
        else if (insert)
-               wacom_wac_queue_insert(hdev, &wacom_wac->pen_fifo, raw_data, size);
+               wacom_wac_queue_insert(hdev, &wacom_wac->pen_fifo,
+                                      raw_data, report_size);
 
        return insert && !flush;
 }
@@ -2176,7 +2177,7 @@ static void wacom_update_name(struct wacom *wacom, const char *suffix)
 {
        struct wacom_wac *wacom_wac = &wacom->wacom_wac;
        struct wacom_features *features = &wacom_wac->features;
-       char name[WACOM_NAME_MAX];
+       char name[WACOM_NAME_MAX - 20]; /* Leave some room for suffixes */
 
        /* Generic devices name unspecified */
        if ((features->type == HID_GENERIC) && !strcmp("Wacom HID", features->name)) {
@@ -2718,14 +2719,12 @@ static int wacom_probe(struct hid_device *hdev,
        wacom_wac->features = *((struct wacom_features *)id->driver_data);
        features = &wacom_wac->features;
 
-       if (features->check_for_hid_type && features->hid_type != hdev->type) {
-               error = -ENODEV;
-               goto fail;
-       }
+       if (features->check_for_hid_type && features->hid_type != hdev->type)
+               return -ENODEV;
 
        error = kfifo_alloc(&wacom_wac->pen_fifo, WACOM_PKGLEN_MAX, GFP_KERNEL);
        if (error)
-               goto fail;
+               return error;
 
        wacom_wac->hid_data.inputmode = -1;
        wacom_wac->mode_report = -1;
@@ -2743,12 +2742,12 @@ static int wacom_probe(struct hid_device *hdev,
        error = hid_parse(hdev);
        if (error) {
                hid_err(hdev, "parse failed\n");
-               goto fail;
+               return error;
        }
 
        error = wacom_parse_and_register(wacom, false);
        if (error)
-               goto fail;
+               return error;
 
        if (hdev->bus == BUS_BLUETOOTH) {
                error = device_create_file(&hdev->dev, &dev_attr_speed);
@@ -2759,10 +2758,6 @@ static int wacom_probe(struct hid_device *hdev,
        }
 
        return 0;
-
-fail:
-       hid_set_drvdata(hdev, NULL);
-       return error;
 }
 
 static void wacom_remove(struct hid_device *hdev)
@@ -2791,8 +2786,6 @@ static void wacom_remove(struct hid_device *hdev)
                wacom_release_resources(wacom);
 
        kfifo_free(&wacom_wac->pen_fifo);
-
-       hid_set_drvdata(hdev, NULL);
 }
 
 #ifdef CONFIG_PM
index 1713235..2b0a5b8 100644 (file)
@@ -251,7 +251,7 @@ static int wacom_dtu_irq(struct wacom_wac *wacom)
 
 static int wacom_dtus_irq(struct wacom_wac *wacom)
 {
-       char *data = wacom->data;
+       unsigned char *data = wacom->data;
        struct input_dev *input = wacom->pen_input;
        unsigned short prox, pressure = 0;
 
@@ -483,6 +483,8 @@ static int wacom_intuos_pad(struct wacom_wac *wacom)
        int ring1 = 0, ring2 = 0;
        int strip1 = 0, strip2 = 0;
        bool prox = false;
+       bool wrench = false, keyboard = false, mute_touch = false, menu = false,
+            info = false;
 
        /* pad packets. Works as a second tool and is always in prox */
        if (!(data[0] == WACOM_REPORT_INTUOSPAD || data[0] == WACOM_REPORT_INTUOS5PAD ||
@@ -512,10 +514,32 @@ static int wacom_intuos_pad(struct wacom_wac *wacom)
                keys = ((data[3] & 0x1C) ? 1<<2 : 0) |
                       ((data[4] & 0xE0) ? 1<<1 : 0) |
                       ((data[4] & 0x07) ? 1<<0 : 0);
+               keyboard = !!(data[4] & 0xE0);
+               info = !!(data[3] & 0x1C);
+
+               if (features->oPid) {
+                       mute_touch = !!(data[4] & 0x07);
+                       if (mute_touch)
+                               wacom->shared->is_touch_on =
+                                       !wacom->shared->is_touch_on;
+               } else {
+                       wrench = !!(data[4] & 0x07);
+               }
        } else if (features->type == WACOM_27QHD) {
                nkeys = 3;
                keys = data[2] & 0x07;
 
+               wrench = !!(data[2] & 0x01);
+               keyboard = !!(data[2] & 0x02);
+
+               if (features->oPid) {
+                       mute_touch = !!(data[2] & 0x04);
+                       if (mute_touch)
+                               wacom->shared->is_touch_on =
+                                       !wacom->shared->is_touch_on;
+               } else {
+                       menu = !!(data[2] & 0x04);
+               }
                input_report_abs(input, ABS_X, be16_to_cpup((__be16 *)&data[4]));
                input_report_abs(input, ABS_Y, be16_to_cpup((__be16 *)&data[6]));
                input_report_abs(input, ABS_Z, be16_to_cpup((__be16 *)&data[8]));
@@ -561,6 +585,9 @@ static int wacom_intuos_pad(struct wacom_wac *wacom)
                        if (features->type == WACOM_22HD) {
                                nkeys = 3;
                                keys = data[9] & 0x07;
+
+                               info = !!(data[9] & 0x01);
+                               wrench = !!(data[9] & 0x02);
                        }
                } else {
                        buttons = ((data[6] & 0x10) << 5)  |
@@ -572,7 +599,7 @@ static int wacom_intuos_pad(struct wacom_wac *wacom)
                strip2 = ((data[3] & 0x1f) << 8) | data[4];
        }
 
-       prox = (buttons & ~(~0 << nbuttons)) | (keys & ~(~0 << nkeys)) |
+       prox = (buttons & ~(~0U << nbuttons)) | (keys & ~(~0U << nkeys)) |
               (ring1 & 0x80) | (ring2 & 0x80) | strip1 | strip2;
 
        wacom_report_numbered_buttons(input, nbuttons, buttons);
@@ -580,6 +607,18 @@ static int wacom_intuos_pad(struct wacom_wac *wacom)
        for (i = 0; i < nkeys; i++)
                input_report_key(input, KEY_PROG1 + i, keys & (1 << i));
 
+       input_report_key(input, KEY_BUTTONCONFIG, wrench);
+       input_report_key(input, KEY_ONSCREEN_KEYBOARD, keyboard);
+       input_report_key(input, KEY_CONTROLPANEL, menu);
+       input_report_key(input, KEY_INFO, info);
+
+       if (wacom->shared && wacom->shared->touch_input) {
+               input_report_switch(wacom->shared->touch_input,
+                                   SW_MUTE_DEVICE,
+                                   !wacom->shared->is_touch_on);
+               input_sync(wacom->shared->touch_input);
+       }
+
        input_report_abs(input, ABS_RX, strip1);
        input_report_abs(input, ABS_RY, strip2);
 
@@ -1483,6 +1522,12 @@ static int wacom_24hdt_irq(struct wacom_wac *wacom)
        int byte_per_packet = WACOM_BYTES_PER_24HDT_PACKET;
        int y_offset = 2;
 
+       if (wacom->shared->has_mute_touch_switch &&
+           !wacom->shared->is_touch_on) {
+               if (!wacom->shared->touch_down)
+                       return 0;
+       }
+
        if (wacom->features.type == WACOM_27QHDT) {
                current_num_contacts = data[63];
                num_contacts_left = 10;
@@ -2051,14 +2096,14 @@ static void wacom_wac_pad_event(struct hid_device *hdev, struct hid_field *field
                    (hdev->product == 0x34d || hdev->product == 0x34e ||  /* MobileStudio Pro */
                     hdev->product == 0x357 || hdev->product == 0x358 ||  /* Intuos Pro 2 */
                     hdev->product == 0x392 ||                            /* Intuos Pro 2 */
-                    hdev->product == 0x399)) {                           /* MobileStudio Pro */
+                    hdev->product == 0x398 || hdev->product == 0x399)) { /* MobileStudio Pro */
                        value = (field->logical_maximum - value);
 
                        if (hdev->product == 0x357 || hdev->product == 0x358 ||
                            hdev->product == 0x392)
                                value = wacom_offset_rotation(input, usage, value, 3, 16);
                        else if (hdev->product == 0x34d || hdev->product == 0x34e ||
-                                hdev->product == 0x399)
+                                hdev->product == 0x398 || hdev->product == 0x399)
                                value = wacom_offset_rotation(input, usage, value, 1, 2);
                }
                else {
@@ -3815,6 +3860,14 @@ int wacom_setup_touch_input_capabilities(struct input_dev *input_dev,
                /* fall through */
 
        case WACOM_27QHDT:
+               if (wacom_wac->shared->touch->product == 0x32C ||
+                   wacom_wac->shared->touch->product == 0xF6) {
+                       input_dev->evbit[0] |= BIT_MASK(EV_SW);
+                       __set_bit(SW_MUTE_DEVICE, input_dev->swbit);
+                       wacom_wac->shared->has_mute_touch_switch = true;
+               }
+               /* fall through */
+
        case MTSCREEN:
        case MTTPC:
        case MTTPC_B:
@@ -4050,6 +4103,12 @@ int wacom_setup_pad_input_capabilities(struct input_dev *input_dev,
                __set_bit(KEY_PROG2, input_dev->keybit);
                __set_bit(KEY_PROG3, input_dev->keybit);
 
+               __set_bit(KEY_ONSCREEN_KEYBOARD, input_dev->keybit);
+               __set_bit(KEY_INFO, input_dev->keybit);
+
+               if (!features->oPid)
+                       __set_bit(KEY_BUTTONCONFIG, input_dev->keybit);
+
                input_set_abs_params(input_dev, ABS_WHEEL, 0, 71, 0, 0);
                input_set_abs_params(input_dev, ABS_THROTTLE, 0, 71, 0, 0);
                break;
@@ -4058,6 +4117,12 @@ int wacom_setup_pad_input_capabilities(struct input_dev *input_dev,
                __set_bit(KEY_PROG1, input_dev->keybit);
                __set_bit(KEY_PROG2, input_dev->keybit);
                __set_bit(KEY_PROG3, input_dev->keybit);
+
+               __set_bit(KEY_ONSCREEN_KEYBOARD, input_dev->keybit);
+               __set_bit(KEY_BUTTONCONFIG, input_dev->keybit);
+
+               if (!features->oPid)
+                       __set_bit(KEY_CONTROLPANEL, input_dev->keybit);
                input_set_abs_params(input_dev, ABS_X, -2048, 2048, 0, 0);
                input_abs_set_res(input_dev, ABS_X, 1024); /* points/g */
                input_set_abs_params(input_dev, ABS_Y, -2048, 2048, 0, 0);
@@ -4071,6 +4136,9 @@ int wacom_setup_pad_input_capabilities(struct input_dev *input_dev,
                __set_bit(KEY_PROG1, input_dev->keybit);
                __set_bit(KEY_PROG2, input_dev->keybit);
                __set_bit(KEY_PROG3, input_dev->keybit);
+
+               __set_bit(KEY_BUTTONCONFIG, input_dev->keybit);
+               __set_bit(KEY_INFO, input_dev->keybit);
                /* fall through */
 
        case WACOM_21UX2:
index 0253e76..365b5d5 100644 (file)
@@ -291,7 +291,7 @@ static void ssip_set_rxstate(struct ssi_protocol *ssi, unsigned int state)
                /* CMT speech workaround */
                if (atomic_read(&ssi->tx_usecnt))
                        break;
-               /* Otherwise fall through */
+               /* Else, fall through */
        case RECEIVING:
                mod_timer(&ssi->keep_alive, jiffies +
                                                msecs_to_jiffies(SSIP_KATOUT));
@@ -466,9 +466,10 @@ static void ssip_keep_alive(struct timer_list *t)
                case SEND_READY:
                        if (atomic_read(&ssi->tx_usecnt) == 0)
                                break;
+                       /* Fall through */
                        /*
-                        * Fall through. Workaround for cmt-speech
-                        * in that case we relay on audio timers.
+                        * Workaround for cmt-speech in that case
+                        * we relay on audio timers.
                         */
                case SEND_IDLE:
                        spin_unlock(&ssi->lock);
index 0cba567..4bc4a20 100644 (file)
@@ -370,10 +370,8 @@ static int ssi_add_controller(struct hsi_controller *ssi,
        if (err < 0)
                goto out_err;
        err = platform_get_irq_byname(pd, "gdd_mpu");
-       if (err < 0) {
-               dev_err(&pd->dev, "GDD IRQ resource missing\n");
+       if (err < 0)
                goto out_err;
-       }
        omap_ssi->gdd_irq = err;
        tasklet_init(&omap_ssi->gdd_tasklet, ssi_gdd_tasklet,
                                                        (unsigned long)ssi);
index 2cd9311..a0cb5be 100644 (file)
@@ -1038,10 +1038,8 @@ static int ssi_port_irq(struct hsi_port *port, struct platform_device *pd)
        int err;
 
        err = platform_get_irq(pd, 0);
-       if (err < 0) {
-               dev_err(&port->device, "Port IRQ resource missing\n");
+       if (err < 0)
                return err;
-       }
        omap_port->irq = err;
        err = devm_request_threaded_irq(&port->device, omap_port->irq, NULL,
                                ssi_pio_thread, IRQF_ONESHOT, "SSI PORT", port);
index addcef5..8eb1675 100644 (file)
@@ -407,7 +407,15 @@ void hv_process_channel_removal(struct vmbus_channel *channel)
                cpumask_clear_cpu(channel->target_cpu,
                                  &primary_channel->alloced_cpus_in_node);
 
-       vmbus_release_relid(channel->offermsg.child_relid);
+       /*
+        * Upon suspend, an in-use hv_sock channel is marked as "rescinded" and
+        * the relid is invalidated; after hibernation, when the user-space app
+        * destroys the channel, the relid is INVALID_RELID, and in this case
+        * it's unnecessary and unsafe to release the old relid, since the same
+        * relid can refer to a completely different channel now.
+        */
+       if (channel->offermsg.child_relid != INVALID_RELID)
+               vmbus_release_relid(channel->offermsg.child_relid);
 
        free_channel(channel);
 }
@@ -545,6 +553,10 @@ static void vmbus_process_offer(struct vmbus_channel *newchannel)
 
        mutex_lock(&vmbus_connection.channel_mutex);
 
+       /* Remember the channels that should be cleaned up upon suspend. */
+       if (is_hvsock_channel(newchannel) || is_sub_channel(newchannel))
+               atomic_inc(&vmbus_connection.nr_chan_close_on_suspend);
+
        /*
         * Now that we have acquired the channel_mutex,
         * we can release the potentially racing rescind thread.
@@ -847,6 +859,67 @@ void vmbus_initiate_unload(bool crash)
                vmbus_wait_for_unload();
 }
 
+static void check_ready_for_resume_event(void)
+{
+       /*
+        * If all the old primary channels have been fixed up, then it's safe
+        * to resume.
+        */
+       if (atomic_dec_and_test(&vmbus_connection.nr_chan_fixup_on_resume))
+               complete(&vmbus_connection.ready_for_resume_event);
+}
+
+static void vmbus_setup_channel_state(struct vmbus_channel *channel,
+                                     struct vmbus_channel_offer_channel *offer)
+{
+       /*
+        * Setup state for signalling the host.
+        */
+       channel->sig_event = VMBUS_EVENT_CONNECTION_ID;
+
+       if (vmbus_proto_version != VERSION_WS2008) {
+               channel->is_dedicated_interrupt =
+                               (offer->is_dedicated_interrupt != 0);
+               channel->sig_event = offer->connection_id;
+       }
+
+       memcpy(&channel->offermsg, offer,
+              sizeof(struct vmbus_channel_offer_channel));
+       channel->monitor_grp = (u8)offer->monitorid / 32;
+       channel->monitor_bit = (u8)offer->monitorid % 32;
+}
+
+/*
+ * find_primary_channel_by_offer - Get the channel object given the new offer.
+ * This is only used in the resume path of hibernation.
+ */
+static struct vmbus_channel *
+find_primary_channel_by_offer(const struct vmbus_channel_offer_channel *offer)
+{
+       struct vmbus_channel *channel = NULL, *iter;
+       const guid_t *inst1, *inst2;
+
+       /* Ignore sub-channel offers. */
+       if (offer->offer.sub_channel_index != 0)
+               return NULL;
+
+       mutex_lock(&vmbus_connection.channel_mutex);
+
+       list_for_each_entry(iter, &vmbus_connection.chn_list, listentry) {
+               inst1 = &iter->offermsg.offer.if_instance;
+               inst2 = &offer->offer.if_instance;
+
+               if (guid_equal(inst1, inst2)) {
+                       channel = iter;
+                       break;
+               }
+       }
+
+       mutex_unlock(&vmbus_connection.channel_mutex);
+
+       return channel;
+}
+
 /*
  * vmbus_onoffer - Handler for channel offers from vmbus in parent partition.
  *
@@ -854,12 +927,58 @@ void vmbus_initiate_unload(bool crash)
 static void vmbus_onoffer(struct vmbus_channel_message_header *hdr)
 {
        struct vmbus_channel_offer_channel *offer;
-       struct vmbus_channel *newchannel;
+       struct vmbus_channel *oldchannel, *newchannel;
+       size_t offer_sz;
 
        offer = (struct vmbus_channel_offer_channel *)hdr;
 
        trace_vmbus_onoffer(offer);
 
+       oldchannel = find_primary_channel_by_offer(offer);
+
+       if (oldchannel != NULL) {
+               atomic_dec(&vmbus_connection.offer_in_progress);
+
+               /*
+                * We're resuming from hibernation: all the sub-channel and
+                * hv_sock channels we had before the hibernation should have
+                * been cleaned up, and now we must be seeing a re-offered
+                * primary channel that we had before the hibernation.
+                */
+
+               WARN_ON(oldchannel->offermsg.child_relid != INVALID_RELID);
+               /* Fix up the relid. */
+               oldchannel->offermsg.child_relid = offer->child_relid;
+
+               offer_sz = sizeof(*offer);
+               if (memcmp(offer, &oldchannel->offermsg, offer_sz) == 0) {
+                       check_ready_for_resume_event();
+                       return;
+               }
+
+               /*
+                * This is not an error, since the host can also change the
+                * other field(s) of the offer, e.g. on WS RS5 (Build 17763),
+                * the offer->connection_id of the Mellanox VF vmbus device
+                * can change when the host reoffers the device upon resume.
+                */
+               pr_debug("vmbus offer changed: relid=%d\n",
+                        offer->child_relid);
+
+               print_hex_dump_debug("Old vmbus offer: ", DUMP_PREFIX_OFFSET,
+                                    16, 4, &oldchannel->offermsg, offer_sz,
+                                    false);
+               print_hex_dump_debug("New vmbus offer: ", DUMP_PREFIX_OFFSET,
+                                    16, 4, offer, offer_sz, false);
+
+               /* Fix up the old channel. */
+               vmbus_setup_channel_state(oldchannel, offer);
+
+               check_ready_for_resume_event();
+
+               return;
+       }
+
        /* Allocate the channel object and save this offer. */
        newchannel = alloc_channel();
        if (!newchannel) {
@@ -869,25 +988,21 @@ static void vmbus_onoffer(struct vmbus_channel_message_header *hdr)
                return;
        }
 
-       /*
-        * Setup state for signalling the host.
-        */
-       newchannel->sig_event = VMBUS_EVENT_CONNECTION_ID;
-
-       if (vmbus_proto_version != VERSION_WS2008) {
-               newchannel->is_dedicated_interrupt =
-                               (offer->is_dedicated_interrupt != 0);
-               newchannel->sig_event = offer->connection_id;
-       }
-
-       memcpy(&newchannel->offermsg, offer,
-              sizeof(struct vmbus_channel_offer_channel));
-       newchannel->monitor_grp = (u8)offer->monitorid / 32;
-       newchannel->monitor_bit = (u8)offer->monitorid % 32;
+       vmbus_setup_channel_state(newchannel, offer);
 
        vmbus_process_offer(newchannel);
 }
 
+static void check_ready_for_suspend_event(void)
+{
+       /*
+        * If all the sub-channels or hv_sock channels have been cleaned up,
+        * then it's safe to suspend.
+        */
+       if (atomic_dec_and_test(&vmbus_connection.nr_chan_close_on_suspend))
+               complete(&vmbus_connection.ready_for_suspend_event);
+}
+
 /*
  * vmbus_onoffer_rescind - Rescind offer handler.
  *
@@ -898,6 +1013,7 @@ static void vmbus_onoffer_rescind(struct vmbus_channel_message_header *hdr)
        struct vmbus_channel_rescind_offer *rescind;
        struct vmbus_channel *channel;
        struct device *dev;
+       bool clean_up_chan_for_suspend;
 
        rescind = (struct vmbus_channel_rescind_offer *)hdr;
 
@@ -937,6 +1053,8 @@ static void vmbus_onoffer_rescind(struct vmbus_channel_message_header *hdr)
                return;
        }
 
+       clean_up_chan_for_suspend = is_hvsock_channel(channel) ||
+                                   is_sub_channel(channel);
        /*
         * Before setting channel->rescind in vmbus_rescind_cleanup(), we
         * should make sure the channel callback is not running any more.
@@ -962,6 +1080,10 @@ static void vmbus_onoffer_rescind(struct vmbus_channel_message_header *hdr)
        if (channel->device_obj) {
                if (channel->chn_rescind_callback) {
                        channel->chn_rescind_callback(channel);
+
+                       if (clean_up_chan_for_suspend)
+                               check_ready_for_suspend_event();
+
                        return;
                }
                /*
@@ -994,6 +1116,11 @@ static void vmbus_onoffer_rescind(struct vmbus_channel_message_header *hdr)
                }
                mutex_unlock(&vmbus_connection.channel_mutex);
        }
+
+       /* The "channel" may have been freed. Do not access it any longer. */
+
+       if (clean_up_chan_for_suspend)
+               check_ready_for_suspend_event();
 }
 
 void vmbus_hvsock_device_unregister(struct vmbus_channel *channel)
index 09829e1..6e4c015 100644 (file)
 struct vmbus_connection vmbus_connection = {
        .conn_state             = DISCONNECTED,
        .next_gpadl_handle      = ATOMIC_INIT(0xE1E10),
+
+       .ready_for_suspend_event= COMPLETION_INITIALIZER(
+                                 vmbus_connection.ready_for_suspend_event),
+       .ready_for_resume_event = COMPLETION_INITIALIZER(
+                                 vmbus_connection.ready_for_resume_event),
 };
 EXPORT_SYMBOL_GPL(vmbus_connection);
 
@@ -59,8 +64,7 @@ static __u32 vmbus_get_next_version(__u32 current_version)
        }
 }
 
-static int vmbus_negotiate_version(struct vmbus_channel_msginfo *msginfo,
-                                       __u32 version)
+int vmbus_negotiate_version(struct vmbus_channel_msginfo *msginfo, u32 version)
 {
        int ret = 0;
        unsigned int cur_cpu;
index 6188fb7..fcc5279 100644 (file)
@@ -154,7 +154,7 @@ void hv_synic_free(void)
  * retrieve the initialized message and event pages.  Otherwise, we create and
  * initialize the message and event pages.
  */
-int hv_synic_init(unsigned int cpu)
+void hv_synic_enable_regs(unsigned int cpu)
 {
        struct hv_per_cpu_context *hv_cpu
                = per_cpu_ptr(hv_context.cpu_context, cpu);
@@ -196,6 +196,11 @@ int hv_synic_init(unsigned int cpu)
        sctrl.enable = 1;
 
        hv_set_synic_state(sctrl.as_uint64);
+}
+
+int hv_synic_init(unsigned int cpu)
+{
+       hv_synic_enable_regs(cpu);
 
        hv_stimer_init(cpu);
 
@@ -205,20 +210,45 @@ int hv_synic_init(unsigned int cpu)
 /*
  * hv_synic_cleanup - Cleanup routine for hv_synic_init().
  */
-int hv_synic_cleanup(unsigned int cpu)
+void hv_synic_disable_regs(unsigned int cpu)
 {
        union hv_synic_sint shared_sint;
        union hv_synic_simp simp;
        union hv_synic_siefp siefp;
        union hv_synic_scontrol sctrl;
+
+       hv_get_synint_state(VMBUS_MESSAGE_SINT, shared_sint.as_uint64);
+
+       shared_sint.masked = 1;
+
+       /* Need to correctly cleanup in the case of SMP!!! */
+       /* Disable the interrupt */
+       hv_set_synint_state(VMBUS_MESSAGE_SINT, shared_sint.as_uint64);
+
+       hv_get_simp(simp.as_uint64);
+       simp.simp_enabled = 0;
+       simp.base_simp_gpa = 0;
+
+       hv_set_simp(simp.as_uint64);
+
+       hv_get_siefp(siefp.as_uint64);
+       siefp.siefp_enabled = 0;
+       siefp.base_siefp_gpa = 0;
+
+       hv_set_siefp(siefp.as_uint64);
+
+       /* Disable the global synic bit */
+       hv_get_synic_state(sctrl.as_uint64);
+       sctrl.enable = 0;
+       hv_set_synic_state(sctrl.as_uint64);
+}
+
+int hv_synic_cleanup(unsigned int cpu)
+{
        struct vmbus_channel *channel, *sc;
        bool channel_found = false;
        unsigned long flags;
 
-       hv_get_synic_state(sctrl.as_uint64);
-       if (sctrl.enable != 1)
-               return -EFAULT;
-
        /*
         * Search for channels which are bound to the CPU we're about to
         * cleanup. In case we find one and vmbus is still connected we need to
@@ -249,29 +279,7 @@ int hv_synic_cleanup(unsigned int cpu)
 
        hv_stimer_cleanup(cpu);
 
-       hv_get_synint_state(VMBUS_MESSAGE_SINT, shared_sint.as_uint64);
-
-       shared_sint.masked = 1;
-
-       /* Need to correctly cleanup in the case of SMP!!! */
-       /* Disable the interrupt */
-       hv_set_synint_state(VMBUS_MESSAGE_SINT, shared_sint.as_uint64);
-
-       hv_get_simp(simp.as_uint64);
-       simp.simp_enabled = 0;
-       simp.base_simp_gpa = 0;
-
-       hv_set_simp(simp.as_uint64);
-
-       hv_get_siefp(siefp.as_uint64);
-       siefp.siefp_enabled = 0;
-       siefp.base_siefp_gpa = 0;
-
-       hv_set_siefp(siefp.as_uint64);
-
-       /* Disable the global synic bit */
-       sctrl.enable = 0;
-       hv_set_synic_state(sctrl.as_uint64);
+       hv_synic_disable_regs(cpu);
 
        return 0;
 }
index 6fb4ea5..34bd735 100644 (file)
@@ -494,7 +494,7 @@ enum hv_dm_state {
 
 
 static __u8 recv_buffer[PAGE_SIZE];
-static __u8 *send_buffer;
+static __u8 balloon_up_send_buffer[PAGE_SIZE];
 #define PAGES_IN_2M    512
 #define HA_CHUNK (32 * 1024)
 
@@ -1292,8 +1292,8 @@ static void balloon_up(struct work_struct *dummy)
        }
 
        while (!done) {
-               bl_resp = (struct dm_balloon_response *)send_buffer;
-               memset(send_buffer, 0, PAGE_SIZE);
+               memset(balloon_up_send_buffer, 0, PAGE_SIZE);
+               bl_resp = (struct dm_balloon_response *)balloon_up_send_buffer;
                bl_resp->hdr.type = DM_BALLOON_RESPONSE;
                bl_resp->hdr.size = sizeof(struct dm_balloon_response);
                bl_resp->more_pages = 1;
@@ -1564,58 +1564,18 @@ static void balloon_onchannelcallback(void *context)
 
 }
 
-static int balloon_probe(struct hv_device *dev,
-                       const struct hv_vmbus_device_id *dev_id)
+static int balloon_connect_vsp(struct hv_device *dev)
 {
-       int ret;
-       unsigned long t;
        struct dm_version_request version_req;
        struct dm_capabilities cap_msg;
-
-#ifdef CONFIG_MEMORY_HOTPLUG
-       do_hot_add = hot_add;
-#else
-       do_hot_add = false;
-#endif
-
-       /*
-        * First allocate a send buffer.
-        */
-
-       send_buffer = kmalloc(PAGE_SIZE, GFP_KERNEL);
-       if (!send_buffer)
-               return -ENOMEM;
+       unsigned long t;
+       int ret;
 
        ret = vmbus_open(dev->channel, dm_ring_size, dm_ring_size, NULL, 0,
-                       balloon_onchannelcallback, dev);
-
+                        balloon_onchannelcallback, dev);
        if (ret)
-               goto probe_error0;
+               return ret;
 
-       dm_device.dev = dev;
-       dm_device.state = DM_INITIALIZING;
-       dm_device.next_version = DYNMEM_PROTOCOL_VERSION_WIN8;
-       init_completion(&dm_device.host_event);
-       init_completion(&dm_device.config_event);
-       INIT_LIST_HEAD(&dm_device.ha_region_list);
-       spin_lock_init(&dm_device.ha_lock);
-       INIT_WORK(&dm_device.balloon_wrk.wrk, balloon_up);
-       INIT_WORK(&dm_device.ha_wrk.wrk, hot_add_req);
-       dm_device.host_specified_ha_region = false;
-
-       dm_device.thread =
-                kthread_run(dm_thread_func, &dm_device, "hv_balloon");
-       if (IS_ERR(dm_device.thread)) {
-               ret = PTR_ERR(dm_device.thread);
-               goto probe_error1;
-       }
-
-#ifdef CONFIG_MEMORY_HOTPLUG
-       set_online_page_callback(&hv_online_page);
-       register_memory_notifier(&hv_memory_nb);
-#endif
-
-       hv_set_drvdata(dev, &dm_device);
        /*
         * Initiate the hand shake with the host and negotiate
         * a version that the host can support. We start with the
@@ -1631,16 +1591,15 @@ static int balloon_probe(struct hv_device *dev,
        dm_device.version = version_req.version.version;
 
        ret = vmbus_sendpacket(dev->channel, &version_req,
-                               sizeof(struct dm_version_request),
-                               (unsigned long)NULL,
-                               VM_PKT_DATA_INBAND, 0);
+                              sizeof(struct dm_version_request),
+                              (unsigned long)NULL, VM_PKT_DATA_INBAND, 0);
        if (ret)
-               goto probe_error2;
+               goto out;
 
        t = wait_for_completion_timeout(&dm_device.host_event, 5*HZ);
        if (t == 0) {
                ret = -ETIMEDOUT;
-               goto probe_error2;
+               goto out;
        }
 
        /*
@@ -1648,8 +1607,8 @@ static int balloon_probe(struct hv_device *dev,
         * fail the probe function.
         */
        if (dm_device.state == DM_INIT_ERROR) {
-               ret = -ETIMEDOUT;
-               goto probe_error2;
+               ret = -EPROTO;
+               goto out;
        }
 
        pr_info("Using Dynamic Memory protocol version %u.%u\n",
@@ -1682,16 +1641,15 @@ static int balloon_probe(struct hv_device *dev,
        cap_msg.max_page_number = -1;
 
        ret = vmbus_sendpacket(dev->channel, &cap_msg,
-                               sizeof(struct dm_capabilities),
-                               (unsigned long)NULL,
-                               VM_PKT_DATA_INBAND, 0);
+                              sizeof(struct dm_capabilities),
+                              (unsigned long)NULL, VM_PKT_DATA_INBAND, 0);
        if (ret)
-               goto probe_error2;
+               goto out;
 
        t = wait_for_completion_timeout(&dm_device.host_event, 5*HZ);
        if (t == 0) {
                ret = -ETIMEDOUT;
-               goto probe_error2;
+               goto out;
        }
 
        /*
@@ -1699,25 +1657,65 @@ static int balloon_probe(struct hv_device *dev,
         * fail the probe function.
         */
        if (dm_device.state == DM_INIT_ERROR) {
-               ret = -ETIMEDOUT;
-               goto probe_error2;
+               ret = -EPROTO;
+               goto out;
        }
 
+       return 0;
+out:
+       vmbus_close(dev->channel);
+       return ret;
+}
+
+static int balloon_probe(struct hv_device *dev,
+                        const struct hv_vmbus_device_id *dev_id)
+{
+       int ret;
+
+#ifdef CONFIG_MEMORY_HOTPLUG
+       do_hot_add = hot_add;
+#else
+       do_hot_add = false;
+#endif
+       dm_device.dev = dev;
+       dm_device.state = DM_INITIALIZING;
+       dm_device.next_version = DYNMEM_PROTOCOL_VERSION_WIN8;
+       init_completion(&dm_device.host_event);
+       init_completion(&dm_device.config_event);
+       INIT_LIST_HEAD(&dm_device.ha_region_list);
+       spin_lock_init(&dm_device.ha_lock);
+       INIT_WORK(&dm_device.balloon_wrk.wrk, balloon_up);
+       INIT_WORK(&dm_device.ha_wrk.wrk, hot_add_req);
+       dm_device.host_specified_ha_region = false;
+
+#ifdef CONFIG_MEMORY_HOTPLUG
+       set_online_page_callback(&hv_online_page);
+       register_memory_notifier(&hv_memory_nb);
+#endif
+
+       hv_set_drvdata(dev, &dm_device);
+
+       ret = balloon_connect_vsp(dev);
+       if (ret != 0)
+               return ret;
+
        dm_device.state = DM_INITIALIZED;
-       last_post_time = jiffies;
+
+       dm_device.thread =
+                kthread_run(dm_thread_func, &dm_device, "hv_balloon");
+       if (IS_ERR(dm_device.thread)) {
+               ret = PTR_ERR(dm_device.thread);
+               goto probe_error;
+       }
 
        return 0;
 
-probe_error2:
+probe_error:
+       vmbus_close(dev->channel);
 #ifdef CONFIG_MEMORY_HOTPLUG
+       unregister_memory_notifier(&hv_memory_nb);
        restore_online_page_callback(&hv_online_page);
 #endif
-       kthread_stop(dm_device.thread);
-
-probe_error1:
-       vmbus_close(dev->channel);
-probe_error0:
-       kfree(send_buffer);
        return ret;
 }
 
@@ -1734,12 +1732,11 @@ static int balloon_remove(struct hv_device *dev)
        cancel_work_sync(&dm->balloon_wrk.wrk);
        cancel_work_sync(&dm->ha_wrk.wrk);
 
-       vmbus_close(dev->channel);
        kthread_stop(dm->thread);
-       kfree(send_buffer);
+       vmbus_close(dev->channel);
 #ifdef CONFIG_MEMORY_HOTPLUG
-       restore_online_page_callback(&hv_online_page);
        unregister_memory_notifier(&hv_memory_nb);
+       restore_online_page_callback(&hv_online_page);
 #endif
        spin_lock_irqsave(&dm_device.ha_lock, flags);
        list_for_each_entry_safe(has, tmp, &dm->ha_region_list, list) {
index 50eaa1f..af9379a 100644 (file)
@@ -169,8 +169,10 @@ extern int hv_synic_alloc(void);
 
 extern void hv_synic_free(void);
 
+extern void hv_synic_enable_regs(unsigned int cpu);
 extern int hv_synic_init(unsigned int cpu);
 
+extern void hv_synic_disable_regs(unsigned int cpu);
 extern int hv_synic_cleanup(unsigned int cpu);
 
 /* Interface */
@@ -256,6 +258,32 @@ struct vmbus_connection {
        struct workqueue_struct *work_queue;
        struct workqueue_struct *handle_primary_chan_wq;
        struct workqueue_struct *handle_sub_chan_wq;
+
+       /*
+        * The number of sub-channels and hv_sock channels that should be
+        * cleaned up upon suspend: sub-channels will be re-created upon
+        * resume, and hv_sock channels should not survive suspend.
+        */
+       atomic_t nr_chan_close_on_suspend;
+       /*
+        * vmbus_bus_suspend() waits for "nr_chan_close_on_suspend" to
+        * drop to zero.
+        */
+       struct completion ready_for_suspend_event;
+
+       /*
+        * The number of primary channels that should be "fixed up"
+        * upon resume: these channels are re-offered upon resume, and some
+        * fields of the channel offers (i.e. child_relid and connection_id)
+        * can change, so the old offermsg must be fixed up, before the resume
+        * callbacks of the VSC drivers start to further touch the channels.
+        */
+       atomic_t nr_chan_fixup_on_resume;
+       /*
+        * vmbus_bus_resume() waits for "nr_chan_fixup_on_resume" to
+        * drop to zero.
+        */
+       struct completion ready_for_resume_event;
 };
 
 
@@ -270,6 +298,8 @@ struct vmbus_msginfo {
 
 extern struct vmbus_connection vmbus_connection;
 
+int vmbus_negotiate_version(struct vmbus_channel_msginfo *msginfo, u32 version);
+
 static inline void vmbus_send_interrupt(u32 relid)
 {
        sync_set_bit(relid, vmbus_connection.send_int_page);
index ebd35fc..391f0b2 100644 (file)
 #include <linux/sched/task_stack.h>
 
 #include <asm/mshyperv.h>
+#include <linux/delay.h>
 #include <linux/notifier.h>
 #include <linux/ptrace.h>
 #include <linux/screen_info.h>
 #include <linux/kdebug.h>
 #include <linux/efi.h>
 #include <linux/random.h>
+#include <linux/syscore_ops.h>
 #include <clocksource/hyperv_timer.h>
 #include "hyperv_vmbus.h"
 
@@ -910,6 +912,43 @@ static void vmbus_shutdown(struct device *child_device)
                drv->shutdown(dev);
 }
 
+/*
+ * vmbus_suspend - Suspend a vmbus device
+ */
+static int vmbus_suspend(struct device *child_device)
+{
+       struct hv_driver *drv;
+       struct hv_device *dev = device_to_hv_device(child_device);
+
+       /* The device may not be attached yet */
+       if (!child_device->driver)
+               return 0;
+
+       drv = drv_to_hv_drv(child_device->driver);
+       if (!drv->suspend)
+               return -EOPNOTSUPP;
+
+       return drv->suspend(dev);
+}
+
+/*
+ * vmbus_resume - Resume a vmbus device
+ */
+static int vmbus_resume(struct device *child_device)
+{
+       struct hv_driver *drv;
+       struct hv_device *dev = device_to_hv_device(child_device);
+
+       /* The device may not be attached yet */
+       if (!child_device->driver)
+               return 0;
+
+       drv = drv_to_hv_drv(child_device->driver);
+       if (!drv->resume)
+               return -EOPNOTSUPP;
+
+       return drv->resume(dev);
+}
 
 /*
  * vmbus_device_release - Final callback release of the vmbus child device
@@ -925,6 +964,14 @@ static void vmbus_device_release(struct device *device)
        kfree(hv_dev);
 }
 
+/*
+ * Note: we must use SET_NOIRQ_SYSTEM_SLEEP_PM_OPS rather than
+ * SET_SYSTEM_SLEEP_PM_OPS: see the comment before vmbus_bus_pm.
+ */
+static const struct dev_pm_ops vmbus_pm = {
+       SET_NOIRQ_SYSTEM_SLEEP_PM_OPS(vmbus_suspend, vmbus_resume)
+};
+
 /* The one and only one */
 static struct bus_type  hv_bus = {
        .name =         "vmbus",
@@ -935,6 +982,7 @@ static struct bus_type  hv_bus = {
        .uevent =               vmbus_uevent,
        .dev_groups =           vmbus_dev_groups,
        .drv_groups =           vmbus_drv_groups,
+       .pm =                   &vmbus_pm,
 };
 
 struct onmessage_work_context {
@@ -1022,6 +1070,41 @@ msg_handled:
        vmbus_signal_eom(msg, message_type);
 }
 
+/*
+ * Fake RESCIND_CHANNEL messages to clean up hv_sock channels by force for
+ * hibernation, because hv_sock connections can not persist across hibernation.
+ */
+static void vmbus_force_channel_rescinded(struct vmbus_channel *channel)
+{
+       struct onmessage_work_context *ctx;
+       struct vmbus_channel_rescind_offer *rescind;
+
+       WARN_ON(!is_hvsock_channel(channel));
+
+       /*
+        * sizeof(*ctx) is small and the allocation should really not fail,
+        * otherwise the state of the hv_sock connections ends up in limbo.
+        */
+       ctx = kzalloc(sizeof(*ctx), GFP_KERNEL | __GFP_NOFAIL);
+
+       /*
+        * So far, these are not really used by Linux. Just set them to the
+        * reasonable values conforming to the definitions of the fields.
+        */
+       ctx->msg.header.message_type = 1;
+       ctx->msg.header.payload_size = sizeof(*rescind);
+
+       /* These values are actually used by Linux. */
+       rescind = (struct vmbus_channel_rescind_offer *)ctx->msg.u.payload;
+       rescind->header.msgtype = CHANNELMSG_RESCIND_CHANNELOFFER;
+       rescind->child_relid = channel->offermsg.child_relid;
+
+       INIT_WORK(&ctx->work, vmbus_onmessage_work);
+
+       queue_work_on(vmbus_connection.connect_cpu,
+                     vmbus_connection.work_queue,
+                     &ctx->work);
+}
 
 /*
  * Direct callback for channels using other deferred processing
@@ -2042,6 +2125,129 @@ acpi_walk_err:
        return ret_val;
 }
 
+static int vmbus_bus_suspend(struct device *dev)
+{
+       struct vmbus_channel *channel, *sc;
+       unsigned long flags;
+
+       while (atomic_read(&vmbus_connection.offer_in_progress) != 0) {
+               /*
+                * We wait here until the completion of any channel
+                * offers that are currently in progress.
+                */
+               msleep(1);
+       }
+
+       mutex_lock(&vmbus_connection.channel_mutex);
+       list_for_each_entry(channel, &vmbus_connection.chn_list, listentry) {
+               if (!is_hvsock_channel(channel))
+                       continue;
+
+               vmbus_force_channel_rescinded(channel);
+       }
+       mutex_unlock(&vmbus_connection.channel_mutex);
+
+       /*
+        * Wait until all the sub-channels and hv_sock channels have been
+        * cleaned up. Sub-channels should be destroyed upon suspend, otherwise
+        * they would conflict with the new sub-channels that will be created
+        * in the resume path. hv_sock channels should also be destroyed, but
+        * a hv_sock channel of an established hv_sock connection can not be
+        * really destroyed since it may still be referenced by the userspace
+        * application, so we just force the hv_sock channel to be rescinded
+        * by vmbus_force_channel_rescinded(), and the userspace application
+        * will thoroughly destroy the channel after hibernation.
+        *
+        * Note: the counter nr_chan_close_on_suspend may never go above 0 if
+        * the VM has no sub-channel and hv_sock channel, e.g. a 1-vCPU VM.
+        */
+       if (atomic_read(&vmbus_connection.nr_chan_close_on_suspend) > 0)
+               wait_for_completion(&vmbus_connection.ready_for_suspend_event);
+
+       WARN_ON(atomic_read(&vmbus_connection.nr_chan_fixup_on_resume) != 0);
+
+       mutex_lock(&vmbus_connection.channel_mutex);
+
+       list_for_each_entry(channel, &vmbus_connection.chn_list, listentry) {
+               /*
+                * Invalidate the field. Upon resume, vmbus_onoffer() will fix
+                * up the field, and the other fields (if necessary).
+                */
+               channel->offermsg.child_relid = INVALID_RELID;
+
+               if (is_hvsock_channel(channel)) {
+                       if (!channel->rescind) {
+                               pr_err("hv_sock channel not rescinded!\n");
+                               WARN_ON_ONCE(1);
+                       }
+                       continue;
+               }
+
+               spin_lock_irqsave(&channel->lock, flags);
+               list_for_each_entry(sc, &channel->sc_list, sc_list) {
+                       pr_err("Sub-channel not deleted!\n");
+                       WARN_ON_ONCE(1);
+               }
+               spin_unlock_irqrestore(&channel->lock, flags);
+
+               atomic_inc(&vmbus_connection.nr_chan_fixup_on_resume);
+       }
+
+       mutex_unlock(&vmbus_connection.channel_mutex);
+
+       vmbus_initiate_unload(false);
+
+       vmbus_connection.conn_state = DISCONNECTED;
+
+       /* Reset the event for the next resume. */
+       reinit_completion(&vmbus_connection.ready_for_resume_event);
+
+       return 0;
+}
+
+static int vmbus_bus_resume(struct device *dev)
+{
+       struct vmbus_channel_msginfo *msginfo;
+       size_t msgsize;
+       int ret;
+
+       /*
+        * We only use the 'vmbus_proto_version', which was in use before
+        * hibernation, to re-negotiate with the host.
+        */
+       if (vmbus_proto_version == VERSION_INVAL ||
+           vmbus_proto_version == 0) {
+               pr_err("Invalid proto version = 0x%x\n", vmbus_proto_version);
+               return -EINVAL;
+       }
+
+       msgsize = sizeof(*msginfo) +
+                 sizeof(struct vmbus_channel_initiate_contact);
+
+       msginfo = kzalloc(msgsize, GFP_KERNEL);
+
+       if (msginfo == NULL)
+               return -ENOMEM;
+
+       ret = vmbus_negotiate_version(msginfo, vmbus_proto_version);
+
+       kfree(msginfo);
+
+       if (ret != 0)
+               return ret;
+
+       WARN_ON(atomic_read(&vmbus_connection.nr_chan_fixup_on_resume) == 0);
+
+       vmbus_request_offers();
+
+       wait_for_completion(&vmbus_connection.ready_for_resume_event);
+
+       /* Reset the event for the next suspend. */
+       reinit_completion(&vmbus_connection.ready_for_suspend_event);
+
+       return 0;
+}
+
 static const struct acpi_device_id vmbus_acpi_device_ids[] = {
        {"VMBUS", 0},
        {"VMBus", 0},
@@ -2049,6 +2255,19 @@ static const struct acpi_device_id vmbus_acpi_device_ids[] = {
 };
 MODULE_DEVICE_TABLE(acpi, vmbus_acpi_device_ids);
 
+/*
+ * Note: we must use SET_NOIRQ_SYSTEM_SLEEP_PM_OPS rather than
+ * SET_SYSTEM_SLEEP_PM_OPS, otherwise NIC SR-IOV can not work, because the
+ * "pci_dev_pm_ops" uses the "noirq" callbacks: in the resume path, the
+ * pci "noirq" restore callback runs before "non-noirq" callbacks (see
+ * resume_target_kernel() -> dpm_resume_start(), and hibernation_restore() ->
+ * dpm_resume_end()). This means vmbus_bus_resume() and the pci-hyperv's
+ * resume callback must also run via the "noirq" callbacks.
+ */
+static const struct dev_pm_ops vmbus_bus_pm = {
+       SET_NOIRQ_SYSTEM_SLEEP_PM_OPS(vmbus_bus_suspend, vmbus_bus_resume)
+};
+
 static struct acpi_driver vmbus_acpi_driver = {
        .name = "vmbus",
        .ids = vmbus_acpi_device_ids,
@@ -2056,6 +2275,7 @@ static struct acpi_driver vmbus_acpi_driver = {
                .add = vmbus_acpi_add,
                .remove = vmbus_acpi_remove,
        },
+       .drv.pm = &vmbus_bus_pm,
 };
 
 static void hv_kexec_handler(void)
@@ -2086,6 +2306,47 @@ static void hv_crash_handler(struct pt_regs *regs)
        hyperv_cleanup();
 };
 
+static int hv_synic_suspend(void)
+{
+       /*
+        * When we reach here, all the non-boot CPUs have been offlined, and
+        * the stimers on them have been unbound in hv_synic_cleanup() ->
+        * hv_stimer_cleanup() -> clockevents_unbind_device().
+        *
+        * hv_synic_suspend() only runs on CPU0 with interrupts disabled. Here
+        * we do not unbind the stimer on CPU0 because: 1) it's unnecessary
+        * because the interrupts remain disabled between syscore_suspend()
+        * and syscore_resume(): see create_image() and resume_target_kernel();
+        * 2) the stimer on CPU0 is automatically disabled later by
+        * syscore_suspend() -> timekeeping_suspend() -> tick_suspend() -> ...
+        * -> clockevents_shutdown() -> ... -> hv_ce_shutdown(); 3) a warning
+        * would be triggered if we call clockevents_unbind_device(), which
+        * may sleep, in an interrupts-disabled context. So, we intentionally
+        * don't call hv_stimer_cleanup(0) here.
+        */
+
+       hv_synic_disable_regs(0);
+
+       return 0;
+}
+
+static void hv_synic_resume(void)
+{
+       hv_synic_enable_regs(0);
+
+       /*
+        * Note: we don't need to call hv_stimer_init(0), because the timer
+        * on CPU0 is not unbound in hv_synic_suspend(), and the timer is
+        * automatically re-enabled in timekeeping_resume().
+        */
+}
+
+/* The callbacks run only on CPU0, with irqs_disabled. */
+static struct syscore_ops hv_synic_syscore_ops = {
+       .suspend = hv_synic_suspend,
+       .resume = hv_synic_resume,
+};
+
 static int __init hv_acpi_init(void)
 {
        int ret, t;
@@ -2116,6 +2377,8 @@ static int __init hv_acpi_init(void)
        hv_setup_kexec_handler(hv_kexec_handler);
        hv_setup_crash_handler(hv_crash_handler);
 
+       register_syscore_ops(&hv_synic_syscore_ops);
+
        return 0;
 
 cleanup:
@@ -2128,6 +2391,8 @@ static void __exit vmbus_exit(void)
 {
        int cpu;
 
+       unregister_syscore_ops(&hv_synic_syscore_ops);
+
        hv_remove_kexec_handler();
        hv_remove_crash_handler();
        vmbus_connection.conn_state = DISCONNECTED;
index 2ca5668..13a6b4a 100644 (file)
@@ -670,16 +670,6 @@ config SENSORS_IT87
          This driver can also be built as a module. If so, the module
          will be called it87.
 
-config SENSORS_JZ4740
-       tristate "Ingenic JZ4740 SoC ADC driver"
-       depends on MACH_JZ4740 && MFD_JZ4740_ADC
-       help
-         If you say yes here you get support for reading adc values from the ADCIN
-         pin on Ingenic JZ4740 SoC based boards.
-
-         This driver can also be built as a module. If so, the module will be
-         called jz4740-hwmon.
-
 config SENSORS_JC42
        tristate "JEDEC JC42.4 compliant memory module temperature sensors"
        depends on I2C
index c86ce4d..40c036e 100644 (file)
@@ -85,7 +85,6 @@ obj-$(CONFIG_SENSORS_INA2XX)  += ina2xx.o
 obj-$(CONFIG_SENSORS_INA3221)  += ina3221.o
 obj-$(CONFIG_SENSORS_IT87)     += it87.o
 obj-$(CONFIG_SENSORS_JC42)     += jc42.o
-obj-$(CONFIG_SENSORS_JZ4740)   += jz4740-hwmon.o
 obj-$(CONFIG_SENSORS_K8TEMP)   += k8temp.o
 obj-$(CONFIG_SENSORS_K10TEMP)  += k10temp.o
 obj-$(CONFIG_SENSORS_LINEAGE)  += lineage-pem.o
diff --git a/drivers/hwmon/jz4740-hwmon.c b/drivers/hwmon/jz4740-hwmon.c
deleted file mode 100644 (file)
index bec5bef..0000000
+++ /dev/null
@@ -1,135 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * Copyright (C) 2009-2010, Lars-Peter Clausen <lars@metafoo.de>
- * JZ4740 SoC HWMON driver
- */
-
-#include <linux/err.h>
-#include <linux/interrupt.h>
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/mutex.h>
-#include <linux/platform_device.h>
-#include <linux/slab.h>
-#include <linux/io.h>
-
-#include <linux/completion.h>
-#include <linux/mfd/core.h>
-
-#include <linux/hwmon.h>
-
-struct jz4740_hwmon {
-       void __iomem *base;
-       int irq;
-       const struct mfd_cell *cell;
-       struct platform_device *pdev;
-       struct completion read_completion;
-       struct mutex lock;
-};
-
-static irqreturn_t jz4740_hwmon_irq(int irq, void *data)
-{
-       struct jz4740_hwmon *hwmon = data;
-
-       complete(&hwmon->read_completion);
-       return IRQ_HANDLED;
-}
-
-static ssize_t in0_input_show(struct device *dev,
-                             struct device_attribute *dev_attr, char *buf)
-{
-       struct jz4740_hwmon *hwmon = dev_get_drvdata(dev);
-       struct platform_device *pdev = hwmon->pdev;
-       struct completion *completion = &hwmon->read_completion;
-       long t;
-       unsigned long val;
-       int ret;
-
-       mutex_lock(&hwmon->lock);
-
-       reinit_completion(completion);
-
-       enable_irq(hwmon->irq);
-       hwmon->cell->enable(pdev);
-
-       t = wait_for_completion_interruptible_timeout(completion, HZ);
-
-       if (t > 0) {
-               val = readw(hwmon->base) & 0xfff;
-               val = (val * 3300) >> 12;
-               ret = sprintf(buf, "%lu\n", val);
-       } else {
-               ret = t ? t : -ETIMEDOUT;
-       }
-
-       hwmon->cell->disable(pdev);
-       disable_irq(hwmon->irq);
-
-       mutex_unlock(&hwmon->lock);
-
-       return ret;
-}
-
-static DEVICE_ATTR_RO(in0_input);
-
-static struct attribute *jz4740_attrs[] = {
-       &dev_attr_in0_input.attr,
-       NULL
-};
-
-ATTRIBUTE_GROUPS(jz4740);
-
-static int jz4740_hwmon_probe(struct platform_device *pdev)
-{
-       int ret;
-       struct device *dev = &pdev->dev;
-       struct jz4740_hwmon *hwmon;
-       struct device *hwmon_dev;
-
-       hwmon = devm_kzalloc(dev, sizeof(*hwmon), GFP_KERNEL);
-       if (!hwmon)
-               return -ENOMEM;
-
-       hwmon->cell = mfd_get_cell(pdev);
-
-       hwmon->irq = platform_get_irq(pdev, 0);
-       if (hwmon->irq < 0) {
-               dev_err(&pdev->dev, "Failed to get platform irq: %d\n",
-                       hwmon->irq);
-               return hwmon->irq;
-       }
-
-       hwmon->base = devm_platform_ioremap_resource(pdev, 0);
-       if (IS_ERR(hwmon->base))
-               return PTR_ERR(hwmon->base);
-
-       hwmon->pdev = pdev;
-       init_completion(&hwmon->read_completion);
-       mutex_init(&hwmon->lock);
-
-       ret = devm_request_irq(dev, hwmon->irq, jz4740_hwmon_irq, 0,
-                              pdev->name, hwmon);
-       if (ret) {
-               dev_err(&pdev->dev, "Failed to request irq: %d\n", ret);
-               return ret;
-       }
-       disable_irq(hwmon->irq);
-
-       hwmon_dev = devm_hwmon_device_register_with_groups(dev, "jz4740", hwmon,
-                                                          jz4740_groups);
-       return PTR_ERR_OR_ZERO(hwmon_dev);
-}
-
-static struct platform_driver jz4740_hwmon_driver = {
-       .probe  = jz4740_hwmon_probe,
-       .driver = {
-               .name = "jz4740-hwmon",
-       },
-};
-
-module_platform_driver(jz4740_hwmon_driver);
-
-MODULE_DESCRIPTION("JZ4740 SoC HWMON driver");
-MODULE_AUTHOR("Lars-Peter Clausen <lars@metafoo.de>");
-MODULE_LICENSE("GPL");
-MODULE_ALIAS("platform:jz4740-hwmon");
index 5587215..146ce40 100644 (file)
@@ -429,6 +429,7 @@ config I2C_AXXIA
        tristate "Axxia I2C controller"
        depends on ARCH_AXXIA || COMPILE_TEST
        default ARCH_AXXIA
+       select I2C_SLAVE
        help
          Say yes if you want to support the I2C bus on Axxia platforms.
 
@@ -977,7 +978,7 @@ config I2C_SIRF
          will be called i2c-sirf.
 
 config I2C_SPRD
-       bool "Spreadtrum I2C interface"
+       tristate "Spreadtrum I2C interface"
        depends on I2C=y && ARCH_SPRD
        help
          If you say yes to this option, support will be included for the
@@ -1309,6 +1310,20 @@ config I2C_ELEKTOR
          This support is also available as a module.  If so, the module
          will be called i2c-elektor.
 
+config I2C_ICY
+       tristate "ICY Zorro card"
+       depends on ZORRO
+       select I2C_ALGOPCF
+       help
+         This supports the PCF8584 Zorro bus I2C adapter, known as ICY.
+         Say Y if you own such an adapter.
+
+         This support is also available as a module.  If so, the module
+         will be called i2c-icy.
+
+         If you have a 2019 edition board with an LTC2990 sensor at address
+         0x4c, loading the module 'ltc2990' is sufficient to enable it.
+
 config I2C_MLXCPLD
        tristate "Mellanox I2C driver"
        depends on X86_64
index 80c2389..3ab8aeb 100644 (file)
@@ -140,6 +140,7 @@ obj-$(CONFIG_I2C_BCM_KONA)  += i2c-bcm-kona.o
 obj-$(CONFIG_I2C_BRCMSTB)      += i2c-brcmstb.o
 obj-$(CONFIG_I2C_CROS_EC_TUNNEL)       += i2c-cros-ec-tunnel.o
 obj-$(CONFIG_I2C_ELEKTOR)      += i2c-elektor.o
+obj-$(CONFIG_I2C_ICY)          += i2c-icy.o
 obj-$(CONFIG_I2C_MLXCPLD)      += i2c-mlxcpld.o
 obj-$(CONFIG_I2C_OPAL)         += i2c-opal.o
 obj-$(CONFIG_I2C_PCA_ISA)      += i2c-pca-isa.o
index ff3142b..0214daa 100644 (file)
                                 MST_STATUS_IP)
 #define MST_TX_BYTES_XFRD      0x50
 #define MST_RX_BYTES_XFRD      0x54
+#define SLV_ADDR_DEC_CTL       0x58
+#define   SLV_ADDR_DEC_GCE     BIT(0)  /* ACK to General Call Address from own master (loopback) */
+#define   SLV_ADDR_DEC_OGCE    BIT(1)  /* ACK to General Call Address from external masters */
+#define   SLV_ADDR_DEC_SA1E    BIT(2)  /* ACK to addr_1 enabled */
+#define   SLV_ADDR_DEC_SA1M    BIT(3)  /* 10-bit addressing for addr_1 enabled */
+#define   SLV_ADDR_DEC_SA2E    BIT(4)  /* ACK to addr_2 enabled */
+#define   SLV_ADDR_DEC_SA2M    BIT(5)  /* 10-bit addressing for addr_2 enabled */
+#define SLV_ADDR_1             0x5c
+#define SLV_ADDR_2             0x60
+#define SLV_RX_CTL             0x64
+#define   SLV_RX_ACSA1         BIT(0)  /* Generate ACK for writes to addr_1 */
+#define   SLV_RX_ACSA2         BIT(1)  /* Generate ACK for writes to addr_2 */
+#define   SLV_RX_ACGCA         BIT(2)  /* ACK data phase transfers to General Call Address */
+#define SLV_DATA               0x68
+#define SLV_RX_FIFO            0x6c
+#define   SLV_FIFO_DV1         BIT(0)  /* Data Valid for addr_1 */
+#define   SLV_FIFO_DV2         BIT(1)  /* Data Valid for addr_2 */
+#define   SLV_FIFO_AS          BIT(2)  /* (N)ACK Sent */
+#define   SLV_FIFO_TNAK                BIT(3)  /* Timeout NACK */
+#define   SLV_FIFO_STRC                BIT(4)  /* First byte after start condition received */
+#define   SLV_FIFO_RSC         BIT(5)  /* Repeated Start Condition */
+#define   SLV_FIFO_STPC                BIT(6)  /* Stop Condition */
+#define   SLV_FIFO_DV          (SLV_FIFO_DV1 | SLV_FIFO_DV2)
+#define SLV_INT_ENABLE         0x70
+#define SLV_INT_STATUS         0x74
+#define   SLV_STATUS_RFH       BIT(0)  /* FIFO service */
+#define   SLV_STATUS_WTC       BIT(1)  /* Write transfer complete */
+#define   SLV_STATUS_SRS1      BIT(2)  /* Slave read from addr 1 */
+#define   SLV_STATUS_SRRS1     BIT(3)  /* Repeated start from addr 1 */
+#define   SLV_STATUS_SRND1     BIT(4)  /* Read request not following start condition */
+#define   SLV_STATUS_SRC1      BIT(5)  /* Read canceled */
+#define   SLV_STATUS_SRAT1     BIT(6)  /* Slave Read timed out */
+#define   SLV_STATUS_SRDRE1    BIT(7)  /* Data written after timed out */
+#define SLV_READ_DUMMY         0x78
 #define SCL_HIGH_PERIOD                0x80
 #define SCL_LOW_PERIOD         0x84
 #define SPIKE_FLTR_LEN         0x88
@@ -111,6 +145,8 @@ struct axxia_i2c_dev {
        struct clk *i2c_clk;
        u32 bus_clk_rate;
        bool last;
+       struct i2c_client *slave;
+       int irq;
 };
 
 static void i2c_int_disable(struct axxia_i2c_dev *idev, u32 mask)
@@ -276,13 +312,65 @@ static int axxia_i2c_fill_tx_fifo(struct axxia_i2c_dev *idev)
        return ret;
 }
 
+static void axxia_i2c_slv_fifo_event(struct axxia_i2c_dev *idev)
+{
+       u32 fifo_status = readl(idev->base + SLV_RX_FIFO);
+       u8 val;
+
+       dev_dbg(idev->dev, "slave irq fifo_status=0x%x\n", fifo_status);
+
+       if (fifo_status & SLV_FIFO_DV1) {
+               if (fifo_status & SLV_FIFO_STRC)
+                       i2c_slave_event(idev->slave,
+                                       I2C_SLAVE_WRITE_REQUESTED, &val);
+
+               val = readl(idev->base + SLV_DATA);
+               i2c_slave_event(idev->slave, I2C_SLAVE_WRITE_RECEIVED, &val);
+       }
+       if (fifo_status & SLV_FIFO_STPC) {
+               readl(idev->base + SLV_DATA); /* dummy read */
+               i2c_slave_event(idev->slave, I2C_SLAVE_STOP, &val);
+       }
+       if (fifo_status & SLV_FIFO_RSC)
+               readl(idev->base + SLV_DATA); /* dummy read */
+}
+
+static irqreturn_t axxia_i2c_slv_isr(struct axxia_i2c_dev *idev)
+{
+       u32 status = readl(idev->base + SLV_INT_STATUS);
+       u8 val;
+
+       dev_dbg(idev->dev, "slave irq status=0x%x\n", status);
+
+       if (status & SLV_STATUS_RFH)
+               axxia_i2c_slv_fifo_event(idev);
+       if (status & SLV_STATUS_SRS1) {
+               i2c_slave_event(idev->slave, I2C_SLAVE_READ_REQUESTED, &val);
+               writel(val, idev->base + SLV_DATA);
+       }
+       if (status & SLV_STATUS_SRND1) {
+               i2c_slave_event(idev->slave, I2C_SLAVE_READ_PROCESSED, &val);
+               writel(val, idev->base + SLV_DATA);
+       }
+       if (status & SLV_STATUS_SRC1)
+               i2c_slave_event(idev->slave, I2C_SLAVE_STOP, &val);
+
+       writel(INT_SLV, idev->base + INTERRUPT_STATUS);
+       return IRQ_HANDLED;
+}
+
 static irqreturn_t axxia_i2c_isr(int irq, void *_dev)
 {
        struct axxia_i2c_dev *idev = _dev;
+       irqreturn_t ret = IRQ_NONE;
        u32 status;
 
-       if (!(readl(idev->base + INTERRUPT_STATUS) & INT_MST))
-               return IRQ_NONE;
+       status = readl(idev->base + INTERRUPT_STATUS);
+
+       if (status & INT_SLV)
+               ret = axxia_i2c_slv_isr(idev);
+       if (!(status & INT_MST))
+               return ret;
 
        /* Read interrupt status bits */
        status = readl(idev->base + MST_INT_STATUS);
@@ -583,9 +671,58 @@ static u32 axxia_i2c_func(struct i2c_adapter *adap)
        return caps;
 }
 
+static int axxia_i2c_reg_slave(struct i2c_client *slave)
+{
+       struct axxia_i2c_dev *idev = i2c_get_adapdata(slave->adapter);
+       u32 slv_int_mask = SLV_STATUS_RFH;
+       u32 dec_ctl;
+
+       if (idev->slave)
+               return -EBUSY;
+
+       idev->slave = slave;
+
+       /* Enable slave mode as well */
+       writel(GLOBAL_MST_EN | GLOBAL_SLV_EN, idev->base + GLOBAL_CONTROL);
+       writel(INT_MST | INT_SLV, idev->base + INTERRUPT_ENABLE);
+
+       /* Set slave address */
+       dec_ctl = SLV_ADDR_DEC_SA1E;
+       if (slave->flags & I2C_CLIENT_TEN)
+               dec_ctl |= SLV_ADDR_DEC_SA1M;
+
+       writel(SLV_RX_ACSA1, idev->base + SLV_RX_CTL);
+       writel(dec_ctl, idev->base + SLV_ADDR_DEC_CTL);
+       writel(slave->addr, idev->base + SLV_ADDR_1);
+
+       /* Enable interrupts */
+       slv_int_mask |= SLV_STATUS_SRS1 | SLV_STATUS_SRRS1 | SLV_STATUS_SRND1;
+       slv_int_mask |= SLV_STATUS_SRC1;
+       writel(slv_int_mask, idev->base + SLV_INT_ENABLE);
+
+       return 0;
+}
+
+static int axxia_i2c_unreg_slave(struct i2c_client *slave)
+{
+       struct axxia_i2c_dev *idev = i2c_get_adapdata(slave->adapter);
+
+       /* Disable slave mode */
+       writel(GLOBAL_MST_EN, idev->base + GLOBAL_CONTROL);
+       writel(INT_MST, idev->base + INTERRUPT_ENABLE);
+
+       synchronize_irq(idev->irq);
+
+       idev->slave = NULL;
+
+       return 0;
+}
+
 static const struct i2c_algorithm axxia_i2c_algo = {
        .master_xfer = axxia_i2c_xfer,
        .functionality = axxia_i2c_func,
+       .reg_slave = axxia_i2c_reg_slave,
+       .unreg_slave = axxia_i2c_unreg_slave,
 };
 
 static const struct i2c_adapter_quirks axxia_i2c_quirks = {
@@ -599,7 +736,6 @@ static int axxia_i2c_probe(struct platform_device *pdev)
        struct axxia_i2c_dev *idev = NULL;
        struct resource *res;
        void __iomem *base;
-       int irq;
        int ret = 0;
 
        idev = devm_kzalloc(&pdev->dev, sizeof(*idev), GFP_KERNEL);
@@ -611,10 +747,10 @@ static int axxia_i2c_probe(struct platform_device *pdev)
        if (IS_ERR(base))
                return PTR_ERR(base);
 
-       irq = platform_get_irq(pdev, 0);
-       if (irq < 0) {
+       idev->irq = platform_get_irq(pdev, 0);
+       if (idev->irq < 0) {
                dev_err(&pdev->dev, "missing interrupt resource\n");
-               return irq;
+               return idev->irq;
        }
 
        idev->i2c_clk = devm_clk_get(&pdev->dev, "i2c");
@@ -643,10 +779,10 @@ static int axxia_i2c_probe(struct platform_device *pdev)
                goto error_disable_clk;
        }
 
-       ret = devm_request_irq(&pdev->dev, irq, axxia_i2c_isr, 0,
+       ret = devm_request_irq(&pdev->dev, idev->irq, axxia_i2c_isr, 0,
                               pdev->name, idev);
        if (ret) {
-               dev_err(&pdev->dev, "failed to claim IRQ%d\n", irq);
+               dev_err(&pdev->dev, "failed to claim IRQ%d\n", idev->irq);
                goto error_disable_clk;
        }
 
index 19ef2b0..9ffdffa 100644 (file)
@@ -808,7 +808,7 @@ static struct i2c_algorithm bcm_iproc_algo = {
        .unreg_slave = bcm_iproc_i2c_unreg_slave,
 };
 
-static struct i2c_adapter_quirks bcm_iproc_i2c_quirks = {
+static const struct i2c_adapter_quirks bcm_iproc_i2c_quirks = {
        .max_read_len = M_RX_MAX_READ_LEN,
 };
 
@@ -922,7 +922,9 @@ static int bcm_iproc_i2c_probe(struct platform_device *pdev)
 
        adap = &iproc_i2c->adapter;
        i2c_set_adapdata(adap, iproc_i2c);
-       strlcpy(adap->name, "Broadcom iProc I2C adapter", sizeof(adap->name));
+       snprintf(adap->name, sizeof(adap->name),
+               "Broadcom iProc (%s)",
+               of_node_full_name(iproc_i2c->device->of_node));
        adap->algo = &bcm_iproc_algo;
        adap->quirks = &bcm_iproc_i2c_quirks;
        adap->dev.parent = &pdev->dev;
index 67752f7..e01b2b5 100644 (file)
@@ -12,6 +12,7 @@
 #include <linux/interrupt.h>
 #include <linux/io.h>
 #include <linux/module.h>
+#include <linux/of_device.h>
 #include <linux/platform_device.h>
 #include <linux/slab.h>
 
@@ -389,7 +390,7 @@ static const struct i2c_algorithm bcm2835_i2c_algo = {
 };
 
 /*
- * This HW was reported to have problems with clock stretching:
+ * The BCM2835 was reported to have problems with clock stretching:
  * http://www.advamation.com/knowhow/raspberrypi/rpi-i2c-bug.html
  * https://www.raspberrypi.org/forums/viewtopic.php?p=146272
  */
@@ -471,11 +472,12 @@ static int bcm2835_i2c_probe(struct platform_device *pdev)
        i2c_set_adapdata(adap, i2c_dev);
        adap->owner = THIS_MODULE;
        adap->class = I2C_CLASS_DEPRECATED;
-       strlcpy(adap->name, "bcm2835 I2C adapter", sizeof(adap->name));
+       snprintf(adap->name, sizeof(adap->name), "bcm2835 (%s)",
+                of_node_full_name(pdev->dev.of_node));
        adap->algo = &bcm2835_i2c_algo;
        adap->dev.parent = &pdev->dev;
        adap->dev.of_node = pdev->dev.of_node;
-       adap->quirks = &bcm2835_i2c_quirks;
+       adap->quirks = of_device_get_match_data(&pdev->dev);
 
        bcm2835_i2c_writel(i2c_dev, BCM2835_I2C_C, 0);
 
@@ -501,7 +503,8 @@ static int bcm2835_i2c_remove(struct platform_device *pdev)
 }
 
 static const struct of_device_id bcm2835_i2c_of_match[] = {
-       { .compatible = "brcm,bcm2835-i2c" },
+       { .compatible = "brcm,bcm2711-i2c" },
+       { .compatible = "brcm,bcm2835-i2c", .data = &bcm2835_i2c_quirks },
        {},
 };
 MODULE_DEVICE_TABLE(of, bcm2835_i2c_of_match);
index 66af44b..b8fde61 100644 (file)
@@ -178,6 +178,51 @@ static const struct i2c_algorithm cht_wc_i2c_adap_algo = {
        .smbus_xfer = cht_wc_i2c_adap_smbus_xfer,
 };
 
+/*
+ * We are an i2c-adapter which itself is part of an i2c-client. This means that
+ * transfers done through us take adapter->bus_lock twice, once for our parent
+ * i2c-adapter and once to take our own bus_lock. Lockdep does not like this
+ * nested locking, to make lockdep happy in the case of busses with muxes, the
+ * i2c-core's i2c_adapter_lock_bus function calls:
+ * rt_mutex_lock_nested(&adapter->bus_lock, i2c_adapter_depth(adapter));
+ *
+ * But i2c_adapter_depth only works when the direct parent of the adapter is
+ * another adapter, as it is only meant for muxes. In our case there is an
+ * i2c-client and MFD instantiated platform_device in the parent->child chain
+ * between the 2 devices.
+ *
+ * So we override the default i2c_lock_operations and pass a hardcoded
+ * depth of 1 to rt_mutex_lock_nested, to make lockdep happy.
+ *
+ * Note that if there were to be a mux attached to our adapter, this would
+ * break things again since the i2c-mux code expects the root-adapter to have
+ * a locking depth of 0. But we always have only 1 client directly attached
+ * in the form of the Charger IC paired with the CHT Whiskey Cove PMIC.
+ */
+static void cht_wc_i2c_adap_lock_bus(struct i2c_adapter *adapter,
+                                unsigned int flags)
+{
+       rt_mutex_lock_nested(&adapter->bus_lock, 1);
+}
+
+static int cht_wc_i2c_adap_trylock_bus(struct i2c_adapter *adapter,
+                                  unsigned int flags)
+{
+       return rt_mutex_trylock(&adapter->bus_lock);
+}
+
+static void cht_wc_i2c_adap_unlock_bus(struct i2c_adapter *adapter,
+                                  unsigned int flags)
+{
+       rt_mutex_unlock(&adapter->bus_lock);
+}
+
+static const struct i2c_lock_operations cht_wc_i2c_adap_lock_ops = {
+       .lock_bus =    cht_wc_i2c_adap_lock_bus,
+       .trylock_bus = cht_wc_i2c_adap_trylock_bus,
+       .unlock_bus =  cht_wc_i2c_adap_unlock_bus,
+};
+
 /**** irqchip for the client connected to the extchgr i2c adapter ****/
 static void cht_wc_i2c_irq_lock(struct irq_data *data)
 {
@@ -286,6 +331,7 @@ static int cht_wc_i2c_adap_i2c_probe(struct platform_device *pdev)
        adap->adapter.owner = THIS_MODULE;
        adap->adapter.class = I2C_CLASS_HWMON;
        adap->adapter.algo = &cht_wc_i2c_adap_algo;
+       adap->adapter.lock_ops = &cht_wc_i2c_adap_lock_ops;
        strlcpy(adap->adapter.name, "PMIC I2C Adapter",
                sizeof(adap->adapter.name));
        adap->adapter.dev.parent = &pdev->dev;
@@ -363,8 +409,7 @@ static int cht_wc_i2c_adap_i2c_remove(struct platform_device *pdev)
 {
        struct cht_wc_i2c_adap *adap = platform_get_drvdata(pdev);
 
-       if (adap->client)
-               i2c_unregister_device(adap->client);
+       i2c_unregister_device(adap->client);
        i2c_del_adapter(&adap->adapter);
        irq_domain_remove(adap->irq_domain);
 
index d464799..e8b3282 100644 (file)
@@ -655,15 +655,11 @@ static int i2c_dw_init_recovery_info(struct dw_i2c_dev *dev)
        struct i2c_bus_recovery_info *rinfo = &dev->rinfo;
        struct i2c_adapter *adap = &dev->adapter;
        struct gpio_desc *gpio;
-       int r;
-
-       gpio = devm_gpiod_get(dev->dev, "scl", GPIOD_OUT_HIGH);
-       if (IS_ERR(gpio)) {
-               r = PTR_ERR(gpio);
-               if (r == -ENOENT || r == -ENOSYS)
-                       return 0;
-               return r;
-       }
+
+       gpio = devm_gpiod_get_optional(dev->dev, "scl", GPIOD_OUT_HIGH);
+       if (IS_ERR_OR_NULL(gpio))
+               return PTR_ERR_OR_ZERO(gpio);
+
        rinfo->scl_gpiod = gpio;
 
        gpio = devm_gpiod_get_optional(dev->dev, "sda", GPIOD_IN);
index 76810de..050adda 100644 (file)
@@ -33,6 +33,7 @@ enum dw_pci_ctl_id_t {
        baytrail,
        cherrytrail,
        haswell,
+       elkhartlake,
 };
 
 struct dw_scl_sda_cfg {
@@ -168,13 +169,20 @@ static struct dw_pci_controller dw_pci_controllers[] = {
                .flags = MODEL_CHERRYTRAIL,
                .scl_sda_cfg = &byt_config,
        },
+       [elkhartlake] = {
+               .bus_num = -1,
+               .bus_cfg = INTEL_MID_STD_CFG | DW_IC_CON_SPEED_FAST,
+               .tx_fifo_depth = 32,
+               .rx_fifo_depth = 32,
+               .functionality = I2C_FUNC_10BIT_ADDR,
+               .clk_khz = 100000,
+       },
 };
 
 #ifdef CONFIG_PM
 static int i2c_dw_pci_suspend(struct device *dev)
 {
-       struct pci_dev *pdev = to_pci_dev(dev);
-       struct dw_i2c_dev *i_dev = pci_get_drvdata(pdev);
+       struct dw_i2c_dev *i_dev = dev_get_drvdata(dev);
 
        i_dev->suspended = true;
        i_dev->disable(i_dev);
@@ -184,8 +192,7 @@ static int i2c_dw_pci_suspend(struct device *dev)
 
 static int i2c_dw_pci_resume(struct device *dev)
 {
-       struct pci_dev *pdev = to_pci_dev(dev);
-       struct dw_i2c_dev *i_dev = pci_get_drvdata(pdev);
+       struct dw_i2c_dev *i_dev = dev_get_drvdata(dev);
        int ret;
 
        ret = i_dev->init(i_dev);
@@ -227,6 +234,8 @@ static int i2c_dw_pci_probe(struct pci_dev *pdev,
                return r;
        }
 
+       pci_set_master(pdev);
+
        r = pcim_iomap_regions(pdev, 1 << 0, pci_name(pdev));
        if (r) {
                dev_err(&pdev->dev, "I/O memory remapping failed\n");
@@ -237,18 +246,24 @@ static int i2c_dw_pci_probe(struct pci_dev *pdev,
        if (!dev)
                return -ENOMEM;
 
+       r = pci_alloc_irq_vectors(pdev, 1, 1, PCI_IRQ_ALL_TYPES);
+       if (r < 0)
+               return r;
+
        dev->clk = NULL;
        dev->controller = controller;
        dev->get_clk_rate_khz = i2c_dw_get_clk_rate_khz;
        dev->base = pcim_iomap_table(pdev)[0];
        dev->dev = &pdev->dev;
-       dev->irq = pdev->irq;
+       dev->irq = pci_irq_vector(pdev, 0);
        dev->flags |= controller->flags;
 
        if (controller->setup) {
                r = controller->setup(pdev, controller);
-               if (r)
+               if (r) {
+                       pci_free_irq_vectors(pdev);
                        return r;
+               }
        }
 
        dev->functionality = controller->functionality |
@@ -276,8 +291,10 @@ static int i2c_dw_pci_probe(struct pci_dev *pdev,
        adap->nr = controller->bus_num;
 
        r = i2c_dw_probe(dev);
-       if (r)
+       if (r) {
+               pci_free_irq_vectors(pdev);
                return r;
+       }
 
        pm_runtime_set_autosuspend_delay(&pdev->dev, 1000);
        pm_runtime_use_autosuspend(&pdev->dev);
@@ -296,6 +313,7 @@ static void i2c_dw_pci_remove(struct pci_dev *pdev)
        pm_runtime_get_noresume(&pdev->dev);
 
        i2c_del_adapter(&dev->adapter);
+       pci_free_irq_vectors(pdev);
 }
 
 /* work with hotplug and coldplug */
@@ -331,6 +349,15 @@ static const struct pci_device_id i2_designware_pci_ids[] = {
        { PCI_VDEVICE(INTEL, 0x22C5), cherrytrail },
        { PCI_VDEVICE(INTEL, 0x22C6), cherrytrail },
        { PCI_VDEVICE(INTEL, 0x22C7), cherrytrail },
+       /* Elkhart Lake (PSE I2C) */
+       { PCI_VDEVICE(INTEL, 0x4bb9), elkhartlake },
+       { PCI_VDEVICE(INTEL, 0x4bba), elkhartlake },
+       { PCI_VDEVICE(INTEL, 0x4bbb), elkhartlake },
+       { PCI_VDEVICE(INTEL, 0x4bbc), elkhartlake },
+       { PCI_VDEVICE(INTEL, 0x4bbd), elkhartlake },
+       { PCI_VDEVICE(INTEL, 0x4bbe), elkhartlake },
+       { PCI_VDEVICE(INTEL, 0x4bbf), elkhartlake },
+       { PCI_VDEVICE(INTEL, 0x4bc0), elkhartlake },
        { 0,}
 };
 MODULE_DEVICE_TABLE(pci, i2_designware_pci_ids);
index ddfb818..16dd338 100644 (file)
@@ -279,12 +279,10 @@ static int dw_i2c_plat_probe(struct platform_device *pdev)
        platform_set_drvdata(pdev, dev);
 
        dev->rst = devm_reset_control_get_optional_exclusive(&pdev->dev, NULL);
-       if (IS_ERR(dev->rst)) {
-               if (PTR_ERR(dev->rst) == -EPROBE_DEFER)
-                       return -EPROBE_DEFER;
-       } else {
-               reset_control_deassert(dev->rst);
-       }
+       if (IS_ERR(dev->rst))
+               return PTR_ERR(dev->rst);
+
+       reset_control_deassert(dev->rst);
 
        t = &dev->timings;
        if (pdata)
@@ -346,8 +344,10 @@ static int dw_i2c_plat_probe(struct platform_device *pdev)
 
        /* Optional interface clock */
        dev->pclk = devm_clk_get_optional(&pdev->dev, "pclk");
-       if (IS_ERR(dev->pclk))
-               return PTR_ERR(dev->pclk);
+       if (IS_ERR(dev->pclk)) {
+               ret = PTR_ERR(dev->pclk);
+               goto exit_reset;
+       }
 
        dev->clk = devm_clk_get(&pdev->dev, NULL);
        if (!i2c_dw_prepare_clk(dev, true)) {
@@ -400,8 +400,7 @@ static int dw_i2c_plat_probe(struct platform_device *pdev)
 exit_probe:
        dw_i2c_plat_pm_cleanup(dev);
 exit_reset:
-       if (!IS_ERR_OR_NULL(dev->rst))
-               reset_control_assert(dev->rst);
+       reset_control_assert(dev->rst);
        return ret;
 }
 
@@ -419,8 +418,7 @@ static int dw_i2c_plat_remove(struct platform_device *pdev)
        pm_runtime_put_sync(&pdev->dev);
        dw_i2c_plat_pm_cleanup(dev);
 
-       if (!IS_ERR_OR_NULL(dev->rst))
-               reset_control_assert(dev->rst);
+       reset_control_assert(dev->rst);
 
        return 0;
 }
index e4e7932..e7514c1 100644 (file)
@@ -791,9 +791,7 @@ static int exynos5_i2c_probe(struct platform_device *pdev)
        }
 
        ret = devm_request_irq(&pdev->dev, i2c->irq, exynos5_i2c_irq,
-                               IRQF_NO_SUSPEND | IRQF_ONESHOT,
-                               dev_name(&pdev->dev), i2c);
-
+                              IRQF_NO_SUSPEND, dev_name(&pdev->dev), i2c);
        if (ret != 0) {
                dev_err(&pdev->dev, "cannot request HS-I2C IRQ %d\n", i2c->irq);
                goto err_clk;
index da5eb39..e0c2569 100644 (file)
@@ -707,8 +707,10 @@ static int fsi_i2c_probe(struct device *dev)
                        continue;
 
                port = kzalloc(sizeof(*port), GFP_KERNEL);
-               if (!port)
+               if (!port) {
+                       of_node_put(np);
                        break;
+               }
 
                port->master = i2c;
                port->port = port_no;
index 4df1434..8497c7a 100644 (file)
@@ -445,8 +445,7 @@ static int hix5hd2_i2c_probe(struct platform_device *pdev)
        hix5hd2_i2c_init(priv);
 
        ret = devm_request_irq(&pdev->dev, irq, hix5hd2_i2c_irq,
-                              IRQF_NO_SUSPEND | IRQF_ONESHOT,
-                              dev_name(&pdev->dev), priv);
+                              IRQF_NO_SUSPEND, dev_name(&pdev->dev), priv);
        if (ret != 0) {
                dev_err(&pdev->dev, "cannot request HS-I2C IRQ %d\n", irq);
                goto err_clk;
index 36e9559..c09791f 100644 (file)
@@ -292,7 +292,8 @@ struct i801_priv {
 #define FEATURE_HOST_NOTIFY    BIT(5)
 /* Not really a feature, but it's convenient to handle it as such */
 #define FEATURE_IDF            BIT(15)
-#define FEATURE_TCO            BIT(16)
+#define FEATURE_TCO_SPT                BIT(16)
+#define FEATURE_TCO_CNL                BIT(17)
 
 static const char *i801_feature_names[] = {
        "SMBus PEC",
@@ -1500,57 +1501,23 @@ static inline unsigned int i801_get_adapter_class(struct i801_priv *priv)
 }
 #endif
 
-static const struct itco_wdt_platform_data tco_platform_data = {
+static const struct itco_wdt_platform_data spt_tco_platform_data = {
        .name = "Intel PCH",
        .version = 4,
 };
 
 static DEFINE_SPINLOCK(p2sb_spinlock);
 
-static void i801_add_tco(struct i801_priv *priv)
+static struct platform_device *
+i801_add_tco_spt(struct i801_priv *priv, struct pci_dev *pci_dev,
+                struct resource *tco_res)
 {
-       struct pci_dev *pci_dev = priv->pci_dev;
-       struct resource tco_res[3], *res;
-       struct platform_device *pdev;
+       struct resource *res;
        unsigned int devfn;
-       u32 tco_base, tco_ctl;
-       u32 base_addr, ctrl_val;
        u64 base64_addr;
+       u32 base_addr;
        u8 hidden;
 
-       if (!(priv->features & FEATURE_TCO))
-               return;
-
-       pci_read_config_dword(pci_dev, TCOBASE, &tco_base);
-       pci_read_config_dword(pci_dev, TCOCTL, &tco_ctl);
-       if (!(tco_ctl & TCOCTL_EN))
-               return;
-
-       memset(tco_res, 0, sizeof(tco_res));
-
-       res = &tco_res[ICH_RES_IO_TCO];
-       res->start = tco_base & ~1;
-       res->end = res->start + 32 - 1;
-       res->flags = IORESOURCE_IO;
-
-       /*
-        * Power Management registers.
-        */
-       devfn = PCI_DEVFN(PCI_SLOT(pci_dev->devfn), 2);
-       pci_bus_read_config_dword(pci_dev->bus, devfn, ACPIBASE, &base_addr);
-
-       res = &tco_res[ICH_RES_IO_SMI];
-       res->start = (base_addr & ~1) + ACPIBASE_SMI_OFF;
-       res->end = res->start + 3;
-       res->flags = IORESOURCE_IO;
-
-       /*
-        * Enable the ACPI I/O space.
-        */
-       pci_bus_read_config_dword(pci_dev->bus, devfn, ACPICTRL, &ctrl_val);
-       ctrl_val |= ACPICTRL_EN;
-       pci_bus_write_config_dword(pci_dev->bus, devfn, ACPICTRL, ctrl_val);
-
        /*
         * We must access the NO_REBOOT bit over the Primary to Sideband
         * bridge (P2SB). The BIOS prevents the P2SB device from being
@@ -1586,15 +1553,76 @@ static void i801_add_tco(struct i801_priv *priv)
        res->end = res->start + 3;
        res->flags = IORESOURCE_MEM;
 
-       pdev = platform_device_register_resndata(&pci_dev->dev, "iTCO_wdt", -1,
-                                                tco_res, 3, &tco_platform_data,
-                                                sizeof(tco_platform_data));
-       if (IS_ERR(pdev)) {
-               dev_warn(&pci_dev->dev, "failed to create iTCO device\n");
+       return platform_device_register_resndata(&pci_dev->dev, "iTCO_wdt", -1,
+                                       tco_res, 3, &spt_tco_platform_data,
+                                       sizeof(spt_tco_platform_data));
+}
+
+static const struct itco_wdt_platform_data cnl_tco_platform_data = {
+       .name = "Intel PCH",
+       .version = 6,
+};
+
+static struct platform_device *
+i801_add_tco_cnl(struct i801_priv *priv, struct pci_dev *pci_dev,
+                struct resource *tco_res)
+{
+       return platform_device_register_resndata(&pci_dev->dev, "iTCO_wdt", -1,
+                                       tco_res, 2, &cnl_tco_platform_data,
+                                       sizeof(cnl_tco_platform_data));
+}
+
+static void i801_add_tco(struct i801_priv *priv)
+{
+       u32 base_addr, tco_base, tco_ctl, ctrl_val;
+       struct pci_dev *pci_dev = priv->pci_dev;
+       struct resource tco_res[3], *res;
+       unsigned int devfn;
+
+       /* If we have ACPI based watchdog use that instead */
+       if (acpi_has_watchdog())
                return;
-       }
 
-       priv->tco_pdev = pdev;
+       if (!(priv->features & (FEATURE_TCO_SPT | FEATURE_TCO_CNL)))
+               return;
+
+       pci_read_config_dword(pci_dev, TCOBASE, &tco_base);
+       pci_read_config_dword(pci_dev, TCOCTL, &tco_ctl);
+       if (!(tco_ctl & TCOCTL_EN))
+               return;
+
+       memset(tco_res, 0, sizeof(tco_res));
+
+       res = &tco_res[ICH_RES_IO_TCO];
+       res->start = tco_base & ~1;
+       res->end = res->start + 32 - 1;
+       res->flags = IORESOURCE_IO;
+
+       /*
+        * Power Management registers.
+        */
+       devfn = PCI_DEVFN(PCI_SLOT(pci_dev->devfn), 2);
+       pci_bus_read_config_dword(pci_dev->bus, devfn, ACPIBASE, &base_addr);
+
+       res = &tco_res[ICH_RES_IO_SMI];
+       res->start = (base_addr & ~1) + ACPIBASE_SMI_OFF;
+       res->end = res->start + 3;
+       res->flags = IORESOURCE_IO;
+
+       /*
+        * Enable the ACPI I/O space.
+        */
+       pci_bus_read_config_dword(pci_dev->bus, devfn, ACPICTRL, &ctrl_val);
+       ctrl_val |= ACPICTRL_EN;
+       pci_bus_write_config_dword(pci_dev->bus, devfn, ACPICTRL, ctrl_val);
+
+       if (priv->features & FEATURE_TCO_CNL)
+               priv->tco_pdev = i801_add_tco_cnl(priv, pci_dev, tco_res);
+       else
+               priv->tco_pdev = i801_add_tco_spt(priv, pci_dev, tco_res);
+
+       if (IS_ERR(priv->tco_pdev))
+               dev_warn(&pci_dev->dev, "failed to create iTCO device\n");
 }
 
 #ifdef CONFIG_ACPI
@@ -1704,13 +1732,21 @@ static int i801_probe(struct pci_dev *dev, const struct pci_device_id *id)
        switch (dev->device) {
        case PCI_DEVICE_ID_INTEL_SUNRISEPOINT_H_SMBUS:
        case PCI_DEVICE_ID_INTEL_SUNRISEPOINT_LP_SMBUS:
-       case PCI_DEVICE_ID_INTEL_CANNONLAKE_H_SMBUS:
-       case PCI_DEVICE_ID_INTEL_CANNONLAKE_LP_SMBUS:
        case PCI_DEVICE_ID_INTEL_LEWISBURG_SMBUS:
        case PCI_DEVICE_ID_INTEL_LEWISBURG_SSKU_SMBUS:
-       case PCI_DEVICE_ID_INTEL_CDF_SMBUS:
        case PCI_DEVICE_ID_INTEL_DNV_SMBUS:
        case PCI_DEVICE_ID_INTEL_KABYLAKE_PCH_H_SMBUS:
+               priv->features |= FEATURE_I2C_BLOCK_READ;
+               priv->features |= FEATURE_IRQ;
+               priv->features |= FEATURE_SMBUS_PEC;
+               priv->features |= FEATURE_BLOCK_BUFFER;
+               priv->features |= FEATURE_TCO_SPT;
+               priv->features |= FEATURE_HOST_NOTIFY;
+               break;
+
+       case PCI_DEVICE_ID_INTEL_CANNONLAKE_H_SMBUS:
+       case PCI_DEVICE_ID_INTEL_CANNONLAKE_LP_SMBUS:
+       case PCI_DEVICE_ID_INTEL_CDF_SMBUS:
        case PCI_DEVICE_ID_INTEL_ICELAKE_LP_SMBUS:
        case PCI_DEVICE_ID_INTEL_COMETLAKE_SMBUS:
        case PCI_DEVICE_ID_INTEL_ELKHART_LAKE_SMBUS:
@@ -1720,9 +1756,7 @@ static int i801_probe(struct pci_dev *dev, const struct pci_device_id *id)
                priv->features |= FEATURE_IRQ;
                priv->features |= FEATURE_SMBUS_PEC;
                priv->features |= FEATURE_BLOCK_BUFFER;
-               /* If we have ACPI based watchdog use that instead */
-               if (!acpi_has_watchdog())
-                       priv->features |= FEATURE_TCO;
+               priv->features |= FEATURE_TCO_CNL;
                priv->features |= FEATURE_HOST_NOTIFY;
                break;
 
@@ -1921,8 +1955,7 @@ static int i801_suspend(struct device *dev)
 
 static int i801_resume(struct device *dev)
 {
-       struct pci_dev *pci_dev = to_pci_dev(dev);
-       struct i801_priv *priv = pci_get_drvdata(pci_dev);
+       struct i801_priv *priv = dev_get_drvdata(dev);
 
        i801_enable_host_notify(&priv->adapter);
 
diff --git a/drivers/i2c/busses/i2c-icy.c b/drivers/i2c/busses/i2c-icy.c
new file mode 100644 (file)
index 0000000..8382eb6
--- /dev/null
@@ -0,0 +1,230 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * I2C driver for stand-alone PCF8584 style adapters on Zorro cards
+ *
+ * Original ICY documentation can be found on Aminet:
+ * https://aminet.net/package/docs/hard/icy
+ *
+ * There has been a modern community re-print of this design in 2019:
+ * https://www.a1k.org/forum/index.php?threads/70106/
+ *
+ * The card is basically a Philips PCF8584 connected straight to the
+ * beginning of the AutoConfig'd address space (register S1 on base+2),
+ * with /INT on /INT2 on the Zorro bus.
+ *
+ * Copyright (c) 2019 Max Staudt <max@enpas.org>
+ *
+ * This started as a fork of i2c-elektor.c and has evolved since.
+ * Thanks go to its authors for providing a base to grow on.
+ *
+ *
+ * IRQ support is currently not implemented.
+ *
+ * As it turns out, i2c-algo-pcf is really written with i2c-elektor's
+ * edge-triggered ISA interrupts in mind, while the Amiga's Zorro bus has
+ * level-triggered interrupts. This means that once an interrupt occurs, we
+ * have to tell the PCF8584 to shut up immediately, or it will keep the
+ * interrupt line busy and cause an IRQ storm.
+
+ * However, because of the PCF8584's host-side protocol, there is no good
+ * way to just quieten it without side effects. Rather, we have to perform
+ * the next read/write operation straight away, which will reset the /INT
+ * pin. This entails re-designing the core of i2c-algo-pcf in the future.
+ * For now, we never request an IRQ from the PCF8584, and poll it instead.
+ */
+
+#include <linux/delay.h>
+#include <linux/init.h>
+#include <linux/io.h>
+#include <linux/ioport.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+
+#include <linux/i2c.h>
+#include <linux/i2c-algo-pcf.h>
+
+#include <asm/amigaints.h>
+#include <linux/zorro.h>
+
+#include "../algos/i2c-algo-pcf.h"
+
+struct icy_i2c {
+       struct i2c_adapter adapter;
+
+       void __iomem *reg_s0;
+       void __iomem *reg_s1;
+       struct fwnode_handle *ltc2990_fwnode;
+       struct i2c_client *ltc2990_client;
+};
+
+/*
+ * Functions called by i2c-algo-pcf
+ */
+static void icy_pcf_setpcf(void *data, int ctl, int val)
+{
+       struct icy_i2c *i2c = (struct icy_i2c *)data;
+
+       u8 __iomem *address = ctl ? i2c->reg_s1 : i2c->reg_s0;
+
+       z_writeb(val, address);
+}
+
+static int icy_pcf_getpcf(void *data, int ctl)
+{
+       struct icy_i2c *i2c = (struct icy_i2c *)data;
+
+       u8 __iomem *address = ctl ? i2c->reg_s1 : i2c->reg_s0;
+
+       return z_readb(address);
+}
+
+static int icy_pcf_getown(void *data)
+{
+       return 0x55;
+}
+
+static int icy_pcf_getclock(void *data)
+{
+       return 0x1c;
+}
+
+static void icy_pcf_waitforpin(void *data)
+{
+       usleep_range(50, 150);
+}
+
+/*
+ * Main i2c-icy part
+ */
+static unsigned short const icy_ltc2990_addresses[] = {
+       0x4c, 0x4d, 0x4e, 0x4f, I2C_CLIENT_END
+};
+
+/*
+ * Additional sensors exposed once this property is applied:
+ *
+ * in1 will be the voltage of the 5V rail, divided by 2.
+ * in2 will be the voltage of the 12V rail, divided by 4.
+ * temp3 will be measured using a PCB loop next the chip.
+ */
+static const u32 icy_ltc2990_meas_mode[] = {0, 3};
+
+static const struct property_entry icy_ltc2990_props[] = {
+       PROPERTY_ENTRY_U32_ARRAY("lltc,meas-mode", icy_ltc2990_meas_mode),
+       { }
+};
+
+static int icy_probe(struct zorro_dev *z,
+                    const struct zorro_device_id *ent)
+{
+       struct icy_i2c *i2c;
+       struct i2c_algo_pcf_data *algo_data;
+       struct fwnode_handle *new_fwnode;
+       struct i2c_board_info ltc2990_info = {
+               .type           = "ltc2990",
+               .addr           = 0x4c,
+       };
+
+       i2c = devm_kzalloc(&z->dev, sizeof(*i2c), GFP_KERNEL);
+       if (!i2c)
+               return -ENOMEM;
+
+       algo_data = devm_kzalloc(&z->dev, sizeof(*algo_data), GFP_KERNEL);
+       if (!algo_data)
+               return -ENOMEM;
+
+       dev_set_drvdata(&z->dev, i2c);
+       i2c->adapter.dev.parent = &z->dev;
+       i2c->adapter.owner = THIS_MODULE;
+       /* i2c->adapter.algo assigned by i2c_pcf_add_bus() */
+       i2c->adapter.algo_data = algo_data;
+       strlcpy(i2c->adapter.name, "ICY I2C Zorro adapter",
+               sizeof(i2c->adapter.name));
+
+       if (!devm_request_mem_region(&z->dev,
+                                    z->resource.start,
+                                    4, i2c->adapter.name))
+               return -ENXIO;
+
+       /* Driver private data */
+       i2c->reg_s0 = ZTWO_VADDR(z->resource.start);
+       i2c->reg_s1 = ZTWO_VADDR(z->resource.start + 2);
+
+       algo_data->data = i2c;
+       algo_data->setpcf     = icy_pcf_setpcf;
+       algo_data->getpcf     = icy_pcf_getpcf;
+       algo_data->getown     = icy_pcf_getown;
+       algo_data->getclock   = icy_pcf_getclock;
+       algo_data->waitforpin = icy_pcf_waitforpin;
+
+       if (i2c_pcf_add_bus(&i2c->adapter)) {
+               dev_err(&z->dev, "i2c_pcf_add_bus() failed\n");
+               return -ENXIO;
+       }
+
+       dev_info(&z->dev, "ICY I2C controller at %pa, IRQ not implemented\n",
+                &z->resource.start);
+
+       /*
+        * The 2019 a1k.org PCBs have an LTC2990 at 0x4c, so start
+        * it automatically once ltc2990 is modprobed.
+        *
+        * in0 is the voltage of the internal 5V power supply.
+        * temp1 is the temperature inside the chip.
+        *
+        * See property_entry above for in1, in2, temp3.
+        */
+       new_fwnode = fwnode_create_software_node(icy_ltc2990_props, NULL);
+       if (IS_ERR(new_fwnode)) {
+               dev_info(&z->dev, "Failed to create fwnode for LTC2990, error: %ld\n",
+                        PTR_ERR(new_fwnode));
+       } else {
+               /*
+                * Store the fwnode so we can destroy it on .remove().
+                * Only store it on success, as fwnode_remove_software_node()
+                * is NULL safe, but not PTR_ERR safe.
+                */
+               i2c->ltc2990_fwnode = new_fwnode;
+               ltc2990_info.fwnode = new_fwnode;
+
+               i2c->ltc2990_client =
+                       i2c_new_probed_device(&i2c->adapter,
+                                             &ltc2990_info,
+                                             icy_ltc2990_addresses,
+                                             NULL);
+       }
+
+       return 0;
+}
+
+static void icy_remove(struct zorro_dev *z)
+{
+       struct icy_i2c *i2c = dev_get_drvdata(&z->dev);
+
+       i2c_unregister_device(i2c->ltc2990_client);
+       fwnode_remove_software_node(i2c->ltc2990_fwnode);
+
+       i2c_del_adapter(&i2c->adapter);
+}
+
+static const struct zorro_device_id icy_zorro_tbl[] = {
+       { ZORRO_ID(VMC, 15, 0), },
+       { 0 }
+};
+
+MODULE_DEVICE_TABLE(zorro, icy_zorro_tbl);
+
+static struct zorro_driver icy_driver = {
+       .name           = "i2c-icy",
+       .id_table       = icy_zorro_tbl,
+       .probe          = icy_probe,
+       .remove         = icy_remove,
+};
+
+module_driver(icy_driver,
+             zorro_register_driver,
+             zorro_unregister_driver);
+
+MODULE_AUTHOR("Max Staudt <max@enpas.org>");
+MODULE_DESCRIPTION("I2C bus via PCF8584 on ICY Zorro card");
+MODULE_LICENSE("GPL v2");
index dc00fab..c92b564 100644 (file)
@@ -545,7 +545,6 @@ MODULE_DEVICE_TABLE(of, lpi2c_imx_of_match);
 static int lpi2c_imx_probe(struct platform_device *pdev)
 {
        struct lpi2c_imx_struct *lpi2c_imx;
-       struct resource *res;
        unsigned int temp;
        int irq, ret;
 
@@ -553,8 +552,7 @@ static int lpi2c_imx_probe(struct platform_device *pdev)
        if (!lpi2c_imx)
                return -ENOMEM;
 
-       res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-       lpi2c_imx->base = devm_ioremap_resource(&pdev->dev, res);
+       lpi2c_imx->base = devm_platform_ioremap_resource(pdev, 0);
        if (IS_ERR(lpi2c_imx->base))
                return PTR_ERR(lpi2c_imx->base);
 
index 15f6cde..a3b6133 100644 (file)
@@ -20,6 +20,7 @@
  *
  */
 
+#include <linux/acpi.h>
 #include <linux/clk.h>
 #include <linux/completion.h>
 #include <linux/delay.h>
@@ -255,6 +256,12 @@ static const struct of_device_id i2c_imx_dt_ids[] = {
 };
 MODULE_DEVICE_TABLE(of, i2c_imx_dt_ids);
 
+static const struct acpi_device_id i2c_imx_acpi_ids[] = {
+       {"NXP0001", .driver_data = (kernel_ulong_t)&vf610_i2c_hwdata},
+       { }
+};
+MODULE_DEVICE_TABLE(acpi, i2c_imx_acpi_ids);
+
 static inline int is_imx1_i2c(struct imx_i2c_struct *i2c_imx)
 {
        return i2c_imx->hwdata->devtype == IMX1_I2C;
@@ -1048,14 +1055,13 @@ static const struct i2c_algorithm i2c_imx_algo = {
 
 static int i2c_imx_probe(struct platform_device *pdev)
 {
-       const struct of_device_id *of_id = of_match_device(i2c_imx_dt_ids,
-                                                          &pdev->dev);
        struct imx_i2c_struct *i2c_imx;
        struct resource *res;
        struct imxi2c_platform_data *pdata = dev_get_platdata(&pdev->dev);
        void __iomem *base;
        int irq, ret;
        dma_addr_t phy_addr;
+       const struct imx_i2c_hwdata *match;
 
        dev_dbg(&pdev->dev, "<%s>\n", __func__);
 
@@ -1075,8 +1081,9 @@ static int i2c_imx_probe(struct platform_device *pdev)
        if (!i2c_imx)
                return -ENOMEM;
 
-       if (of_id)
-               i2c_imx->hwdata = of_id->data;
+       match = device_get_match_data(&pdev->dev);
+       if (match)
+               i2c_imx->hwdata = match;
        else
                i2c_imx->hwdata = (struct imx_i2c_hwdata *)
                                platform_get_device_id(pdev)->driver_data;
@@ -1089,6 +1096,7 @@ static int i2c_imx_probe(struct platform_device *pdev)
        i2c_imx->adapter.nr             = pdev->id;
        i2c_imx->adapter.dev.of_node    = pdev->dev.of_node;
        i2c_imx->base                   = base;
+       ACPI_COMPANION_SET(&i2c_imx->adapter.dev, ACPI_COMPANION(&pdev->dev));
 
        /* Get I2C clock */
        i2c_imx->clk = devm_clk_get(&pdev->dev, NULL);
@@ -1247,6 +1255,7 @@ static struct platform_driver i2c_imx_driver = {
                .name = DRIVER_NAME,
                .pm = &i2c_imx_pm_ops,
                .of_match_table = i2c_imx_dt_ids,
+               .acpi_match_table = i2c_imx_acpi_ids,
        },
        .id_table = imx_i2c_devtype,
 };
index 02d23ed..2f95e25 100644 (file)
@@ -781,8 +781,6 @@ static int ismt_dev_init(struct ismt_priv *priv)
        if (!priv->hw)
                return -ENOMEM;
 
-       memset(priv->hw, 0, (ISMT_DESC_ENTRIES * sizeof(struct ismt_desc)));
-
        priv->head = 0;
        init_completion(&priv->cmp);
 
index 7d79317..8922491 100644 (file)
@@ -802,7 +802,6 @@ static int mxs_i2c_probe(struct platform_device *pdev)
        struct device *dev = &pdev->dev;
        struct mxs_i2c_dev *i2c;
        struct i2c_adapter *adap;
-       struct resource *res;
        int err, irq;
 
        i2c = devm_kzalloc(dev, sizeof(*i2c), GFP_KERNEL);
@@ -814,8 +813,7 @@ static int mxs_i2c_probe(struct platform_device *pdev)
                i2c->dev_type = device_id->driver_data;
        }
 
-       res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-       i2c->regs = devm_ioremap_resource(&pdev->dev, res);
+       i2c->regs = devm_platform_ioremap_resource(pdev, 0);
        if (IS_ERR(i2c->regs))
                return PTR_ERR(i2c->regs);
 
index 4117f1a..ca8b3ec 100644 (file)
@@ -703,8 +703,9 @@ static int ocores_i2c_probe(struct platform_device *pdev)
        }
 
        if (ocores_algorithm.master_xfer != ocores_xfer_polling) {
-               ret = devm_request_irq(&pdev->dev, irq, ocores_isr, 0,
-                                      pdev->name, i2c);
+               ret = devm_request_any_context_irq(&pdev->dev, irq,
+                                                  ocores_isr, 0,
+                                                  pdev->name, i2c);
                if (ret) {
                        dev_err(&pdev->dev, "Cannot claim IRQ\n");
                        goto err_clk;
index cba325e..30ded64 100644 (file)
@@ -72,7 +72,8 @@
 #define PIIX4_BLOCK_DATA       0x14
 
 /* Multi-port constants */
-#define PIIX4_MAX_ADAPTERS 4
+#define PIIX4_MAX_ADAPTERS     4
+#define HUDSON2_MAIN_PORTS     2 /* HUDSON2, KERNCZ reserves ports 3, 4 */
 
 /* SB800 constants */
 #define SB800_PIIX4_SMB_IDX            0xcd6
@@ -806,10 +807,12 @@ MODULE_DEVICE_TABLE (pci, piix4_ids);
 
 static struct i2c_adapter *piix4_main_adapters[PIIX4_MAX_ADAPTERS];
 static struct i2c_adapter *piix4_aux_adapter;
+static int piix4_adapter_count;
 
 static int piix4_add_adapter(struct pci_dev *dev, unsigned short smba,
                             bool sb800_main, u8 port, bool notify_imc,
-                            const char *name, struct i2c_adapter **padap)
+                            u8 hw_port_nr, const char *name,
+                            struct i2c_adapter **padap)
 {
        struct i2c_adapter *adap;
        struct i2c_piix4_adapdata *adapdata;
@@ -841,6 +844,12 @@ static int piix4_add_adapter(struct pci_dev *dev, unsigned short smba,
        /* set up the sysfs linkage to our parent device */
        adap->dev.parent = &dev->dev;
 
+       if (has_acpi_companion(&dev->dev)) {
+               acpi_preset_companion(&adap->dev,
+                                     ACPI_COMPANION(&dev->dev),
+                                     hw_port_nr);
+       }
+
        snprintf(adap->name, sizeof(adap->name),
                "SMBus PIIX4 adapter%s at %04x", name, smba);
 
@@ -865,8 +874,19 @@ static int piix4_add_adapters_sb800(struct pci_dev *dev, unsigned short smba,
        int port;
        int retval;
 
-       for (port = 0; port < PIIX4_MAX_ADAPTERS; port++) {
+       if (dev->device == PCI_DEVICE_ID_AMD_KERNCZ_SMBUS ||
+           (dev->device == PCI_DEVICE_ID_AMD_HUDSON2_SMBUS &&
+            dev->revision >= 0x1F)) {
+               piix4_adapter_count = HUDSON2_MAIN_PORTS;
+       } else {
+               piix4_adapter_count = PIIX4_MAX_ADAPTERS;
+       }
+
+       for (port = 0; port < piix4_adapter_count; port++) {
+               u8 hw_port_nr = port == 0 ? 0 : port + 1;
+
                retval = piix4_add_adapter(dev, smba, true, port, notify_imc,
+                                          hw_port_nr,
                                           piix4_main_port_names_sb800[port],
                                           &piix4_main_adapters[port]);
                if (retval < 0)
@@ -937,8 +957,8 @@ static int piix4_probe(struct pci_dev *dev, const struct pci_device_id *id)
                        return retval;
 
                /* Try to register main SMBus adapter, give up if we can't */
-               retval = piix4_add_adapter(dev, retval, false, 0, false, "",
-                                          &piix4_main_adapters[0]);
+               retval = piix4_add_adapter(dev, retval, false, 0, false, 0,
+                                          "", &piix4_main_adapters[0]);
                if (retval < 0)
                        return retval;
        }
@@ -964,7 +984,7 @@ static int piix4_probe(struct pci_dev *dev, const struct pci_device_id *id)
        if (retval > 0) {
                /* Try to add the aux adapter if it exists,
                 * piix4_add_adapter will clean up if this fails */
-               piix4_add_adapter(dev, retval, false, 0, false,
+               piix4_add_adapter(dev, retval, false, 0, false, 1,
                                  is_sb800 ? piix4_aux_port_name_sb800 : "",
                                  &piix4_aux_adapter);
        }
@@ -987,7 +1007,7 @@ static void piix4_adap_remove(struct i2c_adapter *adap)
 
 static void piix4_remove(struct pci_dev *dev)
 {
-       int port = PIIX4_MAX_ADAPTERS;
+       int port = piix4_adapter_count;
 
        while (--port >= 0) {
                if (piix4_main_adapters[port]) {
index 9611235..b432e75 100644 (file)
@@ -12,6 +12,7 @@
 #include <linux/init.h>
 #include <linux/interrupt.h>
 #include <linux/kernel.h>
+#include <linux/module.h>
 #include <linux/of.h>
 #include <linux/of_device.h>
 #include <linux/platform_device.h>
@@ -465,9 +466,9 @@ static int sprd_i2c_clk_init(struct sprd_i2c *i2c_dev)
 
        i2c_dev->clk = devm_clk_get(i2c_dev->dev, "enable");
        if (IS_ERR(i2c_dev->clk)) {
-               dev_warn(i2c_dev->dev, "i2c%d can't get the enable clock\n",
-                        i2c_dev->adap.nr);
-               i2c_dev->clk = NULL;
+               dev_err(i2c_dev->dev, "i2c%d can't get the enable clock\n",
+                       i2c_dev->adap.nr);
+               return PTR_ERR(i2c_dev->clk);
        }
 
        return 0;
@@ -477,7 +478,6 @@ static int sprd_i2c_probe(struct platform_device *pdev)
 {
        struct device *dev = &pdev->dev;
        struct sprd_i2c *i2c_dev;
-       struct resource *res;
        u32 prop;
        int ret;
 
@@ -487,8 +487,7 @@ static int sprd_i2c_probe(struct platform_device *pdev)
        if (!i2c_dev)
                return -ENOMEM;
 
-       res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-       i2c_dev->base = devm_ioremap_resource(dev, res);
+       i2c_dev->base = devm_platform_ioremap_resource(pdev, 0);
        if (IS_ERR(i2c_dev->base))
                return PTR_ERR(i2c_dev->base);
 
@@ -520,7 +519,10 @@ static int sprd_i2c_probe(struct platform_device *pdev)
        if (i2c_dev->bus_freq != 100000 && i2c_dev->bus_freq != 400000)
                return -EINVAL;
 
-       sprd_i2c_clk_init(i2c_dev);
+       ret = sprd_i2c_clk_init(i2c_dev);
+       if (ret)
+               return ret;
+
        platform_set_drvdata(pdev, i2c_dev);
 
        ret = clk_prepare_enable(i2c_dev->clk);
@@ -644,8 +646,7 @@ static struct platform_driver sprd_i2c_driver = {
        },
 };
 
-static int sprd_i2c_init(void)
-{
-       return platform_driver_register(&sprd_i2c_driver);
-}
-arch_initcall_sync(sprd_i2c_init);
+module_platform_driver(sprd_i2c_driver);
+
+MODULE_DESCRIPTION("Spreadtrum I2C master controller driver");
+MODULE_LICENSE("GPL v2");
index 266d1c2..d36cf08 100644 (file)
@@ -1809,7 +1809,7 @@ static u32 stm32f7_i2c_func(struct i2c_adapter *adap)
                I2C_FUNC_SMBUS_I2C_BLOCK;
 }
 
-static struct i2c_algorithm stm32f7_i2c_algo = {
+static const struct i2c_algorithm stm32f7_i2c_algo = {
        .master_xfer = stm32f7_i2c_xfer,
        .smbus_xfer = stm32f7_i2c_smbus_xfer,
        .functionality = stm32f7_i2c_func,
index f724c8e..39762f0 100644 (file)
@@ -526,7 +526,7 @@ static const struct i2c_algorithm synquacer_i2c_algo = {
        .functionality  = synquacer_i2c_functionality,
 };
 
-static struct i2c_adapter synquacer_i2c_ops = {
+static const struct i2c_adapter synquacer_i2c_ops = {
        .owner          = THIS_MODULE,
        .name           = "synquacer_i2c-adapter",
        .algo           = &synquacer_i2c_algo,
index 37347c9..0bff3f3 100644 (file)
@@ -39,7 +39,7 @@ struct taos_data {
 };
 
 /* TAOS TSL2550 EVM */
-static struct i2c_board_info tsl2550_info = {
+static const struct i2c_board_info tsl2550_info = {
        I2C_BOARD_INFO("tsl2550", 0x39),
 };
 
index 9fcb13b..c1683f9 100644 (file)
@@ -636,7 +636,7 @@ static void tegra_dvc_init(struct tegra_i2c_dev *i2c_dev)
        dvc_writel(i2c_dev, val, DVC_CTRL_REG1);
 }
 
-static int tegra_i2c_runtime_resume(struct device *dev)
+static int __maybe_unused tegra_i2c_runtime_resume(struct device *dev)
 {
        struct tegra_i2c_dev *i2c_dev = dev_get_drvdata(dev);
        int ret;
@@ -665,7 +665,7 @@ static int tegra_i2c_runtime_resume(struct device *dev)
        return 0;
 }
 
-static int tegra_i2c_runtime_suspend(struct device *dev)
+static int __maybe_unused tegra_i2c_runtime_suspend(struct device *dev)
 {
        struct tegra_i2c_dev *i2c_dev = dev_get_drvdata(dev);
 
@@ -713,12 +713,6 @@ static int tegra_i2c_init(struct tegra_i2c_dev *i2c_dev, bool clk_reinit)
        u32 tsu_thd;
        u8 tlow, thigh;
 
-       err = pm_runtime_get_sync(i2c_dev->dev);
-       if (err < 0) {
-               dev_err(i2c_dev->dev, "runtime resume failed %d\n", err);
-               return err;
-       }
-
        reset_control_assert(i2c_dev->rst);
        udelay(2);
        reset_control_deassert(i2c_dev->rst);
@@ -772,7 +766,7 @@ static int tegra_i2c_init(struct tegra_i2c_dev *i2c_dev, bool clk_reinit)
                if (err) {
                        dev_err(i2c_dev->dev,
                                "failed changing clock rate: %d\n", err);
-                       goto err;
+                       return err;
                }
        }
 
@@ -787,23 +781,21 @@ static int tegra_i2c_init(struct tegra_i2c_dev *i2c_dev, bool clk_reinit)
 
        err = tegra_i2c_flush_fifos(i2c_dev);
        if (err)
-               goto err;
+               return err;
 
        if (i2c_dev->is_multimaster_mode && i2c_dev->hw->has_slcg_override_reg)
                i2c_writel(i2c_dev, I2C_MST_CORE_CLKEN_OVR, I2C_CLKEN_OVERRIDE);
 
        err = tegra_i2c_wait_for_config_load(i2c_dev);
        if (err)
-               goto err;
+               return err;
 
        if (i2c_dev->irq_disabled) {
                i2c_dev->irq_disabled = false;
                enable_irq(i2c_dev->irq);
        }
 
-err:
-       pm_runtime_put(i2c_dev->dev);
-       return err;
+       return 0;
 }
 
 static int tegra_i2c_disable_packet_mode(struct tegra_i2c_dev *i2c_dev)
@@ -1616,12 +1608,14 @@ static int tegra_i2c_probe(struct platform_device *pdev)
        }
 
        pm_runtime_enable(&pdev->dev);
-       if (!pm_runtime_enabled(&pdev->dev)) {
+       if (!pm_runtime_enabled(&pdev->dev))
                ret = tegra_i2c_runtime_resume(&pdev->dev);
-               if (ret < 0) {
-                       dev_err(&pdev->dev, "runtime resume failed\n");
-                       goto unprepare_div_clk;
-               }
+       else
+               ret = pm_runtime_get_sync(i2c_dev->dev);
+
+       if (ret < 0) {
+               dev_err(&pdev->dev, "runtime resume failed\n");
+               goto unprepare_div_clk;
        }
 
        if (i2c_dev->is_multimaster_mode) {
@@ -1666,6 +1660,8 @@ static int tegra_i2c_probe(struct platform_device *pdev)
        if (ret)
                goto release_dma;
 
+       pm_runtime_put(&pdev->dev);
+
        return 0;
 
 release_dma:
@@ -1711,8 +1707,7 @@ static int tegra_i2c_remove(struct platform_device *pdev)
        return 0;
 }
 
-#ifdef CONFIG_PM_SLEEP
-static int tegra_i2c_suspend(struct device *dev)
+static int __maybe_unused tegra_i2c_suspend(struct device *dev)
 {
        struct tegra_i2c_dev *i2c_dev = dev_get_drvdata(dev);
 
@@ -1721,38 +1716,41 @@ static int tegra_i2c_suspend(struct device *dev)
        return 0;
 }
 
-static int tegra_i2c_resume(struct device *dev)
+static int __maybe_unused tegra_i2c_resume(struct device *dev)
 {
        struct tegra_i2c_dev *i2c_dev = dev_get_drvdata(dev);
        int err;
 
+       err = tegra_i2c_runtime_resume(dev);
+       if (err)
+               return err;
+
        err = tegra_i2c_init(i2c_dev, false);
        if (err)
                return err;
 
+       err = tegra_i2c_runtime_suspend(dev);
+       if (err)
+               return err;
+
        i2c_mark_adapter_resumed(&i2c_dev->adapter);
 
        return 0;
 }
 
 static const struct dev_pm_ops tegra_i2c_pm = {
-       SET_SYSTEM_SLEEP_PM_OPS(tegra_i2c_suspend, tegra_i2c_resume)
+       SET_NOIRQ_SYSTEM_SLEEP_PM_OPS(tegra_i2c_suspend, tegra_i2c_resume)
        SET_RUNTIME_PM_OPS(tegra_i2c_runtime_suspend, tegra_i2c_runtime_resume,
                           NULL)
 };
 
-#define TEGRA_I2C_PM   (&tegra_i2c_pm)
-#else
-#define TEGRA_I2C_PM   NULL
-#endif
-
 static struct platform_driver tegra_i2c_driver = {
        .probe   = tegra_i2c_probe,
        .remove  = tegra_i2c_remove,
        .driver  = {
                .name  = "tegra-i2c",
                .of_match_table = tegra_i2c_of_match,
-               .pm    = TEGRA_I2C_PM,
+               .pm    = &tegra_i2c_pm,
        },
 };
 
index 7acca25..4241aac 100644 (file)
@@ -108,7 +108,6 @@ static void uniphier_fi2c_fill_txfifo(struct uniphier_fi2c_priv *priv,
                if (fifo_space-- <= 0)
                        break;
 
-               dev_dbg(&priv->adap.dev, "write data: %02x\n", *priv->buf);
                writel(*priv->buf++, priv->membase + UNIPHIER_FI2C_DTTX);
                priv->len--;
        }
@@ -124,7 +123,6 @@ static void uniphier_fi2c_drain_rxfifo(struct uniphier_fi2c_priv *priv)
                        break;
 
                *priv->buf++ = readl(priv->membase + UNIPHIER_FI2C_DTRX);
-               dev_dbg(&priv->adap.dev, "read data: %02x\n", priv->buf[-1]);
                priv->len--;
        }
 }
@@ -142,8 +140,6 @@ static void uniphier_fi2c_clear_irqs(struct uniphier_fi2c_priv *priv,
 
 static void uniphier_fi2c_stop(struct uniphier_fi2c_priv *priv)
 {
-       dev_dbg(&priv->adap.dev, "stop condition\n");
-
        priv->enabled_irqs |= UNIPHIER_FI2C_INT_STOP;
        uniphier_fi2c_set_irqs(priv);
        writel(UNIPHIER_FI2C_CR_MST | UNIPHIER_FI2C_CR_STO,
@@ -160,21 +156,15 @@ static irqreturn_t uniphier_fi2c_interrupt(int irq, void *dev_id)
        irq_status = readl(priv->membase + UNIPHIER_FI2C_INT);
        irq_status &= priv->enabled_irqs;
 
-       dev_dbg(&priv->adap.dev,
-               "interrupt: enabled_irqs=%04x, irq_status=%04x\n",
-               priv->enabled_irqs, irq_status);
-
        if (irq_status & UNIPHIER_FI2C_INT_STOP)
                goto complete;
 
        if (unlikely(irq_status & UNIPHIER_FI2C_INT_AL)) {
-               dev_dbg(&priv->adap.dev, "arbitration lost\n");
                priv->error = -EAGAIN;
                goto complete;
        }
 
        if (unlikely(irq_status & UNIPHIER_FI2C_INT_NA)) {
-               dev_dbg(&priv->adap.dev, "could not get ACK\n");
                priv->error = -ENXIO;
                if (priv->flags & UNIPHIER_FI2C_RD) {
                        /*
@@ -215,18 +205,14 @@ static irqreturn_t uniphier_fi2c_interrupt(int irq, void *dev_id)
                if (unlikely(priv->flags & UNIPHIER_FI2C_MANUAL_NACK)) {
                        if (priv->len <= UNIPHIER_FI2C_FIFO_SIZE &&
                            !(priv->flags & UNIPHIER_FI2C_BYTE_WISE)) {
-                               dev_dbg(&priv->adap.dev,
-                                       "enable read byte count IRQ\n");
                                priv->enabled_irqs |= UNIPHIER_FI2C_INT_RB;
                                uniphier_fi2c_set_irqs(priv);
                                priv->flags |= UNIPHIER_FI2C_BYTE_WISE;
                        }
-                       if (priv->len <= 1) {
-                               dev_dbg(&priv->adap.dev, "set NACK\n");
+                       if (priv->len <= 1)
                                writel(UNIPHIER_FI2C_CR_MST |
                                       UNIPHIER_FI2C_CR_NACK,
                                       priv->membase + UNIPHIER_FI2C_CR);
-                       }
                }
 
                goto handled;
@@ -334,10 +320,6 @@ static int uniphier_fi2c_master_xfer_one(struct i2c_adapter *adap,
        bool is_read = msg->flags & I2C_M_RD;
        unsigned long time_left, flags;
 
-       dev_dbg(&adap->dev, "%s: addr=0x%02x, len=%d, repeat=%d, stop=%d\n",
-               is_read ? "receive" : "transmit", msg->addr, msg->len,
-               repeat, stop);
-
        priv->len = msg->len;
        priv->buf = msg->buf;
        priv->enabled_irqs = UNIPHIER_FI2C_INT_FAULTS;
@@ -359,7 +341,6 @@ static int uniphier_fi2c_master_xfer_one(struct i2c_adapter *adap,
        else
                uniphier_fi2c_tx_init(priv, msg->addr, repeat);
 
-       dev_dbg(&adap->dev, "start condition\n");
        /*
         * For a repeated START condition, writing a slave address to the FIFO
         * kicks the controller. So, the UNIPHIER_FI2C_CR register should be
@@ -383,7 +364,6 @@ static int uniphier_fi2c_master_xfer_one(struct i2c_adapter *adap,
                uniphier_fi2c_recover(priv);
                return -ETIMEDOUT;
        }
-       dev_dbg(&adap->dev, "complete\n");
 
        if (unlikely(priv->flags & UNIPHIER_FI2C_DEFER_STOP_COMP)) {
                u32 status;
@@ -538,7 +518,6 @@ static int uniphier_fi2c_probe(struct platform_device *pdev)
 {
        struct device *dev = &pdev->dev;
        struct uniphier_fi2c_priv *priv;
-       struct resource *regs;
        u32 bus_speed;
        unsigned long clk_rate;
        int irq, ret;
@@ -547,8 +526,7 @@ static int uniphier_fi2c_probe(struct platform_device *pdev)
        if (!priv)
                return -ENOMEM;
 
-       regs = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-       priv->membase = devm_ioremap_resource(dev, regs);
+       priv->membase = devm_platform_ioremap_resource(pdev, 0);
        if (IS_ERR(priv->membase))
                return PTR_ERR(priv->membase);
 
index 0173840..0270090 100644 (file)
@@ -71,7 +71,6 @@ static int uniphier_i2c_xfer_byte(struct i2c_adapter *adap, u32 txdata,
        reinit_completion(&priv->comp);
 
        txdata |= UNIPHIER_I2C_DTRM_IRQEN;
-       dev_dbg(&adap->dev, "write data: 0x%04x\n", txdata);
        writel(txdata, priv->membase + UNIPHIER_I2C_DTRM);
 
        time_left = wait_for_completion_timeout(&priv->comp, adap->timeout);
@@ -81,8 +80,6 @@ static int uniphier_i2c_xfer_byte(struct i2c_adapter *adap, u32 txdata,
        }
 
        rxdata = readl(priv->membase + UNIPHIER_I2C_DREC);
-       dev_dbg(&adap->dev, "read data: 0x%04x\n", rxdata);
-
        if (rxdatap)
                *rxdatap = rxdata;
 
@@ -98,14 +95,11 @@ static int uniphier_i2c_send_byte(struct i2c_adapter *adap, u32 txdata)
        if (ret)
                return ret;
 
-       if (unlikely(rxdata & UNIPHIER_I2C_DREC_LAB)) {
-               dev_dbg(&adap->dev, "arbitration lost\n");
+       if (unlikely(rxdata & UNIPHIER_I2C_DREC_LAB))
                return -EAGAIN;
-       }
-       if (unlikely(rxdata & UNIPHIER_I2C_DREC_LRB)) {
-               dev_dbg(&adap->dev, "could not get ACK\n");
+
+       if (unlikely(rxdata & UNIPHIER_I2C_DREC_LRB))
                return -ENXIO;
-       }
 
        return 0;
 }
@@ -115,7 +109,6 @@ static int uniphier_i2c_tx(struct i2c_adapter *adap, u16 addr, u16 len,
 {
        int ret;
 
-       dev_dbg(&adap->dev, "start condition\n");
        ret = uniphier_i2c_send_byte(adap, addr << 1 |
                                     UNIPHIER_I2C_DTRM_STA |
                                     UNIPHIER_I2C_DTRM_NACK);
@@ -137,7 +130,6 @@ static int uniphier_i2c_rx(struct i2c_adapter *adap, u16 addr, u16 len,
 {
        int ret;
 
-       dev_dbg(&adap->dev, "start condition\n");
        ret = uniphier_i2c_send_byte(adap, addr << 1 |
                                     UNIPHIER_I2C_DTRM_STA |
                                     UNIPHIER_I2C_DTRM_NACK |
@@ -161,7 +153,6 @@ static int uniphier_i2c_rx(struct i2c_adapter *adap, u16 addr, u16 len,
 
 static int uniphier_i2c_stop(struct i2c_adapter *adap)
 {
-       dev_dbg(&adap->dev, "stop condition\n");
        return uniphier_i2c_send_byte(adap, UNIPHIER_I2C_DTRM_STO |
                                      UNIPHIER_I2C_DTRM_NACK);
 }
@@ -173,9 +164,6 @@ static int uniphier_i2c_master_xfer_one(struct i2c_adapter *adap,
        bool recovery = false;
        int ret;
 
-       dev_dbg(&adap->dev, "%s: addr=0x%02x, len=%d, stop=%d\n",
-               is_read ? "receive" : "transmit", msg->addr, msg->len, stop);
-
        if (is_read)
                ret = uniphier_i2c_rx(adap, msg->addr, msg->len, msg->buf);
        else
@@ -326,7 +314,6 @@ static int uniphier_i2c_probe(struct platform_device *pdev)
 {
        struct device *dev = &pdev->dev;
        struct uniphier_i2c_priv *priv;
-       struct resource *regs;
        u32 bus_speed;
        unsigned long clk_rate;
        int irq, ret;
@@ -335,8 +322,7 @@ static int uniphier_i2c_probe(struct platform_device *pdev)
        if (!priv)
                return -ENOMEM;
 
-       regs = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-       priv->membase = devm_ioremap_resource(dev, regs);
+       priv->membase = devm_platform_ioremap_resource(pdev, 0);
        if (IS_ERR(priv->membase))
                return PTR_ERR(priv->membase);
 
index 72b3001..5f6a498 100644 (file)
@@ -350,13 +350,11 @@ static int i2c_device_probe(struct device *dev)
                return -ENODEV;
 
        if (client->flags & I2C_CLIENT_WAKE) {
-               int wakeirq = -ENOENT;
+               int wakeirq;
 
-               if (dev->of_node) {
-                       wakeirq = of_irq_get_byname(dev->of_node, "wakeup");
-                       if (wakeirq == -EPROBE_DEFER)
-                               return wakeirq;
-               }
+               wakeirq = of_irq_get_byname(dev->of_node, "wakeup");
+               if (wakeirq == -EPROBE_DEFER)
+                       return wakeirq;
 
                device_init_wakeup(&client->dev, true);
 
@@ -966,7 +964,7 @@ struct i2c_client *devm_i2c_new_dummy_device(struct device *dev,
 EXPORT_SYMBOL_GPL(devm_i2c_new_dummy_device);
 
 /**
- * i2c_new_secondary_device - Helper to get the instantiated secondary address
+ * i2c_new_ancillary_device - Helper to get the instantiated secondary address
  * and create the associated device
  * @client: Handle to the primary client
  * @name: Handle to specify which secondary address to get
@@ -985,9 +983,9 @@ EXPORT_SYMBOL_GPL(devm_i2c_new_dummy_device);
  * cell whose "reg-names" value matches the slave name.
  *
  * This returns the new i2c client, which should be saved for later use with
- * i2c_unregister_device(); or NULL to indicate an error.
+ * i2c_unregister_device(); or an ERR_PTR to describe the error.
  */
-struct i2c_client *i2c_new_secondary_device(struct i2c_client *client,
+struct i2c_client *i2c_new_ancillary_device(struct i2c_client *client,
                                                const char *name,
                                                u16 default_addr)
 {
@@ -1002,9 +1000,9 @@ struct i2c_client *i2c_new_secondary_device(struct i2c_client *client,
        }
 
        dev_dbg(&client->adapter->dev, "Address for %s : 0x%x\n", name, addr);
-       return i2c_new_dummy(client->adapter, addr);
+       return i2c_new_dummy_device(client->adapter, addr);
 }
-EXPORT_SYMBOL_GPL(i2c_new_secondary_device);
+EXPORT_SYMBOL_GPL(i2c_new_ancillary_device);
 
 /* ------------------------------------------------------------------------- */
 
index be65d38..92ff999 100644 (file)
  * pointer, yet implementation is deferred until the need actually arises.
  */
 
+/*
+ * FIXME: What to do if only 8 bits of a 16 bit address are sent?
+ * The ST-M24C64 sends only 0xff then. Needs verification with other
+ * EEPROMs, though. We currently use the 8 bit as a valid address.
+ */
+
+#include <linux/bitfield.h>
 #include <linux/i2c.h>
 #include <linux/init.h>
 #include <linux/module.h>
 
 struct eeprom_data {
        struct bin_attribute bin;
-       bool first_write;
        spinlock_t buffer_lock;
-       u8 buffer_idx;
+       u16 buffer_idx;
+       u16 address_mask;
+       u8 num_address_bytes;
+       u8 idx_write_cnt;
        u8 buffer[];
 };
 
+#define I2C_SLAVE_BYTELEN GENMASK(15, 0)
+#define I2C_SLAVE_FLAG_ADDR16 BIT(16)
+#define I2C_SLAVE_DEVICE_MAGIC(_len, _flags) ((_flags) | (_len))
+
 static int i2c_slave_eeprom_slave_cb(struct i2c_client *client,
                                     enum i2c_slave_event event, u8 *val)
 {
@@ -34,12 +47,14 @@ static int i2c_slave_eeprom_slave_cb(struct i2c_client *client,
 
        switch (event) {
        case I2C_SLAVE_WRITE_RECEIVED:
-               if (eeprom->first_write) {
-                       eeprom->buffer_idx = *val;
-                       eeprom->first_write = false;
+               if (eeprom->idx_write_cnt < eeprom->num_address_bytes) {
+                       if (eeprom->idx_write_cnt == 0)
+                               eeprom->buffer_idx = 0;
+                       eeprom->buffer_idx = *val | (eeprom->buffer_idx << 8);
+                       eeprom->idx_write_cnt++;
                } else {
                        spin_lock(&eeprom->buffer_lock);
-                       eeprom->buffer[eeprom->buffer_idx++] = *val;
+                       eeprom->buffer[eeprom->buffer_idx++ & eeprom->address_mask] = *val;
                        spin_unlock(&eeprom->buffer_lock);
                }
                break;
@@ -50,7 +65,7 @@ static int i2c_slave_eeprom_slave_cb(struct i2c_client *client,
                /* fallthrough */
        case I2C_SLAVE_READ_REQUESTED:
                spin_lock(&eeprom->buffer_lock);
-               *val = eeprom->buffer[eeprom->buffer_idx];
+               *val = eeprom->buffer[eeprom->buffer_idx & eeprom->address_mask];
                spin_unlock(&eeprom->buffer_lock);
                /*
                 * Do not increment buffer_idx here, because we don't know if
@@ -61,7 +76,7 @@ static int i2c_slave_eeprom_slave_cb(struct i2c_client *client,
 
        case I2C_SLAVE_STOP:
        case I2C_SLAVE_WRITE_REQUESTED:
-               eeprom->first_write = true;
+               eeprom->idx_write_cnt = 0;
                break;
 
        default:
@@ -105,13 +120,16 @@ static int i2c_slave_eeprom_probe(struct i2c_client *client, const struct i2c_de
 {
        struct eeprom_data *eeprom;
        int ret;
-       unsigned size = id->driver_data;
+       unsigned int size = FIELD_GET(I2C_SLAVE_BYTELEN, id->driver_data);
+       unsigned int flag_addr16 = FIELD_GET(I2C_SLAVE_FLAG_ADDR16, id->driver_data);
 
        eeprom = devm_kzalloc(&client->dev, sizeof(struct eeprom_data) + size, GFP_KERNEL);
        if (!eeprom)
                return -ENOMEM;
 
-       eeprom->first_write = true;
+       eeprom->idx_write_cnt = 0;
+       eeprom->num_address_bytes = flag_addr16 ? 2 : 1;
+       eeprom->address_mask = size - 1;
        spin_lock_init(&eeprom->buffer_lock);
        i2c_set_clientdata(client, eeprom);
 
@@ -146,7 +164,9 @@ static int i2c_slave_eeprom_remove(struct i2c_client *client)
 }
 
 static const struct i2c_device_id i2c_slave_eeprom_id[] = {
-       { "slave-24c02", 2048 / 8 },
+       { "slave-24c02", I2C_SLAVE_DEVICE_MAGIC(2048 / 8,  0) },
+       { "slave-24c32", I2C_SLAVE_DEVICE_MAGIC(32768 / 8, I2C_SLAVE_FLAG_ADDR16) },
+       { "slave-24c64", I2C_SLAVE_DEVICE_MAGIC(65536 / 8, I2C_SLAVE_FLAG_ADDR16) },
        { }
 };
 MODULE_DEVICE_TABLE(i2c, i2c_slave_eeprom_id);
index 85e103b..b44b1c3 100644 (file)
@@ -55,6 +55,7 @@ config INFINIBAND_ON_DEMAND_PAGING
        bool "InfiniBand on-demand paging support"
        depends on INFINIBAND_USER_MEM
        select MMU_NOTIFIER
+       select INTERVAL_TREE
        default y
        ---help---
          On demand paging support for the InfiniBand subsystem.
index 9b76a8f..1dd467b 100644 (file)
@@ -183,7 +183,7 @@ static int ib_nl_ip_send_msg(struct rdma_dev_addr *dev_addr,
 
        /* Repair the nlmsg header length */
        nlmsg_end(skb, nlh);
-       rdma_nl_multicast(skb, RDMA_NL_GROUP_LS, GFP_KERNEL);
+       rdma_nl_multicast(&init_net, skb, RDMA_NL_GROUP_LS, GFP_KERNEL);
 
        /* Make the request retry, so when we get the response from userspace
         * we will have something.
index 18e476b..00fb3ea 100644 (file)
@@ -810,6 +810,7 @@ static void release_gid_table(struct ib_device *device,
        if (leak)
                return;
 
+       mutex_destroy(&table->lock);
        kfree(table->data_vec);
        kfree(table);
 }
index a68d0cc..0e3cf34 100644 (file)
@@ -3046,7 +3046,7 @@ static void addr_handler(int status, struct sockaddr *src_addr,
                if (status)
                        pr_debug_ratelimited("RDMA CM: ADDR_ERROR: failed to acquire device. status %d\n",
                                             status);
-       } else {
+       } else if (status) {
                pr_debug_ratelimited("RDMA CM: ADDR_ERROR: failed to resolve IP. status %d\n", status);
        }
 
index 3ec2c41..8b0b5ae 100644 (file)
@@ -342,12 +342,18 @@ static struct configfs_subsystem cma_subsys = {
 
 int __init cma_configfs_init(void)
 {
+       int ret;
+
        config_group_init(&cma_subsys.su_group);
        mutex_init(&cma_subsys.su_mutex);
-       return configfs_register_subsystem(&cma_subsys);
+       ret = configfs_register_subsystem(&cma_subsys);
+       if (ret)
+               mutex_destroy(&cma_subsys.su_mutex);
+       return ret;
 }
 
 void __exit cma_configfs_exit(void)
 {
        configfs_unregister_subsystem(&cma_subsys);
+       mutex_destroy(&cma_subsys.su_mutex);
 }
index beee7b7..3a8b091 100644 (file)
@@ -36,6 +36,8 @@
 #include <linux/list.h>
 #include <linux/spinlock.h>
 #include <linux/cgroup_rdma.h>
+#include <net/net_namespace.h>
+#include <net/netns/generic.h>
 
 #include <rdma/ib_verbs.h>
 #include <rdma/opa_addr.h>
@@ -54,8 +56,26 @@ struct pkey_index_qp_list {
        struct list_head    qp_list;
 };
 
+/**
+ * struct rdma_dev_net - rdma net namespace metadata for a net
+ * @nl_sock:   Pointer to netlink socket
+ * @net:       Pointer to owner net namespace
+ * @id:                xarray id to identify the net namespace.
+ */
+struct rdma_dev_net {
+       struct sock *nl_sock;
+       possible_net_t net;
+       u32 id;
+};
+
 extern const struct attribute_group ib_dev_attr_group;
 extern bool ib_devices_shared_netns;
+extern unsigned int rdma_dev_net_id;
+
+static inline struct rdma_dev_net *rdma_net_to_dev_net(struct net *net)
+{
+       return net_generic(net, rdma_dev_net_id);
+}
 
 int ib_device_register_sysfs(struct ib_device *device);
 void ib_device_unregister_sysfs(struct ib_device *device);
@@ -179,7 +199,6 @@ void ib_mad_cleanup(void);
 int ib_sa_init(void);
 void ib_sa_cleanup(void);
 
-int rdma_nl_init(void);
 void rdma_nl_exit(void);
 
 int ib_nl_handle_resolve_resp(struct sk_buff *skb,
@@ -365,4 +384,7 @@ void ib_port_unregister_module_stat(struct kobject *kobj);
 
 int ib_device_set_netns_put(struct sk_buff *skb,
                            struct ib_device *dev, u32 ns_fd);
+
+int rdma_nl_net_init(struct rdma_dev_net *rnet);
+void rdma_nl_net_exit(struct rdma_dev_net *rnet);
 #endif /* _CORE_PRIV_H */
index af8c85d..680ad27 100644 (file)
@@ -599,7 +599,7 @@ int rdma_counter_get_mode(struct ib_device *dev, u8 port,
 void rdma_counter_init(struct ib_device *dev)
 {
        struct rdma_port_counter *port_counter;
-       u32 port;
+       u32 port, i;
 
        if (!dev->port_data)
                return;
@@ -620,13 +620,12 @@ void rdma_counter_init(struct ib_device *dev)
        return;
 
 fail:
-       rdma_for_each_port(dev, port) {
+       for (i = port; i >= rdma_start_port(dev); i--) {
                port_counter = &dev->port_data[port].port_counter;
                kfree(port_counter->hstats);
                port_counter->hstats = NULL;
+               mutex_destroy(&port_counter->lock);
        }
-
-       return;
 }
 
 void rdma_counter_release(struct ib_device *dev)
@@ -637,5 +636,6 @@ void rdma_counter_release(struct ib_device *dev)
        rdma_for_each_port(dev, port) {
                port_counter = &dev->port_data[port].port_counter;
                kfree(port_counter->hstats);
+               mutex_destroy(&port_counter->lock);
        }
 }
index 7c59987..bbfded6 100644 (file)
@@ -253,6 +253,34 @@ out_free_cq:
 EXPORT_SYMBOL(__ib_alloc_cq_user);
 
 /**
+ * __ib_alloc_cq_any - allocate a completion queue
+ * @dev:               device to allocate the CQ for
+ * @private:           driver private data, accessible from cq->cq_context
+ * @nr_cqe:            number of CQEs to allocate
+ * @poll_ctx:          context to poll the CQ from
+ * @caller:            module owner name
+ *
+ * Attempt to spread ULP Completion Queues over each device's interrupt
+ * vectors. A simple best-effort mechanism is used.
+ */
+struct ib_cq *__ib_alloc_cq_any(struct ib_device *dev, void *private,
+                               int nr_cqe, enum ib_poll_context poll_ctx,
+                               const char *caller)
+{
+       static atomic_t counter;
+       int comp_vector = 0;
+
+       if (dev->num_comp_vectors > 1)
+               comp_vector =
+                       atomic_inc_return(&counter) %
+                       min_t(int, dev->num_comp_vectors, num_online_cpus());
+
+       return __ib_alloc_cq_user(dev, private, nr_cqe, comp_vector, poll_ctx,
+                                 caller, NULL);
+}
+EXPORT_SYMBOL(__ib_alloc_cq_any);
+
+/**
  * ib_free_cq_user - free a completion queue
  * @cq:                completion queue to free.
  * @udata:     User data or NULL for kernel object
index ea8661a..99c4a55 100644 (file)
@@ -39,7 +39,6 @@
 #include <linux/init.h>
 #include <linux/netdevice.h>
 #include <net/net_namespace.h>
-#include <net/netns/generic.h>
 #include <linux/security.h>
 #include <linux/notifier.h>
 #include <linux/hashtable.h>
@@ -111,17 +110,7 @@ static void ib_client_put(struct ib_client *client)
  */
 #define CLIENT_DATA_REGISTERED XA_MARK_1
 
-/**
- * struct rdma_dev_net - rdma net namespace metadata for a net
- * @net:       Pointer to owner net namespace
- * @id:                xarray id to identify the net namespace.
- */
-struct rdma_dev_net {
-       possible_net_t net;
-       u32 id;
-};
-
-static unsigned int rdma_dev_net_id;
+unsigned int rdma_dev_net_id;
 
 /*
  * A list of net namespaces is maintained in an xarray. This is necessary
@@ -514,6 +503,9 @@ static void ib_device_release(struct device *device)
                          rcu_head);
        }
 
+       mutex_destroy(&dev->unregistration_lock);
+       mutex_destroy(&dev->compat_devs_mutex);
+
        xa_destroy(&dev->compat_devs);
        xa_destroy(&dev->client_data);
        kfree_rcu(dev, rcu_head);
@@ -1060,7 +1052,7 @@ int rdma_compatdev_set(u8 enable)
 
 static void rdma_dev_exit_net(struct net *net)
 {
-       struct rdma_dev_net *rnet = net_generic(net, rdma_dev_net_id);
+       struct rdma_dev_net *rnet = rdma_net_to_dev_net(net);
        struct ib_device *dev;
        unsigned long index;
        int ret;
@@ -1094,25 +1086,32 @@ static void rdma_dev_exit_net(struct net *net)
        }
        up_read(&devices_rwsem);
 
+       rdma_nl_net_exit(rnet);
        xa_erase(&rdma_nets, rnet->id);
 }
 
 static __net_init int rdma_dev_init_net(struct net *net)
 {
-       struct rdma_dev_net *rnet = net_generic(net, rdma_dev_net_id);
+       struct rdma_dev_net *rnet = rdma_net_to_dev_net(net);
        unsigned long index;
        struct ib_device *dev;
        int ret;
 
+       write_pnet(&rnet->net, net);
+
+       ret = rdma_nl_net_init(rnet);
+       if (ret)
+               return ret;
+
        /* No need to create any compat devices in default init_net. */
        if (net_eq(net, &init_net))
                return 0;
 
-       write_pnet(&rnet->net, net);
-
        ret = xa_alloc(&rdma_nets, &rnet->id, rnet, xa_limit_32b, GFP_KERNEL);
-       if (ret)
+       if (ret) {
+               rdma_nl_net_exit(rnet);
                return ret;
+       }
 
        down_read(&devices_rwsem);
        xa_for_each_marked (&devices, index, dev, DEVICE_REGISTERED) {
@@ -1974,31 +1973,64 @@ void ib_dispatch_event(struct ib_event *event)
 }
 EXPORT_SYMBOL(ib_dispatch_event);
 
-/**
- * ib_query_port - Query IB port attributes
- * @device:Device to query
- * @port_num:Port number to query
- * @port_attr:Port attributes
- *
- * ib_query_port() returns the attributes of a port through the
- * @port_attr pointer.
- */
-int ib_query_port(struct ib_device *device,
-                 u8 port_num,
-                 struct ib_port_attr *port_attr)
+static int iw_query_port(struct ib_device *device,
+                          u8 port_num,
+                          struct ib_port_attr *port_attr)
 {
-       union ib_gid gid;
+       struct in_device *inetdev;
+       struct net_device *netdev;
        int err;
 
-       if (!rdma_is_port_valid(device, port_num))
-               return -EINVAL;
+       memset(port_attr, 0, sizeof(*port_attr));
+
+       netdev = ib_device_get_netdev(device, port_num);
+       if (!netdev)
+               return -ENODEV;
+
+       dev_put(netdev);
+
+       port_attr->max_mtu = IB_MTU_4096;
+       port_attr->active_mtu = ib_mtu_int_to_enum(netdev->mtu);
+
+       if (!netif_carrier_ok(netdev)) {
+               port_attr->state = IB_PORT_DOWN;
+               port_attr->phys_state = IB_PORT_PHYS_STATE_DISABLED;
+       } else {
+               inetdev = in_dev_get(netdev);
+
+               if (inetdev && inetdev->ifa_list) {
+                       port_attr->state = IB_PORT_ACTIVE;
+                       port_attr->phys_state = IB_PORT_PHYS_STATE_LINK_UP;
+                       in_dev_put(inetdev);
+               } else {
+                       port_attr->state = IB_PORT_INIT;
+                       port_attr->phys_state =
+                               IB_PORT_PHYS_STATE_PORT_CONFIGURATION_TRAINING;
+               }
+       }
+
+       err = device->ops.query_port(device, port_num, port_attr);
+       if (err)
+               return err;
+
+       return 0;
+}
+
+static int __ib_query_port(struct ib_device *device,
+                          u8 port_num,
+                          struct ib_port_attr *port_attr)
+{
+       union ib_gid gid = {};
+       int err;
 
        memset(port_attr, 0, sizeof(*port_attr));
+
        err = device->ops.query_port(device, port_num, port_attr);
        if (err || port_attr->subnet_prefix)
                return err;
 
-       if (rdma_port_get_link_layer(device, port_num) != IB_LINK_LAYER_INFINIBAND)
+       if (rdma_port_get_link_layer(device, port_num) !=
+           IB_LINK_LAYER_INFINIBAND)
                return 0;
 
        err = device->ops.query_gid(device, port_num, 0, &gid);
@@ -2008,6 +2040,28 @@ int ib_query_port(struct ib_device *device,
        port_attr->subnet_prefix = be64_to_cpu(gid.global.subnet_prefix);
        return 0;
 }
+
+/**
+ * ib_query_port - Query IB port attributes
+ * @device:Device to query
+ * @port_num:Port number to query
+ * @port_attr:Port attributes
+ *
+ * ib_query_port() returns the attributes of a port through the
+ * @port_attr pointer.
+ */
+int ib_query_port(struct ib_device *device,
+                 u8 port_num,
+                 struct ib_port_attr *port_attr)
+{
+       if (!rdma_is_port_valid(device, port_num))
+               return -EINVAL;
+
+       if (rdma_protocol_iwarp(device, port_num))
+               return iw_query_port(device, port_num, port_attr);
+       else
+               return __ib_query_port(device, port_num, port_attr);
+}
 EXPORT_SYMBOL(ib_query_port);
 
 static void add_ndev_hash(struct ib_port_data *pdata)
@@ -2562,6 +2616,7 @@ void ib_set_device_ops(struct ib_device *dev, const struct ib_device_ops *ops)
        SET_DEVICE_OP(dev_ops, get_vf_config);
        SET_DEVICE_OP(dev_ops, get_vf_stats);
        SET_DEVICE_OP(dev_ops, init_port);
+       SET_DEVICE_OP(dev_ops, invalidate_range);
        SET_DEVICE_OP(dev_ops, iw_accept);
        SET_DEVICE_OP(dev_ops, iw_add_ref);
        SET_DEVICE_OP(dev_ops, iw_connect);
@@ -2660,12 +2715,6 @@ static int __init ib_core_init(void)
                goto err_comp_unbound;
        }
 
-       ret = rdma_nl_init();
-       if (ret) {
-               pr_warn("Couldn't init IB netlink interface: err %d\n", ret);
-               goto err_sysfs;
-       }
-
        ret = addr_init();
        if (ret) {
                pr_warn("Could't init IB address resolution\n");
@@ -2711,8 +2760,6 @@ err_mad:
 err_addr:
        addr_cleanup();
 err_ibnl:
-       rdma_nl_exit();
-err_sysfs:
        class_unregister(&ib_class);
 err_comp_unbound:
        destroy_workqueue(ib_comp_unbound_wq);
index 7d841b6..e08aec4 100644 (file)
@@ -148,13 +148,6 @@ static void ib_fmr_batch_release(struct ib_fmr_pool *pool)
                hlist_del_init(&fmr->cache_node);
                fmr->remap_count = 0;
                list_add_tail(&fmr->fmr->list, &fmr_list);
-
-#ifdef DEBUG
-               if (fmr->ref_count !=0) {
-                       pr_warn(PFX "Unmapping FMR 0x%08x with ref count %d\n",
-                               fmr, fmr->ref_count);
-               }
-#endif
        }
 
        list_splice_init(&pool->dirty_list, &unmap_list);
@@ -496,12 +489,6 @@ void ib_fmr_pool_unmap(struct ib_pool_fmr *fmr)
                }
        }
 
-#ifdef DEBUG
-       if (fmr->ref_count < 0)
-               pr_warn(PFX "FMR %p has ref count %d < 0\n",
-                       fmr, fmr->ref_count);
-#endif
-
        spin_unlock_irqrestore(&pool->pool_lock, flags);
 }
 EXPORT_SYMBOL(ib_fmr_pool_unmap);
index 2452b0d..4668699 100644 (file)
@@ -112,7 +112,7 @@ int iwpm_register_pid(struct iwpm_dev_data *pm_msg, u8 nl_client)
        pr_debug("%s: Multicasting a nlmsg (dev = %s ifname = %s iwpm = %s)\n",
                __func__, pm_msg->dev_name, pm_msg->if_name, iwpm_ulib_name);
 
-       ret = rdma_nl_multicast(skb, RDMA_NL_GROUP_IWPM, GFP_KERNEL);
+       ret = rdma_nl_multicast(&init_net, skb, RDMA_NL_GROUP_IWPM, GFP_KERNEL);
        if (ret) {
                skb = NULL; /* skb is freed in the netlink send-op handling */
                iwpm_user_pid = IWPM_PID_UNAVAILABLE;
@@ -124,8 +124,7 @@ int iwpm_register_pid(struct iwpm_dev_data *pm_msg, u8 nl_client)
        return ret;
 pid_query_error:
        pr_info("%s: %s (client = %d)\n", __func__, err_str, nl_client);
-       if (skb)
-               dev_kfree_skb(skb);
+       dev_kfree_skb(skb);
        if (nlmsg_request)
                iwpm_free_nlmsg_request(&nlmsg_request->kref);
        return ret;
@@ -202,7 +201,7 @@ int iwpm_add_mapping(struct iwpm_sa_data *pm_msg, u8 nl_client)
        nlmsg_end(skb, nlh);
        nlmsg_request->req_buffer = pm_msg;
 
-       ret = rdma_nl_unicast_wait(skb, iwpm_user_pid);
+       ret = rdma_nl_unicast_wait(&init_net, skb, iwpm_user_pid);
        if (ret) {
                skb = NULL; /* skb is freed in the netlink send-op handling */
                iwpm_user_pid = IWPM_PID_UNDEFINED;
@@ -214,8 +213,7 @@ int iwpm_add_mapping(struct iwpm_sa_data *pm_msg, u8 nl_client)
 add_mapping_error:
        pr_info("%s: %s (client = %d)\n", __func__, err_str, nl_client);
 add_mapping_error_nowarn:
-       if (skb)
-               dev_kfree_skb(skb);
+       dev_kfree_skb(skb);
        if (nlmsg_request)
                iwpm_free_nlmsg_request(&nlmsg_request->kref);
        return ret;
@@ -297,7 +295,7 @@ int iwpm_add_and_query_mapping(struct iwpm_sa_data *pm_msg, u8 nl_client)
        nlmsg_end(skb, nlh);
        nlmsg_request->req_buffer = pm_msg;
 
-       ret = rdma_nl_unicast_wait(skb, iwpm_user_pid);
+       ret = rdma_nl_unicast_wait(&init_net, skb, iwpm_user_pid);
        if (ret) {
                skb = NULL; /* skb is freed in the netlink send-op handling */
                err_str = "Unable to send a nlmsg";
@@ -308,8 +306,7 @@ int iwpm_add_and_query_mapping(struct iwpm_sa_data *pm_msg, u8 nl_client)
 query_mapping_error:
        pr_info("%s: %s (client = %d)\n", __func__, err_str, nl_client);
 query_mapping_error_nowarn:
-       if (skb)
-               dev_kfree_skb(skb);
+       dev_kfree_skb(skb);
        if (nlmsg_request)
                iwpm_free_nlmsg_request(&nlmsg_request->kref);
        return ret;
@@ -364,7 +361,7 @@ int iwpm_remove_mapping(struct sockaddr_storage *local_addr, u8 nl_client)
 
        nlmsg_end(skb, nlh);
 
-       ret = rdma_nl_unicast_wait(skb, iwpm_user_pid);
+       ret = rdma_nl_unicast_wait(&init_net, skb, iwpm_user_pid);
        if (ret) {
                skb = NULL; /* skb is freed in the netlink send-op handling */
                iwpm_user_pid = IWPM_PID_UNDEFINED;
index 41929bb..13495b4 100644 (file)
@@ -645,7 +645,7 @@ static int send_mapinfo_num(u32 mapping_num, u8 nl_client, int iwpm_pid)
 
        nlmsg_end(skb, nlh);
 
-       ret = rdma_nl_unicast(skb, iwpm_pid);
+       ret = rdma_nl_unicast(&init_net, skb, iwpm_pid);
        if (ret) {
                skb = NULL;
                err_str = "Unable to send a nlmsg";
@@ -655,8 +655,7 @@ static int send_mapinfo_num(u32 mapping_num, u8 nl_client, int iwpm_pid)
        return 0;
 mapinfo_num_error:
        pr_info("%s: %s\n", __func__, err_str);
-       if (skb)
-               dev_kfree_skb(skb);
+       dev_kfree_skb(skb);
        return ret;
 }
 
@@ -674,7 +673,7 @@ static int send_nlmsg_done(struct sk_buff *skb, u8 nl_client, int iwpm_pid)
                return -ENOMEM;
        }
        nlh->nlmsg_type = NLMSG_DONE;
-       ret = rdma_nl_unicast(skb, iwpm_pid);
+       ret = rdma_nl_unicast(&init_net, skb, iwpm_pid);
        if (ret)
                pr_warn("%s Unable to send a nlmsg\n", __func__);
        return ret;
@@ -778,8 +777,7 @@ send_mapping_info_unlock:
 send_mapping_info_exit:
        if (ret) {
                pr_warn("%s: %s (ret = %d)\n", __func__, err_str, ret);
-               if (skb)
-                       dev_kfree_skb(skb);
+               dev_kfree_skb(skb);
                return ret;
        }
        send_nlmsg_done(skb, nl_client, iwpm_pid);
@@ -824,7 +822,7 @@ int iwpm_send_hello(u8 nl_client, int iwpm_pid, u16 abi_version)
                goto hello_num_error;
        nlmsg_end(skb, nlh);
 
-       ret = rdma_nl_unicast(skb, iwpm_pid);
+       ret = rdma_nl_unicast(&init_net, skb, iwpm_pid);
        if (ret) {
                skb = NULL;
                err_str = "Unable to send a nlmsg";
@@ -834,7 +832,6 @@ int iwpm_send_hello(u8 nl_client, int iwpm_pid, u16 abi_version)
        return 0;
 hello_num_error:
        pr_info("%s: %s\n", __func__, err_str);
-       if (skb)
-               dev_kfree_skb(skb);
+       dev_kfree_skb(skb);
        return ret;
 }
index eecfc0b..81dbd5f 100644 (file)
 #include <linux/export.h>
 #include <net/netlink.h>
 #include <net/net_namespace.h>
+#include <net/netns/generic.h>
 #include <net/sock.h>
 #include <rdma/rdma_netlink.h>
 #include <linux/module.h>
 #include "core_priv.h"
 
 static DEFINE_MUTEX(rdma_nl_mutex);
-static struct sock *nls;
 static struct {
        const struct rdma_nl_cbs   *cb_table;
 } rdma_nl_types[RDMA_NL_NUM_CLIENTS];
 
 bool rdma_nl_chk_listeners(unsigned int group)
 {
-       return netlink_has_listeners(nls, group);
+       struct rdma_dev_net *rnet = rdma_net_to_dev_net(&init_net);
+
+       return netlink_has_listeners(rnet->nl_sock, group);
 }
 EXPORT_SYMBOL(rdma_nl_chk_listeners);
 
@@ -73,13 +75,21 @@ static bool is_nl_msg_valid(unsigned int type, unsigned int op)
        return (op < max_num_ops[type]) ? true : false;
 }
 
-static bool is_nl_valid(unsigned int type, unsigned int op)
+static bool
+is_nl_valid(const struct sk_buff *skb, unsigned int type, unsigned int op)
 {
        const struct rdma_nl_cbs *cb_table;
 
        if (!is_nl_msg_valid(type, op))
                return false;
 
+       /*
+        * Currently only NLDEV client is supporting netlink commands in
+        * non init_net net namespace.
+        */
+       if (sock_net(skb->sk) != &init_net && type != RDMA_NL_NLDEV)
+               return false;
+
        if (!rdma_nl_types[type].cb_table) {
                mutex_unlock(&rdma_nl_mutex);
                request_module("rdma-netlink-subsys-%d", type);
@@ -161,7 +171,7 @@ static int rdma_nl_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh,
        unsigned int op = RDMA_NL_GET_OP(type);
        const struct rdma_nl_cbs *cb_table;
 
-       if (!is_nl_valid(index, op))
+       if (!is_nl_valid(skb, index, op))
                return -EINVAL;
 
        cb_table = rdma_nl_types[index].cb_table;
@@ -185,7 +195,7 @@ static int rdma_nl_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh,
                        .dump = cb_table[op].dump,
                };
                if (c.dump)
-                       return netlink_dump_start(nls, skb, nlh, &c);
+                       return netlink_dump_start(skb->sk, skb, nlh, &c);
                return -EINVAL;
        }
 
@@ -258,52 +268,65 @@ static void rdma_nl_rcv(struct sk_buff *skb)
        mutex_unlock(&rdma_nl_mutex);
 }
 
-int rdma_nl_unicast(struct sk_buff *skb, u32 pid)
+int rdma_nl_unicast(struct net *net, struct sk_buff *skb, u32 pid)
 {
+       struct rdma_dev_net *rnet = rdma_net_to_dev_net(net);
        int err;
 
-       err = netlink_unicast(nls, skb, pid, MSG_DONTWAIT);
+       err = netlink_unicast(rnet->nl_sock, skb, pid, MSG_DONTWAIT);
        return (err < 0) ? err : 0;
 }
 EXPORT_SYMBOL(rdma_nl_unicast);
 
-int rdma_nl_unicast_wait(struct sk_buff *skb, __u32 pid)
+int rdma_nl_unicast_wait(struct net *net, struct sk_buff *skb, __u32 pid)
 {
+       struct rdma_dev_net *rnet = rdma_net_to_dev_net(net);
        int err;
 
-       err = netlink_unicast(nls, skb, pid, 0);
+       err = netlink_unicast(rnet->nl_sock, skb, pid, 0);
        return (err < 0) ? err : 0;
 }
 EXPORT_SYMBOL(rdma_nl_unicast_wait);
 
-int rdma_nl_multicast(struct sk_buff *skb, unsigned int group, gfp_t flags)
+int rdma_nl_multicast(struct net *net, struct sk_buff *skb,
+                     unsigned int group, gfp_t flags)
 {
-       return nlmsg_multicast(nls, skb, 0, group, flags);
+       struct rdma_dev_net *rnet = rdma_net_to_dev_net(net);
+
+       return nlmsg_multicast(rnet->nl_sock, skb, 0, group, flags);
 }
 EXPORT_SYMBOL(rdma_nl_multicast);
 
-int __init rdma_nl_init(void)
+void rdma_nl_exit(void)
+{
+       int idx;
+
+       for (idx = 0; idx < RDMA_NL_NUM_CLIENTS; idx++)
+               WARN(rdma_nl_types[idx].cb_table,
+                    "Netlink client %d wasn't released prior to unloading %s\n",
+                    idx, KBUILD_MODNAME);
+}
+
+int rdma_nl_net_init(struct rdma_dev_net *rnet)
 {
+       struct net *net = read_pnet(&rnet->net);
        struct netlink_kernel_cfg cfg = {
                .input  = rdma_nl_rcv,
        };
+       struct sock *nls;
 
-       nls = netlink_kernel_create(&init_net, NETLINK_RDMA, &cfg);
+       nls = netlink_kernel_create(net, NETLINK_RDMA, &cfg);
        if (!nls)
                return -ENOMEM;
 
        nls->sk_sndtimeo = 10 * HZ;
+       rnet->nl_sock = nls;
        return 0;
 }
 
-void rdma_nl_exit(void)
+void rdma_nl_net_exit(struct rdma_dev_net *rnet)
 {
-       int idx;
-
-       for (idx = 0; idx < RDMA_NL_NUM_CLIENTS; idx++)
-               rdma_nl_unregister(idx);
-
-       netlink_kernel_release(nls);
+       netlink_kernel_release(rnet->nl_sock);
 }
 
 MODULE_ALIAS_NET_PF_PROTO(PF_NETLINK, NETLINK_RDMA);
index 020c269..7a74740 100644 (file)
@@ -831,7 +831,7 @@ static int nldev_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
        nlmsg_end(msg, nlh);
 
        ib_device_put(device);
-       return rdma_nl_unicast(msg, NETLINK_CB(skb).portid);
+       return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid);
 
 err_free:
        nlmsg_free(msg);
@@ -971,7 +971,7 @@ static int nldev_port_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
        nlmsg_end(msg, nlh);
        ib_device_put(device);
 
-       return rdma_nl_unicast(msg, NETLINK_CB(skb).portid);
+       return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid);
 
 err_free:
        nlmsg_free(msg);
@@ -1073,7 +1073,7 @@ static int nldev_res_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
 
        nlmsg_end(msg, nlh);
        ib_device_put(device);
-       return rdma_nl_unicast(msg, NETLINK_CB(skb).portid);
+       return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid);
 
 err_free:
        nlmsg_free(msg);
@@ -1250,7 +1250,7 @@ static int res_get_common_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
 
        nlmsg_end(msg, nlh);
        ib_device_put(device);
-       return rdma_nl_unicast(msg, NETLINK_CB(skb).portid);
+       return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid);
 
 err_free:
        nlmsg_free(msg);
@@ -1595,7 +1595,7 @@ static int nldev_get_chardev(struct sk_buff *skb, struct nlmsghdr *nlh,
        put_device(data.cdev);
        if (ibdev)
                ib_device_put(ibdev);
-       return rdma_nl_unicast(msg, NETLINK_CB(skb).portid);
+       return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid);
 
 out_data:
        put_device(data.cdev);
@@ -1635,7 +1635,7 @@ static int nldev_sys_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
                return err;
        }
        nlmsg_end(msg, nlh);
-       return rdma_nl_unicast(msg, NETLINK_CB(skb).portid);
+       return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid);
 }
 
 static int nldev_set_sys_set_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
@@ -1733,7 +1733,7 @@ static int nldev_stat_set_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
 
        nlmsg_end(msg, nlh);
        ib_device_put(device);
-       return rdma_nl_unicast(msg, NETLINK_CB(skb).portid);
+       return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid);
 
 err_fill:
        rdma_counter_unbind_qpn(device, port, qpn, cntn);
@@ -1801,7 +1801,7 @@ static int nldev_stat_del_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
 
        nlmsg_end(msg, nlh);
        ib_device_put(device);
-       return rdma_nl_unicast(msg, NETLINK_CB(skb).portid);
+       return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid);
 
 err_fill:
        rdma_counter_bind_qpn(device, port, qpn, cntn);
@@ -1892,7 +1892,7 @@ static int stat_get_doit_default_counter(struct sk_buff *skb,
        mutex_unlock(&stats->lock);
        nlmsg_end(msg, nlh);
        ib_device_put(device);
-       return rdma_nl_unicast(msg, NETLINK_CB(skb).portid);
+       return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid);
 
 err_table:
        nla_nest_cancel(msg, table_attr);
@@ -1964,7 +1964,7 @@ static int stat_get_doit_qp(struct sk_buff *skb, struct nlmsghdr *nlh,
 
        nlmsg_end(msg, nlh);
        ib_device_put(device);
-       return rdma_nl_unicast(msg, NETLINK_CB(skb).portid);
+       return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid);
 
 err_msg:
        nlmsg_free(msg);
index dce0610..5337393 100644 (file)
@@ -583,8 +583,10 @@ void rdma_rw_ctx_destroy(struct rdma_rw_ctx *ctx, struct ib_qp *qp, u8 port_num,
                break;
        }
 
-       /* P2PDMA contexts do not need to be unmapped */
-       if (!is_pci_p2pdma_page(sg_page(sg)))
+       if (is_pci_p2pdma_page(sg_page(sg)))
+               pci_p2pdma_unmap_sg(qp->pd->device->dma_device, sg,
+                                   sg_cnt, dir);
+       else
                ib_dma_unmap_sg(qp->pd->device, sg, sg_cnt, dir);
 }
 EXPORT_SYMBOL(rdma_rw_ctx_destroy);
index 7d8071c..17fc293 100644 (file)
@@ -860,7 +860,7 @@ static int ib_nl_send_msg(struct ib_sa_query *query, gfp_t gfp_mask)
        /* Repair the nlmsg header length */
        nlmsg_end(skb, nlh);
 
-       return rdma_nl_multicast(skb, RDMA_NL_GROUP_LS, gfp_mask);
+       return rdma_nl_multicast(&init_net, skb, RDMA_NL_GROUP_LS, gfp_mask);
 }
 
 static int ib_nl_make_request(struct ib_sa_query *query, gfp_t gfp_mask)
index b477295..7a50ced 100644 (file)
@@ -289,6 +289,24 @@ static ssize_t rate_show(struct ib_port *p, struct port_attribute *unused,
                       ib_width_enum_to_int(attr.active_width), speed);
 }
 
+static const char *phys_state_to_str(enum ib_port_phys_state phys_state)
+{
+       static const char * phys_state_str[] = {
+               "<unknown>",
+               "Sleep",
+               "Polling",
+               "Disabled",
+               "PortConfigurationTraining",
+               "LinkUp",
+               "LinkErrorRecovery",
+               "Phy Test",
+       };
+
+       if (phys_state < ARRAY_SIZE(phys_state_str))
+               return phys_state_str[phys_state];
+       return "<unknown>";
+}
+
 static ssize_t phys_state_show(struct ib_port *p, struct port_attribute *unused,
                               char *buf)
 {
@@ -300,16 +318,8 @@ static ssize_t phys_state_show(struct ib_port *p, struct port_attribute *unused,
        if (ret)
                return ret;
 
-       switch (attr.phys_state) {
-       case 1:  return sprintf(buf, "1: Sleep\n");
-       case 2:  return sprintf(buf, "2: Polling\n");
-       case 3:  return sprintf(buf, "3: Disabled\n");
-       case 4:  return sprintf(buf, "4: PortConfigurationTraining\n");
-       case 5:  return sprintf(buf, "5: LinkUp\n");
-       case 6:  return sprintf(buf, "6: LinkErrorRecovery\n");
-       case 7:  return sprintf(buf, "7: Phy Test\n");
-       default: return sprintf(buf, "%d: <unknown>\n", attr.phys_state);
-       }
+       return sprintf(buf, "%d: %s\n", attr.phys_state,
+                      phys_state_to_str(attr.phys_state));
 }
 
 static ssize_t link_layer_show(struct ib_port *p, struct port_attribute *unused,
index 5655366..24244a2 100644 (file)
@@ -54,10 +54,7 @@ static void __ib_umem_release(struct ib_device *dev, struct ib_umem *umem, int d
 
        for_each_sg_page(umem->sg_head.sgl, &sg_iter, umem->sg_nents, 0) {
                page = sg_page_iter_page(&sg_iter);
-               if (umem->writable && dirty)
-                       put_user_pages_dirty_lock(&page, 1);
-               else
-                       put_user_page(page);
+               put_user_pages_dirty_lock(&page, 1, umem->writable && dirty);
        }
 
        sg_free_table(&umem->sg_head);
@@ -184,9 +181,6 @@ EXPORT_SYMBOL(ib_umem_find_best_pgsz);
 /**
  * ib_umem_get - Pin and DMA map userspace memory.
  *
- * If access flags indicate ODP memory, avoid pinning. Instead, stores
- * the mm for future page fault handling in conjunction with MMU notifiers.
- *
  * @udata: userspace context to pin memory for
  * @addr: userspace virtual address to start at
  * @size: length of region to pin
@@ -231,36 +225,19 @@ struct ib_umem *ib_umem_get(struct ib_udata *udata, unsigned long addr,
        if (!can_do_mlock())
                return ERR_PTR(-EPERM);
 
-       if (access & IB_ACCESS_ON_DEMAND) {
-               umem = kzalloc(sizeof(struct ib_umem_odp), GFP_KERNEL);
-               if (!umem)
-                       return ERR_PTR(-ENOMEM);
-               umem->is_odp = 1;
-       } else {
-               umem = kzalloc(sizeof(*umem), GFP_KERNEL);
-               if (!umem)
-                       return ERR_PTR(-ENOMEM);
-       }
+       if (access & IB_ACCESS_ON_DEMAND)
+               return ERR_PTR(-EOPNOTSUPP);
 
-       umem->context    = context;
+       umem = kzalloc(sizeof(*umem), GFP_KERNEL);
+       if (!umem)
+               return ERR_PTR(-ENOMEM);
+       umem->ibdev = context->device;
        umem->length     = size;
        umem->address    = addr;
        umem->writable   = ib_access_writable(access);
        umem->owning_mm = mm = current->mm;
        mmgrab(mm);
 
-       if (access & IB_ACCESS_ON_DEMAND) {
-               if (WARN_ON_ONCE(!context->invalidate_range)) {
-                       ret = -EINVAL;
-                       goto umem_kfree;
-               }
-
-               ret = ib_umem_odp_get(to_ib_umem_odp(umem), access);
-               if (ret)
-                       goto umem_kfree;
-               return umem;
-       }
-
        page_list = (struct page **) __get_free_page(GFP_KERNEL);
        if (!page_list) {
                ret = -ENOMEM;
@@ -346,15 +323,6 @@ umem_kfree:
 }
 EXPORT_SYMBOL(ib_umem_get);
 
-static void __ib_umem_release_tail(struct ib_umem *umem)
-{
-       mmdrop(umem->owning_mm);
-       if (umem->is_odp)
-               kfree(to_ib_umem_odp(umem));
-       else
-               kfree(umem);
-}
-
 /**
  * ib_umem_release - release memory pinned with ib_umem_get
  * @umem: umem struct to release
@@ -363,17 +331,14 @@ void ib_umem_release(struct ib_umem *umem)
 {
        if (!umem)
                return;
+       if (umem->is_odp)
+               return ib_umem_odp_release(to_ib_umem_odp(umem));
 
-       if (umem->is_odp) {
-               ib_umem_odp_release(to_ib_umem_odp(umem));
-               __ib_umem_release_tail(umem);
-               return;
-       }
-
-       __ib_umem_release(umem->context->device, umem, 1);
+       __ib_umem_release(umem->ibdev, umem, 1);
 
        atomic64_sub(ib_umem_num_pages(umem), &umem->owning_mm->pinned_vm);
-       __ib_umem_release_tail(umem);
+       mmdrop(umem->owning_mm);
+       kfree(umem);
 }
 EXPORT_SYMBOL(ib_umem_release);
 
index c0e15db..f67a30f 100644 (file)
 #include <linux/export.h>
 #include <linux/vmalloc.h>
 #include <linux/hugetlb.h>
-#include <linux/interval_tree_generic.h>
+#include <linux/interval_tree.h>
 #include <linux/pagemap.h>
 
 #include <rdma/ib_verbs.h>
 #include <rdma/ib_umem.h>
 #include <rdma/ib_umem_odp.h>
 
-/*
- * The ib_umem list keeps track of memory regions for which the HW
- * device request to receive notification when the related memory
- * mapping is changed.
- *
- * ib_umem_lock protects the list.
- */
-
-static u64 node_start(struct umem_odp_node *n)
-{
-       struct ib_umem_odp *umem_odp =
-                       container_of(n, struct ib_umem_odp, interval_tree);
-
-       return ib_umem_start(umem_odp);
-}
-
-/* Note that the representation of the intervals in the interval tree
- * considers the ending point as contained in the interval, while the
- * function ib_umem_end returns the first address which is not contained
- * in the umem.
- */
-static u64 node_last(struct umem_odp_node *n)
-{
-       struct ib_umem_odp *umem_odp =
-                       container_of(n, struct ib_umem_odp, interval_tree);
-
-       return ib_umem_end(umem_odp) - 1;
-}
-
-INTERVAL_TREE_DEFINE(struct umem_odp_node, rb, u64, __subtree_last,
-                    node_start, node_last, static, rbt_ib_umem)
+#include "uverbs.h"
 
 static void ib_umem_notifier_start_account(struct ib_umem_odp *umem_odp)
 {
@@ -104,31 +74,34 @@ static void ib_umem_notifier_end_account(struct ib_umem_odp *umem_odp)
        mutex_unlock(&umem_odp->umem_mutex);
 }
 
-static int ib_umem_notifier_release_trampoline(struct ib_umem_odp *umem_odp,
-                                              u64 start, u64 end, void *cookie)
-{
-       /*
-        * Increase the number of notifiers running, to
-        * prevent any further fault handling on this MR.
-        */
-       ib_umem_notifier_start_account(umem_odp);
-       complete_all(&umem_odp->notifier_completion);
-       umem_odp->umem.context->invalidate_range(
-               umem_odp, ib_umem_start(umem_odp), ib_umem_end(umem_odp));
-       return 0;
-}
-
 static void ib_umem_notifier_release(struct mmu_notifier *mn,
                                     struct mm_struct *mm)
 {
        struct ib_ucontext_per_mm *per_mm =
                container_of(mn, struct ib_ucontext_per_mm, mn);
+       struct rb_node *node;
 
        down_read(&per_mm->umem_rwsem);
-       if (per_mm->active)
-               rbt_ib_umem_for_each_in_range(
-                       &per_mm->umem_tree, 0, ULLONG_MAX,
-                       ib_umem_notifier_release_trampoline, true, NULL);
+       if (!per_mm->mn.users)
+               goto out;
+
+       for (node = rb_first_cached(&per_mm->umem_tree); node;
+            node = rb_next(node)) {
+               struct ib_umem_odp *umem_odp =
+                       rb_entry(node, struct ib_umem_odp, interval_tree.rb);
+
+               /*
+                * Increase the number of notifiers running, to prevent any
+                * further fault handling on this MR.
+                */
+               ib_umem_notifier_start_account(umem_odp);
+               complete_all(&umem_odp->notifier_completion);
+               umem_odp->umem.ibdev->ops.invalidate_range(
+                       umem_odp, ib_umem_start(umem_odp),
+                       ib_umem_end(umem_odp));
+       }
+
+out:
        up_read(&per_mm->umem_rwsem);
 }
 
@@ -136,7 +109,7 @@ static int invalidate_range_start_trampoline(struct ib_umem_odp *item,
                                             u64 start, u64 end, void *cookie)
 {
        ib_umem_notifier_start_account(item);
-       item->umem.context->invalidate_range(item, start, end);
+       item->umem.ibdev->ops.invalidate_range(item, start, end);
        return 0;
 }
 
@@ -152,10 +125,10 @@ static int ib_umem_notifier_invalidate_range_start(struct mmu_notifier *mn,
        else if (!down_read_trylock(&per_mm->umem_rwsem))
                return -EAGAIN;
 
-       if (!per_mm->active) {
+       if (!per_mm->mn.users) {
                up_read(&per_mm->umem_rwsem);
                /*
-                * At this point active is permanently set and visible to this
+                * At this point users is permanently zero and visible to this
                 * CPU without a lock, that fact is relied on to skip the unlock
                 * in range_end.
                 */
@@ -185,7 +158,7 @@ static void ib_umem_notifier_invalidate_range_end(struct mmu_notifier *mn,
        struct ib_ucontext_per_mm *per_mm =
                container_of(mn, struct ib_ucontext_per_mm, mn);
 
-       if (unlikely(!per_mm->active))
+       if (unlikely(!per_mm->mn.users))
                return;
 
        rbt_ib_umem_for_each_in_range(&per_mm->umem_tree, range->start,
@@ -194,212 +167,250 @@ static void ib_umem_notifier_invalidate_range_end(struct mmu_notifier *mn,
        up_read(&per_mm->umem_rwsem);
 }
 
-static const struct mmu_notifier_ops ib_umem_notifiers = {
-       .release                    = ib_umem_notifier_release,
-       .invalidate_range_start     = ib_umem_notifier_invalidate_range_start,
-       .invalidate_range_end       = ib_umem_notifier_invalidate_range_end,
-};
-
-static void add_umem_to_per_mm(struct ib_umem_odp *umem_odp)
-{
-       struct ib_ucontext_per_mm *per_mm = umem_odp->per_mm;
-
-       down_write(&per_mm->umem_rwsem);
-       if (likely(ib_umem_start(umem_odp) != ib_umem_end(umem_odp)))
-               rbt_ib_umem_insert(&umem_odp->interval_tree,
-                                  &per_mm->umem_tree);
-       up_write(&per_mm->umem_rwsem);
-}
-
-static void remove_umem_from_per_mm(struct ib_umem_odp *umem_odp)
-{
-       struct ib_ucontext_per_mm *per_mm = umem_odp->per_mm;
-
-       down_write(&per_mm->umem_rwsem);
-       if (likely(ib_umem_start(umem_odp) != ib_umem_end(umem_odp)))
-               rbt_ib_umem_remove(&umem_odp->interval_tree,
-                                  &per_mm->umem_tree);
-       complete_all(&umem_odp->notifier_completion);
-
-       up_write(&per_mm->umem_rwsem);
-}
-
-static struct ib_ucontext_per_mm *alloc_per_mm(struct ib_ucontext *ctx,
-                                              struct mm_struct *mm)
+static struct mmu_notifier *ib_umem_alloc_notifier(struct mm_struct *mm)
 {
        struct ib_ucontext_per_mm *per_mm;
-       int ret;
 
        per_mm = kzalloc(sizeof(*per_mm), GFP_KERNEL);
        if (!per_mm)
                return ERR_PTR(-ENOMEM);
 
-       per_mm->context = ctx;
-       per_mm->mm = mm;
        per_mm->umem_tree = RB_ROOT_CACHED;
        init_rwsem(&per_mm->umem_rwsem);
-       per_mm->active = true;
 
+       WARN_ON(mm != current->mm);
        rcu_read_lock();
        per_mm->tgid = get_task_pid(current->group_leader, PIDTYPE_PID);
        rcu_read_unlock();
+       return &per_mm->mn;
+}
 
-       WARN_ON(mm != current->mm);
-
-       per_mm->mn.ops = &ib_umem_notifiers;
-       ret = mmu_notifier_register(&per_mm->mn, per_mm->mm);
-       if (ret) {
-               dev_err(&ctx->device->dev,
-                       "Failed to register mmu_notifier %d\n", ret);
-               goto out_pid;
-       }
+static void ib_umem_free_notifier(struct mmu_notifier *mn)
+{
+       struct ib_ucontext_per_mm *per_mm =
+               container_of(mn, struct ib_ucontext_per_mm, mn);
 
-       list_add(&per_mm->ucontext_list, &ctx->per_mm_list);
-       return per_mm;
+       WARN_ON(!RB_EMPTY_ROOT(&per_mm->umem_tree.rb_root));
 
-out_pid:
        put_pid(per_mm->tgid);
        kfree(per_mm);
-       return ERR_PTR(ret);
 }
 
-static int get_per_mm(struct ib_umem_odp *umem_odp)
+static const struct mmu_notifier_ops ib_umem_notifiers = {
+       .release                    = ib_umem_notifier_release,
+       .invalidate_range_start     = ib_umem_notifier_invalidate_range_start,
+       .invalidate_range_end       = ib_umem_notifier_invalidate_range_end,
+       .alloc_notifier             = ib_umem_alloc_notifier,
+       .free_notifier              = ib_umem_free_notifier,
+};
+
+static inline int ib_init_umem_odp(struct ib_umem_odp *umem_odp)
 {
-       struct ib_ucontext *ctx = umem_odp->umem.context;
        struct ib_ucontext_per_mm *per_mm;
+       struct mmu_notifier *mn;
+       int ret;
 
-       /*
-        * Generally speaking we expect only one or two per_mm in this list,
-        * so no reason to optimize this search today.
-        */
-       mutex_lock(&ctx->per_mm_list_lock);
-       list_for_each_entry(per_mm, &ctx->per_mm_list, ucontext_list) {
-               if (per_mm->mm == umem_odp->umem.owning_mm)
-                       goto found;
+       umem_odp->umem.is_odp = 1;
+       if (!umem_odp->is_implicit_odp) {
+               size_t page_size = 1UL << umem_odp->page_shift;
+               size_t pages;
+
+               umem_odp->interval_tree.start =
+                       ALIGN_DOWN(umem_odp->umem.address, page_size);
+               if (check_add_overflow(umem_odp->umem.address,
+                                      (unsigned long)umem_odp->umem.length,
+                                      &umem_odp->interval_tree.last))
+                       return -EOVERFLOW;
+               umem_odp->interval_tree.last =
+                       ALIGN(umem_odp->interval_tree.last, page_size);
+               if (unlikely(umem_odp->interval_tree.last < page_size))
+                       return -EOVERFLOW;
+
+               pages = (umem_odp->interval_tree.last -
+                        umem_odp->interval_tree.start) >>
+                       umem_odp->page_shift;
+               if (!pages)
+                       return -EINVAL;
+
+               /*
+                * Note that the representation of the intervals in the
+                * interval tree considers the ending point as contained in
+                * the interval.
+                */
+               umem_odp->interval_tree.last--;
+
+               umem_odp->page_list = kvcalloc(
+                       pages, sizeof(*umem_odp->page_list), GFP_KERNEL);
+               if (!umem_odp->page_list)
+                       return -ENOMEM;
+
+               umem_odp->dma_list = kvcalloc(
+                       pages, sizeof(*umem_odp->dma_list), GFP_KERNEL);
+               if (!umem_odp->dma_list) {
+                       ret = -ENOMEM;
+                       goto out_page_list;
+               }
        }
 
-       per_mm = alloc_per_mm(ctx, umem_odp->umem.owning_mm);
-       if (IS_ERR(per_mm)) {
-               mutex_unlock(&ctx->per_mm_list_lock);
-               return PTR_ERR(per_mm);
+       mn = mmu_notifier_get(&ib_umem_notifiers, umem_odp->umem.owning_mm);
+       if (IS_ERR(mn)) {
+               ret = PTR_ERR(mn);
+               goto out_dma_list;
        }
+       umem_odp->per_mm = per_mm =
+               container_of(mn, struct ib_ucontext_per_mm, mn);
 
-found:
-       umem_odp->per_mm = per_mm;
-       per_mm->odp_mrs_count++;
-       mutex_unlock(&ctx->per_mm_list_lock);
+       mutex_init(&umem_odp->umem_mutex);
+       init_completion(&umem_odp->notifier_completion);
+
+       if (!umem_odp->is_implicit_odp) {
+               down_write(&per_mm->umem_rwsem);
+               interval_tree_insert(&umem_odp->interval_tree,
+                                    &per_mm->umem_tree);
+               up_write(&per_mm->umem_rwsem);
+       }
+       mmgrab(umem_odp->umem.owning_mm);
 
        return 0;
-}
 
-static void free_per_mm(struct rcu_head *rcu)
-{
-       kfree(container_of(rcu, struct ib_ucontext_per_mm, rcu));
+out_dma_list:
+       kvfree(umem_odp->dma_list);
+out_page_list:
+       kvfree(umem_odp->page_list);
+       return ret;
 }
 
-static void put_per_mm(struct ib_umem_odp *umem_odp)
+/**
+ * ib_umem_odp_alloc_implicit - Allocate a parent implicit ODP umem
+ *
+ * Implicit ODP umems do not have a VA range and do not have any page lists.
+ * They exist only to hold the per_mm reference to help the driver create
+ * children umems.
+ *
+ * @udata: udata from the syscall being used to create the umem
+ * @access: ib_reg_mr access flags
+ */
+struct ib_umem_odp *ib_umem_odp_alloc_implicit(struct ib_udata *udata,
+                                              int access)
 {
-       struct ib_ucontext_per_mm *per_mm = umem_odp->per_mm;
-       struct ib_ucontext *ctx = umem_odp->umem.context;
-       bool need_free;
-
-       mutex_lock(&ctx->per_mm_list_lock);
-       umem_odp->per_mm = NULL;
-       per_mm->odp_mrs_count--;
-       need_free = per_mm->odp_mrs_count == 0;
-       if (need_free)
-               list_del(&per_mm->ucontext_list);
-       mutex_unlock(&ctx->per_mm_list_lock);
-
-       if (!need_free)
-               return;
+       struct ib_ucontext *context =
+               container_of(udata, struct uverbs_attr_bundle, driver_udata)
+                       ->context;
+       struct ib_umem *umem;
+       struct ib_umem_odp *umem_odp;
+       int ret;
 
-       /*
-        * NOTE! mmu_notifier_unregister() can happen between a start/end
-        * callback, resulting in an start/end, and thus an unbalanced
-        * lock. This doesn't really matter to us since we are about to kfree
-        * the memory that holds the lock, however LOCKDEP doesn't like this.
-        */
-       down_write(&per_mm->umem_rwsem);
-       per_mm->active = false;
-       up_write(&per_mm->umem_rwsem);
+       if (access & IB_ACCESS_HUGETLB)
+               return ERR_PTR(-EINVAL);
 
-       WARN_ON(!RB_EMPTY_ROOT(&per_mm->umem_tree.rb_root));
-       mmu_notifier_unregister_no_release(&per_mm->mn, per_mm->mm);
-       put_pid(per_mm->tgid);
-       mmu_notifier_call_srcu(&per_mm->rcu, free_per_mm);
+       if (!context)
+               return ERR_PTR(-EIO);
+       if (WARN_ON_ONCE(!context->device->ops.invalidate_range))
+               return ERR_PTR(-EINVAL);
+
+       umem_odp = kzalloc(sizeof(*umem_odp), GFP_KERNEL);
+       if (!umem_odp)
+               return ERR_PTR(-ENOMEM);
+       umem = &umem_odp->umem;
+       umem->ibdev = context->device;
+       umem->writable = ib_access_writable(access);
+       umem->owning_mm = current->mm;
+       umem_odp->is_implicit_odp = 1;
+       umem_odp->page_shift = PAGE_SHIFT;
+
+       ret = ib_init_umem_odp(umem_odp);
+       if (ret) {
+               kfree(umem_odp);
+               return ERR_PTR(ret);
+       }
+       return umem_odp;
 }
+EXPORT_SYMBOL(ib_umem_odp_alloc_implicit);
 
-struct ib_umem_odp *ib_alloc_odp_umem(struct ib_umem_odp *root,
-                                     unsigned long addr, size_t size)
+/**
+ * ib_umem_odp_alloc_child - Allocate a child ODP umem under an implicit
+ *                           parent ODP umem
+ *
+ * @root: The parent umem enclosing the child. This must be allocated using
+ *        ib_alloc_implicit_odp_umem()
+ * @addr: The starting userspace VA
+ * @size: The length of the userspace VA
+ */
+struct ib_umem_odp *ib_umem_odp_alloc_child(struct ib_umem_odp *root,
+                                           unsigned long addr, size_t size)
 {
-       struct ib_ucontext_per_mm *per_mm = root->per_mm;
-       struct ib_ucontext *ctx = per_mm->context;
+       /*
+        * Caller must ensure that root cannot be freed during the call to
+        * ib_alloc_odp_umem.
+        */
        struct ib_umem_odp *odp_data;
        struct ib_umem *umem;
-       int pages = size >> PAGE_SHIFT;
        int ret;
 
+       if (WARN_ON(!root->is_implicit_odp))
+               return ERR_PTR(-EINVAL);
+
        odp_data = kzalloc(sizeof(*odp_data), GFP_KERNEL);
        if (!odp_data)
                return ERR_PTR(-ENOMEM);
        umem = &odp_data->umem;
-       umem->context    = ctx;
+       umem->ibdev = root->umem.ibdev;
        umem->length     = size;
        umem->address    = addr;
-       odp_data->page_shift = PAGE_SHIFT;
        umem->writable   = root->umem.writable;
-       umem->is_odp = 1;
-       odp_data->per_mm = per_mm;
-       umem->owning_mm  = per_mm->mm;
-       mmgrab(umem->owning_mm);
-
-       mutex_init(&odp_data->umem_mutex);
-       init_completion(&odp_data->notifier_completion);
-
-       odp_data->page_list =
-               vzalloc(array_size(pages, sizeof(*odp_data->page_list)));
-       if (!odp_data->page_list) {
-               ret = -ENOMEM;
-               goto out_odp_data;
-       }
+       umem->owning_mm  = root->umem.owning_mm;
+       odp_data->page_shift = PAGE_SHIFT;
 
-       odp_data->dma_list =
-               vzalloc(array_size(pages, sizeof(*odp_data->dma_list)));
-       if (!odp_data->dma_list) {
-               ret = -ENOMEM;
-               goto out_page_list;
+       ret = ib_init_umem_odp(odp_data);
+       if (ret) {
+               kfree(odp_data);
+               return ERR_PTR(ret);
        }
-
-       /*
-        * Caller must ensure that the umem_odp that the per_mm came from
-        * cannot be freed during the call to ib_alloc_odp_umem.
-        */
-       mutex_lock(&ctx->per_mm_list_lock);
-       per_mm->odp_mrs_count++;
-       mutex_unlock(&ctx->per_mm_list_lock);
-       add_umem_to_per_mm(odp_data);
-
        return odp_data;
-
-out_page_list:
-       vfree(odp_data->page_list);
-out_odp_data:
-       mmdrop(umem->owning_mm);
-       kfree(odp_data);
-       return ERR_PTR(ret);
 }
-EXPORT_SYMBOL(ib_alloc_odp_umem);
+EXPORT_SYMBOL(ib_umem_odp_alloc_child);
 
-int ib_umem_odp_get(struct ib_umem_odp *umem_odp, int access)
+/**
+ * ib_umem_odp_get - Create a umem_odp for a userspace va
+ *
+ * @udata: userspace context to pin memory for
+ * @addr: userspace virtual address to start at
+ * @size: length of region to pin
+ * @access: IB_ACCESS_xxx flags for memory being pinned
+ *
+ * The driver should use when the access flags indicate ODP memory. It avoids
+ * pinning, instead, stores the mm for future page fault handling in
+ * conjunction with MMU notifiers.
+ */
+struct ib_umem_odp *ib_umem_odp_get(struct ib_udata *udata, unsigned long addr,
+                                   size_t size, int access)
 {
-       struct ib_umem *umem = &umem_odp->umem;
-       /*
-        * NOTE: This must called in a process context where umem->owning_mm
-        * == current->mm
-        */
-       struct mm_struct *mm = umem->owning_mm;
-       int ret_val;
+       struct ib_umem_odp *umem_odp;
+       struct ib_ucontext *context;
+       struct mm_struct *mm;
+       int ret;
+
+       if (!udata)
+               return ERR_PTR(-EIO);
+
+       context = container_of(udata, struct uverbs_attr_bundle, driver_udata)
+                         ->context;
+       if (!context)
+               return ERR_PTR(-EIO);
+
+       if (WARN_ON_ONCE(!(access & IB_ACCESS_ON_DEMAND)) ||
+           WARN_ON_ONCE(!context->device->ops.invalidate_range))
+               return ERR_PTR(-EINVAL);
+
+       umem_odp = kzalloc(sizeof(struct ib_umem_odp), GFP_KERNEL);
+       if (!umem_odp)
+               return ERR_PTR(-ENOMEM);
+
+       umem_odp->umem.ibdev = context->device;
+       umem_odp->umem.length = size;
+       umem_odp->umem.address = addr;
+       umem_odp->umem.writable = ib_access_writable(access);
+       umem_odp->umem.owning_mm = mm = current->mm;
 
        umem_odp->page_shift = PAGE_SHIFT;
        if (access & IB_ACCESS_HUGETLB) {
@@ -410,63 +421,63 @@ int ib_umem_odp_get(struct ib_umem_odp *umem_odp, int access)
                vma = find_vma(mm, ib_umem_start(umem_odp));
                if (!vma || !is_vm_hugetlb_page(vma)) {
                        up_read(&mm->mmap_sem);
-                       return -EINVAL;
+                       ret = -EINVAL;
+                       goto err_free;
                }
                h = hstate_vma(vma);
                umem_odp->page_shift = huge_page_shift(h);
                up_read(&mm->mmap_sem);
        }
 
-       mutex_init(&umem_odp->umem_mutex);
-
-       init_completion(&umem_odp->notifier_completion);
-
-       if (ib_umem_odp_num_pages(umem_odp)) {
-               umem_odp->page_list =
-                       vzalloc(array_size(sizeof(*umem_odp->page_list),
-                                          ib_umem_odp_num_pages(umem_odp)));
-               if (!umem_odp->page_list)
-                       return -ENOMEM;
-
-               umem_odp->dma_list =
-                       vzalloc(array_size(sizeof(*umem_odp->dma_list),
-                                          ib_umem_odp_num_pages(umem_odp)));
-               if (!umem_odp->dma_list) {
-                       ret_val = -ENOMEM;
-                       goto out_page_list;
-               }
-       }
-
-       ret_val = get_per_mm(umem_odp);
-       if (ret_val)
-               goto out_dma_list;
-       add_umem_to_per_mm(umem_odp);
-
-       return 0;
+       ret = ib_init_umem_odp(umem_odp);
+       if (ret)
+               goto err_free;
+       return umem_odp;
 
-out_dma_list:
-       vfree(umem_odp->dma_list);
-out_page_list:
-       vfree(umem_odp->page_list);
-       return ret_val;
+err_free:
+       kfree(umem_odp);
+       return ERR_PTR(ret);
 }
+EXPORT_SYMBOL(ib_umem_odp_get);
 
 void ib_umem_odp_release(struct ib_umem_odp *umem_odp)
 {
+       struct ib_ucontext_per_mm *per_mm = umem_odp->per_mm;
+
        /*
         * Ensure that no more pages are mapped in the umem.
         *
         * It is the driver's responsibility to ensure, before calling us,
         * that the hardware will not attempt to access the MR any more.
         */
-       ib_umem_odp_unmap_dma_pages(umem_odp, ib_umem_start(umem_odp),
-                                   ib_umem_end(umem_odp));
+       if (!umem_odp->is_implicit_odp) {
+               ib_umem_odp_unmap_dma_pages(umem_odp, ib_umem_start(umem_odp),
+                                           ib_umem_end(umem_odp));
+               kvfree(umem_odp->dma_list);
+               kvfree(umem_odp->page_list);
+       }
 
-       remove_umem_from_per_mm(umem_odp);
-       put_per_mm(umem_odp);
-       vfree(umem_odp->dma_list);
-       vfree(umem_odp->page_list);
+       down_write(&per_mm->umem_rwsem);
+       if (!umem_odp->is_implicit_odp) {
+               interval_tree_remove(&umem_odp->interval_tree,
+                                    &per_mm->umem_tree);
+               complete_all(&umem_odp->notifier_completion);
+       }
+       /*
+        * NOTE! mmu_notifier_unregister() can happen between a start/end
+        * callback, resulting in a missing end, and thus an unbalanced
+        * lock. This doesn't really matter to us since we are about to kfree
+        * the memory that holds the lock, however LOCKDEP doesn't like this.
+        * Thus we call the mmu_notifier_put under the rwsem and test the
+        * internal users count to reliably see if we are past this point.
+        */
+       mmu_notifier_put(&per_mm->mn);
+       up_write(&per_mm->umem_rwsem);
+
+       mmdrop(umem_odp->umem.owning_mm);
+       kfree(umem_odp);
 }
+EXPORT_SYMBOL(ib_umem_odp_release);
 
 /*
  * Map for DMA and insert a single page into the on-demand paging page tables.
@@ -493,8 +504,7 @@ static int ib_umem_odp_map_dma_single_page(
                u64 access_mask,
                unsigned long current_seq)
 {
-       struct ib_ucontext *context = umem_odp->umem.context;
-       struct ib_device *dev = context->device;
+       struct ib_device *dev = umem_odp->umem.ibdev;
        dma_addr_t dma_addr;
        int remove_existing_mapping = 0;
        int ret = 0;
@@ -534,7 +544,7 @@ out:
 
        if (remove_existing_mapping) {
                ib_umem_notifier_start_account(umem_odp);
-               context->invalidate_range(
+               dev->ops.invalidate_range(
                        umem_odp,
                        ib_umem_start(umem_odp) +
                                (page_index << umem_odp->page_shift),
@@ -707,7 +717,7 @@ void ib_umem_odp_unmap_dma_pages(struct ib_umem_odp *umem_odp, u64 virt,
 {
        int idx;
        u64 addr;
-       struct ib_device *dev = umem_odp->umem.context->device;
+       struct ib_device *dev = umem_odp->umem.ibdev;
 
        virt = max_t(u64, virt, ib_umem_start(umem_odp));
        bound = min_t(u64, bound, ib_umem_end(umem_odp));
@@ -761,35 +771,21 @@ int rbt_ib_umem_for_each_in_range(struct rb_root_cached *root,
                                  void *cookie)
 {
        int ret_val = 0;
-       struct umem_odp_node *node, *next;
+       struct interval_tree_node *node, *next;
        struct ib_umem_odp *umem;
 
        if (unlikely(start == last))
                return ret_val;
 
-       for (node = rbt_ib_umem_iter_first(root, start, last - 1);
+       for (node = interval_tree_iter_first(root, start, last - 1);
                        node; node = next) {
                /* TODO move the blockable decision up to the callback */
                if (!blockable)
                        return -EAGAIN;
-               next = rbt_ib_umem_iter_next(node, start, last - 1);
+               next = interval_tree_iter_next(node, start, last - 1);
                umem = container_of(node, struct ib_umem_odp, interval_tree);
                ret_val = cb(umem, start, last, cookie) || ret_val;
        }
 
        return ret_val;
 }
-EXPORT_SYMBOL(rbt_ib_umem_for_each_in_range);
-
-struct ib_umem_odp *rbt_ib_umem_lookup(struct rb_root_cached *root,
-                                      u64 addr, u64 length)
-{
-       struct umem_odp_node *node;
-
-       node = rbt_ib_umem_iter_first(root, addr, addr + length - 1);
-       if (node)
-               return container_of(node, struct ib_umem_odp, interval_tree);
-       return NULL;
-
-}
-EXPORT_SYMBOL(rbt_ib_umem_lookup);
index ffdeaf6..d1407fa 100644 (file)
@@ -1042,7 +1042,7 @@ static int ib_umad_close(struct inode *inode, struct file *filp)
                                ib_unregister_mad_agent(file->agent[i]);
 
        mutex_unlock(&file->port->file_mutex);
-
+       mutex_destroy(&file->mutex);
        kfree(file);
        return 0;
 }
index 7ddd0e5..14a80fd 100644 (file)
@@ -252,9 +252,6 @@ static int ib_uverbs_get_context(struct uverbs_attr_bundle *attrs)
        ucontext->closing = false;
        ucontext->cleanup_retryable = false;
 
-       mutex_init(&ucontext->per_mm_list_lock);
-       INIT_LIST_HEAD(&ucontext->per_mm_list);
-
        ret = get_unused_fd_flags(O_CLOEXEC);
        if (ret < 0)
                goto err_free;
@@ -275,8 +272,6 @@ static int ib_uverbs_get_context(struct uverbs_attr_bundle *attrs)
        ret = ib_dev->ops.alloc_ucontext(ucontext, &attrs->driver_udata);
        if (ret)
                goto err_file;
-       if (!(ib_dev->attrs.device_cap_flags & IB_DEVICE_ON_DEMAND_PAGING))
-               ucontext->invalidate_range = NULL;
 
        rdma_restrack_uadd(&ucontext->res);
 
@@ -3484,7 +3479,8 @@ static int __uverbs_create_xsrq(struct uverbs_attr_bundle *attrs,
 
 err_copy:
        ib_destroy_srq_user(srq, uverbs_get_cleared_udata(attrs));
-
+       /* It was released in ib_destroy_srq_user */
+       srq = NULL;
 err_free:
        kfree(srq);
 err_put:
index 11c13c1..db98111 100644 (file)
@@ -120,6 +120,8 @@ static void ib_uverbs_release_dev(struct device *device)
 
        uverbs_destroy_api(dev->uapi);
        cleanup_srcu_struct(&dev->disassociate_srcu);
+       mutex_destroy(&dev->lists_mutex);
+       mutex_destroy(&dev->xrcd_tree_mutex);
        kfree(dev);
 }
 
@@ -212,6 +214,8 @@ void ib_uverbs_release_file(struct kref *ref)
 
        if (file->disassociate_page)
                __free_pages(file->disassociate_page, 0);
+       mutex_destroy(&file->umap_lock);
+       mutex_destroy(&file->ucontext_lock);
        kfree(file);
 }
 
@@ -1487,6 +1491,7 @@ static void __exit ib_uverbs_cleanup(void)
                                 IB_UVERBS_NUM_FIXED_MINOR);
        unregister_chrdev_region(dynamic_uverbs_dev,
                                 IB_UVERBS_NUM_DYNAMIC_MINOR);
+       mmu_notifier_synchronize();
 }
 
 module_init(ib_uverbs_init);
index 92349bf..f974b68 100644 (file)
@@ -2259,6 +2259,7 @@ int ib_dealloc_xrcd(struct ib_xrcd *xrcd, struct ib_udata *udata)
                if (ret)
                        return ret;
        }
+       mutex_destroy(&xrcd->tgt_qp_mutex);
 
        return xrcd->device->ops.dealloc_xrcd(xrcd, udata);
 }
index 604b718..3421a0b 100644 (file)
@@ -74,7 +74,7 @@ static const char * const bnxt_re_stat_name[] = {
        [BNXT_RE_SEQ_ERR_NAKS_RCVD]     = "seq_err_naks_rcvd",
        [BNXT_RE_MAX_RETRY_EXCEEDED]    = "max_retry_exceeded",
        [BNXT_RE_RNR_NAKS_RCVD]         = "rnr_naks_rcvd",
-       [BNXT_RE_MISSING_RESP]          = "missin_resp",
+       [BNXT_RE_MISSING_RESP]          = "missing_resp",
        [BNXT_RE_UNRECOVERABLE_ERR]     = "unrecoverable_err",
        [BNXT_RE_BAD_RESP_ERR]          = "bad_resp_err",
        [BNXT_RE_LOCAL_QP_OP_ERR]       = "local_qp_op_err",
index 098ab88..b4149dc 100644 (file)
@@ -220,10 +220,10 @@ int bnxt_re_query_port(struct ib_device *ibdev, u8 port_num,
 
        if (netif_running(rdev->netdev) && netif_carrier_ok(rdev->netdev)) {
                port_attr->state = IB_PORT_ACTIVE;
-               port_attr->phys_state = 5;
+               port_attr->phys_state = IB_PORT_PHYS_STATE_LINK_UP;
        } else {
                port_attr->state = IB_PORT_DOWN;
-               port_attr->phys_state = 3;
+               port_attr->phys_state = IB_PORT_PHYS_STATE_DISABLED;
        }
        port_attr->max_mtu = IB_MTU_4096;
        port_attr->active_mtu = iboe_get_mtu(rdev->netdev->mtu);
@@ -1398,7 +1398,7 @@ int bnxt_re_create_srq(struct ib_srq *ib_srq,
                        dev_err(rdev_to_dev(rdev), "SRQ copy to udata failed!");
                        bnxt_qplib_destroy_srq(&rdev->qplib_res,
                                               &srq->qplib_srq);
-                       goto exit;
+                       goto fail;
                }
        }
        if (nq)
index 029babe..30a54f8 100644 (file)
@@ -1473,7 +1473,6 @@ static int bnxt_re_ib_reg(struct bnxt_re_dev *rdev)
                         &rdev->active_width);
        set_bit(BNXT_RE_FLAG_ISSUE_ROCE_STATS, &rdev->flags);
        bnxt_re_dispatch_event(&rdev->ibdev, NULL, 1, IB_EVENT_PORT_ACTIVE);
-       bnxt_re_dispatch_event(&rdev->ibdev, NULL, 1, IB_EVENT_GID_CHANGE);
 
        return 0;
 free_sctx:
index e775c1a..dcf02ec 100644 (file)
@@ -991,33 +991,8 @@ static int iwch_query_device(struct ib_device *ibdev, struct ib_device_attr *pro
 static int iwch_query_port(struct ib_device *ibdev,
                           u8 port, struct ib_port_attr *props)
 {
-       struct iwch_dev *dev;
-       struct net_device *netdev;
-       struct in_device *inetdev;
-
        pr_debug("%s ibdev %p\n", __func__, ibdev);
 
-       dev = to_iwch_dev(ibdev);
-       netdev = dev->rdev.port_info.lldevs[port-1];
-
-       /* props being zeroed by the caller, avoid zeroing it here */
-       props->max_mtu = IB_MTU_4096;
-       props->active_mtu = ib_mtu_int_to_enum(netdev->mtu);
-
-       if (!netif_carrier_ok(netdev))
-               props->state = IB_PORT_DOWN;
-       else {
-               inetdev = in_dev_get(netdev);
-               if (inetdev) {
-                       if (inetdev->ifa_list)
-                               props->state = IB_PORT_ACTIVE;
-                       else
-                               props->state = IB_PORT_INIT;
-                       in_dev_put(inetdev);
-               } else
-                       props->state = IB_PORT_INIT;
-       }
-
        props->port_cap_flags =
            IB_PORT_CM_SUP |
            IB_PORT_SNMP_TUNNEL_SUP |
@@ -1273,8 +1248,24 @@ static const struct ib_device_ops iwch_dev_ops = {
        INIT_RDMA_OBJ_SIZE(ib_ucontext, iwch_ucontext, ibucontext),
 };
 
+static int set_netdevs(struct ib_device *ib_dev, struct cxio_rdev *rdev)
+{
+       int ret;
+       int i;
+
+       for (i = 0; i < rdev->port_info.nports; i++) {
+               ret = ib_device_set_netdev(ib_dev, rdev->port_info.lldevs[i],
+                                          i + 1);
+               if (ret)
+                       return ret;
+       }
+       return 0;
+}
+
 int iwch_register_device(struct iwch_dev *dev)
 {
+       int err;
+
        pr_debug("%s iwch_dev %p\n", __func__, dev);
        memset(&dev->ibdev.node_guid, 0, sizeof(dev->ibdev.node_guid));
        memcpy(&dev->ibdev.node_guid, dev->rdev.t3cdev_p->lldev->dev_addr, 6);
@@ -1315,6 +1306,10 @@ int iwch_register_device(struct iwch_dev *dev)
 
        rdma_set_device_sysfs_group(&dev->ibdev, &iwch_attr_group);
        ib_set_device_ops(&dev->ibdev, &iwch_dev_ops);
+       err = set_netdevs(&dev->ibdev, &dev->rdev);
+       if (err)
+               return err;
+
        return ib_register_device(&dev->ibdev, "cxgb3_%d");
 }
 
index 5e59c57..d373ac0 100644 (file)
@@ -305,32 +305,8 @@ static int c4iw_query_device(struct ib_device *ibdev, struct ib_device_attr *pro
 static int c4iw_query_port(struct ib_device *ibdev, u8 port,
                           struct ib_port_attr *props)
 {
-       struct c4iw_dev *dev;
-       struct net_device *netdev;
-       struct in_device *inetdev;
-
        pr_debug("ibdev %p\n", ibdev);
 
-       dev = to_c4iw_dev(ibdev);
-       netdev = dev->rdev.lldi.ports[port-1];
-       /* props being zeroed by the caller, avoid zeroing it here */
-       props->max_mtu = IB_MTU_4096;
-       props->active_mtu = ib_mtu_int_to_enum(netdev->mtu);
-
-       if (!netif_carrier_ok(netdev))
-               props->state = IB_PORT_DOWN;
-       else {
-               inetdev = in_dev_get(netdev);
-               if (inetdev) {
-                       if (inetdev->ifa_list)
-                               props->state = IB_PORT_ACTIVE;
-                       else
-                               props->state = IB_PORT_INIT;
-                       in_dev_put(inetdev);
-               } else
-                       props->state = IB_PORT_INIT;
-       }
-
        props->port_cap_flags =
            IB_PORT_CM_SUP |
            IB_PORT_SNMP_TUNNEL_SUP |
index 119f8ef..2283e43 100644 (file)
@@ -156,5 +156,8 @@ int efa_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr,
                  int qp_attr_mask, struct ib_udata *udata);
 enum rdma_link_layer efa_port_link_layer(struct ib_device *ibdev,
                                         u8 port_num);
+struct rdma_hw_stats *efa_alloc_hw_stats(struct ib_device *ibdev, u8 port_num);
+int efa_get_hw_stats(struct ib_device *ibdev, struct rdma_hw_stats *stats,
+                    u8 port_num, int index);
 
 #endif /* _EFA_H_ */
index 2cb4248..3c412bc 100644 (file)
@@ -109,17 +109,19 @@ static u32 efa_com_reg_read32(struct efa_com_dev *edev, u16 offset)
        } while (time_is_after_jiffies(exp_time));
 
        if (read_resp->req_id != mmio_read->seq_num) {
-               ibdev_err(edev->efa_dev,
-                         "Reading register timed out. expected: req id[%u] offset[%#x] actual: req id[%u] offset[%#x]\n",
-                         mmio_read->seq_num, offset, read_resp->req_id,
-                         read_resp->reg_off);
+               ibdev_err_ratelimited(
+                       edev->efa_dev,
+                       "Reading register timed out. expected: req id[%u] offset[%#x] actual: req id[%u] offset[%#x]\n",
+                       mmio_read->seq_num, offset, read_resp->req_id,
+                       read_resp->reg_off);
                err = EFA_MMIO_READ_INVALID;
                goto out;
        }
 
        if (read_resp->reg_off != offset) {
-               ibdev_err(edev->efa_dev,
-                         "Reading register failed: wrong offset provided\n");
+               ibdev_err_ratelimited(
+                       edev->efa_dev,
+                       "Reading register failed: wrong offset provided\n");
                err = EFA_MMIO_READ_INVALID;
                goto out;
        }
@@ -293,9 +295,10 @@ static struct efa_comp_ctx *efa_com_get_comp_ctx(struct efa_com_admin_queue *aq,
        u16 ctx_id = cmd_id & (aq->depth - 1);
 
        if (aq->comp_ctx[ctx_id].occupied && capture) {
-               ibdev_err(aq->efa_dev,
-                         "Completion context for command_id %#x is occupied\n",
-                         cmd_id);
+               ibdev_err_ratelimited(
+                       aq->efa_dev,
+                       "Completion context for command_id %#x is occupied\n",
+                       cmd_id);
                return NULL;
        }
 
@@ -401,7 +404,7 @@ static struct efa_comp_ctx *efa_com_submit_admin_cmd(struct efa_com_admin_queue
 
        spin_lock(&aq->sq.lock);
        if (!test_bit(EFA_AQ_STATE_RUNNING_BIT, &aq->state)) {
-               ibdev_err(aq->efa_dev, "Admin queue is closed\n");
+               ibdev_err_ratelimited(aq->efa_dev, "Admin queue is closed\n");
                spin_unlock(&aq->sq.lock);
                return ERR_PTR(-ENODEV);
        }
@@ -519,8 +522,9 @@ static int efa_com_wait_and_process_admin_cq_polling(struct efa_comp_ctx *comp_c
                        break;
 
                if (time_is_before_jiffies(timeout)) {
-                       ibdev_err(aq->efa_dev,
-                                 "Wait for completion (polling) timeout\n");
+                       ibdev_err_ratelimited(
+                               aq->efa_dev,
+                               "Wait for completion (polling) timeout\n");
                        /* EFA didn't have any completion */
                        atomic64_inc(&aq->stats.no_completion);
 
@@ -561,17 +565,19 @@ static int efa_com_wait_and_process_admin_cq_interrupts(struct efa_comp_ctx *com
                atomic64_inc(&aq->stats.no_completion);
 
                if (comp_ctx->status == EFA_CMD_COMPLETED)
-                       ibdev_err(aq->efa_dev,
-                                 "The device sent a completion but the driver didn't receive any MSI-X interrupt for admin cmd %s(%d) status %d (ctx: 0x%p, sq producer: %d, sq consumer: %d, cq consumer: %d)\n",
-                                 efa_com_cmd_str(comp_ctx->cmd_opcode),
-                                 comp_ctx->cmd_opcode, comp_ctx->status,
-                                 comp_ctx, aq->sq.pc, aq->sq.cc, aq->cq.cc);
+                       ibdev_err_ratelimited(
+                               aq->efa_dev,
+                               "The device sent a completion but the driver didn't receive any MSI-X interrupt for admin cmd %s(%d) status %d (ctx: 0x%p, sq producer: %d, sq consumer: %d, cq consumer: %d)\n",
+                               efa_com_cmd_str(comp_ctx->cmd_opcode),
+                               comp_ctx->cmd_opcode, comp_ctx->status,
+                               comp_ctx, aq->sq.pc, aq->sq.cc, aq->cq.cc);
                else
-                       ibdev_err(aq->efa_dev,
-                                 "The device didn't send any completion for admin cmd %s(%d) status %d (ctx 0x%p, sq producer: %d, sq consumer: %d, cq consumer: %d)\n",
-                                 efa_com_cmd_str(comp_ctx->cmd_opcode),
-                                 comp_ctx->cmd_opcode, comp_ctx->status,
-                                 comp_ctx, aq->sq.pc, aq->sq.cc, aq->cq.cc);
+                       ibdev_err_ratelimited(
+                               aq->efa_dev,
+                               "The device didn't send any completion for admin cmd %s(%d) status %d (ctx 0x%p, sq producer: %d, sq consumer: %d, cq consumer: %d)\n",
+                               efa_com_cmd_str(comp_ctx->cmd_opcode),
+                               comp_ctx->cmd_opcode, comp_ctx->status,
+                               comp_ctx, aq->sq.pc, aq->sq.cc, aq->cq.cc);
 
                clear_bit(EFA_AQ_STATE_RUNNING_BIT, &aq->state);
                err = -ETIME;
@@ -633,10 +639,11 @@ int efa_com_cmd_exec(struct efa_com_admin_queue *aq,
                  cmd->aq_common_descriptor.opcode);
        comp_ctx = efa_com_submit_admin_cmd(aq, cmd, cmd_size, comp, comp_size);
        if (IS_ERR(comp_ctx)) {
-               ibdev_err(aq->efa_dev,
-                         "Failed to submit command %s (opcode %u) err %ld\n",
-                         efa_com_cmd_str(cmd->aq_common_descriptor.opcode),
-                         cmd->aq_common_descriptor.opcode, PTR_ERR(comp_ctx));
+               ibdev_err_ratelimited(
+                       aq->efa_dev,
+                       "Failed to submit command %s (opcode %u) err %ld\n",
+                       efa_com_cmd_str(cmd->aq_common_descriptor.opcode),
+                       cmd->aq_common_descriptor.opcode, PTR_ERR(comp_ctx));
 
                up(&aq->avail_cmds);
                return PTR_ERR(comp_ctx);
@@ -644,11 +651,12 @@ int efa_com_cmd_exec(struct efa_com_admin_queue *aq,
 
        err = efa_com_wait_and_process_admin_cq(comp_ctx, aq);
        if (err)
-               ibdev_err(aq->efa_dev,
-                         "Failed to process command %s (opcode %u) comp_status %d err %d\n",
-                         efa_com_cmd_str(cmd->aq_common_descriptor.opcode),
-                         cmd->aq_common_descriptor.opcode,
-                         comp_ctx->comp_status, err);
+               ibdev_err_ratelimited(
+                       aq->efa_dev,
+                       "Failed to process command %s (opcode %u) comp_status %d err %d\n",
+                       efa_com_cmd_str(cmd->aq_common_descriptor.opcode),
+                       cmd->aq_common_descriptor.opcode, comp_ctx->comp_status,
+                       err);
 
        up(&aq->avail_cmds);
 
index 62345d8..c079f13 100644 (file)
@@ -44,7 +44,8 @@ int efa_com_create_qp(struct efa_com_dev *edev,
                               (struct efa_admin_acq_entry *)&cmd_completion,
                               sizeof(cmd_completion));
        if (err) {
-               ibdev_err(edev->efa_dev, "Failed to create qp [%d]\n", err);
+               ibdev_err_ratelimited(edev->efa_dev,
+                                     "Failed to create qp [%d]\n", err);
                return err;
        }
 
@@ -82,9 +83,10 @@ int efa_com_modify_qp(struct efa_com_dev *edev,
                               (struct efa_admin_acq_entry *)&resp,
                               sizeof(resp));
        if (err) {
-               ibdev_err(edev->efa_dev,
-                         "Failed to modify qp-%u modify_mask[%#x] [%d]\n",
-                         cmd.qp_handle, cmd.modify_mask, err);
+               ibdev_err_ratelimited(
+                       edev->efa_dev,
+                       "Failed to modify qp-%u modify_mask[%#x] [%d]\n",
+                       cmd.qp_handle, cmd.modify_mask, err);
                return err;
        }
 
@@ -109,8 +111,9 @@ int efa_com_query_qp(struct efa_com_dev *edev,
                               (struct efa_admin_acq_entry *)&resp,
                               sizeof(resp));
        if (err) {
-               ibdev_err(edev->efa_dev, "Failed to query qp-%u [%d]\n",
-                         cmd.qp_handle, err);
+               ibdev_err_ratelimited(edev->efa_dev,
+                                     "Failed to query qp-%u [%d]\n",
+                                     cmd.qp_handle, err);
                return err;
        }
 
@@ -139,8 +142,9 @@ int efa_com_destroy_qp(struct efa_com_dev *edev,
                               (struct efa_admin_acq_entry *)&cmd_completion,
                               sizeof(cmd_completion));
        if (err) {
-               ibdev_err(edev->efa_dev, "Failed to destroy qp-%u [%d]\n",
-                         qp_cmd.qp_handle, err);
+               ibdev_err_ratelimited(edev->efa_dev,
+                                     "Failed to destroy qp-%u [%d]\n",
+                                     qp_cmd.qp_handle, err);
                return err;
        }
 
@@ -173,7 +177,8 @@ int efa_com_create_cq(struct efa_com_dev *edev,
                               (struct efa_admin_acq_entry *)&cmd_completion,
                               sizeof(cmd_completion));
        if (err) {
-               ibdev_err(edev->efa_dev, "Failed to create cq[%d]\n", err);
+               ibdev_err_ratelimited(edev->efa_dev,
+                                     "Failed to create cq[%d]\n", err);
                return err;
        }
 
@@ -201,8 +206,9 @@ int efa_com_destroy_cq(struct efa_com_dev *edev,
                               sizeof(destroy_resp));
 
        if (err) {
-               ibdev_err(edev->efa_dev, "Failed to destroy CQ-%u [%d]\n",
-                         params->cq_idx, err);
+               ibdev_err_ratelimited(edev->efa_dev,
+                                     "Failed to destroy CQ-%u [%d]\n",
+                                     params->cq_idx, err);
                return err;
        }
 
@@ -250,7 +256,8 @@ int efa_com_register_mr(struct efa_com_dev *edev,
                               (struct efa_admin_acq_entry *)&cmd_completion,
                               sizeof(cmd_completion));
        if (err) {
-               ibdev_err(edev->efa_dev, "Failed to register mr [%d]\n", err);
+               ibdev_err_ratelimited(edev->efa_dev,
+                                     "Failed to register mr [%d]\n", err);
                return err;
        }
 
@@ -277,9 +284,9 @@ int efa_com_dereg_mr(struct efa_com_dev *edev,
                               (struct efa_admin_acq_entry *)&cmd_completion,
                               sizeof(cmd_completion));
        if (err) {
-               ibdev_err(edev->efa_dev,
-                         "Failed to de-register mr(lkey-%u) [%d]\n",
-                         mr_cmd.l_key, err);
+               ibdev_err_ratelimited(edev->efa_dev,
+                                     "Failed to de-register mr(lkey-%u) [%d]\n",
+                                     mr_cmd.l_key, err);
                return err;
        }
 
@@ -306,8 +313,9 @@ int efa_com_create_ah(struct efa_com_dev *edev,
                               (struct efa_admin_acq_entry *)&cmd_completion,
                               sizeof(cmd_completion));
        if (err) {
-               ibdev_err(edev->efa_dev, "Failed to create ah for %pI6 [%d]\n",
-                         ah_cmd.dest_addr, err);
+               ibdev_err_ratelimited(edev->efa_dev,
+                                     "Failed to create ah for %pI6 [%d]\n",
+                                     ah_cmd.dest_addr, err);
                return err;
        }
 
@@ -334,8 +342,9 @@ int efa_com_destroy_ah(struct efa_com_dev *edev,
                               (struct efa_admin_acq_entry *)&cmd_completion,
                               sizeof(cmd_completion));
        if (err) {
-               ibdev_err(edev->efa_dev, "Failed to destroy ah-%d pd-%d [%d]\n",
-                         ah_cmd.ah, ah_cmd.pd, err);
+               ibdev_err_ratelimited(edev->efa_dev,
+                                     "Failed to destroy ah-%d pd-%d [%d]\n",
+                                     ah_cmd.ah, ah_cmd.pd, err);
                return err;
        }
 
@@ -367,8 +376,9 @@ static int efa_com_get_feature_ex(struct efa_com_dev *edev,
        int err;
 
        if (!efa_com_check_supported_feature_id(edev, feature_id)) {
-               ibdev_err(edev->efa_dev, "Feature %d isn't supported\n",
-                         feature_id);
+               ibdev_err_ratelimited(edev->efa_dev,
+                                     "Feature %d isn't supported\n",
+                                     feature_id);
                return -EOPNOTSUPP;
        }
 
@@ -396,9 +406,10 @@ static int efa_com_get_feature_ex(struct efa_com_dev *edev,
                               sizeof(*get_resp));
 
        if (err) {
-               ibdev_err(edev->efa_dev,
-                         "Failed to submit get_feature command %d [%d]\n",
-                         feature_id, err);
+               ibdev_err_ratelimited(
+                       edev->efa_dev,
+                       "Failed to submit get_feature command %d [%d]\n",
+                       feature_id, err);
                return err;
        }
 
@@ -421,8 +432,9 @@ int efa_com_get_network_attr(struct efa_com_dev *edev,
        err = efa_com_get_feature(edev, &resp,
                                  EFA_ADMIN_NETWORK_ATTR);
        if (err) {
-               ibdev_err(edev->efa_dev,
-                         "Failed to get network attributes %d\n", err);
+               ibdev_err_ratelimited(edev->efa_dev,
+                                     "Failed to get network attributes %d\n",
+                                     err);
                return err;
        }
 
@@ -441,8 +453,9 @@ int efa_com_get_device_attr(struct efa_com_dev *edev,
 
        err = efa_com_get_feature(edev, &resp, EFA_ADMIN_DEVICE_ATTR);
        if (err) {
-               ibdev_err(edev->efa_dev, "Failed to get device attributes %d\n",
-                         err);
+               ibdev_err_ratelimited(edev->efa_dev,
+                                     "Failed to get device attributes %d\n",
+                                     err);
                return err;
        }
 
@@ -456,9 +469,10 @@ int efa_com_get_device_attr(struct efa_com_dev *edev,
        result->db_bar = resp.u.device_attr.db_bar;
 
        if (result->admin_api_version < 1) {
-               ibdev_err(edev->efa_dev,
-                         "Failed to get device attr api version [%u < 1]\n",
-                         result->admin_api_version);
+               ibdev_err_ratelimited(
+                       edev->efa_dev,
+                       "Failed to get device attr api version [%u < 1]\n",
+                       result->admin_api_version);
                return -EINVAL;
        }
 
@@ -466,8 +480,9 @@ int efa_com_get_device_attr(struct efa_com_dev *edev,
        err = efa_com_get_feature(edev, &resp,
                                  EFA_ADMIN_QUEUE_ATTR);
        if (err) {
-               ibdev_err(edev->efa_dev,
-                         "Failed to get network attributes %d\n", err);
+               ibdev_err_ratelimited(edev->efa_dev,
+                                     "Failed to get queue attributes %d\n",
+                                     err);
                return err;
        }
 
@@ -497,7 +512,8 @@ int efa_com_get_hw_hints(struct efa_com_dev *edev,
 
        err = efa_com_get_feature(edev, &resp, EFA_ADMIN_HW_HINTS);
        if (err) {
-               ibdev_err(edev->efa_dev, "Failed to get hw hints %d\n", err);
+               ibdev_err_ratelimited(edev->efa_dev,
+                                     "Failed to get hw hints %d\n", err);
                return err;
        }
 
@@ -520,8 +536,9 @@ static int efa_com_set_feature_ex(struct efa_com_dev *edev,
        int err;
 
        if (!efa_com_check_supported_feature_id(edev, feature_id)) {
-               ibdev_err(edev->efa_dev, "Feature %d isn't supported\n",
-                         feature_id);
+               ibdev_err_ratelimited(edev->efa_dev,
+                                     "Feature %d isn't supported\n",
+                                     feature_id);
                return -EOPNOTSUPP;
        }
 
@@ -545,9 +562,10 @@ static int efa_com_set_feature_ex(struct efa_com_dev *edev,
                               sizeof(*set_resp));
 
        if (err) {
-               ibdev_err(edev->efa_dev,
-                         "Failed to submit set_feature command %d error: %d\n",
-                         feature_id, err);
+               ibdev_err_ratelimited(
+                       edev->efa_dev,
+                       "Failed to submit set_feature command %d error: %d\n",
+                       feature_id, err);
                return err;
        }
 
@@ -574,8 +592,9 @@ int efa_com_set_aenq_config(struct efa_com_dev *edev, u32 groups)
 
        err = efa_com_get_feature(edev, &get_resp, EFA_ADMIN_AENQ_CONFIG);
        if (err) {
-               ibdev_err(edev->efa_dev, "Failed to get aenq attributes: %d\n",
-                         err);
+               ibdev_err_ratelimited(edev->efa_dev,
+                                     "Failed to get aenq attributes: %d\n",
+                                     err);
                return err;
        }
 
@@ -585,9 +604,10 @@ int efa_com_set_aenq_config(struct efa_com_dev *edev, u32 groups)
                  get_resp.u.aenq.enabled_groups);
 
        if ((get_resp.u.aenq.supported_groups & groups) != groups) {
-               ibdev_err(edev->efa_dev,
-                         "Trying to set unsupported aenq groups[%#x] supported[%#x]\n",
-                         groups, get_resp.u.aenq.supported_groups);
+               ibdev_err_ratelimited(
+                       edev->efa_dev,
+                       "Trying to set unsupported aenq groups[%#x] supported[%#x]\n",
+                       groups, get_resp.u.aenq.supported_groups);
                return -EOPNOTSUPP;
        }
 
@@ -595,8 +615,9 @@ int efa_com_set_aenq_config(struct efa_com_dev *edev, u32 groups)
        err = efa_com_set_feature(edev, &set_resp, &cmd,
                                  EFA_ADMIN_AENQ_CONFIG);
        if (err) {
-               ibdev_err(edev->efa_dev, "Failed to set aenq attributes: %d\n",
-                         err);
+               ibdev_err_ratelimited(edev->efa_dev,
+                                     "Failed to set aenq attributes: %d\n",
+                                     err);
                return err;
        }
 
@@ -619,7 +640,8 @@ int efa_com_alloc_pd(struct efa_com_dev *edev,
                               (struct efa_admin_acq_entry *)&resp,
                               sizeof(resp));
        if (err) {
-               ibdev_err(edev->efa_dev, "Failed to allocate pd[%d]\n", err);
+               ibdev_err_ratelimited(edev->efa_dev,
+                                     "Failed to allocate pd[%d]\n", err);
                return err;
        }
 
@@ -645,8 +667,9 @@ int efa_com_dealloc_pd(struct efa_com_dev *edev,
                               (struct efa_admin_acq_entry *)&resp,
                               sizeof(resp));
        if (err) {
-               ibdev_err(edev->efa_dev, "Failed to deallocate pd-%u [%d]\n",
-                         cmd.pd, err);
+               ibdev_err_ratelimited(edev->efa_dev,
+                                     "Failed to deallocate pd-%u [%d]\n",
+                                     cmd.pd, err);
                return err;
        }
 
@@ -669,7 +692,8 @@ int efa_com_alloc_uar(struct efa_com_dev *edev,
                               (struct efa_admin_acq_entry *)&resp,
                               sizeof(resp));
        if (err) {
-               ibdev_err(edev->efa_dev, "Failed to allocate uar[%d]\n", err);
+               ibdev_err_ratelimited(edev->efa_dev,
+                                     "Failed to allocate uar[%d]\n", err);
                return err;
        }
 
@@ -695,10 +719,47 @@ int efa_com_dealloc_uar(struct efa_com_dev *edev,
                               (struct efa_admin_acq_entry *)&resp,
                               sizeof(resp));
        if (err) {
-               ibdev_err(edev->efa_dev, "Failed to deallocate uar-%u [%d]\n",
-                         cmd.uar, err);
+               ibdev_err_ratelimited(edev->efa_dev,
+                                     "Failed to deallocate uar-%u [%d]\n",
+                                     cmd.uar, err);
                return err;
        }
 
        return 0;
 }
+
+int efa_com_get_stats(struct efa_com_dev *edev,
+                     struct efa_com_get_stats_params *params,
+                     union efa_com_get_stats_result *result)
+{
+       struct efa_com_admin_queue *aq = &edev->aq;
+       struct efa_admin_aq_get_stats_cmd cmd = {};
+       struct efa_admin_acq_get_stats_resp resp;
+       int err;
+
+       cmd.aq_common_descriptor.opcode = EFA_ADMIN_GET_STATS;
+       cmd.type = params->type;
+       cmd.scope = params->scope;
+       cmd.scope_modifier = params->scope_modifier;
+
+       err = efa_com_cmd_exec(aq,
+                              (struct efa_admin_aq_entry *)&cmd,
+                              sizeof(cmd),
+                              (struct efa_admin_acq_entry *)&resp,
+                              sizeof(resp));
+       if (err) {
+               ibdev_err_ratelimited(
+                       edev->efa_dev,
+                       "Failed to get stats type-%u scope-%u.%u [%d]\n",
+                       cmd.type, cmd.scope, cmd.scope_modifier, err);
+               return err;
+       }
+
+       result->basic_stats.tx_bytes = resp.basic_stats.tx_bytes;
+       result->basic_stats.tx_pkts = resp.basic_stats.tx_pkts;
+       result->basic_stats.rx_bytes = resp.basic_stats.rx_bytes;
+       result->basic_stats.rx_pkts = resp.basic_stats.rx_pkts;
+       result->basic_stats.rx_drops = resp.basic_stats.rx_drops;
+
+       return 0;
+}
index a117438..7f6c130 100644 (file)
@@ -225,6 +225,26 @@ struct efa_com_dealloc_uar_params {
        u16 uarn;
 };
 
+struct efa_com_get_stats_params {
+       /* see enum efa_admin_get_stats_type */
+       u8 type;
+       /* see enum efa_admin_get_stats_scope */
+       u8 scope;
+       u16 scope_modifier;
+};
+
+struct efa_com_basic_stats {
+       u64 tx_bytes;
+       u64 tx_pkts;
+       u64 rx_bytes;
+       u64 rx_pkts;
+       u64 rx_drops;
+};
+
+union efa_com_get_stats_result {
+       struct efa_com_basic_stats basic_stats;
+};
+
 void efa_com_set_dma_addr(dma_addr_t addr, u32 *addr_high, u32 *addr_low);
 int efa_com_create_qp(struct efa_com_dev *edev,
                      struct efa_com_create_qp_params *params,
@@ -266,5 +286,8 @@ int efa_com_alloc_uar(struct efa_com_dev *edev,
                      struct efa_com_alloc_uar_result *result);
 int efa_com_dealloc_uar(struct efa_com_dev *edev,
                        struct efa_com_dealloc_uar_params *params);
+int efa_com_get_stats(struct efa_com_dev *edev,
+                     struct efa_com_get_stats_params *params,
+                     union efa_com_get_stats_result *result);
 
 #endif /* _EFA_COM_CMD_H_ */
index dd1c6d4..83858f7 100644 (file)
@@ -201,6 +201,7 @@ static const struct ib_device_ops efa_dev_ops = {
        .driver_id = RDMA_DRIVER_EFA,
        .uverbs_abi_ver = EFA_UVERBS_ABI_VERSION,
 
+       .alloc_hw_stats = efa_alloc_hw_stats,
        .alloc_pd = efa_alloc_pd,
        .alloc_ucontext = efa_alloc_ucontext,
        .create_ah = efa_create_ah,
@@ -212,6 +213,7 @@ static const struct ib_device_ops efa_dev_ops = {
        .destroy_ah = efa_destroy_ah,
        .destroy_cq = efa_destroy_cq,
        .destroy_qp = efa_destroy_qp,
+       .get_hw_stats = efa_get_hw_stats,
        .get_link_layer = efa_port_link_layer,
        .get_port_immutable = efa_get_port_immutable,
        .mmap = efa_mmap,
index df77bc3..4edae89 100644 (file)
@@ -41,6 +41,33 @@ static inline u64 get_mmap_key(const struct efa_mmap_entry *efa)
               ((u64)efa->mmap_page << PAGE_SHIFT);
 }
 
+#define EFA_DEFINE_STATS(op) \
+       op(EFA_TX_BYTES, "tx_bytes") \
+       op(EFA_TX_PKTS, "tx_pkts") \
+       op(EFA_RX_BYTES, "rx_bytes") \
+       op(EFA_RX_PKTS, "rx_pkts") \
+       op(EFA_RX_DROPS, "rx_drops") \
+       op(EFA_SUBMITTED_CMDS, "submitted_cmds") \
+       op(EFA_COMPLETED_CMDS, "completed_cmds") \
+       op(EFA_NO_COMPLETION_CMDS, "no_completion_cmds") \
+       op(EFA_KEEP_ALIVE_RCVD, "keep_alive_rcvd") \
+       op(EFA_ALLOC_PD_ERR, "alloc_pd_err") \
+       op(EFA_CREATE_QP_ERR, "create_qp_err") \
+       op(EFA_REG_MR_ERR, "reg_mr_err") \
+       op(EFA_ALLOC_UCONTEXT_ERR, "alloc_ucontext_err") \
+       op(EFA_CREATE_AH_ERR, "create_ah_err")
+
+#define EFA_STATS_ENUM(ename, name) ename,
+#define EFA_STATS_STR(ename, name) [ename] = name,
+
+enum efa_hw_stats {
+       EFA_DEFINE_STATS(EFA_STATS_ENUM)
+};
+
+static const char *const efa_stats_names[] = {
+       EFA_DEFINE_STATS(EFA_STATS_STR)
+};
+
 #define EFA_CHUNK_PAYLOAD_SHIFT       12
 #define EFA_CHUNK_PAYLOAD_SIZE        BIT(EFA_CHUNK_PAYLOAD_SHIFT)
 #define EFA_CHUNK_PAYLOAD_PTR_SIZE    8
@@ -121,7 +148,7 @@ static inline struct efa_ah *to_eah(struct ib_ah *ibah)
 }
 
 #define field_avail(x, fld, sz) (offsetof(typeof(x), fld) + \
-                                sizeof(((typeof(x) *)0)->fld) <= (sz))
+                                FIELD_SIZEOF(typeof(x), fld) <= (sz))
 
 #define is_reserved_cleared(reserved) \
        !memchr_inv(reserved, 0, sizeof(reserved))
@@ -306,7 +333,7 @@ int efa_query_port(struct ib_device *ibdev, u8 port,
        props->lmc = 1;
 
        props->state = IB_PORT_ACTIVE;
-       props->phys_state = 5;
+       props->phys_state = IB_PORT_PHYS_STATE_LINK_UP;
        props->gid_tbl_len = 1;
        props->pkey_tbl_len = 1;
        props->active_speed = IB_SPEED_EDR;
@@ -1473,14 +1500,12 @@ int efa_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata)
 
        ibdev_dbg(&dev->ibdev, "Deregister mr[%d]\n", ibmr->lkey);
 
-       if (mr->umem) {
-               params.l_key = mr->ibmr.lkey;
-               err = efa_com_dereg_mr(&dev->edev, &params);
-               if (err)
-                       return err;
-       }
-       ib_umem_release(mr->umem);
+       params.l_key = mr->ibmr.lkey;
+       err = efa_com_dereg_mr(&dev->edev, &params);
+       if (err)
+               return err;
 
+       ib_umem_release(mr->umem);
        kfree(mr);
 
        return 0;
@@ -1727,6 +1752,54 @@ void efa_destroy_ah(struct ib_ah *ibah, u32 flags)
        efa_ah_destroy(dev, ah);
 }
 
+struct rdma_hw_stats *efa_alloc_hw_stats(struct ib_device *ibdev, u8 port_num)
+{
+       return rdma_alloc_hw_stats_struct(efa_stats_names,
+                                         ARRAY_SIZE(efa_stats_names),
+                                         RDMA_HW_STATS_DEFAULT_LIFESPAN);
+}
+
+int efa_get_hw_stats(struct ib_device *ibdev, struct rdma_hw_stats *stats,
+                    u8 port_num, int index)
+{
+       struct efa_com_get_stats_params params = {};
+       union efa_com_get_stats_result result;
+       struct efa_dev *dev = to_edev(ibdev);
+       struct efa_com_basic_stats *bs;
+       struct efa_com_stats_admin *as;
+       struct efa_stats *s;
+       int err;
+
+       params.type = EFA_ADMIN_GET_STATS_TYPE_BASIC;
+       params.scope = EFA_ADMIN_GET_STATS_SCOPE_ALL;
+
+       err = efa_com_get_stats(&dev->edev, &params, &result);
+       if (err)
+               return err;
+
+       bs = &result.basic_stats;
+       stats->value[EFA_TX_BYTES] = bs->tx_bytes;
+       stats->value[EFA_TX_PKTS] = bs->tx_pkts;
+       stats->value[EFA_RX_BYTES] = bs->rx_bytes;
+       stats->value[EFA_RX_PKTS] = bs->rx_pkts;
+       stats->value[EFA_RX_DROPS] = bs->rx_drops;
+
+       as = &dev->edev.aq.stats;
+       stats->value[EFA_SUBMITTED_CMDS] = atomic64_read(&as->submitted_cmd);
+       stats->value[EFA_COMPLETED_CMDS] = atomic64_read(&as->completed_cmd);
+       stats->value[EFA_NO_COMPLETION_CMDS] = atomic64_read(&as->no_completion);
+
+       s = &dev->stats;
+       stats->value[EFA_KEEP_ALIVE_RCVD] = atomic64_read(&s->keep_alive_rcvd);
+       stats->value[EFA_ALLOC_PD_ERR] = atomic64_read(&s->sw_stats.alloc_pd_err);
+       stats->value[EFA_CREATE_QP_ERR] = atomic64_read(&s->sw_stats.create_qp_err);
+       stats->value[EFA_REG_MR_ERR] = atomic64_read(&s->sw_stats.reg_mr_err);
+       stats->value[EFA_ALLOC_UCONTEXT_ERR] = atomic64_read(&s->sw_stats.alloc_ucontext_err);
+       stats->value[EFA_CREATE_AH_ERR] = atomic64_read(&s->sw_stats.create_ah_err);
+
+       return ARRAY_SIZE(efa_stats_names);
+}
+
 enum rdma_link_layer efa_port_link_layer(struct ib_device *ibdev,
                                         u8 port_num)
 {
index 67052dc..9b1fb84 100644 (file)
@@ -4101,6 +4101,7 @@ def_access_ibp_counter(rc_dupreq);
 def_access_ibp_counter(rdma_seq);
 def_access_ibp_counter(unaligned);
 def_access_ibp_counter(seq_naks);
+def_access_ibp_counter(rc_crwaits);
 
 static struct cntr_entry dev_cntrs[DEV_CNTR_LAST] = {
 [C_RCV_OVF] = RXE32_DEV_CNTR_ELEM(RcvOverflow, RCV_BUF_OVFL_CNT, CNTR_SYNTH),
@@ -5119,6 +5120,7 @@ static struct cntr_entry port_cntrs[PORT_CNTR_LAST] = {
 [C_SW_IBP_RDMA_SEQ] = SW_IBP_CNTR(RdmaSeq, rdma_seq),
 [C_SW_IBP_UNALIGNED] = SW_IBP_CNTR(Unaligned, unaligned),
 [C_SW_IBP_SEQ_NAK] = SW_IBP_CNTR(SeqNak, seq_naks),
+[C_SW_IBP_RC_CRWAITS] = SW_IBP_CNTR(RcCrWait, rc_crwaits),
 [C_SW_CPU_RC_ACKS] = CNTR_ELEM("RcAcks", 0, 0, CNTR_NORMAL,
                               access_sw_cpu_rc_acks),
 [C_SW_CPU_RC_QACKS] = CNTR_ELEM("RcQacks", 0, 0, CNTR_NORMAL,
index b76cf81..4ca5ac8 100644 (file)
@@ -1245,6 +1245,7 @@ enum {
        C_SW_IBP_RDMA_SEQ,
        C_SW_IBP_UNALIGNED,
        C_SW_IBP_SEQ_NAK,
+       C_SW_IBP_RC_CRWAITS,
        C_SW_CPU_RC_ACKS,
        C_SW_CPU_RC_QACKS,
        C_SW_CPU_RC_DELAYED_COMP,
index 184dba3..d8ff063 100644 (file)
@@ -2326,7 +2326,7 @@ struct opa_port_status_req {
        __be32 vl_select_mask;
 };
 
-#define VL_MASK_ALL            0x000080ff
+#define VL_MASK_ALL            0x00000000000080ffUL
 
 struct opa_port_status_rsp {
        __u8 port_num;
@@ -2625,15 +2625,14 @@ static int pma_get_opa_classportinfo(struct opa_pma_mad *pmp,
 }
 
 static void a0_portstatus(struct hfi1_pportdata *ppd,
-                         struct opa_port_status_rsp *rsp, u32 vl_select_mask)
+                         struct opa_port_status_rsp *rsp)
 {
        if (!is_bx(ppd->dd)) {
                unsigned long vl;
                u64 sum_vl_xmit_wait = 0;
-               u32 vl_all_mask = VL_MASK_ALL;
+               unsigned long vl_all_mask = VL_MASK_ALL;
 
-               for_each_set_bit(vl, (unsigned long *)&(vl_all_mask),
-                                8 * sizeof(vl_all_mask)) {
+               for_each_set_bit(vl, &vl_all_mask, BITS_PER_LONG) {
                        u64 tmp = sum_vl_xmit_wait +
                                  read_port_cntr(ppd, C_TX_WAIT_VL,
                                                 idx_from_vl(vl));
@@ -2730,12 +2729,12 @@ static int pma_get_opa_portstatus(struct opa_pma_mad *pmp,
                (struct opa_port_status_req *)pmp->data;
        struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
        struct opa_port_status_rsp *rsp;
-       u32 vl_select_mask = be32_to_cpu(req->vl_select_mask);
+       unsigned long vl_select_mask = be32_to_cpu(req->vl_select_mask);
        unsigned long vl;
        size_t response_data_size;
        u32 nports = be32_to_cpu(pmp->mad_hdr.attr_mod) >> 24;
        u8 port_num = req->port_num;
-       u8 num_vls = hweight32(vl_select_mask);
+       u8 num_vls = hweight64(vl_select_mask);
        struct _vls_pctrs *vlinfo;
        struct hfi1_ibport *ibp = to_iport(ibdev, port);
        struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
@@ -2770,7 +2769,7 @@ static int pma_get_opa_portstatus(struct opa_pma_mad *pmp,
 
        hfi1_read_link_quality(dd, &rsp->link_quality_indicator);
 
-       rsp->vl_select_mask = cpu_to_be32(vl_select_mask);
+       rsp->vl_select_mask = cpu_to_be32((u32)vl_select_mask);
        rsp->port_xmit_data = cpu_to_be64(read_dev_cntr(dd, C_DC_XMIT_FLITS,
                                          CNTR_INVALID_VL));
        rsp->port_rcv_data = cpu_to_be64(read_dev_cntr(dd, C_DC_RCV_FLITS,
@@ -2841,8 +2840,7 @@ static int pma_get_opa_portstatus(struct opa_pma_mad *pmp,
         * So in the for_each_set_bit() loop below, we don't need
         * any additional checks for vl.
         */
-       for_each_set_bit(vl, (unsigned long *)&(vl_select_mask),
-                        8 * sizeof(vl_select_mask)) {
+       for_each_set_bit(vl, &vl_select_mask, BITS_PER_LONG) {
                memset(vlinfo, 0, sizeof(*vlinfo));
 
                tmp = read_dev_cntr(dd, C_DC_RX_FLIT_VL, idx_from_vl(vl));
@@ -2883,7 +2881,7 @@ static int pma_get_opa_portstatus(struct opa_pma_mad *pmp,
                vfi++;
        }
 
-       a0_portstatus(ppd, rsp, vl_select_mask);
+       a0_portstatus(ppd, rsp);
 
        if (resp_len)
                *resp_len += response_data_size;
@@ -2930,16 +2928,14 @@ static u64 get_error_counter_summary(struct ib_device *ibdev, u8 port,
        return error_counter_summary;
 }
 
-static void a0_datacounters(struct hfi1_pportdata *ppd, struct _port_dctrs *rsp,
-                           u32 vl_select_mask)
+static void a0_datacounters(struct hfi1_pportdata *ppd, struct _port_dctrs *rsp)
 {
        if (!is_bx(ppd->dd)) {
                unsigned long vl;
                u64 sum_vl_xmit_wait = 0;
-               u32 vl_all_mask = VL_MASK_ALL;
+               unsigned long vl_all_mask = VL_MASK_ALL;
 
-               for_each_set_bit(vl, (unsigned long *)&(vl_all_mask),
-                                8 * sizeof(vl_all_mask)) {
+               for_each_set_bit(vl, &vl_all_mask, BITS_PER_LONG) {
                        u64 tmp = sum_vl_xmit_wait +
                                  read_port_cntr(ppd, C_TX_WAIT_VL,
                                                 idx_from_vl(vl));
@@ -2994,7 +2990,7 @@ static int pma_get_opa_datacounters(struct opa_pma_mad *pmp,
        u64 port_mask;
        u8 port_num;
        unsigned long vl;
-       u32 vl_select_mask;
+       unsigned long vl_select_mask;
        int vfi;
        u16 link_width;
        u16 link_speed;
@@ -3071,8 +3067,7 @@ static int pma_get_opa_datacounters(struct opa_pma_mad *pmp,
         * So in the for_each_set_bit() loop below, we don't need
         * any additional checks for vl.
         */
-       for_each_set_bit(vl, (unsigned long *)&(vl_select_mask),
-                        8 * sizeof(req->vl_select_mask)) {
+       for_each_set_bit(vl, &vl_select_mask, BITS_PER_LONG) {
                memset(vlinfo, 0, sizeof(*vlinfo));
 
                rsp->vls[vfi].port_vl_xmit_data =
@@ -3120,7 +3115,7 @@ static int pma_get_opa_datacounters(struct opa_pma_mad *pmp,
                vfi++;
        }
 
-       a0_datacounters(ppd, rsp, vl_select_mask);
+       a0_datacounters(ppd, rsp);
 
        if (resp_len)
                *resp_len += response_data_size;
@@ -3215,7 +3210,7 @@ static int pma_get_opa_porterrors(struct opa_pma_mad *pmp,
        struct _vls_ectrs *vlinfo;
        unsigned long vl;
        u64 port_mask, tmp;
-       u32 vl_select_mask;
+       unsigned long vl_select_mask;
        int vfi;
 
        req = (struct opa_port_error_counters64_msg *)pmp->data;
@@ -3273,8 +3268,7 @@ static int pma_get_opa_porterrors(struct opa_pma_mad *pmp,
        vlinfo = &rsp->vls[0];
        vfi = 0;
        vl_select_mask = be32_to_cpu(req->vl_select_mask);
-       for_each_set_bit(vl, (unsigned long *)&(vl_select_mask),
-                        8 * sizeof(req->vl_select_mask)) {
+       for_each_set_bit(vl, &vl_select_mask, BITS_PER_LONG) {
                memset(vlinfo, 0, sizeof(*vlinfo));
                rsp->vls[vfi].port_vl_xmit_discards =
                        cpu_to_be64(read_port_cntr(ppd, C_SW_XMIT_DSCD_VL,
@@ -3485,7 +3479,7 @@ static int pma_set_opa_portstatus(struct opa_pma_mad *pmp,
        u32 nports = be32_to_cpu(pmp->mad_hdr.attr_mod) >> 24;
        u64 portn = be64_to_cpu(req->port_select_mask[3]);
        u32 counter_select = be32_to_cpu(req->counter_select_mask);
-       u32 vl_select_mask = VL_MASK_ALL; /* clear all per-vl cnts */
+       unsigned long vl_select_mask = VL_MASK_ALL; /* clear all per-vl cnts */
        unsigned long vl;
 
        if ((nports != 1) || (portn != 1 << port)) {
@@ -3579,8 +3573,7 @@ static int pma_set_opa_portstatus(struct opa_pma_mad *pmp,
        if (counter_select & CS_UNCORRECTABLE_ERRORS)
                write_dev_cntr(dd, C_DC_UNC_ERR, CNTR_INVALID_VL, 0);
 
-       for_each_set_bit(vl, (unsigned long *)&(vl_select_mask),
-                        8 * sizeof(vl_select_mask)) {
+       for_each_set_bit(vl, &vl_select_mask, BITS_PER_LONG) {
                if (counter_select & CS_PORT_XMIT_DATA)
                        write_port_cntr(ppd, C_TX_FLIT_VL, idx_from_vl(vl), 0);
 
index 024a7c2..513a8aa 100644 (file)
@@ -595,11 +595,8 @@ check_s_state:
                case IB_WR_SEND_WITH_IMM:
                case IB_WR_SEND_WITH_INV:
                        /* If no credit, return. */
-                       if (!(qp->s_flags & RVT_S_UNLIMITED_CREDIT) &&
-                           rvt_cmp_msn(wqe->ssn, qp->s_lsn + 1) > 0) {
-                               qp->s_flags |= RVT_S_WAIT_SSN_CREDIT;
+                       if (!rvt_rc_credit_avail(qp, wqe))
                                goto bail;
-                       }
                        if (len > pmtu) {
                                qp->s_state = OP(SEND_FIRST);
                                len = pmtu;
@@ -632,11 +629,8 @@ check_s_state:
                        goto no_flow_control;
                case IB_WR_RDMA_WRITE_WITH_IMM:
                        /* If no credit, return. */
-                       if (!(qp->s_flags & RVT_S_UNLIMITED_CREDIT) &&
-                           rvt_cmp_msn(wqe->ssn, qp->s_lsn + 1) > 0) {
-                               qp->s_flags |= RVT_S_WAIT_SSN_CREDIT;
+                       if (!rvt_rc_credit_avail(qp, wqe))
                                goto bail;
-                       }
 no_flow_control:
                        put_ib_reth_vaddr(
                                wqe->rdma_wr.remote_addr,
@@ -1483,6 +1477,11 @@ static void update_num_rd_atomic(struct rvt_qp *qp, u32 psn,
                        req->ack_pending = cur_seg - req->comp_seg;
                        priv->pending_tid_r_segs += req->ack_pending;
                        qp->s_num_rd_atomic += req->ack_pending;
+                       trace_hfi1_tid_req_update_num_rd_atomic(qp, 0,
+                                                               wqe->wr.opcode,
+                                                               wqe->psn,
+                                                               wqe->lpsn,
+                                                               req);
                } else {
                        priv->pending_tid_r_segs += req->total_segs;
                        qp->s_num_rd_atomic += req->total_segs;
index 6141f4e..b4dcc4d 100644 (file)
@@ -2646,6 +2646,9 @@ static bool handle_read_kdeth_eflags(struct hfi1_ctxtdata *rcd,
        u32 fpsn;
 
        lockdep_assert_held(&qp->r_lock);
+       trace_hfi1_rsp_read_kdeth_eflags(qp, ibpsn);
+       trace_hfi1_sender_read_kdeth_eflags(qp);
+       trace_hfi1_tid_read_sender_kdeth_eflags(qp, 0);
        spin_lock(&qp->s_lock);
        /* If the psn is out of valid range, drop the packet */
        if (cmp_psn(ibpsn, qp->s_last_psn) < 0 ||
@@ -2710,6 +2713,8 @@ static bool handle_read_kdeth_eflags(struct hfi1_ctxtdata *rcd,
                goto s_unlock;
 
        req = wqe_to_tid_req(wqe);
+       trace_hfi1_tid_req_read_kdeth_eflags(qp, 0, wqe->wr.opcode, wqe->psn,
+                                            wqe->lpsn, req);
        switch (rcv_type) {
        case RHF_RCV_TYPE_EXPECTED:
                switch (rte) {
@@ -2724,6 +2729,9 @@ static bool handle_read_kdeth_eflags(struct hfi1_ctxtdata *rcd,
                         * packets that could be still in the fabric.
                         */
                        flow = &req->flows[req->clear_tail];
+                       trace_hfi1_tid_flow_read_kdeth_eflags(qp,
+                                                             req->clear_tail,
+                                                             flow);
                        if (priv->s_flags & HFI1_R_TID_SW_PSN) {
                                diff = cmp_psn(psn,
                                               flow->flow_state.r_next_psn);
index 4388b59..343fb98 100644 (file)
@@ -627,6 +627,12 @@ DEFINE_EVENT(/* event */
        TP_ARGS(qp, index, flow)
 );
 
+DEFINE_EVENT(/* event */
+       hfi1_tid_flow_template, hfi1_tid_flow_read_kdeth_eflags,
+       TP_PROTO(struct rvt_qp *qp, int index, struct tid_rdma_flow *flow),
+       TP_ARGS(qp, index, flow)
+);
+
 DECLARE_EVENT_CLASS(/* tid_node */
        hfi1_tid_node_template,
        TP_PROTO(struct rvt_qp *qp, const char *msg, u32 index, u32 base,
@@ -851,6 +857,12 @@ DEFINE_EVENT(/* event */
        TP_ARGS(qp, psn)
 );
 
+DEFINE_EVENT(/* event */
+       hfi1_responder_info_template, hfi1_rsp_read_kdeth_eflags,
+       TP_PROTO(struct rvt_qp *qp, u32 psn),
+       TP_ARGS(qp, psn)
+);
+
 DECLARE_EVENT_CLASS(/* sender_info */
        hfi1_sender_info_template,
        TP_PROTO(struct rvt_qp *qp),
@@ -955,6 +967,12 @@ DEFINE_EVENT(/* event */
        TP_ARGS(qp)
 );
 
+DEFINE_EVENT(/* event */
+       hfi1_sender_info_template, hfi1_sender_read_kdeth_eflags,
+       TP_PROTO(struct rvt_qp *qp),
+       TP_ARGS(qp)
+);
+
 DECLARE_EVENT_CLASS(/* tid_read_sender */
        hfi1_tid_read_sender_template,
        TP_PROTO(struct rvt_qp *qp, char newreq),
@@ -1015,6 +1033,12 @@ DEFINE_EVENT(/* event */
        TP_ARGS(qp, newreq)
 );
 
+DEFINE_EVENT(/* event */
+       hfi1_tid_read_sender_template, hfi1_tid_read_sender_kdeth_eflags,
+       TP_PROTO(struct rvt_qp *qp, char newreq),
+       TP_ARGS(qp, newreq)
+);
+
 DECLARE_EVENT_CLASS(/* tid_rdma_request */
        hfi1_tid_rdma_request_template,
        TP_PROTO(struct rvt_qp *qp, char newreq, u8 opcode, u32 psn, u32 lpsn,
@@ -1216,6 +1240,13 @@ DEFINE_EVENT(/* event */
 );
 
 DEFINE_EVENT(/* event */
+       hfi1_tid_rdma_request_template, hfi1_tid_req_read_kdeth_eflags,
+       TP_PROTO(struct rvt_qp *qp, char newreq, u8 opcode, u32 psn, u32 lpsn,
+                struct tid_rdma_request *req),
+       TP_ARGS(qp, newreq, opcode, psn, lpsn, req)
+);
+
+DEFINE_EVENT(/* event */
        hfi1_tid_rdma_request_template, hfi1_tid_req_make_rc_ack_write,
        TP_PROTO(struct rvt_qp *qp, char newreq, u8 opcode, u32 psn, u32 lpsn,
                 struct tid_rdma_request *req),
@@ -1229,6 +1260,13 @@ DEFINE_EVENT(/* event */
        TP_ARGS(qp, newreq, opcode, psn, lpsn, req)
 );
 
+DEFINE_EVENT(/* event */
+       hfi1_tid_rdma_request_template, hfi1_tid_req_update_num_rd_atomic,
+       TP_PROTO(struct rvt_qp *qp, char newreq, u8 opcode, u32 psn, u32 lpsn,
+                struct tid_rdma_request *req),
+       TP_ARGS(qp, newreq, opcode, psn, lpsn, req)
+);
+
 DECLARE_EVENT_CLASS(/* rc_rcv_err */
        hfi1_rc_rcv_err_template,
        TP_PROTO(struct rvt_qp *qp, u32 opcode, u32 psn, int diff),
index b89a9b9..469acb9 100644 (file)
@@ -118,10 +118,7 @@ int hfi1_acquire_user_pages(struct mm_struct *mm, unsigned long vaddr, size_t np
 void hfi1_release_user_pages(struct mm_struct *mm, struct page **p,
                             size_t npages, bool dirty)
 {
-       if (dirty)
-               put_user_pages_dirty_lock(p, npages);
-       else
-               put_user_pages(p, npages);
+       put_user_pages_dirty_lock(p, npages, dirty);
 
        if (mm) { /* during close after signal, mm can be NULL */
                atomic64_sub(npages, &mm->pinned_vm);
index 4d8510b..9972e0e 100644 (file)
@@ -110,12 +110,6 @@ enum pkt_q_sdma_state {
        SDMA_PKT_Q_DEFERRED,
 };
 
-/*
- * Maximum retry attempts to submit a TX request
- * before putting the process to sleep.
- */
-#define MAX_DEFER_RETRY_COUNT 1
-
 #define SDMA_IOWAIT_TIMEOUT 1000 /* in milliseconds */
 
 #define SDMA_DBG(req, fmt, ...)                                     \
index 646f615..7bff0a1 100644 (file)
@@ -874,16 +874,17 @@ int hfi1_verbs_send_dma(struct rvt_qp *qp, struct hfi1_pkt_state *ps,
                        else
                                pbc |= (ib_is_sc5(sc5) << PBC_DC_INFO_SHIFT);
 
-                       if (unlikely(hfi1_dbg_should_fault_tx(qp, ps->opcode)))
-                               pbc = hfi1_fault_tx(qp, ps->opcode, pbc);
                        pbc = create_pbc(ppd,
                                         pbc,
                                         qp->srate_mbps,
                                         vl,
                                         plen);
 
-                       /* Update HCRC based on packet opcode */
-                       pbc = update_hcrc(ps->opcode, pbc);
+                       if (unlikely(hfi1_dbg_should_fault_tx(qp, ps->opcode)))
+                               pbc = hfi1_fault_tx(qp, ps->opcode, pbc);
+                       else
+                               /* Update HCRC based on packet opcode */
+                               pbc = update_hcrc(ps->opcode, pbc);
                }
                tx->wqe = qp->s_wqe;
                ret = build_verbs_tx_desc(tx->sde, len, tx, ahg_info, pbc);
@@ -1030,17 +1031,17 @@ int hfi1_verbs_send_pio(struct rvt_qp *qp, struct hfi1_pkt_state *ps,
                else
                        pbc |= (ib_is_sc5(sc5) << PBC_DC_INFO_SHIFT);
 
+               pbc = create_pbc(ppd, pbc, qp->srate_mbps, vl, plen);
                if (unlikely(hfi1_dbg_should_fault_tx(qp, ps->opcode)))
                        pbc = hfi1_fault_tx(qp, ps->opcode, pbc);
-               pbc = create_pbc(ppd, pbc, qp->srate_mbps, vl, plen);
-
-               /* Update HCRC based on packet opcode */
-               pbc = update_hcrc(ps->opcode, pbc);
+               else
+                       /* Update HCRC based on packet opcode */
+                       pbc = update_hcrc(ps->opcode, pbc);
        }
        if (cb)
                iowait_pio_inc(&priv->s_iowait);
        pbuf = sc_buffer_alloc(sc, plen, cb, qp);
-       if (unlikely(IS_ERR_OR_NULL(pbuf))) {
+       if (IS_ERR_OR_NULL(pbuf)) {
                if (cb)
                        verbs_pio_complete(qp, 0);
                if (IS_ERR(pbuf)) {
index 5478219..d602b69 100644 (file)
@@ -8,8 +8,6 @@ config INFINIBAND_HNS
          is used in Hisilicon Hip06 and more further ICT SoC based on
          platform device.
 
-         To compile HIP06 or HIP08 driver as module, choose M here.
-
 config INFINIBAND_HNS_HIP06
        tristate "Hisilicon Hip06 Family RoCE support"
        depends on INFINIBAND_HNS && HNS && HNS_DSAF && HNS_ENET
@@ -17,15 +15,9 @@ config INFINIBAND_HNS_HIP06
          RoCE driver support for Hisilicon RoCE engine in Hisilicon Hip06 and
          Hip07 SoC. These RoCE engines are platform devices.
 
-         To compile this driver, choose Y here: if INFINIBAND_HNS is m, this
-         module will be called hns-roce-hw-v1
-
 config INFINIBAND_HNS_HIP08
        tristate "Hisilicon Hip08 Family RoCE support"
        depends on INFINIBAND_HNS && PCI && HNS3
        ---help---
          RoCE driver support for Hisilicon RoCE engine in Hisilicon Hip08 SoC.
          The RoCE engine is a PCI device.
-
-         To compile this driver, choose Y here: if INFINIBAND_HNS is m, this
-         module will be called hns-roce-hw-v2.
index cdd2ac2..90e08c0 100644 (file)
@@ -66,11 +66,9 @@ int hns_roce_create_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr,
                             HNS_ROCE_VLAN_SL_SHIFT;
        }
 
-       ah->av.port_pd = cpu_to_le32(to_hr_pd(ibah->pd)->pdn |
-                                    (rdma_ah_get_port_num(ah_attr) <<
-                                    HNS_ROCE_PORT_NUM_SHIFT));
+       ah->av.port = rdma_ah_get_port_num(ah_attr);
        ah->av.gid_index = grh->sgid_index;
-       ah->av.vlan = cpu_to_le16(vlan_tag);
+       ah->av.vlan = vlan_tag;
        ah->av.vlan_en = vlan_en;
        dev_dbg(dev, "gid_index = 0x%x,vlan = 0x%x\n", ah->av.gid_index,
                ah->av.vlan);
@@ -79,8 +77,7 @@ int hns_roce_create_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr,
                ah->av.stat_rate = IB_RATE_10_GBPS;
 
        memcpy(ah->av.dgid, grh->dgid.raw, HNS_ROCE_GID_SIZE);
-       ah->av.sl_tclass_flowlabel = cpu_to_le32(rdma_ah_get_sl(ah_attr) <<
-                                                HNS_ROCE_SL_SHIFT);
+       ah->av.sl = rdma_ah_get_sl(ah_attr);
 
        return 0;
 }
@@ -91,17 +88,11 @@ int hns_roce_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr)
 
        memset(ah_attr, 0, sizeof(*ah_attr));
 
-       rdma_ah_set_sl(ah_attr, (le32_to_cpu(ah->av.sl_tclass_flowlabel) >>
-                                HNS_ROCE_SL_SHIFT));
-       rdma_ah_set_port_num(ah_attr, (le32_to_cpu(ah->av.port_pd) >>
-                                      HNS_ROCE_PORT_NUM_SHIFT));
+       rdma_ah_set_sl(ah_attr, ah->av.sl);
+       rdma_ah_set_port_num(ah_attr, ah->av.port);
        rdma_ah_set_static_rate(ah_attr, ah->av.stat_rate);
-       rdma_ah_set_grh(ah_attr, NULL,
-                       (le32_to_cpu(ah->av.sl_tclass_flowlabel) &
-                        HNS_ROCE_FLOW_LABEL_MASK), ah->av.gid_index,
-                       ah->av.hop_limit,
-                       (le32_to_cpu(ah->av.sl_tclass_flowlabel) >>
-                        HNS_ROCE_TCLASS_SHIFT));
+       rdma_ah_set_grh(ah_attr, NULL, ah->av.flowlabel,
+                       ah->av.gid_index, ah->av.hop_limit, ah->av.tclass);
        rdma_ah_set_dgid_raw(ah_attr, ah->av.dgid);
 
        return 0;
index 0cd09bf..455d533 100644 (file)
@@ -211,7 +211,6 @@ int hns_roce_cmd_init(struct hns_roce_dev *hr_dev)
        mutex_init(&hr_dev->cmd.hcr_mutex);
        sema_init(&hr_dev->cmd.poll_sem, 1);
        hr_dev->cmd.use_events = 0;
-       hr_dev->cmd.toggle = 1;
        hr_dev->cmd.max_cmds = CMD_MAX_NUM;
        hr_dev->cmd.pool = dma_pool_create("hns_roce_cmd", dev,
                                           HNS_ROCE_MAILBOX_SIZE,
@@ -252,23 +251,15 @@ int hns_roce_cmd_use_events(struct hns_roce_dev *hr_dev)
        hr_cmd->token_mask = CMD_TOKEN_MASK;
        hr_cmd->use_events = 1;
 
-       down(&hr_cmd->poll_sem);
-
        return 0;
 }
 
 void hns_roce_cmd_use_polling(struct hns_roce_dev *hr_dev)
 {
        struct hns_roce_cmdq *hr_cmd = &hr_dev->cmd;
-       int i;
-
-       hr_cmd->use_events = 0;
-
-       for (i = 0; i < hr_cmd->max_cmds; ++i)
-               down(&hr_cmd->event_sem);
 
        kfree(hr_cmd->context);
-       up(&hr_cmd->poll_sem);
+       hr_cmd->use_events = 0;
 }
 
 struct hns_roce_cmd_mailbox
index 4e50c22..22541d1 100644 (file)
@@ -83,7 +83,6 @@ static int hns_roce_sw2hw_cq(struct hns_roce_dev *dev,
 
 static int hns_roce_cq_alloc(struct hns_roce_dev *hr_dev, int nent,
                             struct hns_roce_mtt *hr_mtt,
-                            struct hns_roce_uar *hr_uar,
                             struct hns_roce_cq *hr_cq, int vector)
 {
        struct hns_roce_cmd_mailbox *mailbox;
@@ -154,7 +153,6 @@ static int hns_roce_cq_alloc(struct hns_roce_dev *hr_dev, int nent,
 
        hr_cq->cons_index = 0;
        hr_cq->arm_sn = 1;
-       hr_cq->uar = hr_uar;
 
        atomic_set(&hr_cq->refcount, 1);
        init_completion(&hr_cq->free);
@@ -298,21 +296,127 @@ static void hns_roce_ib_free_cq_buf(struct hns_roce_dev *hr_dev,
                          &buf->hr_buf);
 }
 
+static int create_user_cq(struct hns_roce_dev *hr_dev,
+                         struct hns_roce_cq *hr_cq,
+                         struct ib_udata *udata,
+                         struct hns_roce_ib_create_cq_resp *resp,
+                         int cq_entries)
+{
+       struct hns_roce_ib_create_cq ucmd;
+       struct device *dev = hr_dev->dev;
+       int ret;
+       struct hns_roce_ucontext *context = rdma_udata_to_drv_context(
+                                  udata, struct hns_roce_ucontext, ibucontext);
+
+       if (ib_copy_from_udata(&ucmd, udata, sizeof(ucmd))) {
+               dev_err(dev, "Failed to copy_from_udata.\n");
+               return -EFAULT;
+       }
+
+       /* Get user space address, write it into mtt table */
+       ret = hns_roce_ib_get_cq_umem(hr_dev, udata, &hr_cq->hr_buf,
+                                     &hr_cq->umem, ucmd.buf_addr,
+                                     cq_entries);
+       if (ret) {
+               dev_err(dev, "Failed to get_cq_umem.\n");
+               return ret;
+       }
+
+       if ((hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RECORD_DB) &&
+           (udata->outlen >= sizeof(*resp))) {
+               ret = hns_roce_db_map_user(context, udata, ucmd.db_addr,
+                                          &hr_cq->db);
+               if (ret) {
+                       dev_err(dev, "cq record doorbell map failed!\n");
+                       goto err_mtt;
+               }
+               hr_cq->db_en = 1;
+               resp->cap_flags |= HNS_ROCE_SUPPORT_CQ_RECORD_DB;
+       }
+
+       return 0;
+
+err_mtt:
+       hns_roce_mtt_cleanup(hr_dev, &hr_cq->hr_buf.hr_mtt);
+       ib_umem_release(hr_cq->umem);
+
+       return ret;
+}
+
+static int create_kernel_cq(struct hns_roce_dev *hr_dev,
+                           struct hns_roce_cq *hr_cq, int cq_entries)
+{
+       struct device *dev = hr_dev->dev;
+       struct hns_roce_uar *uar;
+       int ret;
+
+       if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RECORD_DB) {
+               ret = hns_roce_alloc_db(hr_dev, &hr_cq->db, 1);
+               if (ret)
+                       return ret;
+
+               hr_cq->set_ci_db = hr_cq->db.db_record;
+               *hr_cq->set_ci_db = 0;
+               hr_cq->db_en = 1;
+       }
+
+       /* Init mtt table and write buff address to mtt table */
+       ret = hns_roce_ib_alloc_cq_buf(hr_dev, &hr_cq->hr_buf, cq_entries);
+       if (ret) {
+               dev_err(dev, "Failed to alloc_cq_buf.\n");
+               goto err_db;
+       }
+
+       uar = &hr_dev->priv_uar;
+       hr_cq->cq_db_l = hr_dev->reg_base + hr_dev->odb_offset +
+                        DB_REG_OFFSET * uar->index;
+
+       return 0;
+
+err_db:
+       if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RECORD_DB)
+               hns_roce_free_db(hr_dev, &hr_cq->db);
+
+       return ret;
+}
+
+static void destroy_user_cq(struct hns_roce_dev *hr_dev,
+                           struct hns_roce_cq *hr_cq,
+                           struct ib_udata *udata,
+                           struct hns_roce_ib_create_cq_resp *resp)
+{
+       struct hns_roce_ucontext *context = rdma_udata_to_drv_context(
+                                  udata, struct hns_roce_ucontext, ibucontext);
+
+       if ((hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RECORD_DB) &&
+           (udata->outlen >= sizeof(*resp)))
+               hns_roce_db_unmap_user(context, &hr_cq->db);
+
+       hns_roce_mtt_cleanup(hr_dev, &hr_cq->hr_buf.hr_mtt);
+       ib_umem_release(hr_cq->umem);
+}
+
+static void destroy_kernel_cq(struct hns_roce_dev *hr_dev,
+                             struct hns_roce_cq *hr_cq)
+{
+       hns_roce_mtt_cleanup(hr_dev, &hr_cq->hr_buf.hr_mtt);
+       hns_roce_ib_free_cq_buf(hr_dev, &hr_cq->hr_buf, hr_cq->ib_cq.cqe);
+
+       if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RECORD_DB)
+               hns_roce_free_db(hr_dev, &hr_cq->db);
+}
+
 int hns_roce_ib_create_cq(struct ib_cq *ib_cq,
                          const struct ib_cq_init_attr *attr,
                          struct ib_udata *udata)
 {
        struct hns_roce_dev *hr_dev = to_hr_dev(ib_cq->device);
        struct device *dev = hr_dev->dev;
-       struct hns_roce_ib_create_cq ucmd;
        struct hns_roce_ib_create_cq_resp resp = {};
        struct hns_roce_cq *hr_cq = to_hr_cq(ib_cq);
-       struct hns_roce_uar *uar = NULL;
        int vector = attr->comp_vector;
        int cq_entries = attr->cqe;
        int ret;
-       struct hns_roce_ucontext *context = rdma_udata_to_drv_context(
-               udata, struct hns_roce_ucontext, ibucontext);
 
        if (cq_entries < 1 || cq_entries > hr_dev->caps.max_cqes) {
                dev_err(dev, "Creat CQ failed. entries=%d, max=%d\n",
@@ -328,61 +432,21 @@ int hns_roce_ib_create_cq(struct ib_cq *ib_cq,
        spin_lock_init(&hr_cq->lock);
 
        if (udata) {
-               if (ib_copy_from_udata(&ucmd, udata, sizeof(ucmd))) {
-                       dev_err(dev, "Failed to copy_from_udata.\n");
-                       ret = -EFAULT;
-                       goto err_cq;
-               }
-
-               /* Get user space address, write it into mtt table */
-               ret = hns_roce_ib_get_cq_umem(hr_dev, udata, &hr_cq->hr_buf,
-                                             &hr_cq->umem, ucmd.buf_addr,
-                                             cq_entries);
+               ret = create_user_cq(hr_dev, hr_cq, udata, &resp, cq_entries);
                if (ret) {
-                       dev_err(dev, "Failed to get_cq_umem.\n");
+                       dev_err(dev, "Create cq failed in user mode!\n");
                        goto err_cq;
                }
-
-               if ((hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RECORD_DB) &&
-                   (udata->outlen >= sizeof(resp))) {
-                       ret = hns_roce_db_map_user(context, udata, ucmd.db_addr,
-                                                  &hr_cq->db);
-                       if (ret) {
-                               dev_err(dev, "cq record doorbell map failed!\n");
-                               goto err_mtt;
-                       }
-                       hr_cq->db_en = 1;
-                       resp.cap_flags |= HNS_ROCE_SUPPORT_CQ_RECORD_DB;
-               }
-
-               /* Get user space parameters */
-               uar = &context->uar;
        } else {
-               if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RECORD_DB) {
-                       ret = hns_roce_alloc_db(hr_dev, &hr_cq->db, 1);
-                       if (ret)
-                               goto err_cq;
-
-                       hr_cq->set_ci_db = hr_cq->db.db_record;
-                       *hr_cq->set_ci_db = 0;
-                       hr_cq->db_en = 1;
-               }
-
-               /* Init mmt table and write buff address to mtt table */
-               ret = hns_roce_ib_alloc_cq_buf(hr_dev, &hr_cq->hr_buf,
-                                              cq_entries);
+               ret = create_kernel_cq(hr_dev, hr_cq, cq_entries);
                if (ret) {
-                       dev_err(dev, "Failed to alloc_cq_buf.\n");
-                       goto err_db;
+                       dev_err(dev, "Create cq failed in kernel mode!\n");
+                       goto err_cq;
                }
-
-               uar = &hr_dev->priv_uar;
-               hr_cq->cq_db_l = hr_dev->reg_base + hr_dev->odb_offset +
-                               DB_REG_OFFSET * uar->index;
        }
 
        /* Allocate cq index, fill cq_context */
-       ret = hns_roce_cq_alloc(hr_dev, cq_entries, &hr_cq->hr_buf.hr_mtt, uar,
+       ret = hns_roce_cq_alloc(hr_dev, cq_entries, &hr_cq->hr_buf.hr_mtt,
                                hr_cq, vector);
        if (ret) {
                dev_err(dev, "Creat CQ .Failed to cq_alloc.\n");
@@ -416,20 +480,10 @@ err_cqc:
        hns_roce_free_cq(hr_dev, hr_cq);
 
 err_dbmap:
-       if (udata && (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RECORD_DB) &&
-           (udata->outlen >= sizeof(resp)))
-               hns_roce_db_unmap_user(context, &hr_cq->db);
-
-err_mtt:
-       hns_roce_mtt_cleanup(hr_dev, &hr_cq->hr_buf.hr_mtt);
-       ib_umem_release(hr_cq->umem);
-       if (!udata)
-               hns_roce_ib_free_cq_buf(hr_dev, &hr_cq->hr_buf,
-                                       hr_cq->ib_cq.cqe);
-
-err_db:
-       if (!udata && (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RECORD_DB))
-               hns_roce_free_db(hr_dev, &hr_cq->db);
+       if (udata)
+               destroy_user_cq(hr_dev, hr_cq, udata, &resp);
+       else
+               destroy_kernel_cq(hr_dev, hr_cq);
 
 err_cq:
        return ret;
index a548b28..96d1302 100644 (file)
@@ -84,7 +84,6 @@
 #define HNS_ROCE_CEQ_ENTRY_SIZE                        0x4
 #define HNS_ROCE_AEQ_ENTRY_SIZE                        0x10
 
-/* 4G/4K = 1M */
 #define HNS_ROCE_SL_SHIFT                      28
 #define HNS_ROCE_TCLASS_SHIFT                  20
 #define HNS_ROCE_FLOW_LABEL_MASK               0xfffff
 #define HNS_ROCE_IDX_QUE_ENTRY_SZ              4
 #define SRQ_DB_REG                             0x230
 
+/* The chip implementation of the consumer index is calculated
+ * according to twice the actual EQ depth
+ */
+#define EQ_DEPTH_COEFF                         2
+
 enum {
        HNS_ROCE_SUPPORT_RQ_RECORD_DB = 1 << 0,
        HNS_ROCE_SUPPORT_SQ_RECORD_DB = 1 << 1,
@@ -322,7 +326,7 @@ struct hns_roce_hem_table {
        unsigned long   num_hem;
        /* HEM entry record obj total num */
        unsigned long   num_obj;
-       /*Single obj size */
+       /* Single obj size */
        unsigned long   obj_size;
        unsigned long   table_chunk_size;
        int             lowmem;
@@ -343,7 +347,7 @@ struct hns_roce_mtt {
 
 struct hns_roce_buf_region {
        int offset; /* page offset */
-       u32 count; /* page count*/
+       u32 count; /* page count */
        int hopnum; /* addressing hop num */
 };
 
@@ -384,25 +388,25 @@ struct hns_roce_mr {
        u64                     size; /* Address range of MR */
        u32                     key; /* Key of MR */
        u32                     pd;   /* PD num of MR */
-       u32                     access;/* Access permission of MR */
+       u32                     access; /* Access permission of MR */
        u32                     npages;
        int                     enabled; /* MR's active status */
        int                     type;   /* MR's register type */
-       u64                     *pbl_buf;/* MR's PBL space */
+       u64                     *pbl_buf;       /* MR's PBL space */
        dma_addr_t              pbl_dma_addr;   /* MR's PBL space PA */
-       u32                     pbl_size;/* PA number in the PBL */
-       u64                     pbl_ba;/* page table address */
-       u32                     l0_chunk_last_num;/* L0 last number */
-       u32                     l1_chunk_last_num;/* L1 last number */
-       u64                     **pbl_bt_l2;/* PBL BT L2 */
-       u64                     **pbl_bt_l1;/* PBL BT L1 */
-       u64                     *pbl_bt_l0;/* PBL BT L0 */
-       dma_addr_t              *pbl_l2_dma_addr;/* PBL BT L2 dma addr */
-       dma_addr_t              *pbl_l1_dma_addr;/* PBL BT L1 dma addr */
-       dma_addr_t              pbl_l0_dma_addr;/* PBL BT L0 dma addr */
-       u32                     pbl_ba_pg_sz;/* BT chunk page size */
-       u32                     pbl_buf_pg_sz;/* buf chunk page size */
-       u32                     pbl_hop_num;/* multi-hop number */
+       u32                     pbl_size;       /* PA number in the PBL */
+       u64                     pbl_ba;         /* page table address */
+       u32                     l0_chunk_last_num;      /* L0 last number */
+       u32                     l1_chunk_last_num;      /* L1 last number */
+       u64                     **pbl_bt_l2;    /* PBL BT L2 */
+       u64                     **pbl_bt_l1;    /* PBL BT L1 */
+       u64                     *pbl_bt_l0;     /* PBL BT L0 */
+       dma_addr_t              *pbl_l2_dma_addr;       /* PBL BT L2 dma addr */
+       dma_addr_t              *pbl_l1_dma_addr;       /* PBL BT L1 dma addr */
+       dma_addr_t              pbl_l0_dma_addr;        /* PBL BT L0 dma addr */
+       u32                     pbl_ba_pg_sz;   /* BT chunk page size */
+       u32                     pbl_buf_pg_sz;  /* buf chunk page size */
+       u32                     pbl_hop_num;    /* multi-hop number */
 };
 
 struct hns_roce_mr_table {
@@ -425,16 +429,16 @@ struct hns_roce_wq {
        u32             max_post;
        int             max_gs;
        int             offset;
-       int             wqe_shift;/* WQE size */
+       int             wqe_shift;      /* WQE size */
        u32             head;
        u32             tail;
        void __iomem    *db_reg_l;
 };
 
 struct hns_roce_sge {
-       int             sge_cnt;  /* SGE num */
+       int             sge_cnt;        /* SGE num */
        int             offset;
-       int             sge_shift;/* SGE size */
+       int             sge_shift;      /* SGE size */
 };
 
 struct hns_roce_buf_list {
@@ -569,14 +573,16 @@ struct hns_roce_raq_table {
 };
 
 struct hns_roce_av {
-       __le32      port_pd;
+       u8          port;
        u8          gid_index;
        u8          stat_rate;
        u8          hop_limit;
-       __le32      sl_tclass_flowlabel;
+       u32         flowlabel;
+       u8          sl;
+       u8          tclass;
        u8          dgid[HNS_ROCE_GID_SIZE];
        u8          mac[ETH_ALEN];
-       __le16      vlan;
+       u16         vlan;
        bool        vlan_en;
 };
 
@@ -618,7 +624,6 @@ struct hns_roce_cmdq {
         * close device, switch into poll mode(non event mode)
         */
        u8                      use_events;
-       u8                      toggle;
 };
 
 struct hns_roce_cmd_mailbox {
@@ -652,10 +657,8 @@ struct hns_roce_qp {
        u8                      rdb_en;
        u8                      sdb_en;
        u32                     doorbell_qpn;
-       __le32                  sq_signal_bits;
+       u32                     sq_signal_bits;
        u32                     sq_next_wqe;
-       int                     sq_max_wqes_per_wr;
-       int                     sq_spare_wqes;
        struct hns_roce_wq      sq;
 
        struct ib_umem          *umem;
@@ -709,7 +712,7 @@ enum {
 };
 
 struct hns_roce_ceqe {
-       u32                     comp;
+       __le32                  comp;
 };
 
 struct hns_roce_aeqe {
@@ -752,7 +755,7 @@ struct hns_roce_eq {
        struct hns_roce_dev             *hr_dev;
        void __iomem                    *doorbell;
 
-       int                             type_flag;/* Aeq:1 ceq:0 */
+       int                             type_flag; /* Aeq:1 ceq:0 */
        int                             eqn;
        u32                             entries;
        int                             log_entries;
@@ -798,22 +801,22 @@ struct hns_roce_caps {
        int             local_ca_ack_delay;
        int             num_uars;
        u32             phy_num_uars;
-       u32             max_sq_sg;      /* 2 */
-       u32             max_sq_inline;  /* 32 */
-       u32             max_rq_sg;      /* 2 */
+       u32             max_sq_sg;
+       u32             max_sq_inline;
+       u32             max_rq_sg;
        u32             max_extend_sg;
-       int             num_qps;        /* 256k */
+       int             num_qps;
        int             reserved_qps;
        int             num_qpc_timer;
        int             num_cqc_timer;
        u32             max_srq_sg;
        int             num_srqs;
-       u32             max_wqes;       /* 16k */
+       u32             max_wqes;
        u32             max_srqs;
        u32             max_srq_wrs;
        u32             max_srq_sges;
-       u32             max_sq_desc_sz; /* 64 */
-       u32             max_rq_desc_sz; /* 64 */
+       u32             max_sq_desc_sz;
+       u32             max_rq_desc_sz;
        u32             max_srq_desc_sz;
        int             max_qp_init_rdma;
        int             max_qp_dest_rdma;
@@ -824,7 +827,7 @@ struct hns_roce_caps {
        int             reserved_cqs;
        int             reserved_srqs;
        u32             max_srqwqes;
-       int             num_aeq_vectors;        /* 1 */
+       int             num_aeq_vectors;
        int             num_comp_vectors;
        int             num_other_vectors;
        int             num_mtpts;
@@ -905,7 +908,7 @@ struct hns_roce_caps {
        u32             sl_num;
        u32             tsq_buf_pg_sz;
        u32             tpq_buf_pg_sz;
-       u32             chunk_sz;       /* chunk size in non multihop mode*/
+       u32             chunk_sz;       /* chunk size in non multihop mode */
        u64             flags;
 };
 
@@ -991,16 +994,6 @@ struct hns_roce_hw {
        const struct ib_device_ops *hns_roce_dev_srq_ops;
 };
 
-enum hns_phy_state {
-       HNS_ROCE_PHY_SLEEP              = 1,
-       HNS_ROCE_PHY_POLLING            = 2,
-       HNS_ROCE_PHY_DISABLED           = 3,
-       HNS_ROCE_PHY_TRAINING           = 4,
-       HNS_ROCE_PHY_LINKUP             = 5,
-       HNS_ROCE_PHY_LINKERR            = 6,
-       HNS_ROCE_PHY_TEST               = 7
-};
-
 struct hns_roce_dev {
        struct ib_device        ib_dev;
        struct platform_device  *pdev;
@@ -1045,8 +1038,8 @@ struct hns_roce_dev {
        int                     loop_idc;
        u32                     sdb_offset;
        u32                     odb_offset;
-       dma_addr_t              tptr_dma_addr; /*only for hw v1*/
-       u32                     tptr_size; /*only for hw v1*/
+       dma_addr_t              tptr_dma_addr;  /* only for hw v1 */
+       u32                     tptr_size;      /* only for hw v1 */
        const struct hns_roce_hw *hw;
        void                    *priv;
        struct workqueue_struct *irq_workq;
index f4da5bd..e822157 100644 (file)
 
 bool hns_roce_check_whether_mhop(struct hns_roce_dev *hr_dev, u32 type)
 {
-       if ((hr_dev->caps.qpc_hop_num && type == HEM_TYPE_QPC) ||
-           (hr_dev->caps.mpt_hop_num && type == HEM_TYPE_MTPT) ||
-           (hr_dev->caps.cqc_hop_num && type == HEM_TYPE_CQC) ||
-           (hr_dev->caps.srqc_hop_num && type == HEM_TYPE_SRQC) ||
-           (hr_dev->caps.sccc_hop_num && type == HEM_TYPE_SCCC) ||
-           (hr_dev->caps.qpc_timer_hop_num && type == HEM_TYPE_QPC_TIMER) ||
-           (hr_dev->caps.cqc_timer_hop_num && type == HEM_TYPE_CQC_TIMER) ||
-           (hr_dev->caps.cqe_hop_num && type == HEM_TYPE_CQE) ||
-           (hr_dev->caps.mtt_hop_num && type == HEM_TYPE_MTT) ||
-           (hr_dev->caps.srqwqe_hop_num && type == HEM_TYPE_SRQWQE) ||
-           (hr_dev->caps.idx_hop_num && type == HEM_TYPE_IDX))
-               return true;
-
-       return false;
+       int hop_num = 0;
+
+       switch (type) {
+       case HEM_TYPE_QPC:
+               hop_num = hr_dev->caps.qpc_hop_num;
+               break;
+       case HEM_TYPE_MTPT:
+               hop_num = hr_dev->caps.mpt_hop_num;
+               break;
+       case HEM_TYPE_CQC:
+               hop_num = hr_dev->caps.cqc_hop_num;
+               break;
+       case HEM_TYPE_SRQC:
+               hop_num = hr_dev->caps.srqc_hop_num;
+               break;
+       case HEM_TYPE_SCCC:
+               hop_num = hr_dev->caps.sccc_hop_num;
+               break;
+       case HEM_TYPE_QPC_TIMER:
+               hop_num = hr_dev->caps.qpc_timer_hop_num;
+               break;
+       case HEM_TYPE_CQC_TIMER:
+               hop_num = hr_dev->caps.cqc_timer_hop_num;
+               break;
+       case HEM_TYPE_CQE:
+               hop_num = hr_dev->caps.cqe_hop_num;
+               break;
+       case HEM_TYPE_MTT:
+               hop_num = hr_dev->caps.mtt_hop_num;
+               break;
+       case HEM_TYPE_SRQWQE:
+               hop_num = hr_dev->caps.srqwqe_hop_num;
+               break;
+       case HEM_TYPE_IDX:
+               hop_num = hr_dev->caps.idx_hop_num;
+               break;
+       default:
+               return false;
+       }
+
+       return hop_num ? true : false;
 }
 
 static bool hns_roce_check_hem_null(struct hns_roce_hem **hem, u64 start_idx,
-                           u32 bt_chunk_num)
+                           u32 bt_chunk_num, u64 hem_max_num)
 {
-       int i;
+       u64 check_max_num = start_idx + bt_chunk_num;
+       u64 i;
 
-       for (i = 0; i < bt_chunk_num; i++)
-               if (hem[start_idx + i])
+       for (i = start_idx; (i < check_max_num) && (i < hem_max_num); i++)
+               if (hem[i])
                        return false;
 
        return true;
@@ -92,17 +120,13 @@ static int hns_roce_get_bt_num(u32 table_type, u32 hop_num)
                return 0;
 }
 
-int hns_roce_calc_hem_mhop(struct hns_roce_dev *hr_dev,
-                          struct hns_roce_hem_table *table, unsigned long *obj,
-                          struct hns_roce_hem_mhop *mhop)
+static int get_hem_table_config(struct hns_roce_dev *hr_dev,
+                               struct hns_roce_hem_mhop *mhop,
+                               u32 type)
 {
        struct device *dev = hr_dev->dev;
-       u32 chunk_ba_num;
-       u32 table_idx;
-       u32 bt_num;
-       u32 chunk_size;
 
-       switch (table->type) {
+       switch (type) {
        case HEM_TYPE_QPC:
                mhop->buf_chunk_size = 1 << (hr_dev->caps.qpc_buf_pg_sz
                                             + PAGE_SHIFT);
@@ -193,10 +217,26 @@ int hns_roce_calc_hem_mhop(struct hns_roce_dev *hr_dev,
                break;
        default:
                dev_err(dev, "Table %d not support multi-hop addressing!\n",
-                        table->type);
+                       type);
                return -EINVAL;
        }
 
+       return 0;
+}
+
+int hns_roce_calc_hem_mhop(struct hns_roce_dev *hr_dev,
+                          struct hns_roce_hem_table *table, unsigned long *obj,
+                          struct hns_roce_hem_mhop *mhop)
+{
+       struct device *dev = hr_dev->dev;
+       u32 chunk_ba_num;
+       u32 table_idx;
+       u32 bt_num;
+       u32 chunk_size;
+
+       if (get_hem_table_config(hr_dev, mhop, table->type))
+               return -EINVAL;
+
        if (!obj)
                return 0;
 
@@ -324,13 +364,13 @@ static int hns_roce_set_hem(struct hns_roce_dev *hr_dev,
 {
        spinlock_t *lock = &hr_dev->bt_cmd_lock;
        struct device *dev = hr_dev->dev;
-       unsigned long end = 0;
+       long end;
        unsigned long flags;
        struct hns_roce_hem_iter iter;
        void __iomem *bt_cmd;
-       u32 bt_cmd_h_val = 0;
-       u32 bt_cmd_val[2];
-       u32 bt_cmd_l = 0;
+       __le32 bt_cmd_val[2];
+       __le32 bt_cmd_h = 0;
+       __le32 bt_cmd_l = 0;
        u64 bt_ba = 0;
        int ret = 0;
 
@@ -340,30 +380,20 @@ static int hns_roce_set_hem(struct hns_roce_dev *hr_dev,
 
        switch (table->type) {
        case HEM_TYPE_QPC:
-               roce_set_field(bt_cmd_h_val, ROCEE_BT_CMD_H_ROCEE_BT_CMD_MDF_M,
-                              ROCEE_BT_CMD_H_ROCEE_BT_CMD_MDF_S, HEM_TYPE_QPC);
-               break;
        case HEM_TYPE_MTPT:
-               roce_set_field(bt_cmd_h_val, ROCEE_BT_CMD_H_ROCEE_BT_CMD_MDF_M,
-                              ROCEE_BT_CMD_H_ROCEE_BT_CMD_MDF_S,
-                              HEM_TYPE_MTPT);
-               break;
        case HEM_TYPE_CQC:
-               roce_set_field(bt_cmd_h_val, ROCEE_BT_CMD_H_ROCEE_BT_CMD_MDF_M,
-                              ROCEE_BT_CMD_H_ROCEE_BT_CMD_MDF_S, HEM_TYPE_CQC);
-               break;
        case HEM_TYPE_SRQC:
-               roce_set_field(bt_cmd_h_val, ROCEE_BT_CMD_H_ROCEE_BT_CMD_MDF_M,
-                              ROCEE_BT_CMD_H_ROCEE_BT_CMD_MDF_S,
-                              HEM_TYPE_SRQC);
+               roce_set_field(bt_cmd_h, ROCEE_BT_CMD_H_ROCEE_BT_CMD_MDF_M,
+                       ROCEE_BT_CMD_H_ROCEE_BT_CMD_MDF_S, table->type);
                break;
        default:
                return ret;
        }
-       roce_set_field(bt_cmd_h_val, ROCEE_BT_CMD_H_ROCEE_BT_CMD_IN_MDF_M,
+
+       roce_set_field(bt_cmd_h, ROCEE_BT_CMD_H_ROCEE_BT_CMD_IN_MDF_M,
                       ROCEE_BT_CMD_H_ROCEE_BT_CMD_IN_MDF_S, obj);
-       roce_set_bit(bt_cmd_h_val, ROCEE_BT_CMD_H_ROCEE_BT_CMD_S, 0);
-       roce_set_bit(bt_cmd_h_val, ROCEE_BT_CMD_H_ROCEE_BT_CMD_HW_SYNS_S, 1);
+       roce_set_bit(bt_cmd_h, ROCEE_BT_CMD_H_ROCEE_BT_CMD_S, 0);
+       roce_set_bit(bt_cmd_h, ROCEE_BT_CMD_H_ROCEE_BT_CMD_HW_SYNS_S, 1);
 
        /* Currently iter only a chunk */
        for (hns_roce_hem_first(table->hem[i], &iter);
@@ -375,7 +405,7 @@ static int hns_roce_set_hem(struct hns_roce_dev *hr_dev,
                bt_cmd = hr_dev->reg_base + ROCEE_BT_CMD_H_REG;
 
                end = HW_SYNC_TIMEOUT_MSECS;
-               while (end) {
+               while (end > 0) {
                        if (!(readl(bt_cmd) >> BT_CMD_SYNC_SHIFT))
                                break;
 
@@ -389,13 +419,13 @@ static int hns_roce_set_hem(struct hns_roce_dev *hr_dev,
                        return -EBUSY;
                }
 
-               bt_cmd_l = (u32)bt_ba;
-               roce_set_field(bt_cmd_h_val, ROCEE_BT_CMD_H_ROCEE_BT_CMD_BA_H_M,
+               bt_cmd_l = cpu_to_le32(bt_ba);
+               roce_set_field(bt_cmd_h, ROCEE_BT_CMD_H_ROCEE_BT_CMD_BA_H_M,
                               ROCEE_BT_CMD_H_ROCEE_BT_CMD_BA_H_S,
                               bt_ba >> BT_BA_SHIFT);
 
                bt_cmd_val[0] = bt_cmd_l;
-               bt_cmd_val[1] = bt_cmd_h_val;
+               bt_cmd_val[1] = bt_cmd_h;
                hns_roce_write64_k(bt_cmd_val,
                                   hr_dev->reg_base + ROCEE_BT_CMD_L_REG);
                spin_unlock_irqrestore(lock, flags);
@@ -457,6 +487,12 @@ static int hns_roce_table_mhop_get(struct hns_roce_dev *hr_dev,
                return -EINVAL;
        }
 
+       if (unlikely(hem_idx >= table->num_hem)) {
+               dev_err(dev, "Table %d exceed hem limt idx = %llu,max = %lu!\n",
+                            table->type, hem_idx, table->num_hem);
+               return -EINVAL;
+       }
+
        mutex_lock(&table->mutex);
 
        if (table->hem[hem_idx]) {
@@ -693,7 +729,7 @@ static void hns_roce_table_mhop_put(struct hns_roce_dev *hr_dev,
        if (check_whether_bt_num_2(table->type, hop_num)) {
                start_idx = mhop.l0_idx * chunk_ba_num;
                if (hns_roce_check_hem_null(table->hem, start_idx,
-                                           chunk_ba_num)) {
+                                           chunk_ba_num, table->num_hem)) {
                        if (table->type < HEM_TYPE_MTT &&
                            hr_dev->hw->clear_hem(hr_dev, table, obj, 0))
                                dev_warn(dev, "Clear HEM base address failed.\n");
@@ -707,7 +743,7 @@ static void hns_roce_table_mhop_put(struct hns_roce_dev *hr_dev,
                start_idx = mhop.l0_idx * chunk_ba_num * chunk_ba_num +
                            mhop.l1_idx * chunk_ba_num;
                if (hns_roce_check_hem_null(table->hem, start_idx,
-                                           chunk_ba_num)) {
+                                           chunk_ba_num, table->num_hem)) {
                        if (hr_dev->hw->clear_hem(hr_dev, table, obj, 1))
                                dev_warn(dev, "Clear HEM base address failed.\n");
 
@@ -791,7 +827,8 @@ void *hns_roce_table_find(struct hns_roce_dev *hr_dev,
        } else {
                u32 seg_size = 64; /* 8 bytes per BA and 8 BA per segment */
 
-               hns_roce_calc_hem_mhop(hr_dev, table, &mhop_obj, &mhop);
+               if (hns_roce_calc_hem_mhop(hr_dev, table, &mhop_obj, &mhop))
+                       goto out;
                /* mtt mhop */
                i = mhop.l0_idx;
                j = mhop.l1_idx;
@@ -840,11 +877,13 @@ int hns_roce_table_get_range(struct hns_roce_dev *hr_dev,
 {
        struct hns_roce_hem_mhop mhop;
        unsigned long inc = table->table_chunk_size / table->obj_size;
-       unsigned long i;
+       unsigned long i = 0;
        int ret;
 
        if (hns_roce_check_whether_mhop(hr_dev, table->type)) {
-               hns_roce_calc_hem_mhop(hr_dev, table, NULL, &mhop);
+               ret = hns_roce_calc_hem_mhop(hr_dev, table, NULL, &mhop);
+               if (ret)
+                       goto fail;
                inc = mhop.bt_chunk_size / table->obj_size;
        }
 
@@ -874,7 +913,8 @@ void hns_roce_table_put_range(struct hns_roce_dev *hr_dev,
        unsigned long i;
 
        if (hns_roce_check_whether_mhop(hr_dev, table->type)) {
-               hns_roce_calc_hem_mhop(hr_dev, table, NULL, &mhop);
+               if (hns_roce_calc_hem_mhop(hr_dev, table, NULL, &mhop))
+                       return;
                inc = mhop.bt_chunk_size / table->obj_size;
        }
 
@@ -887,7 +927,6 @@ int hns_roce_init_hem_table(struct hns_roce_dev *hr_dev,
                            unsigned long obj_size, unsigned long nobj,
                            int use_lowmem)
 {
-       struct device *dev = hr_dev->dev;
        unsigned long obj_per_chunk;
        unsigned long num_hem;
 
@@ -900,99 +939,21 @@ int hns_roce_init_hem_table(struct hns_roce_dev *hr_dev,
                if (!table->hem)
                        return -ENOMEM;
        } else {
+               struct hns_roce_hem_mhop mhop = {};
                unsigned long buf_chunk_size;
                unsigned long bt_chunk_size;
                unsigned long bt_chunk_num;
                unsigned long num_bt_l0 = 0;
                u32 hop_num;
 
-               switch (type) {
-               case HEM_TYPE_QPC:
-                       buf_chunk_size = 1 << (hr_dev->caps.qpc_buf_pg_sz
-                                       + PAGE_SHIFT);
-                       bt_chunk_size = 1 << (hr_dev->caps.qpc_ba_pg_sz
-                                       + PAGE_SHIFT);
-                       num_bt_l0 = hr_dev->caps.qpc_bt_num;
-                       hop_num = hr_dev->caps.qpc_hop_num;
-                       break;
-               case HEM_TYPE_MTPT:
-                       buf_chunk_size = 1 << (hr_dev->caps.mpt_buf_pg_sz
-                                       + PAGE_SHIFT);
-                       bt_chunk_size = 1 << (hr_dev->caps.mpt_ba_pg_sz
-                                       + PAGE_SHIFT);
-                       num_bt_l0 = hr_dev->caps.mpt_bt_num;
-                       hop_num = hr_dev->caps.mpt_hop_num;
-                       break;
-               case HEM_TYPE_CQC:
-                       buf_chunk_size = 1 << (hr_dev->caps.cqc_buf_pg_sz
-                                       + PAGE_SHIFT);
-                       bt_chunk_size = 1 << (hr_dev->caps.cqc_ba_pg_sz
-                                       + PAGE_SHIFT);
-                       num_bt_l0 = hr_dev->caps.cqc_bt_num;
-                       hop_num = hr_dev->caps.cqc_hop_num;
-                       break;
-               case HEM_TYPE_SCCC:
-                       buf_chunk_size = 1 << (hr_dev->caps.sccc_buf_pg_sz
-                                       + PAGE_SHIFT);
-                       bt_chunk_size = 1 << (hr_dev->caps.sccc_ba_pg_sz
-                                       + PAGE_SHIFT);
-                       num_bt_l0 = hr_dev->caps.sccc_bt_num;
-                       hop_num = hr_dev->caps.sccc_hop_num;
-                       break;
-               case HEM_TYPE_QPC_TIMER:
-                       buf_chunk_size = 1 << (hr_dev->caps.qpc_timer_buf_pg_sz
-                                       + PAGE_SHIFT);
-                       bt_chunk_size = 1 << (hr_dev->caps.qpc_timer_ba_pg_sz
-                                       + PAGE_SHIFT);
-                       num_bt_l0 = hr_dev->caps.qpc_timer_bt_num;
-                       hop_num = hr_dev->caps.qpc_timer_hop_num;
-                       break;
-               case HEM_TYPE_CQC_TIMER:
-                       buf_chunk_size = 1 << (hr_dev->caps.cqc_timer_buf_pg_sz
-                                       + PAGE_SHIFT);
-                       bt_chunk_size = 1 << (hr_dev->caps.cqc_timer_ba_pg_sz
-                                       + PAGE_SHIFT);
-                       num_bt_l0 = hr_dev->caps.cqc_timer_bt_num;
-                       hop_num = hr_dev->caps.cqc_timer_hop_num;
-                       break;
-               case HEM_TYPE_SRQC:
-                       buf_chunk_size = 1 << (hr_dev->caps.srqc_buf_pg_sz
-                                       + PAGE_SHIFT);
-                       bt_chunk_size = 1 << (hr_dev->caps.srqc_ba_pg_sz
-                                       + PAGE_SHIFT);
-                       num_bt_l0 = hr_dev->caps.srqc_bt_num;
-                       hop_num = hr_dev->caps.srqc_hop_num;
-                       break;
-               case HEM_TYPE_MTT:
-                       buf_chunk_size = 1 << (hr_dev->caps.mtt_ba_pg_sz
-                                       + PAGE_SHIFT);
-                       bt_chunk_size = buf_chunk_size;
-                       hop_num = hr_dev->caps.mtt_hop_num;
-                       break;
-               case HEM_TYPE_CQE:
-                       buf_chunk_size = 1 << (hr_dev->caps.cqe_ba_pg_sz
-                                       + PAGE_SHIFT);
-                       bt_chunk_size = buf_chunk_size;
-                       hop_num = hr_dev->caps.cqe_hop_num;
-                       break;
-               case HEM_TYPE_SRQWQE:
-                       buf_chunk_size = 1 << (hr_dev->caps.srqwqe_ba_pg_sz
-                                       + PAGE_SHIFT);
-                       bt_chunk_size = buf_chunk_size;
-                       hop_num = hr_dev->caps.srqwqe_hop_num;
-                       break;
-               case HEM_TYPE_IDX:
-                       buf_chunk_size = 1 << (hr_dev->caps.idx_ba_pg_sz
-                                       + PAGE_SHIFT);
-                       bt_chunk_size = buf_chunk_size;
-                       hop_num = hr_dev->caps.idx_hop_num;
-                       break;
-               default:
-                       dev_err(dev,
-                         "Table %d not support to init hem table here!\n",
-                         type);
+               if (get_hem_table_config(hr_dev, &mhop, type))
                        return -EINVAL;
-               }
+
+               buf_chunk_size = mhop.buf_chunk_size;
+               bt_chunk_size = mhop.bt_chunk_size;
+               num_bt_l0 = mhop.ba_l0_num;
+               hop_num = mhop.hop_num;
+
                obj_per_chunk = buf_chunk_size / obj_size;
                num_hem = (nobj + obj_per_chunk - 1) / obj_per_chunk;
                bt_chunk_num = bt_chunk_size / BA_BYTE_LEN;
@@ -1075,7 +1036,8 @@ static void hns_roce_cleanup_mhop_hem_table(struct hns_roce_dev *hr_dev,
        int i;
        u64 obj;
 
-       hns_roce_calc_hem_mhop(hr_dev, table, NULL, &mhop);
+       if (hns_roce_calc_hem_mhop(hr_dev, table, NULL, &mhop))
+               return;
        buf_chunk_size = table->type < HEM_TYPE_MTT ? mhop.buf_chunk_size :
                                        mhop.bt_chunk_size;
 
index f1ccb8f..8678327 100644 (file)
@@ -102,9 +102,9 @@ struct hns_roce_hem_mhop {
        u32     buf_chunk_size;
        u32     bt_chunk_size;
        u32     ba_l0_num;
-       u32     l0_idx;/* level 0 base address table index */
-       u32     l1_idx;/* level 1 base address table index */
-       u32     l2_idx;/* level 2 base address table index */
+       u32     l0_idx; /* level 0 base address table index */
+       u32     l1_idx; /* level 1 base address table index */
+       u32     l2_idx; /* level 2 base address table index */
 };
 
 void hns_roce_free_hem(struct hns_roce_dev *hr_dev, struct hns_roce_hem *hem);
index 141205e..5f74bf5 100644 (file)
@@ -73,7 +73,7 @@ static int hns_roce_v1_post_send(struct ib_qp *ibqp,
        int ps_opcode = 0, i = 0;
        unsigned long flags = 0;
        void *wqe = NULL;
-       u32 doorbell[2];
+       __le32 doorbell[2];
        int nreq = 0;
        u32 ind = 0;
        int ret = 0;
@@ -175,13 +175,11 @@ static int hns_roce_v1_post_send(struct ib_qp *ibqp,
                        roce_set_field(ud_sq_wqe->u32_36,
                                       UD_SEND_WQE_U32_36_FLOW_LABEL_M,
                                       UD_SEND_WQE_U32_36_FLOW_LABEL_S,
-                                      ah->av.sl_tclass_flowlabel &
-                                      HNS_ROCE_FLOW_LABEL_MASK);
+                                      ah->av.flowlabel);
                        roce_set_field(ud_sq_wqe->u32_36,
                                      UD_SEND_WQE_U32_36_PRIORITY_M,
                                      UD_SEND_WQE_U32_36_PRIORITY_S,
-                                     le32_to_cpu(ah->av.sl_tclass_flowlabel) >>
-                                     HNS_ROCE_SL_SHIFT);
+                                     ah->av.sl);
                        roce_set_field(ud_sq_wqe->u32_36,
                                       UD_SEND_WQE_U32_36_SGID_INDEX_M,
                                       UD_SEND_WQE_U32_36_SGID_INDEX_S,
@@ -195,8 +193,7 @@ static int hns_roce_v1_post_send(struct ib_qp *ibqp,
                        roce_set_field(ud_sq_wqe->u32_40,
                                       UD_SEND_WQE_U32_40_TRAFFIC_CLASS_M,
                                       UD_SEND_WQE_U32_40_TRAFFIC_CLASS_S,
-                                      ah->av.sl_tclass_flowlabel >>
-                                      HNS_ROCE_TCLASS_SHIFT);
+                                      ah->av.tclass);
 
                        memcpy(&ud_sq_wqe->dgid[0], &ah->av.dgid[0], GID_LEN);
 
@@ -335,10 +332,10 @@ out:
                               SQ_DOORBELL_U32_8_QPN_S, qp->doorbell_qpn);
                roce_set_bit(sq_db.u32_8, SQ_DOORBELL_HW_SYNC_S, 1);
 
-               doorbell[0] = le32_to_cpu(sq_db.u32_4);
-               doorbell[1] = le32_to_cpu(sq_db.u32_8);
+               doorbell[0] = sq_db.u32_4;
+               doorbell[1] = sq_db.u32_8;
 
-               hns_roce_write64_k((__le32 *)doorbell, qp->sq.db_reg_l);
+               hns_roce_write64_k(doorbell, qp->sq.db_reg_l);
                qp->sq_next_wqe = ind;
        }
 
@@ -363,7 +360,7 @@ static int hns_roce_v1_post_recv(struct ib_qp *ibqp,
        struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device);
        struct device *dev = &hr_dev->pdev->dev;
        struct hns_roce_rq_db rq_db;
-       uint32_t doorbell[2] = {0};
+       __le32 doorbell[2] = {0};
 
        spin_lock_irqsave(&hr_qp->rq.lock, flags);
        ind = hr_qp->rq.head & (hr_qp->rq.wqe_cnt - 1);
@@ -437,11 +434,10 @@ out:
                        roce_set_bit(rq_db.u32_8, RQ_DOORBELL_U32_8_HW_SYNC_S,
                                     1);
 
-                       doorbell[0] = le32_to_cpu(rq_db.u32_4);
-                       doorbell[1] = le32_to_cpu(rq_db.u32_8);
+                       doorbell[0] = rq_db.u32_4;
+                       doorbell[1] = rq_db.u32_8;
 
-                       hns_roce_write64_k((__le32 *)doorbell,
-                                          hr_qp->rq.db_reg_l);
+                       hns_roce_write64_k(doorbell, hr_qp->rq.db_reg_l);
                }
        }
        spin_unlock_irqrestore(&hr_qp->rq.lock, flags);
@@ -715,7 +711,7 @@ static int hns_roce_v1_rsv_lp_qp(struct hns_roce_dev *hr_dev)
        struct ib_cq *cq;
        struct ib_pd *pd;
        union ib_gid dgid;
-       u64 subnet_prefix;
+       __be64 subnet_prefix;
        int attr_mask = 0;
        int ret;
        int i, j;
@@ -971,7 +967,7 @@ static int hns_roce_v1_recreate_lp_qp(struct hns_roce_dev *hr_dev)
        struct hns_roce_free_mr *free_mr;
        struct hns_roce_v1_priv *priv;
        struct completion comp;
-       unsigned long end = HNS_ROCE_V1_RECREATE_LP_QP_TIMEOUT_MSECS;
+       long end = HNS_ROCE_V1_RECREATE_LP_QP_TIMEOUT_MSECS;
 
        priv = (struct hns_roce_v1_priv *)hr_dev->priv;
        free_mr = &priv->free_mr;
@@ -991,7 +987,7 @@ static int hns_roce_v1_recreate_lp_qp(struct hns_roce_dev *hr_dev)
 
        queue_work(free_mr->free_mr_wq, &(lp_qp_work->work));
 
-       while (end) {
+       while (end > 0) {
                if (try_wait_for_completion(&comp))
                        return 0;
                msleep(HNS_ROCE_V1_RECREATE_LP_QP_WAIT_VALUE);
@@ -1109,7 +1105,7 @@ static int hns_roce_v1_dereg_mr(struct hns_roce_dev *hr_dev,
        struct hns_roce_free_mr *free_mr;
        struct hns_roce_v1_priv *priv;
        struct completion comp;
-       unsigned long end = HNS_ROCE_V1_FREE_MR_TIMEOUT_MSECS;
+       long end = HNS_ROCE_V1_FREE_MR_TIMEOUT_MSECS;
        unsigned long start = jiffies;
        int npages;
        int ret = 0;
@@ -1139,7 +1135,7 @@ static int hns_roce_v1_dereg_mr(struct hns_roce_dev *hr_dev,
 
        queue_work(free_mr->free_mr_wq, &(mr_work->work));
 
-       while (end) {
+       while (end > 0) {
                if (try_wait_for_completion(&comp))
                        goto free_mr;
                msleep(HNS_ROCE_V1_FREE_MR_WAIT_VALUE);
@@ -2165,7 +2161,7 @@ static int hns_roce_v1_req_notify_cq(struct ib_cq *ibcq,
 {
        struct hns_roce_cq *hr_cq = to_hr_cq(ibcq);
        u32 notification_flag;
-       __le32 doorbell[2];
+       __le32 doorbell[2] = {};
 
        notification_flag = (flags & IB_CQ_SOLICITED_MASK) ==
                            IB_CQ_SOLICITED ? CQ_DB_REQ_NOT : CQ_DB_REQ_NOT_SOL;
@@ -2430,7 +2426,8 @@ static int hns_roce_v1_clear_hem(struct hns_roce_dev *hr_dev,
 {
        struct device *dev = &hr_dev->pdev->dev;
        struct hns_roce_v1_priv *priv;
-       unsigned long end = 0, flags = 0;
+       unsigned long flags = 0;
+       long end = HW_SYNC_TIMEOUT_MSECS;
        __le32 bt_cmd_val[2] = {0};
        void __iomem *bt_cmd;
        u64 bt_ba = 0;
@@ -2439,18 +2436,12 @@ static int hns_roce_v1_clear_hem(struct hns_roce_dev *hr_dev,
 
        switch (table->type) {
        case HEM_TYPE_QPC:
-               roce_set_field(bt_cmd_val[1], ROCEE_BT_CMD_H_ROCEE_BT_CMD_MDF_M,
-                       ROCEE_BT_CMD_H_ROCEE_BT_CMD_MDF_S, HEM_TYPE_QPC);
                bt_ba = priv->bt_table.qpc_buf.map >> 12;
                break;
        case HEM_TYPE_MTPT:
-               roce_set_field(bt_cmd_val[1], ROCEE_BT_CMD_H_ROCEE_BT_CMD_MDF_M,
-                       ROCEE_BT_CMD_H_ROCEE_BT_CMD_MDF_S, HEM_TYPE_MTPT);
                bt_ba = priv->bt_table.mtpt_buf.map >> 12;
                break;
        case HEM_TYPE_CQC:
-               roce_set_field(bt_cmd_val[1], ROCEE_BT_CMD_H_ROCEE_BT_CMD_MDF_M,
-                       ROCEE_BT_CMD_H_ROCEE_BT_CMD_MDF_S, HEM_TYPE_CQC);
                bt_ba = priv->bt_table.cqc_buf.map >> 12;
                break;
        case HEM_TYPE_SRQC:
@@ -2459,6 +2450,8 @@ static int hns_roce_v1_clear_hem(struct hns_roce_dev *hr_dev,
        default:
                return 0;
        }
+       roce_set_field(bt_cmd_val[1], ROCEE_BT_CMD_H_ROCEE_BT_CMD_MDF_M,
+                       ROCEE_BT_CMD_H_ROCEE_BT_CMD_MDF_S, table->type);
        roce_set_field(bt_cmd_val[1], ROCEE_BT_CMD_H_ROCEE_BT_CMD_IN_MDF_M,
                ROCEE_BT_CMD_H_ROCEE_BT_CMD_IN_MDF_S, obj);
        roce_set_bit(bt_cmd_val[1], ROCEE_BT_CMD_H_ROCEE_BT_CMD_S, 0);
@@ -2468,7 +2461,6 @@ static int hns_roce_v1_clear_hem(struct hns_roce_dev *hr_dev,
 
        bt_cmd = hr_dev->reg_base + ROCEE_BT_CMD_H_REG;
 
-       end = HW_SYNC_TIMEOUT_MSECS;
        while (1) {
                if (readl(bt_cmd) >> BT_CMD_SYNC_SHIFT) {
                        if (!end) {
@@ -2484,7 +2476,7 @@ static int hns_roce_v1_clear_hem(struct hns_roce_dev *hr_dev,
                end -= HW_SYNC_SLEEP_TIME_INTERVAL;
        }
 
-       bt_cmd_val[0] = (__le32)bt_ba;
+       bt_cmd_val[0] = cpu_to_le32(bt_ba);
        roce_set_field(bt_cmd_val[1], ROCEE_BT_CMD_H_ROCEE_BT_CMD_BA_H_M,
                ROCEE_BT_CMD_H_ROCEE_BT_CMD_BA_H_S, bt_ba >> 32);
        hns_roce_write64_k(bt_cmd_val, hr_dev->reg_base + ROCEE_BT_CMD_L_REG);
@@ -2627,7 +2619,7 @@ static int hns_roce_v1_m_sqp(struct ib_qp *ibqp, const struct ib_qp_attr *attr,
                               QP1C_BYTES_16_PORT_NUM_S, hr_qp->phy_port);
                roce_set_bit(context->qp1c_bytes_16,
                             QP1C_BYTES_16_SIGNALING_TYPE_S,
-                            le32_to_cpu(hr_qp->sq_signal_bits));
+                            hr_qp->sq_signal_bits);
                roce_set_bit(context->qp1c_bytes_16, QP1C_BYTES_16_RQ_BA_FLG_S,
                             1);
                roce_set_bit(context->qp1c_bytes_16, QP1C_BYTES_16_SQ_BA_FLG_S,
@@ -2933,7 +2925,7 @@ static int hns_roce_v1_m_qp(struct ib_qp *ibqp, const struct ib_qp_attr *attr,
                             1);
                roce_set_bit(context->qpc_bytes_32,
                             QP_CONTEXT_QPC_BYTE_32_SIGNALING_TYPE_S,
-                            le32_to_cpu(hr_qp->sq_signal_bits));
+                            hr_qp->sq_signal_bits);
 
                port = (attr_mask & IB_QP_PORT) ? (attr->port_num - 1) :
                        hr_qp->port;
@@ -3578,7 +3570,7 @@ static int hns_roce_v1_q_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr,
        qp_attr->retry_cnt = roce_get_field(context->qpc_bytes_148,
                             QP_CONTEXT_QPC_BYTES_148_RETRY_COUNT_M,
                             QP_CONTEXT_QPC_BYTES_148_RETRY_COUNT_S);
-       qp_attr->rnr_retry = (u8)context->rnr_retry;
+       qp_attr->rnr_retry = (u8)le32_to_cpu(context->rnr_retry);
 
 done:
        qp_attr->cur_qp_state = qp_attr->qp_state;
@@ -4021,7 +4013,8 @@ static int hns_roce_v1_ceq_int(struct hns_roce_dev *hr_dev,
                ++eq->cons_index;
                ceqes_found = 1;
 
-               if (eq->cons_index > 2 * hr_dev->caps.ceqe_depth - 1) {
+               if (eq->cons_index >
+                   EQ_DEPTH_COEFF * hr_dev->caps.ceqe_depth - 1) {
                        dev_warn(&eq->hr_dev->pdev->dev,
                                "cons_index overflow, set back to 0.\n");
                        eq->cons_index = 0;
@@ -4518,7 +4511,6 @@ static int hns_roce_get_cfg(struct hns_roce_dev *hr_dev)
        struct platform_device *pdev = NULL;
        struct net_device *netdev = NULL;
        struct device_node *net_node;
-       struct resource *res;
        int port_cnt = 0;
        u8 phy_port;
        int ret;
@@ -4557,8 +4549,7 @@ static int hns_roce_get_cfg(struct hns_roce_dev *hr_dev)
        }
 
        /* get the mapped register base address */
-       res = platform_get_resource(hr_dev->pdev, IORESOURCE_MEM, 0);
-       hr_dev->reg_base = devm_ioremap_resource(dev, res);
+       hr_dev->reg_base = devm_platform_ioremap_resource(hr_dev->pdev, 0);
        if (IS_ERR(hr_dev->reg_base))
                return PTR_ERR(hr_dev->reg_base);
 
@@ -4633,10 +4624,8 @@ static int hns_roce_get_cfg(struct hns_roce_dev *hr_dev)
        /* fetch the interrupt numbers */
        for (i = 0; i < HNS_ROCE_V1_MAX_IRQ_NUM; i++) {
                hr_dev->irq[i] = platform_get_irq(hr_dev->pdev, i);
-               if (hr_dev->irq[i] <= 0) {
-                       dev_err(dev, "platform get of irq[=%d] failed!\n", i);
+               if (hr_dev->irq[i] <= 0)
                        return -EINVAL;
-               }
        }
 
        return 0;
index b76e3be..7a89d66 100644 (file)
@@ -239,7 +239,7 @@ static int hns_roce_v2_post_send(struct ib_qp *ibqp,
        struct device *dev = hr_dev->dev;
        struct hns_roce_v2_db sq_db;
        struct ib_qp_attr attr;
-       unsigned int sge_ind = 0;
+       unsigned int sge_ind;
        unsigned int owner_bit;
        unsigned long flags;
        unsigned int ind;
@@ -397,18 +397,15 @@ static int hns_roce_v2_post_send(struct ib_qp *ibqp,
                        roce_set_field(ud_sq_wqe->byte_36,
                                       V2_UD_SEND_WQE_BYTE_36_TCLASS_M,
                                       V2_UD_SEND_WQE_BYTE_36_TCLASS_S,
-                                      ah->av.sl_tclass_flowlabel >>
-                                      HNS_ROCE_TCLASS_SHIFT);
+                                      ah->av.tclass);
                        roce_set_field(ud_sq_wqe->byte_40,
                                       V2_UD_SEND_WQE_BYTE_40_FLOW_LABEL_M,
                                       V2_UD_SEND_WQE_BYTE_40_FLOW_LABEL_S,
-                                      ah->av.sl_tclass_flowlabel &
-                                      HNS_ROCE_FLOW_LABEL_MASK);
+                                      ah->av.flowlabel);
                        roce_set_field(ud_sq_wqe->byte_40,
                                       V2_UD_SEND_WQE_BYTE_40_SL_M,
                                       V2_UD_SEND_WQE_BYTE_40_SL_S,
-                                     le32_to_cpu(ah->av.sl_tclass_flowlabel) >>
-                                     HNS_ROCE_SL_SHIFT);
+                                      ah->av.sl);
                        roce_set_field(ud_sq_wqe->byte_40,
                                       V2_UD_SEND_WQE_BYTE_40_PORTN_M,
                                       V2_UD_SEND_WQE_BYTE_40_PORTN_S,
@@ -887,8 +884,7 @@ static void hns_roce_cmq_init_regs(struct hns_roce_dev *hr_dev, bool ring_type)
                roce_write(hr_dev, ROCEE_TX_CMQ_BASEADDR_H_REG,
                           upper_32_bits(dma));
                roce_write(hr_dev, ROCEE_TX_CMQ_DEPTH_REG,
-                         (ring->desc_num >> HNS_ROCE_CMQ_DESC_NUM_S) |
-                          HNS_ROCE_CMQ_ENABLE);
+                          ring->desc_num >> HNS_ROCE_CMQ_DESC_NUM_S);
                roce_write(hr_dev, ROCEE_TX_CMQ_HEAD_REG, 0);
                roce_write(hr_dev, ROCEE_TX_CMQ_TAIL_REG, 0);
        } else {
@@ -896,8 +892,7 @@ static void hns_roce_cmq_init_regs(struct hns_roce_dev *hr_dev, bool ring_type)
                roce_write(hr_dev, ROCEE_RX_CMQ_BASEADDR_H_REG,
                           upper_32_bits(dma));
                roce_write(hr_dev, ROCEE_RX_CMQ_DEPTH_REG,
-                         (ring->desc_num >> HNS_ROCE_CMQ_DESC_NUM_S) |
-                          HNS_ROCE_CMQ_ENABLE);
+                          ring->desc_num >> HNS_ROCE_CMQ_DESC_NUM_S);
                roce_write(hr_dev, ROCEE_RX_CMQ_HEAD_REG, 0);
                roce_write(hr_dev, ROCEE_RX_CMQ_TAIL_REG, 0);
        }
@@ -1044,7 +1039,7 @@ static int __hns_roce_cmq_send(struct hns_roce_dev *hr_dev,
         * If the command is sync, wait for the firmware to write back,
         * if multi descriptors to be sent, use the first one to check
         */
-       if ((desc->flag) & HNS_ROCE_CMD_FLAG_NO_INTR) {
+       if (le16_to_cpu(desc->flag) & HNS_ROCE_CMD_FLAG_NO_INTR) {
                do {
                        if (hns_roce_cmq_csq_done(hr_dev))
                                break;
@@ -1061,7 +1056,7 @@ static int __hns_roce_cmq_send(struct hns_roce_dev *hr_dev,
                        desc_to_use = &csq->desc[ntc];
                        desc[handle] = *desc_to_use;
                        dev_dbg(hr_dev->dev, "Get cmq desc:\n");
-                       desc_ret = desc[handle].retval;
+                       desc_ret = le16_to_cpu(desc[handle].retval);
                        if (desc_ret == CMD_EXEC_SUCCESS)
                                ret = 0;
                        else
@@ -1124,32 +1119,124 @@ static int hns_roce_cmq_query_hw_info(struct hns_roce_dev *hr_dev)
                return ret;
 
        resp = (struct hns_roce_query_version *)desc.data;
-       hr_dev->hw_rev = le32_to_cpu(resp->rocee_hw_version);
+       hr_dev->hw_rev = le16_to_cpu(resp->rocee_hw_version);
        hr_dev->vendor_id = hr_dev->pci_dev->vendor;
 
        return 0;
 }
 
+static bool hns_roce_func_clr_chk_rst(struct hns_roce_dev *hr_dev)
+{
+       struct hns_roce_v2_priv *priv = (struct hns_roce_v2_priv *)hr_dev->priv;
+       struct hnae3_handle *handle = priv->handle;
+       const struct hnae3_ae_ops *ops = handle->ae_algo->ops;
+       unsigned long reset_cnt;
+       bool sw_resetting;
+       bool hw_resetting;
+
+       reset_cnt = ops->ae_dev_reset_cnt(handle);
+       hw_resetting = ops->get_hw_reset_stat(handle);
+       sw_resetting = ops->ae_dev_resetting(handle);
+
+       if (reset_cnt != hr_dev->reset_cnt || hw_resetting || sw_resetting)
+               return true;
+
+       return false;
+}
+
+static void hns_roce_func_clr_rst_prc(struct hns_roce_dev *hr_dev, int retval,
+                                     int flag)
+{
+       struct hns_roce_v2_priv *priv = (struct hns_roce_v2_priv *)hr_dev->priv;
+       struct hnae3_handle *handle = priv->handle;
+       const struct hnae3_ae_ops *ops = handle->ae_algo->ops;
+       unsigned long instance_stage;
+       unsigned long reset_cnt;
+       unsigned long end;
+       bool sw_resetting;
+       bool hw_resetting;
+
+       instance_stage = handle->rinfo.instance_state;
+       reset_cnt = ops->ae_dev_reset_cnt(handle);
+       hw_resetting = ops->get_hw_reset_stat(handle);
+       sw_resetting = ops->ae_dev_resetting(handle);
+
+       if (reset_cnt != hr_dev->reset_cnt) {
+               hr_dev->dis_db = true;
+               hr_dev->is_reset = true;
+               dev_info(hr_dev->dev, "Func clear success after reset.\n");
+       } else if (hw_resetting) {
+               hr_dev->dis_db = true;
+
+               dev_warn(hr_dev->dev,
+                        "Func clear is pending, device in resetting state.\n");
+               end = HNS_ROCE_V2_HW_RST_TIMEOUT;
+               while (end) {
+                       if (!ops->get_hw_reset_stat(handle)) {
+                               hr_dev->is_reset = true;
+                               dev_info(hr_dev->dev,
+                                        "Func clear success after reset.\n");
+                               return;
+                       }
+                       msleep(HNS_ROCE_V2_HW_RST_COMPLETION_WAIT);
+                       end -= HNS_ROCE_V2_HW_RST_COMPLETION_WAIT;
+               }
+
+               dev_warn(hr_dev->dev, "Func clear failed.\n");
+       } else if (sw_resetting && instance_stage == HNS_ROCE_STATE_INIT) {
+               hr_dev->dis_db = true;
+
+               dev_warn(hr_dev->dev,
+                        "Func clear is pending, device in resetting state.\n");
+               end = HNS_ROCE_V2_HW_RST_TIMEOUT;
+               while (end) {
+                       if (ops->ae_dev_reset_cnt(handle) !=
+                           hr_dev->reset_cnt) {
+                               hr_dev->is_reset = true;
+                               dev_info(hr_dev->dev,
+                                        "Func clear success after sw reset\n");
+                               return;
+                       }
+                       msleep(HNS_ROCE_V2_HW_RST_COMPLETION_WAIT);
+                       end -= HNS_ROCE_V2_HW_RST_COMPLETION_WAIT;
+               }
+
+               dev_warn(hr_dev->dev, "Func clear failed because of unfinished sw reset\n");
+       } else {
+               if (retval && !flag)
+                       dev_warn(hr_dev->dev,
+                                "Func clear read failed, ret = %d.\n", retval);
+
+               dev_warn(hr_dev->dev, "Func clear failed.\n");
+       }
+}
 static void hns_roce_function_clear(struct hns_roce_dev *hr_dev)
 {
+       bool fclr_write_fail_flag = false;
        struct hns_roce_func_clear *resp;
        struct hns_roce_cmq_desc desc;
        unsigned long end;
-       int ret;
+       int ret = 0;
+
+       if (hns_roce_func_clr_chk_rst(hr_dev))
+               goto out;
 
        hns_roce_cmq_setup_basic_desc(&desc, HNS_ROCE_OPC_FUNC_CLEAR, false);
        resp = (struct hns_roce_func_clear *)desc.data;
 
        ret = hns_roce_cmq_send(hr_dev, &desc, 1);
        if (ret) {
+               fclr_write_fail_flag = true;
                dev_err(hr_dev->dev, "Func clear write failed, ret = %d.\n",
                         ret);
-               return;
+               goto out;
        }
 
        msleep(HNS_ROCE_V2_READ_FUNC_CLEAR_FLAG_INTERVAL);
        end = HNS_ROCE_V2_FUNC_CLEAR_TIMEOUT_MSECS;
        while (end) {
+               if (hns_roce_func_clr_chk_rst(hr_dev))
+                       goto out;
                msleep(HNS_ROCE_V2_READ_FUNC_CLEAR_FLAG_FAIL_WAIT);
                end -= HNS_ROCE_V2_READ_FUNC_CLEAR_FLAG_FAIL_WAIT;
 
@@ -1166,7 +1253,9 @@ static void hns_roce_function_clear(struct hns_roce_dev *hr_dev)
                }
        }
 
+out:
        dev_err(hr_dev->dev, "Func clear fail.\n");
+       hns_roce_func_clr_rst_prc(hr_dev, ret, fclr_write_fail_flag);
 }
 
 static int hns_roce_query_fw_ver(struct hns_roce_dev *hr_dev)
@@ -1298,7 +1387,7 @@ static int hns_roce_set_vf_switch_param(struct hns_roce_dev *hr_dev,
 
        swt = (struct hns_roce_vf_switch *)desc.data;
        hns_roce_cmq_setup_basic_desc(&desc, HNS_SWITCH_PARAMETER_CFG, true);
-       swt->rocee_sel |= cpu_to_le16(HNS_ICL_SWITCH_CMD_ROCEE_SEL);
+       swt->rocee_sel |= cpu_to_le32(HNS_ICL_SWITCH_CMD_ROCEE_SEL);
        roce_set_field(swt->fun_id,
                        VF_SWITCH_DATA_FUN_ID_VF_ID_M,
                        VF_SWITCH_DATA_FUN_ID_VF_ID_S,
@@ -1310,7 +1399,7 @@ static int hns_roce_set_vf_switch_param(struct hns_roce_dev *hr_dev,
                cpu_to_le16(HNS_ROCE_CMD_FLAG_NO_INTR | HNS_ROCE_CMD_FLAG_IN);
        desc.flag &= cpu_to_le16(~HNS_ROCE_CMD_FLAG_WR);
        roce_set_bit(swt->cfg, VF_SWITCH_DATA_CFG_ALW_LPBK_S, 1);
-       roce_set_bit(swt->cfg, VF_SWITCH_DATA_CFG_ALW_LCL_LPBK_S, 1);
+       roce_set_bit(swt->cfg, VF_SWITCH_DATA_CFG_ALW_LCL_LPBK_S, 0);
        roce_set_bit(swt->cfg, VF_SWITCH_DATA_CFG_ALW_DST_OVRD_S, 1);
 
        return hns_roce_cmq_send(hr_dev, &desc, 1);
@@ -1724,9 +1813,10 @@ static int hns_roce_config_link_table(struct hns_roce_dev *hr_dev,
                        desc[i].flag &= ~cpu_to_le16(HNS_ROCE_CMD_FLAG_NEXT);
 
                if (i == 0) {
-                       req_a->base_addr_l = link_tbl->table.map & 0xffffffff;
-                       req_a->base_addr_h = (link_tbl->table.map >> 32) &
-                                            0xffffffff;
+                       req_a->base_addr_l =
+                               cpu_to_le32(link_tbl->table.map & 0xffffffff);
+                       req_a->base_addr_h =
+                               cpu_to_le32(link_tbl->table.map >> 32);
                        roce_set_field(req_a->depth_pgsz_init_en,
                                       CFG_LLM_QUE_DEPTH_M,
                                       CFG_LLM_QUE_DEPTH_S,
@@ -1735,13 +1825,15 @@ static int hns_roce_config_link_table(struct hns_roce_dev *hr_dev,
                                       CFG_LLM_QUE_PGSZ_M,
                                       CFG_LLM_QUE_PGSZ_S,
                                       link_tbl->pg_sz);
-                       req_a->head_ba_l = entry[0].blk_ba0;
-                       req_a->head_ba_h_nxtptr = entry[0].blk_ba1_nxt_ptr;
+                       req_a->head_ba_l = cpu_to_le32(entry[0].blk_ba0);
+                       req_a->head_ba_h_nxtptr =
+                               cpu_to_le32(entry[0].blk_ba1_nxt_ptr);
                        roce_set_field(req_a->head_ptr,
                                       CFG_LLM_HEAD_PTR_M,
                                       CFG_LLM_HEAD_PTR_S, 0);
                } else {
-                       req_b->tail_ba_l = entry[page_num - 1].blk_ba0;
+                       req_b->tail_ba_l =
+                               cpu_to_le32(entry[page_num - 1].blk_ba0);
                        roce_set_field(req_b->tail_ba_h,
                                       CFG_LLM_TAIL_BA_H_M,
                                       CFG_LLM_TAIL_BA_H_S,
@@ -1817,17 +1909,13 @@ static int hns_roce_init_link_table(struct hns_roce_dev *hr_dev,
 
                link_tbl->pg_list[i].map = t;
 
-               entry[i].blk_ba0 = (t >> 12) & 0xffffffff;
-               roce_set_field(entry[i].blk_ba1_nxt_ptr,
-                              HNS_ROCE_LINK_TABLE_BA1_M,
-                              HNS_ROCE_LINK_TABLE_BA1_S,
-                              t >> 44);
+               entry[i].blk_ba0 = (u32)(t >> 12);
+               entry[i].blk_ba1_nxt_ptr = (u32)(t >> 44);
 
                if (i < (pg_num - 1))
-                       roce_set_field(entry[i].blk_ba1_nxt_ptr,
-                                      HNS_ROCE_LINK_TABLE_NXT_PTR_M,
-                                      HNS_ROCE_LINK_TABLE_NXT_PTR_S,
-                                      i + 1);
+                       entry[i].blk_ba1_nxt_ptr |=
+                               (i + 1) << HNS_ROCE_LINK_TABLE_NXT_PTR_S;
+
        }
        link_tbl->npages = pg_num;
        link_tbl->pg_sz = buf_chk_sz;
@@ -1888,7 +1976,7 @@ static int hns_roce_v2_init(struct hns_roce_dev *hr_dev)
                goto err_tpq_init_failed;
        }
 
-       /* Alloc memory for QPC Timer buffer space chunk*/
+       /* Alloc memory for QPC Timer buffer space chunk */
        for (qpc_count = 0; qpc_count < hr_dev->caps.qpc_timer_bt_num;
             qpc_count++) {
                ret = hns_roce_table_get(hr_dev, &hr_dev->qpc_timer_table,
@@ -1899,7 +1987,7 @@ static int hns_roce_v2_init(struct hns_roce_dev *hr_dev)
                }
        }
 
-       /* Alloc memory for CQC Timer buffer space chunk*/
+       /* Alloc memory for CQC Timer buffer space chunk */
        for (cqc_count = 0; cqc_count < hr_dev->caps.cqc_timer_bt_num;
             cqc_count++) {
                ret = hns_roce_table_get(hr_dev, &hr_dev->cqc_timer_table,
@@ -1952,7 +2040,7 @@ static int hns_roce_query_mbox_status(struct hns_roce_dev *hr_dev)
        if (status)
                return status;
 
-       return cpu_to_le32(mb_st->mb_status_hw_run);
+       return le32_to_cpu(mb_st->mb_status_hw_run);
 }
 
 static int hns_roce_v2_cmd_pending(struct hns_roce_dev *hr_dev)
@@ -1978,10 +2066,10 @@ static int hns_roce_mbox_post(struct hns_roce_dev *hr_dev, u64 in_param,
 
        hns_roce_cmq_setup_basic_desc(&desc, HNS_ROCE_OPC_POST_MB, false);
 
-       mb->in_param_l = cpu_to_le64(in_param);
-       mb->in_param_h = cpu_to_le64(in_param) >> 32;
-       mb->out_param_l = cpu_to_le64(out_param);
-       mb->out_param_h = cpu_to_le64(out_param) >> 32;
+       mb->in_param_l = cpu_to_le32(in_param);
+       mb->in_param_h = cpu_to_le32(in_param >> 32);
+       mb->out_param_l = cpu_to_le32(out_param);
+       mb->out_param_h = cpu_to_le32(out_param >> 32);
        mb->cmd_tag = cpu_to_le32(in_modifier << 8 | op);
        mb->token_event_en = cpu_to_le32(event << 16 | token);
 
@@ -2123,7 +2211,7 @@ static int hns_roce_v2_set_mac(struct hns_roce_dev *hr_dev, u8 phy_port,
        roce_set_field(smac_tb->vf_smac_h_rsv,
                       CFG_SMAC_TB_VF_SMAC_H_M,
                       CFG_SMAC_TB_VF_SMAC_H_S, reg_smac_h);
-       smac_tb->vf_smac_l = reg_smac_l;
+       smac_tb->vf_smac_l = cpu_to_le32(reg_smac_l);
 
        return hns_roce_cmq_send(hr_dev, &desc, 1);
 }
@@ -2409,7 +2497,7 @@ static void __hns_roce_v2_cq_clean(struct hns_roce_cq *hr_cq, u32 qpn,
 
        for (prod_index = hr_cq->cons_index; get_sw_cqe_v2(hr_cq, prod_index);
             ++prod_index) {
-               if (prod_index == hr_cq->cons_index + hr_cq->ib_cq.cqe)
+               if (prod_index > hr_cq->cons_index + hr_cq->ib_cq.cqe)
                        break;
        }
 
@@ -2478,29 +2566,26 @@ static void hns_roce_v2_write_cqc(struct hns_roce_dev *hr_dev,
                       V2_CQC_BYTE_4_SHIFT_S, ilog2((unsigned int)nent));
        roce_set_field(cq_context->byte_4_pg_ceqn, V2_CQC_BYTE_4_CEQN_M,
                       V2_CQC_BYTE_4_CEQN_S, vector);
-       cq_context->byte_4_pg_ceqn = cpu_to_le32(cq_context->byte_4_pg_ceqn);
 
        roce_set_field(cq_context->byte_8_cqn, V2_CQC_BYTE_8_CQN_M,
                       V2_CQC_BYTE_8_CQN_S, hr_cq->cqn);
 
-       cq_context->cqe_cur_blk_addr = (u32)(mtts[0] >> PAGE_ADDR_SHIFT);
-       cq_context->cqe_cur_blk_addr =
-                               cpu_to_le32(cq_context->cqe_cur_blk_addr);
+       cq_context->cqe_cur_blk_addr = cpu_to_le32(mtts[0] >> PAGE_ADDR_SHIFT);
 
        roce_set_field(cq_context->byte_16_hop_addr,
                       V2_CQC_BYTE_16_CQE_CUR_BLK_ADDR_M,
                       V2_CQC_BYTE_16_CQE_CUR_BLK_ADDR_S,
-                      cpu_to_le32((mtts[0]) >> (32 + PAGE_ADDR_SHIFT)));
+                      mtts[0] >> (32 + PAGE_ADDR_SHIFT));
        roce_set_field(cq_context->byte_16_hop_addr,
                       V2_CQC_BYTE_16_CQE_HOP_NUM_M,
                       V2_CQC_BYTE_16_CQE_HOP_NUM_S, hr_dev->caps.cqe_hop_num ==
                       HNS_ROCE_HOP_NUM_0 ? 0 : hr_dev->caps.cqe_hop_num);
 
-       cq_context->cqe_nxt_blk_addr = (u32)(mtts[1] >> PAGE_ADDR_SHIFT);
+       cq_context->cqe_nxt_blk_addr = cpu_to_le32(mtts[1] >> PAGE_ADDR_SHIFT);
        roce_set_field(cq_context->byte_24_pgsz_addr,
                       V2_CQC_BYTE_24_CQE_NXT_BLK_ADDR_M,
                       V2_CQC_BYTE_24_CQE_NXT_BLK_ADDR_S,
-                      cpu_to_le32((mtts[1]) >> (32 + PAGE_ADDR_SHIFT)));
+                      mtts[1] >> (32 + PAGE_ADDR_SHIFT));
        roce_set_field(cq_context->byte_24_pgsz_addr,
                       V2_CQC_BYTE_24_CQE_BA_PG_SZ_M,
                       V2_CQC_BYTE_24_CQE_BA_PG_SZ_S,
@@ -2510,7 +2595,7 @@ static void hns_roce_v2_write_cqc(struct hns_roce_dev *hr_dev,
                       V2_CQC_BYTE_24_CQE_BUF_PG_SZ_S,
                       hr_dev->caps.cqe_buf_pg_sz + PG_SHIFT_OFFSET);
 
-       cq_context->cqe_ba = (u32)(dma_handle >> 3);
+       cq_context->cqe_ba = cpu_to_le32(dma_handle >> 3);
 
        roce_set_field(cq_context->byte_40_cqe_ba, V2_CQC_BYTE_40_CQE_BA_M,
                       V2_CQC_BYTE_40_CQE_BA_S, (dma_handle >> (32 + 3)));
@@ -2523,7 +2608,7 @@ static void hns_roce_v2_write_cqc(struct hns_roce_dev *hr_dev,
                       V2_CQC_BYTE_44_DB_RECORD_ADDR_M,
                       V2_CQC_BYTE_44_DB_RECORD_ADDR_S,
                       ((u32)hr_cq->db.dma) >> 1);
-       cq_context->db_record_addr = hr_cq->db.dma >> 32;
+       cq_context->db_record_addr = cpu_to_le32(hr_cq->db.dma >> 32);
 
        roce_set_field(cq_context->byte_56_cqe_period_maxcnt,
                       V2_CQC_BYTE_56_CQ_MAX_CNT_M,
@@ -2541,7 +2626,7 @@ static int hns_roce_v2_req_notify_cq(struct ib_cq *ibcq,
        struct hns_roce_dev *hr_dev = to_hr_dev(ibcq->device);
        struct hns_roce_cq *hr_cq = to_hr_cq(ibcq);
        u32 notification_flag;
-       u32 doorbell[2];
+       __le32 doorbell[2];
 
        doorbell[0] = 0;
        doorbell[1] = 0;
@@ -2668,9 +2753,9 @@ static int hns_roce_v2_poll_one(struct hns_roce_cq *hr_cq,
                ++wq->tail;
        } else if ((*cur_qp)->ibqp.srq) {
                srq = to_hr_srq((*cur_qp)->ibqp.srq);
-               wqe_ctr = le16_to_cpu(roce_get_field(cqe->byte_4,
-                                                    V2_CQE_BYTE_4_WQE_INDX_M,
-                                                    V2_CQE_BYTE_4_WQE_INDX_S));
+               wqe_ctr = (u16)roce_get_field(cqe->byte_4,
+                                             V2_CQE_BYTE_4_WQE_INDX_M,
+                                             V2_CQE_BYTE_4_WQE_INDX_S);
                wc->wr_id = srq->wrid[wqe_ctr];
                hns_roce_free_srq_wqe(srq, wqe_ctr);
        } else {
@@ -2862,15 +2947,16 @@ static int hns_roce_v2_poll_one(struct hns_roce_cq *hr_cq,
                wc->smac[5] = roce_get_field(cqe->byte_28,
                                             V2_CQE_BYTE_28_SMAC_5_M,
                                             V2_CQE_BYTE_28_SMAC_5_S);
+               wc->wc_flags |= IB_WC_WITH_SMAC;
                if (roce_get_bit(cqe->byte_28, V2_CQE_BYTE_28_VID_VLD_S)) {
                        wc->vlan_id = (u16)roce_get_field(cqe->byte_28,
                                                          V2_CQE_BYTE_28_VID_M,
                                                          V2_CQE_BYTE_28_VID_S);
+                       wc->wc_flags |= IB_WC_WITH_VLAN;
                } else {
                        wc->vlan_id = 0xffff;
                }
 
-               wc->wc_flags |= (IB_WC_WITH_VLAN | IB_WC_WITH_SMAC);
                wc->network_hdr_type = roce_get_field(cqe->byte_28,
                                                    V2_CQE_BYTE_28_PORT_TYPE_M,
                                                    V2_CQE_BYTE_28_PORT_TYPE_S);
@@ -2905,11 +2991,49 @@ static int hns_roce_v2_poll_cq(struct ib_cq *ibcq, int num_entries,
        return npolled;
 }
 
+static int get_op_for_set_hem(struct hns_roce_dev *hr_dev, u32 type,
+                             int step_idx)
+{
+       int op;
+
+       if (type == HEM_TYPE_SCCC && step_idx)
+               return -EINVAL;
+
+       switch (type) {
+       case HEM_TYPE_QPC:
+               op = HNS_ROCE_CMD_WRITE_QPC_BT0;
+               break;
+       case HEM_TYPE_MTPT:
+               op = HNS_ROCE_CMD_WRITE_MPT_BT0;
+               break;
+       case HEM_TYPE_CQC:
+               op = HNS_ROCE_CMD_WRITE_CQC_BT0;
+               break;
+       case HEM_TYPE_SRQC:
+               op = HNS_ROCE_CMD_WRITE_SRQC_BT0;
+               break;
+       case HEM_TYPE_SCCC:
+               op = HNS_ROCE_CMD_WRITE_SCCC_BT0;
+               break;
+       case HEM_TYPE_QPC_TIMER:
+               op = HNS_ROCE_CMD_WRITE_QPC_TIMER_BT0;
+               break;
+       case HEM_TYPE_CQC_TIMER:
+               op = HNS_ROCE_CMD_WRITE_CQC_TIMER_BT0;
+               break;
+       default:
+               dev_warn(hr_dev->dev,
+                        "Table %d not to be written by mailbox!\n", type);
+               return -EINVAL;
+       }
+
+       return op + step_idx;
+}
+
 static int hns_roce_v2_set_hem(struct hns_roce_dev *hr_dev,
                               struct hns_roce_hem_table *table, int obj,
                               int step_idx)
 {
-       struct device *dev = hr_dev->dev;
        struct hns_roce_cmd_mailbox *mailbox;
        struct hns_roce_hem_iter iter;
        struct hns_roce_hem_mhop mhop;
@@ -2922,7 +3046,7 @@ static int hns_roce_v2_set_hem(struct hns_roce_dev *hr_dev,
        u64 bt_ba = 0;
        u32 chunk_ba_num;
        u32 hop_num;
-       u16 op = 0xff;
+       int op;
 
        if (!hns_roce_check_whether_mhop(hr_dev, table->type))
                return 0;
@@ -2944,39 +3068,10 @@ static int hns_roce_v2_set_hem(struct hns_roce_dev *hr_dev,
                hem_idx = i;
        }
 
-       switch (table->type) {
-       case HEM_TYPE_QPC:
-               op = HNS_ROCE_CMD_WRITE_QPC_BT0;
-               break;
-       case HEM_TYPE_MTPT:
-               op = HNS_ROCE_CMD_WRITE_MPT_BT0;
-               break;
-       case HEM_TYPE_CQC:
-               op = HNS_ROCE_CMD_WRITE_CQC_BT0;
-               break;
-       case HEM_TYPE_SRQC:
-               op = HNS_ROCE_CMD_WRITE_SRQC_BT0;
-               break;
-       case HEM_TYPE_SCCC:
-               op = HNS_ROCE_CMD_WRITE_SCCC_BT0;
-               break;
-       case HEM_TYPE_QPC_TIMER:
-               op = HNS_ROCE_CMD_WRITE_QPC_TIMER_BT0;
-               break;
-       case HEM_TYPE_CQC_TIMER:
-               op = HNS_ROCE_CMD_WRITE_CQC_TIMER_BT0;
-               break;
-       default:
-               dev_warn(dev, "Table %d not to be written by mailbox!\n",
-                        table->type);
-               return 0;
-       }
-
-       if (table->type == HEM_TYPE_SCCC && step_idx)
+       op = get_op_for_set_hem(hr_dev, table->type, step_idx);
+       if (op == -EINVAL)
                return 0;
 
-       op += step_idx;
-
        mailbox = hns_roce_alloc_cmd_mailbox(hr_dev);
        if (IS_ERR(mailbox))
                return PTR_ERR(mailbox);
@@ -3118,6 +3213,43 @@ static void set_access_flags(struct hns_roce_qp *hr_qp,
        roce_set_bit(qpc_mask->byte_76_srqn_op_en, V2_QPC_BYTE_76_ATE_S, 0);
 }
 
+static void set_qpc_wqe_cnt(struct hns_roce_qp *hr_qp,
+                           struct hns_roce_v2_qp_context *context,
+                           struct hns_roce_v2_qp_context *qpc_mask)
+{
+       if (hr_qp->ibqp.qp_type == IB_QPT_GSI)
+               roce_set_field(context->byte_4_sqpn_tst,
+                              V2_QPC_BYTE_4_SGE_SHIFT_M,
+                              V2_QPC_BYTE_4_SGE_SHIFT_S,
+                              ilog2((unsigned int)hr_qp->sge.sge_cnt));
+       else
+               roce_set_field(context->byte_4_sqpn_tst,
+                              V2_QPC_BYTE_4_SGE_SHIFT_M,
+                              V2_QPC_BYTE_4_SGE_SHIFT_S,
+                              hr_qp->sq.max_gs >
+                              HNS_ROCE_V2_UC_RC_SGE_NUM_IN_WQE ?
+                              ilog2((unsigned int)hr_qp->sge.sge_cnt) : 0);
+
+       roce_set_field(qpc_mask->byte_4_sqpn_tst, V2_QPC_BYTE_4_SGE_SHIFT_M,
+                      V2_QPC_BYTE_4_SGE_SHIFT_S, 0);
+
+       roce_set_field(context->byte_20_smac_sgid_idx,
+                      V2_QPC_BYTE_20_SQ_SHIFT_M, V2_QPC_BYTE_20_SQ_SHIFT_S,
+                      ilog2((unsigned int)hr_qp->sq.wqe_cnt));
+       roce_set_field(qpc_mask->byte_20_smac_sgid_idx,
+                      V2_QPC_BYTE_20_SQ_SHIFT_M, V2_QPC_BYTE_20_SQ_SHIFT_S, 0);
+
+       roce_set_field(context->byte_20_smac_sgid_idx,
+                      V2_QPC_BYTE_20_RQ_SHIFT_M, V2_QPC_BYTE_20_RQ_SHIFT_S,
+                      (hr_qp->ibqp.qp_type == IB_QPT_XRC_INI ||
+                      hr_qp->ibqp.qp_type == IB_QPT_XRC_TGT ||
+                      hr_qp->ibqp.srq) ? 0 :
+                      ilog2((unsigned int)hr_qp->rq.wqe_cnt));
+
+       roce_set_field(qpc_mask->byte_20_smac_sgid_idx,
+                      V2_QPC_BYTE_20_RQ_SHIFT_M, V2_QPC_BYTE_20_RQ_SHIFT_S, 0);
+}
+
 static void modify_qp_reset_to_init(struct ib_qp *ibqp,
                                    const struct ib_qp_attr *attr,
                                    int attr_mask,
@@ -3138,21 +3270,6 @@ static void modify_qp_reset_to_init(struct ib_qp *ibqp,
        roce_set_field(qpc_mask->byte_4_sqpn_tst, V2_QPC_BYTE_4_TST_M,
                       V2_QPC_BYTE_4_TST_S, 0);
 
-       if (ibqp->qp_type == IB_QPT_GSI)
-               roce_set_field(context->byte_4_sqpn_tst,
-                              V2_QPC_BYTE_4_SGE_SHIFT_M,
-                              V2_QPC_BYTE_4_SGE_SHIFT_S,
-                              ilog2((unsigned int)hr_qp->sge.sge_cnt));
-       else
-               roce_set_field(context->byte_4_sqpn_tst,
-                              V2_QPC_BYTE_4_SGE_SHIFT_M,
-                              V2_QPC_BYTE_4_SGE_SHIFT_S,
-                              hr_qp->sq.max_gs > 2 ?
-                              ilog2((unsigned int)hr_qp->sge.sge_cnt) : 0);
-
-       roce_set_field(qpc_mask->byte_4_sqpn_tst, V2_QPC_BYTE_4_SGE_SHIFT_M,
-                      V2_QPC_BYTE_4_SGE_SHIFT_S, 0);
-
        roce_set_field(context->byte_4_sqpn_tst, V2_QPC_BYTE_4_SQPN_M,
                       V2_QPC_BYTE_4_SQPN_S, hr_qp->qpn);
        roce_set_field(qpc_mask->byte_4_sqpn_tst, V2_QPC_BYTE_4_SQPN_M,
@@ -3168,19 +3285,7 @@ static void modify_qp_reset_to_init(struct ib_qp *ibqp,
        roce_set_field(qpc_mask->byte_20_smac_sgid_idx, V2_QPC_BYTE_20_RQWS_M,
                       V2_QPC_BYTE_20_RQWS_S, 0);
 
-       roce_set_field(context->byte_20_smac_sgid_idx,
-                      V2_QPC_BYTE_20_SQ_SHIFT_M, V2_QPC_BYTE_20_SQ_SHIFT_S,
-                      ilog2((unsigned int)hr_qp->sq.wqe_cnt));
-       roce_set_field(qpc_mask->byte_20_smac_sgid_idx,
-                      V2_QPC_BYTE_20_SQ_SHIFT_M, V2_QPC_BYTE_20_SQ_SHIFT_S, 0);
-
-       roce_set_field(context->byte_20_smac_sgid_idx,
-                      V2_QPC_BYTE_20_RQ_SHIFT_M, V2_QPC_BYTE_20_RQ_SHIFT_S,
-                      (hr_qp->ibqp.qp_type == IB_QPT_XRC_INI ||
-                      hr_qp->ibqp.qp_type == IB_QPT_XRC_TGT || ibqp->srq) ? 0 :
-                      ilog2((unsigned int)hr_qp->rq.wqe_cnt));
-       roce_set_field(qpc_mask->byte_20_smac_sgid_idx,
-                      V2_QPC_BYTE_20_RQ_SHIFT_M, V2_QPC_BYTE_20_RQ_SHIFT_S, 0);
+       set_qpc_wqe_cnt(hr_qp, context, qpc_mask);
 
        /* No VLAN need to set 0xFFF */
        roce_set_field(context->byte_24_mtu_tc, V2_QPC_BYTE_24_VLAN_ID_M,
@@ -3225,7 +3330,7 @@ static void modify_qp_reset_to_init(struct ib_qp *ibqp,
        roce_set_field(qpc_mask->byte_68_rq_db,
                       V2_QPC_BYTE_68_RQ_DB_RECORD_ADDR_M,
                       V2_QPC_BYTE_68_RQ_DB_RECORD_ADDR_S, 0);
-       context->rq_db_record_addr = hr_qp->rdb.dma >> 32;
+       context->rq_db_record_addr = cpu_to_le32(hr_qp->rdb.dma >> 32);
        qpc_mask->rq_db_record_addr = 0;
 
        roce_set_bit(context->byte_76_srqn_op_en, V2_QPC_BYTE_76_RQIE_S,
@@ -3456,22 +3561,6 @@ static void modify_qp_init_to_init(struct ib_qp *ibqp,
        roce_set_field(qpc_mask->byte_4_sqpn_tst, V2_QPC_BYTE_4_TST_M,
                       V2_QPC_BYTE_4_TST_S, 0);
 
-       if (ibqp->qp_type == IB_QPT_GSI)
-               roce_set_field(context->byte_4_sqpn_tst,
-                              V2_QPC_BYTE_4_SGE_SHIFT_M,
-                              V2_QPC_BYTE_4_SGE_SHIFT_S,
-                              ilog2((unsigned int)hr_qp->sge.sge_cnt));
-       else
-               roce_set_field(context->byte_4_sqpn_tst,
-                              V2_QPC_BYTE_4_SGE_SHIFT_M,
-                              V2_QPC_BYTE_4_SGE_SHIFT_S,
-                              hr_qp->sq.max_gs >
-                              HNS_ROCE_V2_UC_RC_SGE_NUM_IN_WQE ?
-                              ilog2((unsigned int)hr_qp->sge.sge_cnt) : 0);
-
-       roce_set_field(qpc_mask->byte_4_sqpn_tst, V2_QPC_BYTE_4_SGE_SHIFT_M,
-                      V2_QPC_BYTE_4_SGE_SHIFT_S, 0);
-
        if (attr_mask & IB_QP_ACCESS_FLAGS) {
                roce_set_bit(context->byte_76_srqn_op_en, V2_QPC_BYTE_76_RRE_S,
                             !!(attr->qp_access_flags & IB_ACCESS_REMOTE_READ));
@@ -3506,20 +3595,6 @@ static void modify_qp_init_to_init(struct ib_qp *ibqp,
                             0);
        }
 
-       roce_set_field(context->byte_20_smac_sgid_idx,
-                      V2_QPC_BYTE_20_SQ_SHIFT_M, V2_QPC_BYTE_20_SQ_SHIFT_S,
-                      ilog2((unsigned int)hr_qp->sq.wqe_cnt));
-       roce_set_field(qpc_mask->byte_20_smac_sgid_idx,
-                      V2_QPC_BYTE_20_SQ_SHIFT_M, V2_QPC_BYTE_20_SQ_SHIFT_S, 0);
-
-       roce_set_field(context->byte_20_smac_sgid_idx,
-                      V2_QPC_BYTE_20_RQ_SHIFT_M, V2_QPC_BYTE_20_RQ_SHIFT_S,
-                      (hr_qp->ibqp.qp_type == IB_QPT_XRC_INI ||
-                      hr_qp->ibqp.qp_type == IB_QPT_XRC_TGT || ibqp->srq) ? 0 :
-                      ilog2((unsigned int)hr_qp->rq.wqe_cnt));
-       roce_set_field(qpc_mask->byte_20_smac_sgid_idx,
-                      V2_QPC_BYTE_20_RQ_SHIFT_M, V2_QPC_BYTE_20_RQ_SHIFT_S, 0);
-
        roce_set_field(context->byte_16_buf_ba_pg_sz, V2_QPC_BYTE_16_PD_M,
                       V2_QPC_BYTE_16_PD_S, to_hr_pd(ibqp->pd)->pdn);
        roce_set_field(qpc_mask->byte_16_buf_ba_pg_sz, V2_QPC_BYTE_16_PD_M,
@@ -3638,7 +3713,7 @@ static int modify_qp_init_to_rtr(struct ib_qp *ibqp,
        }
 
        dmac = (u8 *)attr->ah_attr.roce.dmac;
-       context->wqe_sge_ba = (u32)(wqe_sge_ba >> 3);
+       context->wqe_sge_ba = cpu_to_le32(wqe_sge_ba >> 3);
        qpc_mask->wqe_sge_ba = 0;
 
        /*
@@ -3694,7 +3769,7 @@ static int modify_qp_init_to_rtr(struct ib_qp *ibqp,
                       V2_QPC_BYTE_16_WQE_SGE_BUF_PG_SZ_M,
                       V2_QPC_BYTE_16_WQE_SGE_BUF_PG_SZ_S, 0);
 
-       context->rq_cur_blk_addr = (u32)(mtts[0] >> PAGE_ADDR_SHIFT);
+       context->rq_cur_blk_addr = cpu_to_le32(mtts[0] >> PAGE_ADDR_SHIFT);
        qpc_mask->rq_cur_blk_addr = 0;
 
        roce_set_field(context->byte_92_srq_info,
@@ -3705,7 +3780,7 @@ static int modify_qp_init_to_rtr(struct ib_qp *ibqp,
                       V2_QPC_BYTE_92_RQ_CUR_BLK_ADDR_M,
                       V2_QPC_BYTE_92_RQ_CUR_BLK_ADDR_S, 0);
 
-       context->rq_nxt_blk_addr = (u32)(mtts[1] >> PAGE_ADDR_SHIFT);
+       context->rq_nxt_blk_addr = cpu_to_le32(mtts[1] >> PAGE_ADDR_SHIFT);
        qpc_mask->rq_nxt_blk_addr = 0;
 
        roce_set_field(context->byte_104_rq_sge,
@@ -3720,7 +3795,7 @@ static int modify_qp_init_to_rtr(struct ib_qp *ibqp,
                       V2_QPC_BYTE_132_TRRL_BA_S, dma_handle_3 >> 4);
        roce_set_field(qpc_mask->byte_132_trrl, V2_QPC_BYTE_132_TRRL_BA_M,
                       V2_QPC_BYTE_132_TRRL_BA_S, 0);
-       context->trrl_ba = (u32)(dma_handle_3 >> (16 + 4));
+       context->trrl_ba = cpu_to_le32(dma_handle_3 >> (16 + 4));
        qpc_mask->trrl_ba = 0;
        roce_set_field(context->byte_140_raq, V2_QPC_BYTE_140_TRRL_BA_M,
                       V2_QPC_BYTE_140_TRRL_BA_S,
@@ -3728,7 +3803,7 @@ static int modify_qp_init_to_rtr(struct ib_qp *ibqp,
        roce_set_field(qpc_mask->byte_140_raq, V2_QPC_BYTE_140_TRRL_BA_M,
                       V2_QPC_BYTE_140_TRRL_BA_S, 0);
 
-       context->irrl_ba = (u32)(dma_handle_2 >> 6);
+       context->irrl_ba = cpu_to_le32(dma_handle_2 >> 6);
        qpc_mask->irrl_ba = 0;
        roce_set_field(context->byte_208_irrl, V2_QPC_BYTE_208_IRRL_BA_M,
                       V2_QPC_BYTE_208_IRRL_BA_S,
@@ -3876,7 +3951,7 @@ static int modify_qp_rtr_to_rts(struct ib_qp *ibqp,
         * we should set all bits of the relevant fields in context mask to
         * 0 at the same time, else set them to 0x1.
         */
-       context->sq_cur_blk_addr = (u32)(sq_cur_blk >> PAGE_ADDR_SHIFT);
+       context->sq_cur_blk_addr = cpu_to_le32(sq_cur_blk >> PAGE_ADDR_SHIFT);
        roce_set_field(context->byte_168_irrl_idx,
                       V2_QPC_BYTE_168_SQ_CUR_BLK_ADDR_M,
                       V2_QPC_BYTE_168_SQ_CUR_BLK_ADDR_S,
@@ -3888,8 +3963,8 @@ static int modify_qp_rtr_to_rts(struct ib_qp *ibqp,
 
        context->sq_cur_sge_blk_addr = ((ibqp->qp_type == IB_QPT_GSI) ||
                       hr_qp->sq.max_gs > HNS_ROCE_V2_UC_RC_SGE_NUM_IN_WQE) ?
-                      ((u32)(sge_cur_blk >>
-                      PAGE_ADDR_SHIFT)) : 0;
+                      cpu_to_le32(sge_cur_blk >>
+                      PAGE_ADDR_SHIFT) : 0;
        roce_set_field(context->byte_184_irrl_idx,
                       V2_QPC_BYTE_184_SQ_CUR_SGE_BLK_ADDR_M,
                       V2_QPC_BYTE_184_SQ_CUR_SGE_BLK_ADDR_S,
@@ -3902,7 +3977,8 @@ static int modify_qp_rtr_to_rts(struct ib_qp *ibqp,
                       V2_QPC_BYTE_184_SQ_CUR_SGE_BLK_ADDR_M,
                       V2_QPC_BYTE_184_SQ_CUR_SGE_BLK_ADDR_S, 0);
 
-       context->rx_sq_cur_blk_addr = (u32)(sq_cur_blk >> PAGE_ADDR_SHIFT);
+       context->rx_sq_cur_blk_addr =
+               cpu_to_le32(sq_cur_blk >> PAGE_ADDR_SHIFT);
        roce_set_field(context->byte_232_irrl_sge,
                       V2_QPC_BYTE_232_RX_SQ_CUR_BLK_ADDR_M,
                       V2_QPC_BYTE_232_RX_SQ_CUR_BLK_ADDR_S,
@@ -3974,30 +4050,119 @@ static inline bool hns_roce_v2_check_qp_stat(enum ib_qp_state cur_state,
 
 }
 
-static int hns_roce_v2_modify_qp(struct ib_qp *ibqp,
-                                const struct ib_qp_attr *attr,
-                                int attr_mask, enum ib_qp_state cur_state,
-                                enum ib_qp_state new_state)
+static int hns_roce_v2_set_path(struct ib_qp *ibqp,
+                               const struct ib_qp_attr *attr,
+                               int attr_mask,
+                               struct hns_roce_v2_qp_context *context,
+                               struct hns_roce_v2_qp_context *qpc_mask)
 {
+       const struct ib_global_route *grh = rdma_ah_read_grh(&attr->ah_attr);
        struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device);
        struct hns_roce_qp *hr_qp = to_hr_qp(ibqp);
-       struct hns_roce_v2_qp_context *context;
-       struct hns_roce_v2_qp_context *qpc_mask;
-       struct device *dev = hr_dev->dev;
-       int ret = -EINVAL;
+       const struct ib_gid_attr *gid_attr = NULL;
+       int is_roce_protocol;
+       bool is_udp = false;
+       u16 vlan = 0xffff;
+       u8 ib_port;
+       u8 hr_port;
+       int ret;
 
-       context = kcalloc(2, sizeof(*context), GFP_ATOMIC);
-       if (!context)
-               return -ENOMEM;
+       ib_port = (attr_mask & IB_QP_PORT) ? attr->port_num : hr_qp->port + 1;
+       hr_port = ib_port - 1;
+       is_roce_protocol = rdma_cap_eth_ah(&hr_dev->ib_dev, ib_port) &&
+                          rdma_ah_get_ah_flags(&attr->ah_attr) & IB_AH_GRH;
+
+       if (is_roce_protocol) {
+               gid_attr = attr->ah_attr.grh.sgid_attr;
+               ret = rdma_read_gid_l2_fields(gid_attr, &vlan, NULL);
+               if (ret)
+                       return ret;
+
+               if (gid_attr)
+                       is_udp = (gid_attr->gid_type ==
+                                IB_GID_TYPE_ROCE_UDP_ENCAP);
+       }
+
+       if (vlan < VLAN_CFI_MASK) {
+               roce_set_bit(context->byte_76_srqn_op_en,
+                            V2_QPC_BYTE_76_RQ_VLAN_EN_S, 1);
+               roce_set_bit(qpc_mask->byte_76_srqn_op_en,
+                            V2_QPC_BYTE_76_RQ_VLAN_EN_S, 0);
+               roce_set_bit(context->byte_168_irrl_idx,
+                            V2_QPC_BYTE_168_SQ_VLAN_EN_S, 1);
+               roce_set_bit(qpc_mask->byte_168_irrl_idx,
+                            V2_QPC_BYTE_168_SQ_VLAN_EN_S, 0);
+       }
+
+       roce_set_field(context->byte_24_mtu_tc, V2_QPC_BYTE_24_VLAN_ID_M,
+                      V2_QPC_BYTE_24_VLAN_ID_S, vlan);
+       roce_set_field(qpc_mask->byte_24_mtu_tc, V2_QPC_BYTE_24_VLAN_ID_M,
+                      V2_QPC_BYTE_24_VLAN_ID_S, 0);
+
+       if (grh->sgid_index >= hr_dev->caps.gid_table_len[hr_port]) {
+               dev_err(hr_dev->dev, "sgid_index(%u) too large. max is %d\n",
+                       grh->sgid_index, hr_dev->caps.gid_table_len[hr_port]);
+               return -EINVAL;
+       }
+
+       if (attr->ah_attr.type != RDMA_AH_ATTR_TYPE_ROCE) {
+               dev_err(hr_dev->dev, "ah attr is not RDMA roce type\n");
+               return -EINVAL;
+       }
+
+       roce_set_field(context->byte_52_udpspn_dmac, V2_QPC_BYTE_52_UDPSPN_M,
+                      V2_QPC_BYTE_52_UDPSPN_S,
+                      is_udp ? 0x12b7 : 0);
+
+       roce_set_field(qpc_mask->byte_52_udpspn_dmac, V2_QPC_BYTE_52_UDPSPN_M,
+                      V2_QPC_BYTE_52_UDPSPN_S, 0);
+
+       roce_set_field(context->byte_20_smac_sgid_idx,
+                      V2_QPC_BYTE_20_SGID_IDX_M, V2_QPC_BYTE_20_SGID_IDX_S,
+                      grh->sgid_index);
+
+       roce_set_field(qpc_mask->byte_20_smac_sgid_idx,
+                      V2_QPC_BYTE_20_SGID_IDX_M, V2_QPC_BYTE_20_SGID_IDX_S, 0);
+
+       roce_set_field(context->byte_24_mtu_tc, V2_QPC_BYTE_24_HOP_LIMIT_M,
+                      V2_QPC_BYTE_24_HOP_LIMIT_S, grh->hop_limit);
+       roce_set_field(qpc_mask->byte_24_mtu_tc, V2_QPC_BYTE_24_HOP_LIMIT_M,
+                      V2_QPC_BYTE_24_HOP_LIMIT_S, 0);
+
+       if (hr_dev->pci_dev->revision == 0x21 && is_udp)
+               roce_set_field(context->byte_24_mtu_tc, V2_QPC_BYTE_24_TC_M,
+                              V2_QPC_BYTE_24_TC_S, grh->traffic_class >> 2);
+       else
+               roce_set_field(context->byte_24_mtu_tc, V2_QPC_BYTE_24_TC_M,
+                              V2_QPC_BYTE_24_TC_S, grh->traffic_class);
+       roce_set_field(qpc_mask->byte_24_mtu_tc, V2_QPC_BYTE_24_TC_M,
+                      V2_QPC_BYTE_24_TC_S, 0);
+       roce_set_field(context->byte_28_at_fl, V2_QPC_BYTE_28_FL_M,
+                      V2_QPC_BYTE_28_FL_S, grh->flow_label);
+       roce_set_field(qpc_mask->byte_28_at_fl, V2_QPC_BYTE_28_FL_M,
+                      V2_QPC_BYTE_28_FL_S, 0);
+       memcpy(context->dgid, grh->dgid.raw, sizeof(grh->dgid.raw));
+       memset(qpc_mask->dgid, 0, sizeof(grh->dgid.raw));
+       roce_set_field(context->byte_28_at_fl, V2_QPC_BYTE_28_SL_M,
+                      V2_QPC_BYTE_28_SL_S, rdma_ah_get_sl(&attr->ah_attr));
+       roce_set_field(qpc_mask->byte_28_at_fl, V2_QPC_BYTE_28_SL_M,
+                      V2_QPC_BYTE_28_SL_S, 0);
+       hr_qp->sl = rdma_ah_get_sl(&attr->ah_attr);
+
+       return 0;
+}
+
+static int hns_roce_v2_set_abs_fields(struct ib_qp *ibqp,
+                                     const struct ib_qp_attr *attr,
+                                     int attr_mask,
+                                     enum ib_qp_state cur_state,
+                                     enum ib_qp_state new_state,
+                                     struct hns_roce_v2_qp_context *context,
+                                     struct hns_roce_v2_qp_context *qpc_mask)
+{
+       struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device);
+       int ret = 0;
 
-       qpc_mask = context + 1;
-       /*
-        * In v2 engine, software pass context and context mask to hardware
-        * when modifying qp. If software need modify some fields in context,
-        * we should set all bits of the relevant fields in context mask to
-        * 0 at the same time, else set them to 0x1.
-        */
-       memset(qpc_mask, 0xff, sizeof(*qpc_mask));
        if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) {
                memset(qpc_mask, 0, sizeof(*qpc_mask));
                modify_qp_reset_to_init(ibqp, attr, attr_mask, context,
@@ -4019,134 +4184,30 @@ static int hns_roce_v2_modify_qp(struct ib_qp *ibqp,
                /* Nothing */
                ;
        } else {
-               dev_err(dev, "Illegal state for QP!\n");
+               dev_err(hr_dev->dev, "Illegal state for QP!\n");
                ret = -EINVAL;
                goto out;
        }
 
-       /* When QP state is err, SQ and RQ WQE should be flushed */
-       if (new_state == IB_QPS_ERR) {
-               roce_set_field(context->byte_160_sq_ci_pi,
-                              V2_QPC_BYTE_160_SQ_PRODUCER_IDX_M,
-                              V2_QPC_BYTE_160_SQ_PRODUCER_IDX_S,
-                              hr_qp->sq.head);
-               roce_set_field(qpc_mask->byte_160_sq_ci_pi,
-                              V2_QPC_BYTE_160_SQ_PRODUCER_IDX_M,
-                              V2_QPC_BYTE_160_SQ_PRODUCER_IDX_S, 0);
+out:
+       return ret;
+}
 
-               if (!ibqp->srq) {
-                       roce_set_field(context->byte_84_rq_ci_pi,
-                              V2_QPC_BYTE_84_RQ_PRODUCER_IDX_M,
-                              V2_QPC_BYTE_84_RQ_PRODUCER_IDX_S,
-                              hr_qp->rq.head);
-                       roce_set_field(qpc_mask->byte_84_rq_ci_pi,
-                              V2_QPC_BYTE_84_RQ_PRODUCER_IDX_M,
-                              V2_QPC_BYTE_84_RQ_PRODUCER_IDX_S, 0);
-               }
-       }
+static int hns_roce_v2_set_opt_fields(struct ib_qp *ibqp,
+                                     const struct ib_qp_attr *attr,
+                                     int attr_mask,
+                                     struct hns_roce_v2_qp_context *context,
+                                     struct hns_roce_v2_qp_context *qpc_mask)
+{
+       struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device);
+       struct hns_roce_qp *hr_qp = to_hr_qp(ibqp);
+       int ret = 0;
 
        if (attr_mask & IB_QP_AV) {
-               const struct ib_global_route *grh =
-                                           rdma_ah_read_grh(&attr->ah_attr);
-               const struct ib_gid_attr *gid_attr = NULL;
-               int is_roce_protocol;
-               u16 vlan = 0xffff;
-               u8 ib_port;
-               u8 hr_port;
-
-               ib_port = (attr_mask & IB_QP_PORT) ? attr->port_num :
-                          hr_qp->port + 1;
-               hr_port = ib_port - 1;
-               is_roce_protocol = rdma_cap_eth_ah(&hr_dev->ib_dev, ib_port) &&
-                              rdma_ah_get_ah_flags(&attr->ah_attr) & IB_AH_GRH;
-
-               if (is_roce_protocol) {
-                       gid_attr = attr->ah_attr.grh.sgid_attr;
-                       ret = rdma_read_gid_l2_fields(gid_attr, &vlan, NULL);
-                       if (ret)
-                               goto out;
-               }
-
-               if (vlan < VLAN_CFI_MASK) {
-                       roce_set_bit(context->byte_76_srqn_op_en,
-                                    V2_QPC_BYTE_76_RQ_VLAN_EN_S, 1);
-                       roce_set_bit(qpc_mask->byte_76_srqn_op_en,
-                                    V2_QPC_BYTE_76_RQ_VLAN_EN_S, 0);
-                       roce_set_bit(context->byte_168_irrl_idx,
-                                    V2_QPC_BYTE_168_SQ_VLAN_EN_S, 1);
-                       roce_set_bit(qpc_mask->byte_168_irrl_idx,
-                                    V2_QPC_BYTE_168_SQ_VLAN_EN_S, 0);
-               }
-
-               roce_set_field(context->byte_24_mtu_tc,
-                              V2_QPC_BYTE_24_VLAN_ID_M,
-                              V2_QPC_BYTE_24_VLAN_ID_S, vlan);
-               roce_set_field(qpc_mask->byte_24_mtu_tc,
-                              V2_QPC_BYTE_24_VLAN_ID_M,
-                              V2_QPC_BYTE_24_VLAN_ID_S, 0);
-
-               if (grh->sgid_index >= hr_dev->caps.gid_table_len[hr_port]) {
-                       dev_err(hr_dev->dev,
-                               "sgid_index(%u) too large. max is %d\n",
-                               grh->sgid_index,
-                               hr_dev->caps.gid_table_len[hr_port]);
-                       ret = -EINVAL;
-                       goto out;
-               }
-
-               if (attr->ah_attr.type != RDMA_AH_ATTR_TYPE_ROCE) {
-                       dev_err(hr_dev->dev, "ah attr is not RDMA roce type\n");
-                       ret = -EINVAL;
-                       goto out;
-               }
-
-               roce_set_field(context->byte_52_udpspn_dmac,
-                          V2_QPC_BYTE_52_UDPSPN_M, V2_QPC_BYTE_52_UDPSPN_S,
-                          (gid_attr->gid_type != IB_GID_TYPE_ROCE_UDP_ENCAP) ?
-                          0 : 0x12b7);
-
-               roce_set_field(qpc_mask->byte_52_udpspn_dmac,
-                              V2_QPC_BYTE_52_UDPSPN_M,
-                              V2_QPC_BYTE_52_UDPSPN_S, 0);
-
-               roce_set_field(context->byte_20_smac_sgid_idx,
-                              V2_QPC_BYTE_20_SGID_IDX_M,
-                              V2_QPC_BYTE_20_SGID_IDX_S, grh->sgid_index);
-
-               roce_set_field(qpc_mask->byte_20_smac_sgid_idx,
-                              V2_QPC_BYTE_20_SGID_IDX_M,
-                              V2_QPC_BYTE_20_SGID_IDX_S, 0);
-
-               roce_set_field(context->byte_24_mtu_tc,
-                              V2_QPC_BYTE_24_HOP_LIMIT_M,
-                              V2_QPC_BYTE_24_HOP_LIMIT_S, grh->hop_limit);
-               roce_set_field(qpc_mask->byte_24_mtu_tc,
-                              V2_QPC_BYTE_24_HOP_LIMIT_M,
-                              V2_QPC_BYTE_24_HOP_LIMIT_S, 0);
-
-               if (hr_dev->pci_dev->revision == 0x21 &&
-                   gid_attr->gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP)
-                       roce_set_field(context->byte_24_mtu_tc,
-                                      V2_QPC_BYTE_24_TC_M, V2_QPC_BYTE_24_TC_S,
-                                      grh->traffic_class >> 2);
-               else
-                       roce_set_field(context->byte_24_mtu_tc,
-                                      V2_QPC_BYTE_24_TC_M, V2_QPC_BYTE_24_TC_S,
-                                      grh->traffic_class);
-               roce_set_field(qpc_mask->byte_24_mtu_tc, V2_QPC_BYTE_24_TC_M,
-                              V2_QPC_BYTE_24_TC_S, 0);
-               roce_set_field(context->byte_28_at_fl, V2_QPC_BYTE_28_FL_M,
-                              V2_QPC_BYTE_28_FL_S, grh->flow_label);
-               roce_set_field(qpc_mask->byte_28_at_fl, V2_QPC_BYTE_28_FL_M,
-                              V2_QPC_BYTE_28_FL_S, 0);
-               memcpy(context->dgid, grh->dgid.raw, sizeof(grh->dgid.raw));
-               memset(qpc_mask->dgid, 0, sizeof(grh->dgid.raw));
-               roce_set_field(context->byte_28_at_fl, V2_QPC_BYTE_28_SL_M,
-                              V2_QPC_BYTE_28_SL_S,
-                              rdma_ah_get_sl(&attr->ah_attr));
-               roce_set_field(qpc_mask->byte_28_at_fl, V2_QPC_BYTE_28_SL_M,
-                              V2_QPC_BYTE_28_SL_S, 0);
-               hr_qp->sl = rdma_ah_get_sl(&attr->ah_attr);
+               ret = hns_roce_v2_set_path(ibqp, attr, attr_mask, context,
+                                          qpc_mask);
+               if (ret)
+                       return ret;
        }
 
        if (attr_mask & IB_QP_TIMEOUT) {
@@ -4158,7 +4219,8 @@ static int hns_roce_v2_modify_qp(struct ib_qp *ibqp,
                                       V2_QPC_BYTE_28_AT_M, V2_QPC_BYTE_28_AT_S,
                                       0);
                } else {
-                       dev_warn(dev, "Local ACK timeout shall be 0 to 30.\n");
+                       dev_warn(hr_dev->dev,
+                                "Local ACK timeout shall be 0 to 30.\n");
                }
        }
 
@@ -4196,6 +4258,7 @@ static int hns_roce_v2_modify_qp(struct ib_qp *ibqp,
                               V2_QPC_BYTE_244_RNR_CNT_S, 0);
        }
 
+       /* RC&UC&UD required attr */
        if (attr_mask & IB_QP_SQ_PSN) {
                roce_set_field(context->byte_172_sq_psn,
                               V2_QPC_BYTE_172_SQ_CUR_PSN_M,
@@ -4290,11 +4353,85 @@ static int hns_roce_v2_modify_qp(struct ib_qp *ibqp,
        }
 
        if (attr_mask & IB_QP_QKEY) {
-               context->qkey_xrcd = attr->qkey;
+               context->qkey_xrcd = cpu_to_le32(attr->qkey);
                qpc_mask->qkey_xrcd = 0;
                hr_qp->qkey = attr->qkey;
        }
 
+       return ret;
+}
+
+static void hns_roce_v2_record_opt_fields(struct ib_qp *ibqp,
+                                         const struct ib_qp_attr *attr,
+                                         int attr_mask)
+{
+       struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device);
+       struct hns_roce_qp *hr_qp = to_hr_qp(ibqp);
+
+       if (attr_mask & IB_QP_ACCESS_FLAGS)
+               hr_qp->atomic_rd_en = attr->qp_access_flags;
+
+       if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC)
+               hr_qp->resp_depth = attr->max_dest_rd_atomic;
+       if (attr_mask & IB_QP_PORT) {
+               hr_qp->port = attr->port_num - 1;
+               hr_qp->phy_port = hr_dev->iboe.phy_port[hr_qp->port];
+       }
+}
+
+static int hns_roce_v2_modify_qp(struct ib_qp *ibqp,
+                                const struct ib_qp_attr *attr,
+                                int attr_mask, enum ib_qp_state cur_state,
+                                enum ib_qp_state new_state)
+{
+       struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device);
+       struct hns_roce_qp *hr_qp = to_hr_qp(ibqp);
+       struct hns_roce_v2_qp_context ctx[2];
+       struct hns_roce_v2_qp_context *context = ctx;
+       struct hns_roce_v2_qp_context *qpc_mask = ctx + 1;
+       struct device *dev = hr_dev->dev;
+       int ret;
+
+       /*
+        * In v2 engine, software pass context and context mask to hardware
+        * when modifying qp. If software need modify some fields in context,
+        * we should set all bits of the relevant fields in context mask to
+        * 0 at the same time, else set them to 0x1.
+        */
+       memset(context, 0, sizeof(*context));
+       memset(qpc_mask, 0xff, sizeof(*qpc_mask));
+       ret = hns_roce_v2_set_abs_fields(ibqp, attr, attr_mask, cur_state,
+                                        new_state, context, qpc_mask);
+       if (ret)
+               goto out;
+
+       /* When QP state is err, SQ and RQ WQE should be flushed */
+       if (new_state == IB_QPS_ERR) {
+               roce_set_field(context->byte_160_sq_ci_pi,
+                              V2_QPC_BYTE_160_SQ_PRODUCER_IDX_M,
+                              V2_QPC_BYTE_160_SQ_PRODUCER_IDX_S,
+                              hr_qp->sq.head);
+               roce_set_field(qpc_mask->byte_160_sq_ci_pi,
+                              V2_QPC_BYTE_160_SQ_PRODUCER_IDX_M,
+                              V2_QPC_BYTE_160_SQ_PRODUCER_IDX_S, 0);
+
+               if (!ibqp->srq) {
+                       roce_set_field(context->byte_84_rq_ci_pi,
+                              V2_QPC_BYTE_84_RQ_PRODUCER_IDX_M,
+                              V2_QPC_BYTE_84_RQ_PRODUCER_IDX_S,
+                              hr_qp->rq.head);
+                       roce_set_field(qpc_mask->byte_84_rq_ci_pi,
+                              V2_QPC_BYTE_84_RQ_PRODUCER_IDX_M,
+                              V2_QPC_BYTE_84_RQ_PRODUCER_IDX_S, 0);
+               }
+       }
+
+       /* Configure the optional fields */
+       ret = hns_roce_v2_set_opt_fields(ibqp, attr, attr_mask, context,
+                                        qpc_mask);
+       if (ret)
+               goto out;
+
        roce_set_bit(context->byte_108_rx_reqepsn, V2_QPC_BYTE_108_INV_CREDIT_S,
                     ibqp->srq ? 1 : 0);
        roce_set_bit(qpc_mask->byte_108_rx_reqepsn,
@@ -4307,8 +4444,7 @@ static int hns_roce_v2_modify_qp(struct ib_qp *ibqp,
                       V2_QPC_BYTE_60_QP_ST_S, 0);
 
        /* SW pass context to HW */
-       ret = hns_roce_v2_qp_modify(hr_dev, cur_state, new_state,
-                                   context, hr_qp);
+       ret = hns_roce_v2_qp_modify(hr_dev, cur_state, new_state, ctx, hr_qp);
        if (ret) {
                dev_err(dev, "hns_roce_qp_modify failed(%d)\n", ret);
                goto out;
@@ -4316,15 +4452,7 @@ static int hns_roce_v2_modify_qp(struct ib_qp *ibqp,
 
        hr_qp->state = new_state;
 
-       if (attr_mask & IB_QP_ACCESS_FLAGS)
-               hr_qp->atomic_rd_en = attr->qp_access_flags;
-
-       if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC)
-               hr_qp->resp_depth = attr->max_dest_rd_atomic;
-       if (attr_mask & IB_QP_PORT) {
-               hr_qp->port = attr->port_num - 1;
-               hr_qp->phy_port = hr_dev->iboe.phy_port[hr_qp->port];
-       }
+       hns_roce_v2_record_opt_fields(ibqp, attr, attr_mask);
 
        if (new_state == IB_QPS_RESET && !ibqp->uobject) {
                hns_roce_v2_cq_clean(to_hr_cq(ibqp->recv_cq), hr_qp->qpn,
@@ -4344,7 +4472,6 @@ static int hns_roce_v2_modify_qp(struct ib_qp *ibqp,
        }
 
 out:
-       kfree(context);
        return ret;
 }
 
@@ -4395,16 +4522,12 @@ static int hns_roce_v2_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr,
 {
        struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device);
        struct hns_roce_qp *hr_qp = to_hr_qp(ibqp);
-       struct hns_roce_v2_qp_context *context;
+       struct hns_roce_v2_qp_context context = {};
        struct device *dev = hr_dev->dev;
        int tmp_qp_state;
        int state;
        int ret;
 
-       context = kzalloc(sizeof(*context), GFP_KERNEL);
-       if (!context)
-               return -ENOMEM;
-
        memset(qp_attr, 0, sizeof(*qp_attr));
        memset(qp_init_attr, 0, sizeof(*qp_init_attr));
 
@@ -4416,14 +4539,14 @@ static int hns_roce_v2_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr,
                goto done;
        }
 
-       ret = hns_roce_v2_query_qpc(hr_dev, hr_qp, context);
+       ret = hns_roce_v2_query_qpc(hr_dev, hr_qp, &context);
        if (ret) {
                dev_err(dev, "query qpc error\n");
                ret = -EINVAL;
                goto out;
        }
 
-       state = roce_get_field(context->byte_60_qpst_tempid,
+       state = roce_get_field(context.byte_60_qpst_tempid,
                               V2_QPC_BYTE_60_QP_ST_M, V2_QPC_BYTE_60_QP_ST_S);
        tmp_qp_state = to_ib_qp_st((enum hns_roce_v2_qp_state)state);
        if (tmp_qp_state == -1) {
@@ -4433,7 +4556,7 @@ static int hns_roce_v2_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr,
        }
        hr_qp->state = (u8)tmp_qp_state;
        qp_attr->qp_state = (enum ib_qp_state)hr_qp->state;
-       qp_attr->path_mtu = (enum ib_mtu)roce_get_field(context->byte_24_mtu_tc,
+       qp_attr->path_mtu = (enum ib_mtu)roce_get_field(context.byte_24_mtu_tc,
                                                        V2_QPC_BYTE_24_MTU_M,
                                                        V2_QPC_BYTE_24_MTU_S);
        qp_attr->path_mig_state = IB_MIG_ARMED;
@@ -4441,20 +4564,20 @@ static int hns_roce_v2_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr,
        if (hr_qp->ibqp.qp_type == IB_QPT_UD)
                qp_attr->qkey = V2_QKEY_VAL;
 
-       qp_attr->rq_psn = roce_get_field(context->byte_108_rx_reqepsn,
+       qp_attr->rq_psn = roce_get_field(context.byte_108_rx_reqepsn,
                                         V2_QPC_BYTE_108_RX_REQ_EPSN_M,
                                         V2_QPC_BYTE_108_RX_REQ_EPSN_S);
-       qp_attr->sq_psn = (u32)roce_get_field(context->byte_172_sq_psn,
+       qp_attr->sq_psn = (u32)roce_get_field(context.byte_172_sq_psn,
                                              V2_QPC_BYTE_172_SQ_CUR_PSN_M,
                                              V2_QPC_BYTE_172_SQ_CUR_PSN_S);
-       qp_attr->dest_qp_num = (u8)roce_get_field(context->byte_56_dqpn_err,
+       qp_attr->dest_qp_num = (u8)roce_get_field(context.byte_56_dqpn_err,
                                                  V2_QPC_BYTE_56_DQPN_M,
                                                  V2_QPC_BYTE_56_DQPN_S);
-       qp_attr->qp_access_flags = ((roce_get_bit(context->byte_76_srqn_op_en,
-                                   V2_QPC_BYTE_76_RRE_S)) << V2_QP_RWE_S) |
-                                   ((roce_get_bit(context->byte_76_srqn_op_en,
-                                   V2_QPC_BYTE_76_RWE_S)) << V2_QP_RRE_S) |
-                                   ((roce_get_bit(context->byte_76_srqn_op_en,
+       qp_attr->qp_access_flags = ((roce_get_bit(context.byte_76_srqn_op_en,
+                                   V2_QPC_BYTE_76_RRE_S)) << V2_QP_RRE_S) |
+                                   ((roce_get_bit(context.byte_76_srqn_op_en,
+                                   V2_QPC_BYTE_76_RWE_S)) << V2_QP_RWE_S) |
+                                   ((roce_get_bit(context.byte_76_srqn_op_en,
                                    V2_QPC_BYTE_76_ATE_S)) << V2_QP_ATE_S);
 
        if (hr_qp->ibqp.qp_type == IB_QPT_RC ||
@@ -4463,43 +4586,43 @@ static int hns_roce_v2_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr,
                                rdma_ah_retrieve_grh(&qp_attr->ah_attr);
 
                rdma_ah_set_sl(&qp_attr->ah_attr,
-                              roce_get_field(context->byte_28_at_fl,
+                              roce_get_field(context.byte_28_at_fl,
                                              V2_QPC_BYTE_28_SL_M,
                                              V2_QPC_BYTE_28_SL_S));
-               grh->flow_label = roce_get_field(context->byte_28_at_fl,
+               grh->flow_label = roce_get_field(context.byte_28_at_fl,
                                                 V2_QPC_BYTE_28_FL_M,
                                                 V2_QPC_BYTE_28_FL_S);
-               grh->sgid_index = roce_get_field(context->byte_20_smac_sgid_idx,
+               grh->sgid_index = roce_get_field(context.byte_20_smac_sgid_idx,
                                                 V2_QPC_BYTE_20_SGID_IDX_M,
                                                 V2_QPC_BYTE_20_SGID_IDX_S);
-               grh->hop_limit = roce_get_field(context->byte_24_mtu_tc,
+               grh->hop_limit = roce_get_field(context.byte_24_mtu_tc,
                                                V2_QPC_BYTE_24_HOP_LIMIT_M,
                                                V2_QPC_BYTE_24_HOP_LIMIT_S);
-               grh->traffic_class = roce_get_field(context->byte_24_mtu_tc,
+               grh->traffic_class = roce_get_field(context.byte_24_mtu_tc,
                                                    V2_QPC_BYTE_24_TC_M,
                                                    V2_QPC_BYTE_24_TC_S);
 
-               memcpy(grh->dgid.raw, context->dgid, sizeof(grh->dgid.raw));
+               memcpy(grh->dgid.raw, context.dgid, sizeof(grh->dgid.raw));
        }
 
        qp_attr->port_num = hr_qp->port + 1;
        qp_attr->sq_draining = 0;
-       qp_attr->max_rd_atomic = 1 << roce_get_field(context->byte_208_irrl,
+       qp_attr->max_rd_atomic = 1 << roce_get_field(context.byte_208_irrl,
                                                     V2_QPC_BYTE_208_SR_MAX_M,
                                                     V2_QPC_BYTE_208_SR_MAX_S);
-       qp_attr->max_dest_rd_atomic = 1 << roce_get_field(context->byte_140_raq,
+       qp_attr->max_dest_rd_atomic = 1 << roce_get_field(context.byte_140_raq,
                                                     V2_QPC_BYTE_140_RR_MAX_M,
                                                     V2_QPC_BYTE_140_RR_MAX_S);
-       qp_attr->min_rnr_timer = (u8)roce_get_field(context->byte_80_rnr_rx_cqn,
+       qp_attr->min_rnr_timer = (u8)roce_get_field(context.byte_80_rnr_rx_cqn,
                                                 V2_QPC_BYTE_80_MIN_RNR_TIME_M,
                                                 V2_QPC_BYTE_80_MIN_RNR_TIME_S);
-       qp_attr->timeout = (u8)roce_get_field(context->byte_28_at_fl,
+       qp_attr->timeout = (u8)roce_get_field(context.byte_28_at_fl,
                                              V2_QPC_BYTE_28_AT_M,
                                              V2_QPC_BYTE_28_AT_S);
-       qp_attr->retry_cnt = roce_get_field(context->byte_212_lsn,
+       qp_attr->retry_cnt = roce_get_field(context.byte_212_lsn,
                                            V2_QPC_BYTE_212_RETRY_CNT_M,
                                            V2_QPC_BYTE_212_RETRY_CNT_S);
-       qp_attr->rnr_retry = context->rq_rnr_timer;
+       qp_attr->rnr_retry = le32_to_cpu(context.rq_rnr_timer);
 
 done:
        qp_attr->cur_qp_state = qp_attr->qp_state;
@@ -4518,7 +4641,6 @@ done:
 
 out:
        mutex_unlock(&hr_qp->mutex);
-       kfree(context);
        return ret;
 }
 
@@ -4527,7 +4649,7 @@ static int hns_roce_v2_destroy_qp_common(struct hns_roce_dev *hr_dev,
                                         struct ib_udata *udata)
 {
        struct hns_roce_cq *send_cq, *recv_cq;
-       struct device *dev = hr_dev->dev;
+       struct ib_device *ibdev = &hr_dev->ib_dev;
        int ret;
 
        if (hr_qp->ibqp.qp_type == IB_QPT_RC && hr_qp->state != IB_QPS_RESET) {
@@ -4535,8 +4657,7 @@ static int hns_roce_v2_destroy_qp_common(struct hns_roce_dev *hr_dev,
                ret = hns_roce_v2_modify_qp(&hr_qp->ibqp, NULL, 0,
                                            hr_qp->state, IB_QPS_RESET);
                if (ret) {
-                       dev_err(dev, "modify QP %06lx to ERR failed.\n",
-                               hr_qp->qpn);
+                       ibdev_err(ibdev, "modify QP to Reset failed.\n");
                        return ret;
                }
        }
@@ -4605,7 +4726,8 @@ static int hns_roce_v2_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata)
 
        ret = hns_roce_v2_destroy_qp_common(hr_dev, hr_qp, udata);
        if (ret) {
-               dev_err(hr_dev->dev, "Destroy qp failed(%d)\n", ret);
+               ibdev_err(&hr_dev->ib_dev, "Destroy qp 0x%06lx failed(%d)\n",
+                         hr_qp->qpn, ret);
                return ret;
        }
 
@@ -4829,7 +4951,7 @@ static void hns_roce_v2_init_irq_work(struct hns_roce_dev *hr_dev,
 static void set_eq_cons_index_v2(struct hns_roce_eq *eq)
 {
        struct hns_roce_dev *hr_dev = eq->hr_dev;
-       u32 doorbell[2];
+       __le32 doorbell[2];
 
        doorbell[0] = 0;
        doorbell[1] = 0;
@@ -4904,7 +5026,7 @@ static int hns_roce_v2_aeq_int(struct hns_roce_dev *hr_dev,
                               struct hns_roce_eq *eq)
 {
        struct device *dev = hr_dev->dev;
-       struct hns_roce_aeqe *aeqe;
+       struct hns_roce_aeqe *aeqe = next_aeqe_sw_v2(eq);
        int aeqe_found = 0;
        int event_type;
        int sub_type;
@@ -4912,8 +5034,7 @@ static int hns_roce_v2_aeq_int(struct hns_roce_dev *hr_dev,
        u32 qpn;
        u32 cqn;
 
-       while ((aeqe = next_aeqe_sw_v2(eq))) {
-
+       while (aeqe) {
                /* Make sure we read AEQ entry after we have checked the
                 * ownership bit
                 */
@@ -4977,11 +5098,12 @@ static int hns_roce_v2_aeq_int(struct hns_roce_dev *hr_dev,
                ++eq->cons_index;
                aeqe_found = 1;
 
-               if (eq->cons_index > (2 * eq->entries - 1)) {
-                       dev_warn(dev, "cons_index overflow, set back to 0.\n");
+               if (eq->cons_index > (2 * eq->entries - 1))
                        eq->cons_index = 0;
-               }
+
                hns_roce_v2_init_irq_work(hr_dev, eq, qpn, cqn);
+
+               aeqe = next_aeqe_sw_v2(eq);
        }
 
        set_eq_cons_index_v2(eq);
@@ -5034,12 +5156,11 @@ static int hns_roce_v2_ceq_int(struct hns_roce_dev *hr_dev,
                               struct hns_roce_eq *eq)
 {
        struct device *dev = hr_dev->dev;
-       struct hns_roce_ceqe *ceqe;
+       struct hns_roce_ceqe *ceqe = next_ceqe_sw_v2(eq);
        int ceqe_found = 0;
        u32 cqn;
 
-       while ((ceqe = next_ceqe_sw_v2(eq))) {
-
+       while (ceqe) {
                /* Make sure we read CEQ entry after we have checked the
                 * ownership bit
                 */
@@ -5054,10 +5175,12 @@ static int hns_roce_v2_ceq_int(struct hns_roce_dev *hr_dev,
                ++eq->cons_index;
                ceqe_found = 1;
 
-               if (eq->cons_index > (2 * eq->entries - 1)) {
+               if (eq->cons_index > (EQ_DEPTH_COEFF * eq->entries - 1)) {
                        dev_warn(dev, "cons_index overflow, set back to 0.\n");
                        eq->cons_index = 0;
                }
+
+               ceqe = next_ceqe_sw_v2(eq);
        }
 
        set_eq_cons_index_v2(eq);
@@ -5093,14 +5216,14 @@ static irqreturn_t hns_roce_v2_msix_interrupt_abn(int irq, void *dev_id)
        int_st = roce_read(hr_dev, ROCEE_VF_ABN_INT_ST_REG);
        int_en = roce_read(hr_dev, ROCEE_VF_ABN_INT_EN_REG);
 
-       if (roce_get_bit(int_st, HNS_ROCE_V2_VF_INT_ST_AEQ_OVERFLOW_S)) {
+       if (int_st & BIT(HNS_ROCE_V2_VF_INT_ST_AEQ_OVERFLOW_S)) {
                struct pci_dev *pdev = hr_dev->pci_dev;
                struct hnae3_ae_dev *ae_dev = pci_get_drvdata(pdev);
                const struct hnae3_ae_ops *ops = ae_dev->ops;
 
                dev_err(dev, "AEQ overflow!\n");
 
-               roce_set_bit(int_st, HNS_ROCE_V2_VF_INT_ST_AEQ_OVERFLOW_S, 1);
+               int_st |= 1 << HNS_ROCE_V2_VF_INT_ST_AEQ_OVERFLOW_S;
                roce_write(hr_dev, ROCEE_VF_ABN_INT_ST_REG, int_st);
 
                /* Set reset level for reset_event() */
@@ -5110,27 +5233,27 @@ static irqreturn_t hns_roce_v2_msix_interrupt_abn(int irq, void *dev_id)
                if (ops->reset_event)
                        ops->reset_event(pdev, NULL);
 
-               roce_set_bit(int_en, HNS_ROCE_V2_VF_ABN_INT_EN_S, 1);
+               int_en |= 1 << HNS_ROCE_V2_VF_ABN_INT_EN_S;
                roce_write(hr_dev, ROCEE_VF_ABN_INT_EN_REG, int_en);
 
                int_work = 1;
-       } else if (roce_get_bit(int_st, HNS_ROCE_V2_VF_INT_ST_BUS_ERR_S)) {
+       } else if (int_st & BIT(HNS_ROCE_V2_VF_INT_ST_BUS_ERR_S)) {
                dev_err(dev, "BUS ERR!\n");
 
-               roce_set_bit(int_st, HNS_ROCE_V2_VF_INT_ST_BUS_ERR_S, 1);
+               int_st |= 1 << HNS_ROCE_V2_VF_INT_ST_BUS_ERR_S;
                roce_write(hr_dev, ROCEE_VF_ABN_INT_ST_REG, int_st);
 
-               roce_set_bit(int_en, HNS_ROCE_V2_VF_ABN_INT_EN_S, 1);
+               int_en |= 1 << HNS_ROCE_V2_VF_ABN_INT_EN_S;
                roce_write(hr_dev, ROCEE_VF_ABN_INT_EN_REG, int_en);
 
                int_work = 1;
-       } else if (roce_get_bit(int_st, HNS_ROCE_V2_VF_INT_ST_OTHER_ERR_S)) {
+       } else if (int_st & BIT(HNS_ROCE_V2_VF_INT_ST_OTHER_ERR_S)) {
                dev_err(dev, "OTHER ERR!\n");
 
-               roce_set_bit(int_st, HNS_ROCE_V2_VF_INT_ST_OTHER_ERR_S, 1);
+               int_st |= 1 << HNS_ROCE_V2_VF_INT_ST_OTHER_ERR_S;
                roce_write(hr_dev, ROCEE_VF_ABN_INT_ST_REG, int_st);
 
-               roce_set_bit(int_en, HNS_ROCE_V2_VF_ABN_INT_EN_S, 1);
+               int_en |= 1 << HNS_ROCE_V2_VF_ABN_INT_EN_S;
                roce_write(hr_dev, ROCEE_VF_ABN_INT_EN_REG, int_en);
 
                int_work = 1;
@@ -5202,14 +5325,12 @@ static void hns_roce_mhop_free_eq(struct hns_roce_dev *hr_dev,
        buf_chk_sz = 1 << (hr_dev->caps.eqe_buf_pg_sz + PAGE_SHIFT);
        bt_chk_sz = 1 << (hr_dev->caps.eqe_ba_pg_sz + PAGE_SHIFT);
 
-       /* hop_num = 0 */
        if (mhop_num == HNS_ROCE_HOP_NUM_0) {
                dma_free_coherent(dev, (unsigned int)(eq->entries *
                                  eq->eqe_size), eq->bt_l0, eq->l0_dma);
                return;
        }
 
-       /* hop_num = 1 or hop = 2 */
        dma_free_coherent(dev, bt_chk_sz, eq->bt_l0, eq->l0_dma);
        if (mhop_num == 1) {
                for (i = 0; i < eq->l0_last_num; i++) {
@@ -5449,7 +5570,6 @@ static int hns_roce_mhop_alloc_eq(struct hns_roce_dev *hr_dev,
                              buf_chk_sz);
        bt_num = DIV_ROUND_UP(ba_num, bt_chk_sz / BA_BYTE_LEN);
 
-       /* hop_num = 0 */
        if (mhop_num == HNS_ROCE_HOP_NUM_0) {
                if (eq->entries > buf_chk_sz / eq->eqe_size) {
                        dev_err(dev, "eq entries %d is larger than buf_pg_sz!",
@@ -5515,7 +5635,8 @@ static int hns_roce_mhop_alloc_eq(struct hns_roce_dev *hr_dev,
                                break;
                }
                eq->cur_eqe_ba = eq->buf_dma[0];
-               eq->nxt_eqe_ba = eq->buf_dma[1];
+               if (ba_num > 1)
+                       eq->nxt_eqe_ba = eq->buf_dma[1];
 
        } else if (mhop_num == 2) {
                /* alloc L1 BT and buf */
@@ -5556,7 +5677,8 @@ static int hns_roce_mhop_alloc_eq(struct hns_roce_dev *hr_dev,
                                break;
                }
                eq->cur_eqe_ba = eq->buf_dma[0];
-               eq->nxt_eqe_ba = eq->buf_dma[1];
+               if (ba_num > 1)
+                       eq->nxt_eqe_ba = eq->buf_dma[1];
        }
 
        eq->l0_last_num = i + 1;
@@ -5699,6 +5821,95 @@ free_cmd_mbox:
        return ret;
 }
 
+static int __hns_roce_request_irq(struct hns_roce_dev *hr_dev, int irq_num,
+                                 int comp_num, int aeq_num, int other_num)
+{
+       struct hns_roce_eq_table *eq_table = &hr_dev->eq_table;
+       int i, j;
+       int ret;
+
+       for (i = 0; i < irq_num; i++) {
+               hr_dev->irq_names[i] = kzalloc(HNS_ROCE_INT_NAME_LEN,
+                                              GFP_KERNEL);
+               if (!hr_dev->irq_names[i]) {
+                       ret = -ENOMEM;
+                       goto err_kzalloc_failed;
+               }
+       }
+
+       /* irq contains: abnormal + AEQ + CEQ */
+       for (j = 0; j < other_num; j++)
+               snprintf((char *)hr_dev->irq_names[j],
+                        HNS_ROCE_INT_NAME_LEN, "hns-abn-%d", j);
+
+       for (j = other_num; j < (other_num + aeq_num); j++)
+               snprintf((char *)hr_dev->irq_names[j],
+                        HNS_ROCE_INT_NAME_LEN, "hns-aeq-%d",
+                        j - other_num);
+
+       for (j = (other_num + aeq_num); j < irq_num; j++)
+               snprintf((char *)hr_dev->irq_names[j],
+                        HNS_ROCE_INT_NAME_LEN, "hns-ceq-%d",
+                        j - other_num - aeq_num);
+
+       for (j = 0; j < irq_num; j++) {
+               if (j < other_num)
+                       ret = request_irq(hr_dev->irq[j],
+                                         hns_roce_v2_msix_interrupt_abn,
+                                         0, hr_dev->irq_names[j], hr_dev);
+
+               else if (j < (other_num + comp_num))
+                       ret = request_irq(eq_table->eq[j - other_num].irq,
+                                         hns_roce_v2_msix_interrupt_eq,
+                                         0, hr_dev->irq_names[j + aeq_num],
+                                         &eq_table->eq[j - other_num]);
+               else
+                       ret = request_irq(eq_table->eq[j - other_num].irq,
+                                         hns_roce_v2_msix_interrupt_eq,
+                                         0, hr_dev->irq_names[j - comp_num],
+                                         &eq_table->eq[j - other_num]);
+               if (ret) {
+                       dev_err(hr_dev->dev, "Request irq error!\n");
+                       goto err_request_failed;
+               }
+       }
+
+       return 0;
+
+err_request_failed:
+       for (j -= 1; j >= 0; j--)
+               if (j < other_num)
+                       free_irq(hr_dev->irq[j], hr_dev);
+               else
+                       free_irq(eq_table->eq[j - other_num].irq,
+                                &eq_table->eq[j - other_num]);
+
+err_kzalloc_failed:
+       for (i -= 1; i >= 0; i--)
+               kfree(hr_dev->irq_names[i]);
+
+       return ret;
+}
+
+static void __hns_roce_free_irq(struct hns_roce_dev *hr_dev)
+{
+       int irq_num;
+       int eq_num;
+       int i;
+
+       eq_num = hr_dev->caps.num_comp_vectors + hr_dev->caps.num_aeq_vectors;
+       irq_num = eq_num + hr_dev->caps.num_other_vectors;
+
+       for (i = 0; i < hr_dev->caps.num_other_vectors; i++)
+               free_irq(hr_dev->irq[i], hr_dev);
+
+       for (i = 0; i < eq_num; i++)
+               free_irq(hr_dev->eq_table.eq[i].irq, &hr_dev->eq_table.eq[i]);
+
+       for (i = 0; i < irq_num; i++)
+               kfree(hr_dev->irq_names[i]);
+}
+
 static int hns_roce_v2_init_eq_table(struct hns_roce_dev *hr_dev)
 {
        struct hns_roce_eq_table *eq_table = &hr_dev->eq_table;
@@ -5710,7 +5921,7 @@ static int hns_roce_v2_init_eq_table(struct hns_roce_dev *hr_dev)
        int other_num;
        int comp_num;
        int aeq_num;
-       int i, j, k;
+       int i;
        int ret;
 
        other_num = hr_dev->caps.num_other_vectors;
@@ -5724,27 +5935,18 @@ static int hns_roce_v2_init_eq_table(struct hns_roce_dev *hr_dev)
        if (!eq_table->eq)
                return -ENOMEM;
 
-       for (i = 0; i < irq_num; i++) {
-               hr_dev->irq_names[i] = kzalloc(HNS_ROCE_INT_NAME_LEN,
-                                              GFP_KERNEL);
-               if (!hr_dev->irq_names[i]) {
-                       ret = -ENOMEM;
-                       goto err_failed_kzalloc;
-               }
-       }
-
        /* create eq */
-       for (j = 0; j < eq_num; j++) {
-               eq = &eq_table->eq[j];
+       for (i = 0; i < eq_num; i++) {
+               eq = &eq_table->eq[i];
                eq->hr_dev = hr_dev;
-               eq->eqn = j;
-               if (j < comp_num) {
+               eq->eqn = i;
+               if (i < comp_num) {
                        /* CEQ */
                        eq_cmd = HNS_ROCE_CMD_CREATE_CEQC;
                        eq->type_flag = HNS_ROCE_CEQ;
                        eq->entries = hr_dev->caps.ceqe_depth;
                        eq->eqe_size = HNS_ROCE_CEQ_ENTRY_SIZE;
-                       eq->irq = hr_dev->irq[j + other_num + aeq_num];
+                       eq->irq = hr_dev->irq[i + other_num + aeq_num];
                        eq->eq_max_cnt = HNS_ROCE_CEQ_DEFAULT_BURST_NUM;
                        eq->eq_period = HNS_ROCE_CEQ_DEFAULT_INTERVAL;
                } else {
@@ -5753,7 +5955,7 @@ static int hns_roce_v2_init_eq_table(struct hns_roce_dev *hr_dev)
                        eq->type_flag = HNS_ROCE_AEQ;
                        eq->entries = hr_dev->caps.aeqe_depth;
                        eq->eqe_size = HNS_ROCE_AEQ_ENTRY_SIZE;
-                       eq->irq = hr_dev->irq[j - comp_num + other_num];
+                       eq->irq = hr_dev->irq[i - comp_num + other_num];
                        eq->eq_max_cnt = HNS_ROCE_AEQ_DEFAULT_BURST_NUM;
                        eq->eq_period = HNS_ROCE_AEQ_DEFAULT_INTERVAL;
                }
@@ -5768,40 +5970,11 @@ static int hns_roce_v2_init_eq_table(struct hns_roce_dev *hr_dev)
        /* enable irq */
        hns_roce_v2_int_mask_enable(hr_dev, eq_num, EQ_ENABLE);
 
-       /* irq contains: abnormal + AEQ + CEQ*/
-       for (k = 0; k < irq_num; k++)
-               if (k < other_num)
-                       snprintf((char *)hr_dev->irq_names[k],
-                                HNS_ROCE_INT_NAME_LEN, "hns-abn-%d", k);
-               else if (k < (other_num + aeq_num))
-                       snprintf((char *)hr_dev->irq_names[k],
-                                HNS_ROCE_INT_NAME_LEN, "hns-aeq-%d",
-                                k - other_num);
-               else
-                       snprintf((char *)hr_dev->irq_names[k],
-                                HNS_ROCE_INT_NAME_LEN, "hns-ceq-%d",
-                                k - other_num - aeq_num);
-
-       for (k = 0; k < irq_num; k++) {
-               if (k < other_num)
-                       ret = request_irq(hr_dev->irq[k],
-                                         hns_roce_v2_msix_interrupt_abn,
-                                         0, hr_dev->irq_names[k], hr_dev);
-
-               else if (k < (other_num + comp_num))
-                       ret = request_irq(eq_table->eq[k - other_num].irq,
-                                         hns_roce_v2_msix_interrupt_eq,
-                                         0, hr_dev->irq_names[k + aeq_num],
-                                         &eq_table->eq[k - other_num]);
-               else
-                       ret = request_irq(eq_table->eq[k - other_num].irq,
-                                         hns_roce_v2_msix_interrupt_eq,
-                                         0, hr_dev->irq_names[k - comp_num],
-                                         &eq_table->eq[k - other_num]);
-               if (ret) {
-                       dev_err(dev, "Request irq error!\n");
-                       goto err_request_irq_fail;
-               }
+       ret = __hns_roce_request_irq(hr_dev, irq_num, comp_num,
+                                    aeq_num, other_num);
+       if (ret) {
+               dev_err(dev, "Request irq failed.\n");
+               goto err_request_irq_fail;
        }
 
        hr_dev->irq_workq =
@@ -5809,26 +5982,20 @@ static int hns_roce_v2_init_eq_table(struct hns_roce_dev *hr_dev)
        if (!hr_dev->irq_workq) {
                dev_err(dev, "Create irq workqueue failed!\n");
                ret = -ENOMEM;
-               goto err_request_irq_fail;
+               goto err_create_wq_fail;
        }
 
        return 0;
 
+err_create_wq_fail:
+       __hns_roce_free_irq(hr_dev);
+
 err_request_irq_fail:
-       for (k -= 1; k >= 0; k--)
-               if (k < other_num)
-                       free_irq(hr_dev->irq[k], hr_dev);
-               else
-                       free_irq(eq_table->eq[k - other_num].irq,
-                                &eq_table->eq[k - other_num]);
+       hns_roce_v2_int_mask_enable(hr_dev, eq_num, EQ_DISABLE);
 
 err_create_eq_fail:
-       for (j -= 1; j >= 0; j--)
-               hns_roce_v2_free_eq(hr_dev, &eq_table->eq[j]);
-
-err_failed_kzalloc:
        for (i -= 1; i >= 0; i--)
-               kfree(hr_dev->irq_names[i]);
+               hns_roce_v2_free_eq(hr_dev, &eq_table->eq[i]);
        kfree(eq_table->eq);
 
        return ret;
@@ -5837,30 +6004,22 @@ err_failed_kzalloc:
 static void hns_roce_v2_cleanup_eq_table(struct hns_roce_dev *hr_dev)
 {
        struct hns_roce_eq_table *eq_table = &hr_dev->eq_table;
-       int irq_num;
        int eq_num;
        int i;
 
        eq_num = hr_dev->caps.num_comp_vectors + hr_dev->caps.num_aeq_vectors;
-       irq_num = eq_num + hr_dev->caps.num_other_vectors;
 
        /* Disable irq */
        hns_roce_v2_int_mask_enable(hr_dev, eq_num, EQ_DISABLE);
 
-       for (i = 0; i < hr_dev->caps.num_other_vectors; i++)
-               free_irq(hr_dev->irq[i], hr_dev);
+       __hns_roce_free_irq(hr_dev);
 
        for (i = 0; i < eq_num; i++) {
                hns_roce_v2_destroy_eqc(hr_dev, i);
 
-               free_irq(eq_table->eq[i].irq, &eq_table->eq[i]);
-
                hns_roce_v2_free_eq(hr_dev, &eq_table->eq[i]);
        }
 
-       for (i = 0; i < irq_num; i++)
-               kfree(hr_dev->irq_names[i]);
-
        kfree(eq_table->eq);
 
        flush_workqueue(hr_dev->irq_workq);
@@ -5904,7 +6063,7 @@ static void hns_roce_v2_write_srqc(struct hns_roce_dev *hr_dev,
        roce_set_field(srq_context->byte_24_wqe_bt_ba,
                       SRQC_BYTE_24_SRQ_WQE_BT_BA_M,
                       SRQC_BYTE_24_SRQ_WQE_BT_BA_S,
-                      cpu_to_le32(dma_handle_wqe >> 35));
+                      dma_handle_wqe >> 35);
 
        roce_set_field(srq_context->byte_28_rqws_pd, SRQC_BYTE_28_PD_M,
                       SRQC_BYTE_28_PD_S, pdn);
@@ -5912,20 +6071,18 @@ static void hns_roce_v2_write_srqc(struct hns_roce_dev *hr_dev,
                       SRQC_BYTE_28_RQWS_S, srq->max_gs <= 0 ? 0 :
                       fls(srq->max_gs - 1));
 
-       srq_context->idx_bt_ba = (u32)(dma_handle_idx >> 3);
-       srq_context->idx_bt_ba = cpu_to_le32(srq_context->idx_bt_ba);
+       srq_context->idx_bt_ba = cpu_to_le32(dma_handle_idx >> 3);
        roce_set_field(srq_context->rsv_idx_bt_ba,
                       SRQC_BYTE_36_SRQ_IDX_BT_BA_M,
                       SRQC_BYTE_36_SRQ_IDX_BT_BA_S,
-                      cpu_to_le32(dma_handle_idx >> 35));
+                      dma_handle_idx >> 35);
 
-       srq_context->idx_cur_blk_addr = (u32)(mtts_idx[0] >> PAGE_ADDR_SHIFT);
        srq_context->idx_cur_blk_addr =
-                                    cpu_to_le32(srq_context->idx_cur_blk_addr);
+               cpu_to_le32(mtts_idx[0] >> PAGE_ADDR_SHIFT);
        roce_set_field(srq_context->byte_44_idxbufpgsz_addr,
                       SRQC_BYTE_44_SRQ_IDX_CUR_BLK_ADDR_M,
                       SRQC_BYTE_44_SRQ_IDX_CUR_BLK_ADDR_S,
-                      cpu_to_le32((mtts_idx[0]) >> (32 + PAGE_ADDR_SHIFT)));
+                      mtts_idx[0] >> (32 + PAGE_ADDR_SHIFT));
        roce_set_field(srq_context->byte_44_idxbufpgsz_addr,
                       SRQC_BYTE_44_SRQ_IDX_HOP_NUM_M,
                       SRQC_BYTE_44_SRQ_IDX_HOP_NUM_S,
@@ -5941,13 +6098,12 @@ static void hns_roce_v2_write_srqc(struct hns_roce_dev *hr_dev,
                       SRQC_BYTE_44_SRQ_IDX_BUF_PG_SZ_S,
                       hr_dev->caps.idx_buf_pg_sz);
 
-       srq_context->idx_nxt_blk_addr = (u32)(mtts_idx[1] >> PAGE_ADDR_SHIFT);
        srq_context->idx_nxt_blk_addr =
-                                  cpu_to_le32(srq_context->idx_nxt_blk_addr);
+               cpu_to_le32(mtts_idx[1] >> PAGE_ADDR_SHIFT);
        roce_set_field(srq_context->rsv_idxnxtblkaddr,
                       SRQC_BYTE_52_SRQ_IDX_NXT_BLK_ADDR_M,
                       SRQC_BYTE_52_SRQ_IDX_NXT_BLK_ADDR_S,
-                      cpu_to_le32((mtts_idx[1]) >> (32 + PAGE_ADDR_SHIFT)));
+                      mtts_idx[1] >> (32 + PAGE_ADDR_SHIFT));
        roce_set_field(srq_context->byte_56_xrc_cqn,
                       SRQC_BYTE_56_SRQ_XRC_CQN_M, SRQC_BYTE_56_SRQ_XRC_CQN_S,
                       cqn);
@@ -6141,9 +6297,10 @@ static int hns_roce_v2_post_srq_recv(struct ib_srq *ibsrq,
                 */
                wmb();
 
-               srq_db.byte_4 = HNS_ROCE_V2_SRQ_DB << V2_DB_BYTE_4_CMD_S |
-                               (srq->srqn & V2_DB_BYTE_4_TAG_M);
-               srq_db.parameter = srq->head;
+               srq_db.byte_4 =
+                       cpu_to_le32(HNS_ROCE_V2_SRQ_DB << V2_DB_BYTE_4_CMD_S |
+                                   (srq->srqn & V2_DB_BYTE_4_TAG_M));
+               srq_db.parameter = cpu_to_le32(srq->head);
 
                hns_roce_write64(hr_dev, (__le32 *)&srq_db, srq->db_reg_l);
 
@@ -6433,7 +6590,7 @@ static int hns_roce_hw_v2_reset_notify_uninit(struct hnae3_handle *handle)
 
        handle->rinfo.reset_state = HNS_ROCE_STATE_RST_UNINIT;
        dev_info(&handle->pdev->dev, "In reset process RoCE client uninit.\n");
-       msleep(100);
+       msleep(HNS_ROCE_V2_HW_RST_UNINT_DELAY);
        __hns_roce_hw_v2_uninit_instance(handle, false);
 
        return 0;
index 478f5a5..43219d2 100644 (file)
 #define HNS_ROCE_V2_UC_RC_SGE_NUM_IN_WQE       2
 #define HNS_ROCE_V2_RSV_QPS                    8
 
-#define HNS_ROCE_V2_HW_RST_TIMEOUT             1000
+#define HNS_ROCE_V2_HW_RST_TIMEOUT             1000
+#define HNS_ROCE_V2_HW_RST_UNINT_DELAY         100
+
+#define HNS_ROCE_V2_HW_RST_COMPLETION_WAIT     20
 
 #define HNS_ROCE_CONTEXT_HOP_NUM               1
 #define HNS_ROCE_SCCC_HOP_NUM                  1
 #define HNS_ROCE_CMD_FLAG_ERR_INTR     BIT(HNS_ROCE_CMD_FLAG_ERR_INTR_SHIFT)
 
 #define HNS_ROCE_CMQ_DESC_NUM_S                3
-#define HNS_ROCE_CMQ_EN_B              16
-#define HNS_ROCE_CMQ_ENABLE            BIT(HNS_ROCE_CMQ_EN_B)
 
 #define HNS_ROCE_CMQ_SCC_CLR_DONE_CNT          5
 
index 1e4ba48..b5d196c 100644 (file)
@@ -262,7 +262,8 @@ static int hns_roce_query_port(struct ib_device *ib_dev, u8 port_num,
        props->state = (netif_running(net_dev) && netif_carrier_ok(net_dev)) ?
                        IB_PORT_ACTIVE : IB_PORT_DOWN;
        props->phys_state = (props->state == IB_PORT_ACTIVE) ?
-                            HNS_ROCE_PHY_LINKUP : HNS_ROCE_PHY_DISABLED;
+                            IB_PORT_PHYS_STATE_LINK_UP :
+                            IB_PORT_PHYS_STATE_DISABLED;
 
        spin_unlock_irqrestore(&hr_dev->iboe.lock, flags);
 
@@ -901,6 +902,7 @@ int hns_roce_init(struct hns_roce_dev *hr_dev)
                goto error_failed_cmd_init;
        }
 
+       /* EQ depends on poll mode, event mode depends on EQ */
        ret = hr_dev->hw->init_eq(hr_dev);
        if (ret) {
                dev_err(dev, "eq init failed!\n");
@@ -910,8 +912,9 @@ int hns_roce_init(struct hns_roce_dev *hr_dev)
        if (hr_dev->cmd_mod) {
                ret = hns_roce_cmd_use_events(hr_dev);
                if (ret) {
-                       dev_err(dev, "Switch to event-driven cmd failed!\n");
-                       goto error_failed_use_event;
+                       dev_warn(dev,
+                                "Cmd event  mode failed, set back to poll!\n");
+                       hns_roce_cmd_use_polling(hr_dev);
                }
        }
 
@@ -954,8 +957,6 @@ error_failed_setup_hca:
 error_failed_init_hem:
        if (hr_dev->cmd_mod)
                hns_roce_cmd_use_polling(hr_dev);
-
-error_failed_use_event:
        hr_dev->hw->cleanup_eq(hr_dev);
 
 error_failed_eq_table:
index 549e1a3..5f8416b 100644 (file)
@@ -347,155 +347,207 @@ static void hns_roce_loop_free(struct hns_roce_dev *hr_dev,
        mr->pbl_bt_l0 = NULL;
        mr->pbl_l0_dma_addr = 0;
 }
+static int pbl_1hop_alloc(struct hns_roce_dev *hr_dev, int npages,
+                              struct hns_roce_mr *mr, u32 pbl_bt_sz)
+{
+       struct device *dev = hr_dev->dev;
 
-/* PBL multi hop addressing */
-static int hns_roce_mhop_alloc(struct hns_roce_dev *hr_dev, int npages,
-                              struct hns_roce_mr *mr)
+       if (npages > pbl_bt_sz / 8) {
+               dev_err(dev, "npages %d is larger than buf_pg_sz!",
+                       npages);
+               return -EINVAL;
+       }
+       mr->pbl_buf = dma_alloc_coherent(dev, npages * 8,
+                                        &(mr->pbl_dma_addr),
+                                        GFP_KERNEL);
+       if (!mr->pbl_buf)
+               return -ENOMEM;
+
+       mr->pbl_size = npages;
+       mr->pbl_ba = mr->pbl_dma_addr;
+       mr->pbl_hop_num = 1;
+       mr->pbl_ba_pg_sz = hr_dev->caps.pbl_ba_pg_sz;
+       mr->pbl_buf_pg_sz = hr_dev->caps.pbl_buf_pg_sz;
+       return 0;
+
+}
+
+
+static int pbl_2hop_alloc(struct hns_roce_dev *hr_dev, int npages,
+                              struct hns_roce_mr *mr, u32 pbl_bt_sz)
 {
        struct device *dev = hr_dev->dev;
-       int mr_alloc_done = 0;
        int npages_allocated;
-       int i = 0, j = 0;
-       u32 pbl_bt_sz;
-       u32 mhop_num;
        u64 pbl_last_bt_num;
        u64 pbl_bt_cnt = 0;
-       u64 bt_idx;
        u64 size;
+       int i;
 
-       mhop_num = (mr->type == MR_TYPE_FRMR ? 1 : hr_dev->caps.pbl_hop_num);
-       pbl_bt_sz = 1 << (hr_dev->caps.pbl_ba_pg_sz + PAGE_SHIFT);
        pbl_last_bt_num = (npages + pbl_bt_sz / 8 - 1) / (pbl_bt_sz / 8);
 
-       if (mhop_num == HNS_ROCE_HOP_NUM_0)
-               return 0;
-
-       /* hop_num = 1 */
-       if (mhop_num == 1) {
-               if (npages > pbl_bt_sz / 8) {
-                       dev_err(dev, "npages %d is larger than buf_pg_sz!",
-                               npages);
-                       return -EINVAL;
+       /* alloc L1 BT */
+       for (i = 0; i < pbl_bt_sz / 8; i++) {
+               if (pbl_bt_cnt + 1 < pbl_last_bt_num) {
+                       size = pbl_bt_sz;
+               } else {
+                       npages_allocated = i * (pbl_bt_sz / 8);
+                       size = (npages - npages_allocated) * 8;
                }
-               mr->pbl_buf = dma_alloc_coherent(dev, npages * 8,
-                                                &(mr->pbl_dma_addr),
-                                                GFP_KERNEL);
-               if (!mr->pbl_buf)
+               mr->pbl_bt_l1[i] = dma_alloc_coherent(dev, size,
+                                           &(mr->pbl_l1_dma_addr[i]),
+                                           GFP_KERNEL);
+               if (!mr->pbl_bt_l1[i]) {
+                       hns_roce_loop_free(hr_dev, mr, 1, i, 0);
                        return -ENOMEM;
+               }
 
-               mr->pbl_size = npages;
-               mr->pbl_ba = mr->pbl_dma_addr;
-               mr->pbl_hop_num = mhop_num;
-               mr->pbl_ba_pg_sz = hr_dev->caps.pbl_ba_pg_sz;
-               mr->pbl_buf_pg_sz = hr_dev->caps.pbl_buf_pg_sz;
-               return 0;
+               *(mr->pbl_bt_l0 + i) = mr->pbl_l1_dma_addr[i];
+
+               pbl_bt_cnt++;
+               if (pbl_bt_cnt >= pbl_last_bt_num)
+                       break;
        }
 
-       mr->pbl_l1_dma_addr = kcalloc(pbl_bt_sz / 8,
-                                     sizeof(*mr->pbl_l1_dma_addr),
+       mr->l0_chunk_last_num = i + 1;
+
+       return 0;
+}
+
+static int pbl_3hop_alloc(struct hns_roce_dev *hr_dev, int npages,
+                              struct hns_roce_mr *mr, u32 pbl_bt_sz)
+{
+       struct device *dev = hr_dev->dev;
+       int mr_alloc_done = 0;
+       int npages_allocated;
+       u64 pbl_last_bt_num;
+       u64 pbl_bt_cnt = 0;
+       u64 bt_idx;
+       u64 size;
+       int i;
+       int j = 0;
+
+       pbl_last_bt_num = (npages + pbl_bt_sz / 8 - 1) / (pbl_bt_sz / 8);
+
+       mr->pbl_l2_dma_addr = kcalloc(pbl_last_bt_num,
+                                     sizeof(*mr->pbl_l2_dma_addr),
                                      GFP_KERNEL);
-       if (!mr->pbl_l1_dma_addr)
+       if (!mr->pbl_l2_dma_addr)
                return -ENOMEM;
 
-       mr->pbl_bt_l1 = kcalloc(pbl_bt_sz / 8, sizeof(*mr->pbl_bt_l1),
+       mr->pbl_bt_l2 = kcalloc(pbl_last_bt_num,
+                               sizeof(*mr->pbl_bt_l2),
                                GFP_KERNEL);
-       if (!mr->pbl_bt_l1)
-               goto err_kcalloc_bt_l1;
-
-       if (mhop_num == 3) {
-               mr->pbl_l2_dma_addr = kcalloc(pbl_last_bt_num,
-                                             sizeof(*mr->pbl_l2_dma_addr),
-                                             GFP_KERNEL);
-               if (!mr->pbl_l2_dma_addr)
-                       goto err_kcalloc_l2_dma;
+       if (!mr->pbl_bt_l2)
+               goto err_kcalloc_bt_l2;
+
+       /* alloc L1, L2 BT */
+       for (i = 0; i < pbl_bt_sz / 8; i++) {
+               mr->pbl_bt_l1[i] = dma_alloc_coherent(dev, pbl_bt_sz,
+                                           &(mr->pbl_l1_dma_addr[i]),
+                                           GFP_KERNEL);
+               if (!mr->pbl_bt_l1[i]) {
+                       hns_roce_loop_free(hr_dev, mr, 1, i, 0);
+                       goto err_dma_alloc_l0;
+               }
 
-               mr->pbl_bt_l2 = kcalloc(pbl_last_bt_num,
-                                       sizeof(*mr->pbl_bt_l2),
-                                       GFP_KERNEL);
-               if (!mr->pbl_bt_l2)
-                       goto err_kcalloc_bt_l2;
-       }
+               *(mr->pbl_bt_l0 + i) = mr->pbl_l1_dma_addr[i];
 
-       /* alloc L0 BT */
-       mr->pbl_bt_l0 = dma_alloc_coherent(dev, pbl_bt_sz,
-                                          &(mr->pbl_l0_dma_addr),
-                                          GFP_KERNEL);
-       if (!mr->pbl_bt_l0)
-               goto err_dma_alloc_l0;
+               for (j = 0; j < pbl_bt_sz / 8; j++) {
+                       bt_idx = i * pbl_bt_sz / 8 + j;
 
-       if (mhop_num == 2) {
-               /* alloc L1 BT */
-               for (i = 0; i < pbl_bt_sz / 8; i++) {
                        if (pbl_bt_cnt + 1 < pbl_last_bt_num) {
                                size = pbl_bt_sz;
                        } else {
-                               npages_allocated = i * (pbl_bt_sz / 8);
+                               npages_allocated = bt_idx *
+                                                  (pbl_bt_sz / 8);
                                size = (npages - npages_allocated) * 8;
                        }
-                       mr->pbl_bt_l1[i] = dma_alloc_coherent(dev, size,
-                                                   &(mr->pbl_l1_dma_addr[i]),
-                                                   GFP_KERNEL);
-                       if (!mr->pbl_bt_l1[i]) {
-                               hns_roce_loop_free(hr_dev, mr, 1, i, 0);
+                       mr->pbl_bt_l2[bt_idx] = dma_alloc_coherent(
+                                     dev, size,
+                                     &(mr->pbl_l2_dma_addr[bt_idx]),
+                                     GFP_KERNEL);
+                       if (!mr->pbl_bt_l2[bt_idx]) {
+                               hns_roce_loop_free(hr_dev, mr, 2, i, j);
                                goto err_dma_alloc_l0;
                        }
 
-                       *(mr->pbl_bt_l0 + i) = mr->pbl_l1_dma_addr[i];
+                       *(mr->pbl_bt_l1[i] + j) =
+                                       mr->pbl_l2_dma_addr[bt_idx];
 
                        pbl_bt_cnt++;
-                       if (pbl_bt_cnt >= pbl_last_bt_num)
+                       if (pbl_bt_cnt >= pbl_last_bt_num) {
+                               mr_alloc_done = 1;
                                break;
-               }
-       } else if (mhop_num == 3) {
-               /* alloc L1, L2 BT */
-               for (i = 0; i < pbl_bt_sz / 8; i++) {
-                       mr->pbl_bt_l1[i] = dma_alloc_coherent(dev, pbl_bt_sz,
-                                                   &(mr->pbl_l1_dma_addr[i]),
-                                                   GFP_KERNEL);
-                       if (!mr->pbl_bt_l1[i]) {
-                               hns_roce_loop_free(hr_dev, mr, 1, i, 0);
-                               goto err_dma_alloc_l0;
                        }
+               }
 
-                       *(mr->pbl_bt_l0 + i) = mr->pbl_l1_dma_addr[i];
+               if (mr_alloc_done)
+                       break;
+       }
 
-                       for (j = 0; j < pbl_bt_sz / 8; j++) {
-                               bt_idx = i * pbl_bt_sz / 8 + j;
+       mr->l0_chunk_last_num = i + 1;
+       mr->l1_chunk_last_num = j + 1;
 
-                               if (pbl_bt_cnt + 1 < pbl_last_bt_num) {
-                                       size = pbl_bt_sz;
-                               } else {
-                                       npages_allocated = bt_idx *
-                                                          (pbl_bt_sz / 8);
-                                       size = (npages - npages_allocated) * 8;
-                               }
-                               mr->pbl_bt_l2[bt_idx] = dma_alloc_coherent(
-                                             dev, size,
-                                             &(mr->pbl_l2_dma_addr[bt_idx]),
-                                             GFP_KERNEL);
-                               if (!mr->pbl_bt_l2[bt_idx]) {
-                                       hns_roce_loop_free(hr_dev, mr, 2, i, j);
-                                       goto err_dma_alloc_l0;
-                               }
 
-                               *(mr->pbl_bt_l1[i] + j) =
-                                               mr->pbl_l2_dma_addr[bt_idx];
+       return 0;
 
-                               pbl_bt_cnt++;
-                               if (pbl_bt_cnt >= pbl_last_bt_num) {
-                                       mr_alloc_done = 1;
-                                       break;
-                               }
-                       }
+err_dma_alloc_l0:
+       kfree(mr->pbl_bt_l2);
+       mr->pbl_bt_l2 = NULL;
 
-                       if (mr_alloc_done)
-                               break;
-               }
+err_kcalloc_bt_l2:
+       kfree(mr->pbl_l2_dma_addr);
+       mr->pbl_l2_dma_addr = NULL;
+
+       return -ENOMEM;
+}
+
+
+/* PBL multi hop addressing */
+static int hns_roce_mhop_alloc(struct hns_roce_dev *hr_dev, int npages,
+                              struct hns_roce_mr *mr)
+{
+       struct device *dev = hr_dev->dev;
+       u32 pbl_bt_sz;
+       u32 mhop_num;
+
+       mhop_num = (mr->type == MR_TYPE_FRMR ? 1 : hr_dev->caps.pbl_hop_num);
+       pbl_bt_sz = 1 << (hr_dev->caps.pbl_ba_pg_sz + PAGE_SHIFT);
+
+       if (mhop_num == HNS_ROCE_HOP_NUM_0)
+               return 0;
+
+       if (mhop_num == 1)
+               return pbl_1hop_alloc(hr_dev, npages, mr, pbl_bt_sz);
+
+       mr->pbl_l1_dma_addr = kcalloc(pbl_bt_sz / 8,
+                                     sizeof(*mr->pbl_l1_dma_addr),
+                                     GFP_KERNEL);
+       if (!mr->pbl_l1_dma_addr)
+               return -ENOMEM;
+
+       mr->pbl_bt_l1 = kcalloc(pbl_bt_sz / 8, sizeof(*mr->pbl_bt_l1),
+                               GFP_KERNEL);
+       if (!mr->pbl_bt_l1)
+               goto err_kcalloc_bt_l1;
+
+       /* alloc L0 BT */
+       mr->pbl_bt_l0 = dma_alloc_coherent(dev, pbl_bt_sz,
+                                          &(mr->pbl_l0_dma_addr),
+                                          GFP_KERNEL);
+       if (!mr->pbl_bt_l0)
+               goto err_kcalloc_l2_dma;
+
+       if (mhop_num == 2) {
+               if (pbl_2hop_alloc(hr_dev, npages, mr, pbl_bt_sz))
+                       goto err_kcalloc_l2_dma;
+       }
+
+       if (mhop_num == 3) {
+               if (pbl_3hop_alloc(hr_dev, npages, mr, pbl_bt_sz))
+                       goto err_kcalloc_l2_dma;
        }
 
-       mr->l0_chunk_last_num = i + 1;
-       if (mhop_num == 3)
-               mr->l1_chunk_last_num = j + 1;
 
        mr->pbl_size = npages;
        mr->pbl_ba = mr->pbl_l0_dma_addr;
@@ -505,14 +557,6 @@ static int hns_roce_mhop_alloc(struct hns_roce_dev *hr_dev, int npages,
 
        return 0;
 
-err_dma_alloc_l0:
-       kfree(mr->pbl_bt_l2);
-       mr->pbl_bt_l2 = NULL;
-
-err_kcalloc_bt_l2:
-       kfree(mr->pbl_l2_dma_addr);
-       mr->pbl_l2_dma_addr = NULL;
-
 err_kcalloc_l2_dma:
        kfree(mr->pbl_bt_l1);
        mr->pbl_bt_l1 = NULL;
@@ -1161,6 +1205,83 @@ err_free:
        return ERR_PTR(ret);
 }
 
+static int rereg_mr_trans(struct ib_mr *ibmr, int flags,
+                         u64 start, u64 length,
+                         u64 virt_addr, int mr_access_flags,
+                         struct hns_roce_cmd_mailbox *mailbox,
+                         u32 pdn, struct ib_udata *udata)
+{
+       struct hns_roce_dev *hr_dev = to_hr_dev(ibmr->device);
+       struct hns_roce_mr *mr = to_hr_mr(ibmr);
+       struct device *dev = hr_dev->dev;
+       int npages;
+       int ret;
+
+       if (mr->size != ~0ULL) {
+               npages = ib_umem_page_count(mr->umem);
+
+               if (hr_dev->caps.pbl_hop_num)
+                       hns_roce_mhop_free(hr_dev, mr);
+               else
+                       dma_free_coherent(dev, npages * 8,
+                                         mr->pbl_buf, mr->pbl_dma_addr);
+       }
+       ib_umem_release(mr->umem);
+
+       mr->umem = ib_umem_get(udata, start, length, mr_access_flags, 0);
+       if (IS_ERR(mr->umem)) {
+               ret = PTR_ERR(mr->umem);
+               mr->umem = NULL;
+               return -ENOMEM;
+       }
+       npages = ib_umem_page_count(mr->umem);
+
+       if (hr_dev->caps.pbl_hop_num) {
+               ret = hns_roce_mhop_alloc(hr_dev, npages, mr);
+               if (ret)
+                       goto release_umem;
+       } else {
+               mr->pbl_buf = dma_alloc_coherent(dev, npages * 8,
+                                                &(mr->pbl_dma_addr),
+                                                GFP_KERNEL);
+               if (!mr->pbl_buf) {
+                       ret = -ENOMEM;
+                       goto release_umem;
+               }
+       }
+
+       ret = hr_dev->hw->rereg_write_mtpt(hr_dev, mr, flags, pdn,
+                                          mr_access_flags, virt_addr,
+                                          length, mailbox->buf);
+       if (ret)
+               goto release_umem;
+
+
+       ret = hns_roce_ib_umem_write_mr(hr_dev, mr, mr->umem);
+       if (ret) {
+               if (mr->size != ~0ULL) {
+                       npages = ib_umem_page_count(mr->umem);
+
+                       if (hr_dev->caps.pbl_hop_num)
+                               hns_roce_mhop_free(hr_dev, mr);
+                       else
+                               dma_free_coherent(dev, npages * 8,
+                                                 mr->pbl_buf,
+                                                 mr->pbl_dma_addr);
+               }
+
+               goto release_umem;
+       }
+
+       return 0;
+
+release_umem:
+       ib_umem_release(mr->umem);
+       return ret;
+
+}
+
+
 int hns_roce_rereg_user_mr(struct ib_mr *ibmr, int flags, u64 start, u64 length,
                           u64 virt_addr, int mr_access_flags, struct ib_pd *pd,
                           struct ib_udata *udata)
@@ -1171,7 +1292,6 @@ int hns_roce_rereg_user_mr(struct ib_mr *ibmr, int flags, u64 start, u64 length,
        struct device *dev = hr_dev->dev;
        unsigned long mtpt_idx;
        u32 pdn = 0;
-       int npages;
        int ret;
 
        if (!mr->enabled)
@@ -1198,73 +1318,25 @@ int hns_roce_rereg_user_mr(struct ib_mr *ibmr, int flags, u64 start, u64 length,
                pdn = to_hr_pd(pd)->pdn;
 
        if (flags & IB_MR_REREG_TRANS) {
-               if (mr->size != ~0ULL) {
-                       npages = ib_umem_page_count(mr->umem);
-
-                       if (hr_dev->caps.pbl_hop_num)
-                               hns_roce_mhop_free(hr_dev, mr);
-                       else
-                               dma_free_coherent(dev, npages * 8, mr->pbl_buf,
-                                                 mr->pbl_dma_addr);
-               }
-               ib_umem_release(mr->umem);
-
-               mr->umem =
-                       ib_umem_get(udata, start, length, mr_access_flags, 0);
-               if (IS_ERR(mr->umem)) {
-                       ret = PTR_ERR(mr->umem);
-                       mr->umem = NULL;
+               ret = rereg_mr_trans(ibmr, flags,
+                                    start, length,
+                                    virt_addr, mr_access_flags,
+                                    mailbox, pdn, udata);
+               if (ret)
                        goto free_cmd_mbox;
-               }
-               npages = ib_umem_page_count(mr->umem);
-
-               if (hr_dev->caps.pbl_hop_num) {
-                       ret = hns_roce_mhop_alloc(hr_dev, npages, mr);
-                       if (ret)
-                               goto release_umem;
-               } else {
-                       mr->pbl_buf = dma_alloc_coherent(dev, npages * 8,
-                                                        &(mr->pbl_dma_addr),
-                                                        GFP_KERNEL);
-                       if (!mr->pbl_buf) {
-                               ret = -ENOMEM;
-                               goto release_umem;
-                       }
-               }
-       }
-
-       ret = hr_dev->hw->rereg_write_mtpt(hr_dev, mr, flags, pdn,
-                                          mr_access_flags, virt_addr,
-                                          length, mailbox->buf);
-       if (ret) {
-               if (flags & IB_MR_REREG_TRANS)
-                       goto release_umem;
-               else
+       } else {
+               ret = hr_dev->hw->rereg_write_mtpt(hr_dev, mr, flags, pdn,
+                                                  mr_access_flags, virt_addr,
+                                                  length, mailbox->buf);
+               if (ret)
                        goto free_cmd_mbox;
        }
 
-       if (flags & IB_MR_REREG_TRANS) {
-               ret = hns_roce_ib_umem_write_mr(hr_dev, mr, mr->umem);
-               if (ret) {
-                       if (mr->size != ~0ULL) {
-                               npages = ib_umem_page_count(mr->umem);
-
-                               if (hr_dev->caps.pbl_hop_num)
-                                       hns_roce_mhop_free(hr_dev, mr);
-                               else
-                                       dma_free_coherent(dev, npages * 8,
-                                                         mr->pbl_buf,
-                                                         mr->pbl_dma_addr);
-                       }
-
-                       goto release_umem;
-               }
-       }
-
        ret = hns_roce_sw2hw_mpt(hr_dev, mailbox, mtpt_idx);
        if (ret) {
                dev_err(dev, "SW2HW_MPT failed (%d)\n", ret);
-               goto release_umem;
+               ib_umem_release(mr->umem);
+               goto free_cmd_mbox;
        }
 
        mr->enabled = 1;
@@ -1275,9 +1347,6 @@ int hns_roce_rereg_user_mr(struct ib_mr *ibmr, int flags, u64 start, u64 length,
 
        return 0;
 
-release_umem:
-       ib_umem_release(mr->umem);
-
 free_cmd_mbox:
        hns_roce_free_cmd_mailbox(hr_dev, mailbox);
 
@@ -1357,7 +1426,7 @@ static int hns_roce_set_page(struct ib_mr *ibmr, u64 addr)
 {
        struct hns_roce_mr *mr = to_hr_mr(ibmr);
 
-       mr->pbl_buf[mr->npages++] = cpu_to_le64(addr);
+       mr->pbl_buf[mr->npages++] = addr;
 
        return 0;
 }
@@ -1528,10 +1597,9 @@ static int hns_roce_write_mtr(struct hns_roce_dev *hr_dev,
                /* Save page addr, low 12 bits : 0 */
                for (i = 0; i < count; i++) {
                        if (hr_dev->hw_rev == HNS_ROCE_HW_VER1)
-                               mtts[i] = cpu_to_le64(bufs[npage] >>
-                                                       PAGE_ADDR_SHIFT);
+                               mtts[i] = bufs[npage] >> PAGE_ADDR_SHIFT;
                        else
-                               mtts[i] = cpu_to_le64(bufs[npage]);
+                               mtts[i] = bufs[npage];
 
                        npage++;
                }
index e042402..bd78ff9 100644 (file)
@@ -324,31 +324,46 @@ static int hns_roce_set_rq_size(struct hns_roce_dev *hr_dev,
        return 0;
 }
 
-static int hns_roce_set_user_sq_size(struct hns_roce_dev *hr_dev,
-                                    struct ib_qp_cap *cap,
-                                    struct hns_roce_qp *hr_qp,
-                                    struct hns_roce_ib_create_qp *ucmd)
+static int check_sq_size_with_integrity(struct hns_roce_dev *hr_dev,
+                                       struct ib_qp_cap *cap,
+                                       struct hns_roce_ib_create_qp *ucmd)
 {
        u32 roundup_sq_stride = roundup_pow_of_two(hr_dev->caps.max_sq_desc_sz);
        u8 max_sq_stride = ilog2(roundup_sq_stride);
-       u32 ex_sge_num;
-       u32 page_size;
-       u32 max_cnt;
 
        /* Sanity check SQ size before proceeding */
        if ((u32)(1 << ucmd->log_sq_bb_count) > hr_dev->caps.max_wqes ||
             ucmd->log_sq_stride > max_sq_stride ||
             ucmd->log_sq_stride < HNS_ROCE_IB_MIN_SQ_STRIDE) {
-               dev_err(hr_dev->dev, "check SQ size error!\n");
+               ibdev_err(&hr_dev->ib_dev, "check SQ size error!\n");
                return -EINVAL;
        }
 
        if (cap->max_send_sge > hr_dev->caps.max_sq_sg) {
-               dev_err(hr_dev->dev, "SQ sge error! max_send_sge=%d\n",
-                       cap->max_send_sge);
+               ibdev_err(&hr_dev->ib_dev, "SQ sge error! max_send_sge=%d\n",
+                         cap->max_send_sge);
                return -EINVAL;
        }
 
+       return 0;
+}
+
+static int hns_roce_set_user_sq_size(struct hns_roce_dev *hr_dev,
+                                    struct ib_qp_cap *cap,
+                                    struct hns_roce_qp *hr_qp,
+                                    struct hns_roce_ib_create_qp *ucmd)
+{
+       u32 ex_sge_num;
+       u32 page_size;
+       u32 max_cnt;
+       int ret;
+
+       ret = check_sq_size_with_integrity(hr_dev, cap, ucmd);
+       if (ret) {
+               ibdev_err(&hr_dev->ib_dev, "Sanity check sq size failed\n");
+               return ret;
+       }
+
        hr_qp->sq.wqe_cnt = 1 << ucmd->log_sq_bb_count;
        hr_qp->sq.wqe_shift = ucmd->log_sq_stride;
 
@@ -501,6 +516,35 @@ static int calc_wqe_bt_page_shift(struct hns_roce_dev *hr_dev,
        return bt_pg_shift - PAGE_SHIFT;
 }
 
+static int set_extend_sge_param(struct hns_roce_dev *hr_dev,
+                               struct hns_roce_qp *hr_qp)
+{
+       struct device *dev = hr_dev->dev;
+
+       if (hr_qp->sq.max_gs > 2) {
+               hr_qp->sge.sge_cnt = roundup_pow_of_two(hr_qp->sq.wqe_cnt *
+                                    (hr_qp->sq.max_gs - 2));
+               hr_qp->sge.sge_shift = 4;
+       }
+
+       /* ud sqwqe's sge use extend sge */
+       if (hr_dev->caps.max_sq_sg > 2 && hr_qp->ibqp.qp_type == IB_QPT_GSI) {
+               hr_qp->sge.sge_cnt = roundup_pow_of_two(hr_qp->sq.wqe_cnt *
+                                    hr_qp->sq.max_gs);
+               hr_qp->sge.sge_shift = 4;
+       }
+
+       if ((hr_qp->sq.max_gs > 2) && hr_dev->pci_dev->revision == 0x20) {
+               if (hr_qp->sge.sge_cnt > hr_dev->caps.max_extend_sg) {
+                       dev_err(dev, "The extended sge cnt error! sge_cnt=%d\n",
+                               hr_qp->sge.sge_cnt);
+                       return -EINVAL;
+               }
+       }
+
+       return 0;
+}
+
 static int hns_roce_set_kernel_sq_size(struct hns_roce_dev *hr_dev,
                                       struct ib_qp_cap *cap,
                                       struct hns_roce_qp *hr_qp)
@@ -509,6 +553,7 @@ static int hns_roce_set_kernel_sq_size(struct hns_roce_dev *hr_dev,
        u32 page_size;
        u32 max_cnt;
        int size;
+       int ret;
 
        if (cap->max_send_wr  > hr_dev->caps.max_wqes  ||
            cap->max_send_sge > hr_dev->caps.max_sq_sg ||
@@ -518,8 +563,6 @@ static int hns_roce_set_kernel_sq_size(struct hns_roce_dev *hr_dev,
        }
 
        hr_qp->sq.wqe_shift = ilog2(hr_dev->caps.max_sq_desc_sz);
-       hr_qp->sq_max_wqes_per_wr = 1;
-       hr_qp->sq_spare_wqes = 0;
 
        if (hr_dev->caps.min_wqes)
                max_cnt = max(cap->max_send_wr, hr_dev->caps.min_wqes);
@@ -539,25 +582,10 @@ static int hns_roce_set_kernel_sq_size(struct hns_roce_dev *hr_dev,
        else
                hr_qp->sq.max_gs = max_cnt;
 
-       if (hr_qp->sq.max_gs > 2) {
-               hr_qp->sge.sge_cnt = roundup_pow_of_two(hr_qp->sq.wqe_cnt *
-                                    (hr_qp->sq.max_gs - 2));
-               hr_qp->sge.sge_shift = 4;
-       }
-
-       /* ud sqwqe's sge use extend sge */
-       if (hr_dev->caps.max_sq_sg > 2 && hr_qp->ibqp.qp_type == IB_QPT_GSI) {
-               hr_qp->sge.sge_cnt = roundup_pow_of_two(hr_qp->sq.wqe_cnt *
-                                    hr_qp->sq.max_gs);
-               hr_qp->sge.sge_shift = 4;
-       }
-
-       if ((hr_qp->sq.max_gs > 2) && hr_dev->pci_dev->revision == 0x20) {
-               if (hr_qp->sge.sge_cnt > hr_dev->caps.max_extend_sg) {
-                       dev_err(dev, "The extended sge cnt error! sge_cnt=%d\n",
-                               hr_qp->sge.sge_cnt);
-                       return -EINVAL;
-               }
+       ret = set_extend_sge_param(hr_dev, hr_qp);
+       if (ret) {
+               dev_err(dev, "set extend sge parameters fail\n");
+               return ret;
        }
 
        /* Get buf size, SQ and RQ are aligned to PAGE_SIZE */
@@ -607,13 +635,57 @@ static int hns_roce_qp_has_rq(struct ib_qp_init_attr *attr)
        return 1;
 }
 
+static int alloc_rq_inline_buf(struct hns_roce_qp *hr_qp,
+                              struct ib_qp_init_attr *init_attr)
+{
+       u32 max_recv_sge = init_attr->cap.max_recv_sge;
+       struct hns_roce_rinl_wqe *wqe_list;
+       u32 wqe_cnt = hr_qp->rq.wqe_cnt;
+       int i;
+
+       /* allocate recv inline buf */
+       wqe_list = kcalloc(wqe_cnt, sizeof(struct hns_roce_rinl_wqe),
+                          GFP_KERNEL);
+
+       if (!wqe_list)
+               goto err;
+
+       /* Allocate a continuous buffer for all inline sge we need */
+       wqe_list[0].sg_list = kcalloc(wqe_cnt, (max_recv_sge *
+                                     sizeof(struct hns_roce_rinl_sge)),
+                                     GFP_KERNEL);
+       if (!wqe_list[0].sg_list)
+               goto err_wqe_list;
+
+       /* Assign buffers of sg_list to each inline wqe */
+       for (i = 1; i < wqe_cnt; i++)
+               wqe_list[i].sg_list = &wqe_list[0].sg_list[i * max_recv_sge];
+
+       hr_qp->rq_inl_buf.wqe_list = wqe_list;
+       hr_qp->rq_inl_buf.wqe_cnt = wqe_cnt;
+
+       return 0;
+
+err_wqe_list:
+       kfree(wqe_list);
+
+err:
+       return -ENOMEM;
+}
+
+static void free_rq_inline_buf(struct hns_roce_qp *hr_qp)
+{
+       kfree(hr_qp->rq_inl_buf.wqe_list[0].sg_list);
+       kfree(hr_qp->rq_inl_buf.wqe_list);
+}
+
 static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev,
                                     struct ib_pd *ib_pd,
                                     struct ib_qp_init_attr *init_attr,
                                     struct ib_udata *udata, unsigned long sqpn,
                                     struct hns_roce_qp *hr_qp)
 {
-       dma_addr_t *buf_list[ARRAY_SIZE(hr_qp->regions)] = { 0 };
+       dma_addr_t *buf_list[ARRAY_SIZE(hr_qp->regions)] = { NULL };
        struct device *dev = hr_dev->dev;
        struct hns_roce_ib_create_qp ucmd;
        struct hns_roce_ib_create_qp_resp resp = {};
@@ -635,9 +707,9 @@ static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev,
        hr_qp->ibqp.qp_type = init_attr->qp_type;
 
        if (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR)
-               hr_qp->sq_signal_bits = cpu_to_le32(IB_SIGNAL_ALL_WR);
+               hr_qp->sq_signal_bits = IB_SIGNAL_ALL_WR;
        else
-               hr_qp->sq_signal_bits = cpu_to_le32(IB_SIGNAL_REQ_WR);
+               hr_qp->sq_signal_bits = IB_SIGNAL_REQ_WR;
 
        ret = hns_roce_set_rq_size(hr_dev, &init_attr->cap, udata,
                                   hns_roce_qp_has_rq(init_attr), hr_qp);
@@ -648,33 +720,11 @@ static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev,
 
        if ((hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RQ_INLINE) &&
            hns_roce_qp_has_rq(init_attr)) {
-               /* allocate recv inline buf */
-               hr_qp->rq_inl_buf.wqe_list = kcalloc(hr_qp->rq.wqe_cnt,
-                                              sizeof(struct hns_roce_rinl_wqe),
-                                              GFP_KERNEL);
-               if (!hr_qp->rq_inl_buf.wqe_list) {
-                       ret = -ENOMEM;
+               ret = alloc_rq_inline_buf(hr_qp, init_attr);
+               if (ret) {
+                       dev_err(dev, "allocate receive inline buffer failed\n");
                        goto err_out;
                }
-
-               hr_qp->rq_inl_buf.wqe_cnt = hr_qp->rq.wqe_cnt;
-
-               /* Firstly, allocate a list of sge space buffer */
-               hr_qp->rq_inl_buf.wqe_list[0].sg_list =
-                                       kcalloc(hr_qp->rq_inl_buf.wqe_cnt,
-                                              init_attr->cap.max_recv_sge *
-                                              sizeof(struct hns_roce_rinl_sge),
-                                              GFP_KERNEL);
-               if (!hr_qp->rq_inl_buf.wqe_list[0].sg_list) {
-                       ret = -ENOMEM;
-                       goto err_wqe_list;
-               }
-
-               for (i = 1; i < hr_qp->rq_inl_buf.wqe_cnt; i++)
-                       /* Secondly, reallocate the buffer */
-                       hr_qp->rq_inl_buf.wqe_list[i].sg_list =
-                               &hr_qp->rq_inl_buf.wqe_list[0].sg_list[i *
-                               init_attr->cap.max_recv_sge];
        }
 
        page_shift = PAGE_SHIFT + hr_dev->caps.mtt_buf_pg_sz;
@@ -682,14 +732,14 @@ static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev,
                if (ib_copy_from_udata(&ucmd, udata, sizeof(ucmd))) {
                        dev_err(dev, "ib_copy_from_udata error for create qp\n");
                        ret = -EFAULT;
-                       goto err_rq_sge_list;
+                       goto err_alloc_rq_inline_buf;
                }
 
                ret = hns_roce_set_user_sq_size(hr_dev, &init_attr->cap, hr_qp,
                                                &ucmd);
                if (ret) {
                        dev_err(dev, "hns_roce_set_user_sq_size error for create qp\n");
-                       goto err_rq_sge_list;
+                       goto err_alloc_rq_inline_buf;
                }
 
                hr_qp->umem = ib_umem_get(udata, ucmd.buf_addr,
@@ -697,7 +747,7 @@ static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev,
                if (IS_ERR(hr_qp->umem)) {
                        dev_err(dev, "ib_umem_get error for create qp\n");
                        ret = PTR_ERR(hr_qp->umem);
-                       goto err_rq_sge_list;
+                       goto err_alloc_rq_inline_buf;
                }
                hr_qp->region_cnt = split_wqe_buf_region(hr_dev, hr_qp,
                                hr_qp->regions, ARRAY_SIZE(hr_qp->regions),
@@ -758,13 +808,13 @@ static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev,
                    IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK) {
                        dev_err(dev, "init_attr->create_flags error!\n");
                        ret = -EINVAL;
-                       goto err_rq_sge_list;
+                       goto err_alloc_rq_inline_buf;
                }
 
                if (init_attr->create_flags & IB_QP_CREATE_IPOIB_UD_LSO) {
                        dev_err(dev, "init_attr->create_flags error!\n");
                        ret = -EINVAL;
-                       goto err_rq_sge_list;
+                       goto err_alloc_rq_inline_buf;
                }
 
                /* Set SQ size */
@@ -772,7 +822,7 @@ static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev,
                                                  hr_qp);
                if (ret) {
                        dev_err(dev, "hns_roce_set_kernel_sq_size error!\n");
-                       goto err_rq_sge_list;
+                       goto err_alloc_rq_inline_buf;
                }
 
                /* QP doorbell register address */
@@ -786,7 +836,7 @@ static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev,
                        ret = hns_roce_alloc_db(hr_dev, &hr_qp->rdb, 0);
                        if (ret) {
                                dev_err(dev, "rq record doorbell alloc failed!\n");
-                               goto err_rq_sge_list;
+                               goto err_alloc_rq_inline_buf;
                        }
                        *hr_qp->rdb.db_record = 0;
                        hr_qp->rdb_en = 1;
@@ -826,11 +876,18 @@ static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev,
 
                hr_qp->sq.wrid = kcalloc(hr_qp->sq.wqe_cnt, sizeof(u64),
                                         GFP_KERNEL);
-               hr_qp->rq.wrid = kcalloc(hr_qp->rq.wqe_cnt, sizeof(u64),
-                                        GFP_KERNEL);
-               if (!hr_qp->sq.wrid || !hr_qp->rq.wrid) {
+               if (ZERO_OR_NULL_PTR(hr_qp->sq.wrid)) {
                        ret = -ENOMEM;
-                       goto err_wrid;
+                       goto err_get_bufs;
+               }
+
+               if (hr_qp->rq.wqe_cnt) {
+                       hr_qp->rq.wrid = kcalloc(hr_qp->rq.wqe_cnt, sizeof(u64),
+                                                GFP_KERNEL);
+                       if (ZERO_OR_NULL_PTR(hr_qp->rq.wrid)) {
+                               ret = -ENOMEM;
+                               goto err_sq_wrid;
+                       }
                }
        }
 
@@ -875,7 +932,7 @@ static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev,
        if (sqpn)
                hr_qp->doorbell_qpn = 1;
        else
-               hr_qp->doorbell_qpn = cpu_to_le64(hr_qp->qpn);
+               hr_qp->doorbell_qpn = (u32)hr_qp->qpn;
 
        if (udata) {
                ret = ib_copy_to_udata(udata, &resp,
@@ -916,8 +973,8 @@ err_wrid:
                    hns_roce_qp_has_rq(init_attr))
                        hns_roce_db_unmap_user(uctx, &hr_qp->rdb);
        } else {
-               kfree(hr_qp->sq.wrid);
-               kfree(hr_qp->rq.wrid);
+               if (hr_qp->rq.wqe_cnt)
+                       kfree(hr_qp->rq.wrid);
        }
 
 err_sq_dbmap:
@@ -928,6 +985,10 @@ err_sq_dbmap:
                    hns_roce_qp_has_sq(init_attr))
                        hns_roce_db_unmap_user(uctx, &hr_qp->sdb);
 
+err_sq_wrid:
+       if (!udata)
+               kfree(hr_qp->sq.wrid);
+
 err_get_bufs:
        hns_roce_free_buf_list(buf_list, hr_qp->region_cnt);
 
@@ -941,13 +1002,10 @@ err_db:
            (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RECORD_DB))
                hns_roce_free_db(hr_dev, &hr_qp->rdb);
 
-err_rq_sge_list:
-       if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RQ_INLINE)
-               kfree(hr_qp->rq_inl_buf.wqe_list[0].sg_list);
-
-err_wqe_list:
-       if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RQ_INLINE)
-               kfree(hr_qp->rq_inl_buf.wqe_list);
+err_alloc_rq_inline_buf:
+       if ((hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RQ_INLINE) &&
+            hns_roce_qp_has_rq(init_attr))
+               free_rq_inline_buf(hr_qp);
 
 err_out:
        return ret;
@@ -958,7 +1016,7 @@ struct ib_qp *hns_roce_create_qp(struct ib_pd *pd,
                                 struct ib_udata *udata)
 {
        struct hns_roce_dev *hr_dev = to_hr_dev(pd->device);
-       struct device *dev = hr_dev->dev;
+       struct ib_device *ibdev = &hr_dev->ib_dev;
        struct hns_roce_sqp *hr_sqp;
        struct hns_roce_qp *hr_qp;
        int ret;
@@ -972,7 +1030,8 @@ struct ib_qp *hns_roce_create_qp(struct ib_pd *pd,
                ret = hns_roce_create_qp_common(hr_dev, pd, init_attr, udata, 0,
                                                hr_qp);
                if (ret) {
-                       dev_err(dev, "Create RC QP failed\n");
+                       ibdev_err(ibdev, "Create RC QP 0x%06lx failed(%d)\n",
+                                 hr_qp->qpn, ret);
                        kfree(hr_qp);
                        return ERR_PTR(ret);
                }
@@ -984,7 +1043,7 @@ struct ib_qp *hns_roce_create_qp(struct ib_pd *pd,
        case IB_QPT_GSI: {
                /* Userspace is not allowed to create special QPs: */
                if (udata) {
-                       dev_err(dev, "not support usr space GSI\n");
+                       ibdev_err(ibdev, "not support usr space GSI\n");
                        return ERR_PTR(-EINVAL);
                }
 
@@ -1006,7 +1065,7 @@ struct ib_qp *hns_roce_create_qp(struct ib_pd *pd,
                ret = hns_roce_create_qp_common(hr_dev, pd, init_attr, udata,
                                                hr_qp->ibqp.qp_num, hr_qp);
                if (ret) {
-                       dev_err(dev, "Create GSI QP failed!\n");
+                       ibdev_err(ibdev, "Create GSI QP failed!\n");
                        kfree(hr_sqp);
                        return ERR_PTR(ret);
                }
@@ -1014,7 +1073,8 @@ struct ib_qp *hns_roce_create_qp(struct ib_pd *pd,
                break;
        }
        default:{
-               dev_err(dev, "not support QP type %d\n", init_attr->qp_type);
+               ibdev_err(ibdev, "not support QP type %d\n",
+                         init_attr->qp_type);
                return ERR_PTR(-EINVAL);
        }
        }
@@ -1040,23 +1100,88 @@ int to_hr_qp_type(int qp_type)
        return transport_type;
 }
 
+static int check_mtu_validate(struct hns_roce_dev *hr_dev,
+                             struct hns_roce_qp *hr_qp,
+                             struct ib_qp_attr *attr, int attr_mask)
+{
+       enum ib_mtu active_mtu;
+       int p;
+
+       p = attr_mask & IB_QP_PORT ? (attr->port_num - 1) : hr_qp->port;
+       active_mtu = iboe_get_mtu(hr_dev->iboe.netdevs[p]->mtu);
+
+       if ((hr_dev->caps.max_mtu >= IB_MTU_2048 &&
+           attr->path_mtu > hr_dev->caps.max_mtu) ||
+           attr->path_mtu < IB_MTU_256 || attr->path_mtu > active_mtu) {
+               ibdev_err(&hr_dev->ib_dev,
+                       "attr path_mtu(%d)invalid while modify qp",
+                       attr->path_mtu);
+               return -EINVAL;
+       }
+
+       return 0;
+}
+
+static int hns_roce_check_qp_attr(struct ib_qp *ibqp, struct ib_qp_attr *attr,
+                                 int attr_mask)
+{
+       struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device);
+       struct hns_roce_qp *hr_qp = to_hr_qp(ibqp);
+       int p;
+
+       if ((attr_mask & IB_QP_PORT) &&
+           (attr->port_num == 0 || attr->port_num > hr_dev->caps.num_ports)) {
+               ibdev_err(&hr_dev->ib_dev,
+                       "attr port_num invalid.attr->port_num=%d\n",
+                       attr->port_num);
+               return -EINVAL;
+       }
+
+       if (attr_mask & IB_QP_PKEY_INDEX) {
+               p = attr_mask & IB_QP_PORT ? (attr->port_num - 1) : hr_qp->port;
+               if (attr->pkey_index >= hr_dev->caps.pkey_table_len[p]) {
+                       ibdev_err(&hr_dev->ib_dev,
+                               "attr pkey_index invalid.attr->pkey_index=%d\n",
+                               attr->pkey_index);
+                       return -EINVAL;
+               }
+       }
+
+       if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC &&
+           attr->max_rd_atomic > hr_dev->caps.max_qp_init_rdma) {
+               ibdev_err(&hr_dev->ib_dev,
+                       "attr max_rd_atomic invalid.attr->max_rd_atomic=%d\n",
+                       attr->max_rd_atomic);
+               return -EINVAL;
+       }
+
+       if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC &&
+           attr->max_dest_rd_atomic > hr_dev->caps.max_qp_dest_rdma) {
+               ibdev_err(&hr_dev->ib_dev,
+                       "attr max_dest_rd_atomic invalid.attr->max_dest_rd_atomic=%d\n",
+                       attr->max_dest_rd_atomic);
+               return -EINVAL;
+       }
+
+       if (attr_mask & IB_QP_PATH_MTU)
+               return check_mtu_validate(hr_dev, hr_qp, attr, attr_mask);
+
+       return 0;
+}
+
 int hns_roce_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
                       int attr_mask, struct ib_udata *udata)
 {
        struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device);
        struct hns_roce_qp *hr_qp = to_hr_qp(ibqp);
        enum ib_qp_state cur_state, new_state;
-       struct device *dev = hr_dev->dev;
        int ret = -EINVAL;
-       int p;
-       enum ib_mtu active_mtu;
 
        mutex_lock(&hr_qp->mutex);
 
        cur_state = attr_mask & IB_QP_CUR_STATE ?
                    attr->cur_qp_state : (enum ib_qp_state)hr_qp->state;
-       new_state = attr_mask & IB_QP_STATE ?
-                   attr->qp_state : cur_state;
+       new_state = attr_mask & IB_QP_STATE ? attr->qp_state : cur_state;
 
        if (ibqp->uobject &&
            (attr_mask & IB_QP_STATE) && new_state == IB_QPS_ERR) {
@@ -1066,67 +1191,27 @@ int hns_roce_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
                        if (hr_qp->rdb_en == 1)
                                hr_qp->rq.head = *(int *)(hr_qp->rdb.virt_addr);
                } else {
-                       dev_warn(dev, "flush cqe is not supported in userspace!\n");
+                       ibdev_warn(&hr_dev->ib_dev,
+                                 "flush cqe is not supported in userspace!\n");
                        goto out;
                }
        }
 
        if (!ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type,
                                attr_mask)) {
-               dev_err(dev, "ib_modify_qp_is_ok failed\n");
-               goto out;
-       }
-
-       if ((attr_mask & IB_QP_PORT) &&
-           (attr->port_num == 0 || attr->port_num > hr_dev->caps.num_ports)) {
-               dev_err(dev, "attr port_num invalid.attr->port_num=%d\n",
-                       attr->port_num);
+               ibdev_err(&hr_dev->ib_dev, "ib_modify_qp_is_ok failed\n");
                goto out;
        }
 
-       if (attr_mask & IB_QP_PKEY_INDEX) {
-               p = attr_mask & IB_QP_PORT ? (attr->port_num - 1) : hr_qp->port;
-               if (attr->pkey_index >= hr_dev->caps.pkey_table_len[p]) {
-                       dev_err(dev, "attr pkey_index invalid.attr->pkey_index=%d\n",
-                               attr->pkey_index);
-                       goto out;
-               }
-       }
-
-       if (attr_mask & IB_QP_PATH_MTU) {
-               p = attr_mask & IB_QP_PORT ? (attr->port_num - 1) : hr_qp->port;
-               active_mtu = iboe_get_mtu(hr_dev->iboe.netdevs[p]->mtu);
-
-               if ((hr_dev->caps.max_mtu == IB_MTU_4096 &&
-                   attr->path_mtu > IB_MTU_4096) ||
-                   (hr_dev->caps.max_mtu == IB_MTU_2048 &&
-                   attr->path_mtu > IB_MTU_2048) ||
-                   attr->path_mtu < IB_MTU_256 ||
-                   attr->path_mtu > active_mtu) {
-                       dev_err(dev, "attr path_mtu(%d)invalid while modify qp",
-                               attr->path_mtu);
-                       goto out;
-               }
-       }
-
-       if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC &&
-           attr->max_rd_atomic > hr_dev->caps.max_qp_init_rdma) {
-               dev_err(dev, "attr max_rd_atomic invalid.attr->max_rd_atomic=%d\n",
-                       attr->max_rd_atomic);
-               goto out;
-       }
-
-       if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC &&
-           attr->max_dest_rd_atomic > hr_dev->caps.max_qp_dest_rdma) {
-               dev_err(dev, "attr max_dest_rd_atomic invalid.attr->max_dest_rd_atomic=%d\n",
-                       attr->max_dest_rd_atomic);
+       ret = hns_roce_check_qp_attr(ibqp, attr, attr_mask);
+       if (ret)
                goto out;
-       }
 
        if (cur_state == new_state && cur_state == IB_QPS_RESET) {
                if (hr_dev->caps.min_wqes) {
                        ret = -EPERM;
-                       dev_err(dev, "cur_state=%d new_state=%d\n", cur_state,
+                       ibdev_err(&hr_dev->ib_dev,
+                               "cur_state=%d new_state=%d\n", cur_state,
                                new_state);
                } else {
                        ret = 0;
index 38bb548..9591457 100644 (file)
@@ -175,6 +175,76 @@ static void hns_roce_srq_free(struct hns_roce_dev *hr_dev,
        hns_roce_bitmap_free(&srq_table->bitmap, srq->srqn, BITMAP_NO_RR);
 }
 
+static int create_user_srq(struct hns_roce_srq *srq, struct ib_udata *udata,
+                          int srq_buf_size)
+{
+       struct hns_roce_dev *hr_dev = to_hr_dev(srq->ibsrq.device);
+       struct hns_roce_ib_create_srq  ucmd;
+       u32 page_shift;
+       u32 npages;
+       int ret;
+
+       if (ib_copy_from_udata(&ucmd, udata, sizeof(ucmd)))
+               return -EFAULT;
+
+       srq->umem = ib_umem_get(udata, ucmd.buf_addr, srq_buf_size, 0, 0);
+       if (IS_ERR(srq->umem))
+               return PTR_ERR(srq->umem);
+
+       npages = (ib_umem_page_count(srq->umem) +
+               (1 << hr_dev->caps.srqwqe_buf_pg_sz) - 1) /
+               (1 << hr_dev->caps.srqwqe_buf_pg_sz);
+       page_shift = PAGE_SHIFT + hr_dev->caps.srqwqe_buf_pg_sz;
+       ret = hns_roce_mtt_init(hr_dev, npages, page_shift, &srq->mtt);
+       if (ret)
+               goto err_user_buf;
+
+       ret = hns_roce_ib_umem_write_mtt(hr_dev, &srq->mtt, srq->umem);
+       if (ret)
+               goto err_user_srq_mtt;
+
+       /* config index queue BA */
+       srq->idx_que.umem = ib_umem_get(udata, ucmd.que_addr,
+                                       srq->idx_que.buf_size, 0, 0);
+       if (IS_ERR(srq->idx_que.umem)) {
+               dev_err(hr_dev->dev, "ib_umem_get error for index queue\n");
+               ret = PTR_ERR(srq->idx_que.umem);
+               goto err_user_srq_mtt;
+       }
+
+       ret = hns_roce_mtt_init(hr_dev, ib_umem_page_count(srq->idx_que.umem),
+                               PAGE_SHIFT, &srq->idx_que.mtt);
+
+       if (ret) {
+               dev_err(hr_dev->dev, "hns_roce_mtt_init error for idx que\n");
+               goto err_user_idx_mtt;
+       }
+
+       ret = hns_roce_ib_umem_write_mtt(hr_dev, &srq->idx_que.mtt,
+                                        srq->idx_que.umem);
+       if (ret) {
+               dev_err(hr_dev->dev,
+                       "hns_roce_ib_umem_write_mtt error for idx que\n");
+               goto err_user_idx_buf;
+       }
+
+       return 0;
+
+err_user_idx_buf:
+       hns_roce_mtt_cleanup(hr_dev, &srq->idx_que.mtt);
+
+err_user_idx_mtt:
+       ib_umem_release(srq->idx_que.umem);
+
+err_user_srq_mtt:
+       hns_roce_mtt_cleanup(hr_dev, &srq->mtt);
+
+err_user_buf:
+       ib_umem_release(srq->umem);
+
+       return ret;
+}
+
 static int hns_roce_create_idx_que(struct ib_pd *pd, struct hns_roce_srq *srq,
                                   u32 page_shift)
 {
@@ -196,6 +266,93 @@ static int hns_roce_create_idx_que(struct ib_pd *pd, struct hns_roce_srq *srq,
        return 0;
 }
 
+static int create_kernel_srq(struct hns_roce_srq *srq, int srq_buf_size)
+{
+       struct hns_roce_dev *hr_dev = to_hr_dev(srq->ibsrq.device);
+       u32 page_shift = PAGE_SHIFT + hr_dev->caps.srqwqe_buf_pg_sz;
+       int ret;
+
+       if (hns_roce_buf_alloc(hr_dev, srq_buf_size, (1 << page_shift) * 2,
+                              &srq->buf, page_shift))
+               return -ENOMEM;
+
+       srq->head = 0;
+       srq->tail = srq->max - 1;
+
+       ret = hns_roce_mtt_init(hr_dev, srq->buf.npages, srq->buf.page_shift,
+                               &srq->mtt);
+       if (ret)
+               goto err_kernel_buf;
+
+       ret = hns_roce_buf_write_mtt(hr_dev, &srq->mtt, &srq->buf);
+       if (ret)
+               goto err_kernel_srq_mtt;
+
+       page_shift = PAGE_SHIFT + hr_dev->caps.idx_buf_pg_sz;
+       ret = hns_roce_create_idx_que(srq->ibsrq.pd, srq, page_shift);
+       if (ret) {
+               dev_err(hr_dev->dev, "Create idx queue fail(%d)!\n", ret);
+               goto err_kernel_srq_mtt;
+       }
+
+       /* Init mtt table for idx_que */
+       ret = hns_roce_mtt_init(hr_dev, srq->idx_que.idx_buf.npages,
+                               srq->idx_que.idx_buf.page_shift,
+                               &srq->idx_que.mtt);
+       if (ret)
+               goto err_kernel_create_idx;
+
+       /* Write buffer address into the mtt table */
+       ret = hns_roce_buf_write_mtt(hr_dev, &srq->idx_que.mtt,
+                                    &srq->idx_que.idx_buf);
+       if (ret)
+               goto err_kernel_idx_buf;
+
+       srq->wrid = kvmalloc_array(srq->max, sizeof(u64), GFP_KERNEL);
+       if (!srq->wrid) {
+               ret = -ENOMEM;
+               goto err_kernel_idx_buf;
+       }
+
+       return 0;
+
+err_kernel_idx_buf:
+       hns_roce_mtt_cleanup(hr_dev, &srq->idx_que.mtt);
+
+err_kernel_create_idx:
+       hns_roce_buf_free(hr_dev, srq->idx_que.buf_size,
+                         &srq->idx_que.idx_buf);
+       kfree(srq->idx_que.bitmap);
+
+err_kernel_srq_mtt:
+       hns_roce_mtt_cleanup(hr_dev, &srq->mtt);
+
+err_kernel_buf:
+       hns_roce_buf_free(hr_dev, srq_buf_size, &srq->buf);
+
+       return ret;
+}
+
+static void destroy_user_srq(struct hns_roce_dev *hr_dev,
+                            struct hns_roce_srq *srq)
+{
+       hns_roce_mtt_cleanup(hr_dev, &srq->idx_que.mtt);
+       ib_umem_release(srq->idx_que.umem);
+       hns_roce_mtt_cleanup(hr_dev, &srq->mtt);
+       ib_umem_release(srq->umem);
+}
+
+static void destroy_kernel_srq(struct hns_roce_dev *hr_dev,
+                              struct hns_roce_srq *srq, int srq_buf_size)
+{
+       kvfree(srq->wrid);
+       hns_roce_mtt_cleanup(hr_dev, &srq->idx_que.mtt);
+       hns_roce_buf_free(hr_dev, srq->idx_que.buf_size, &srq->idx_que.idx_buf);
+       kfree(srq->idx_que.bitmap);
+       hns_roce_mtt_cleanup(hr_dev, &srq->mtt);
+       hns_roce_buf_free(hr_dev, srq_buf_size, &srq->buf);
+}
+
 int hns_roce_create_srq(struct ib_srq *ib_srq,
                        struct ib_srq_init_attr *srq_init_attr,
                        struct ib_udata *udata)
@@ -205,9 +362,7 @@ int hns_roce_create_srq(struct ib_srq *ib_srq,
        struct hns_roce_srq *srq = to_hr_srq(ib_srq);
        int srq_desc_size;
        int srq_buf_size;
-       u32 page_shift;
        int ret = 0;
-       u32 npages;
        u32 cqn;
 
        /* Check the actual SRQ wqe and SRQ sge num */
@@ -233,115 +388,16 @@ int hns_roce_create_srq(struct ib_srq *ib_srq,
        srq->idx_que.mtt.mtt_type = MTT_TYPE_IDX;
 
        if (udata) {
-               struct hns_roce_ib_create_srq  ucmd;
-
-               if (ib_copy_from_udata(&ucmd, udata, sizeof(ucmd)))
-                       return -EFAULT;
-
-               srq->umem =
-                       ib_umem_get(udata, ucmd.buf_addr, srq_buf_size, 0, 0);
-               if (IS_ERR(srq->umem))
-                       return PTR_ERR(srq->umem);
-
-               if (hr_dev->caps.srqwqe_buf_pg_sz) {
-                       npages = (ib_umem_page_count(srq->umem) +
-                                 (1 << hr_dev->caps.srqwqe_buf_pg_sz) - 1) /
-                                 (1 << hr_dev->caps.srqwqe_buf_pg_sz);
-                       page_shift = PAGE_SHIFT + hr_dev->caps.srqwqe_buf_pg_sz;
-                       ret = hns_roce_mtt_init(hr_dev, npages,
-                                               page_shift,
-                                               &srq->mtt);
-               } else
-                       ret = hns_roce_mtt_init(hr_dev,
-                                               ib_umem_page_count(srq->umem),
-                                               PAGE_SHIFT, &srq->mtt);
-               if (ret)
-                       goto err_buf;
-
-               ret = hns_roce_ib_umem_write_mtt(hr_dev, &srq->mtt, srq->umem);
-               if (ret)
-                       goto err_srq_mtt;
-
-               /* config index queue BA */
-               srq->idx_que.umem = ib_umem_get(udata, ucmd.que_addr,
-                                               srq->idx_que.buf_size, 0, 0);
-               if (IS_ERR(srq->idx_que.umem)) {
-                       dev_err(hr_dev->dev,
-                               "ib_umem_get error for index queue\n");
-                       ret = PTR_ERR(srq->idx_que.umem);
-                       goto err_srq_mtt;
-               }
-
-               if (hr_dev->caps.idx_buf_pg_sz) {
-                       npages = (ib_umem_page_count(srq->idx_que.umem) +
-                                 (1 << hr_dev->caps.idx_buf_pg_sz) - 1) /
-                                 (1 << hr_dev->caps.idx_buf_pg_sz);
-                       page_shift = PAGE_SHIFT + hr_dev->caps.idx_buf_pg_sz;
-                       ret = hns_roce_mtt_init(hr_dev, npages,
-                                               page_shift, &srq->idx_que.mtt);
-               } else {
-                       ret = hns_roce_mtt_init(
-                               hr_dev, ib_umem_page_count(srq->idx_que.umem),
-                               PAGE_SHIFT, &srq->idx_que.mtt);
-               }
-
+               ret = create_user_srq(srq, udata, srq_buf_size);
                if (ret) {
-                       dev_err(hr_dev->dev,
-                               "hns_roce_mtt_init error for idx que\n");
-                       goto err_idx_mtt;
-               }
-
-               ret = hns_roce_ib_umem_write_mtt(hr_dev, &srq->idx_que.mtt,
-                                                srq->idx_que.umem);
-               if (ret) {
-                       dev_err(hr_dev->dev,
-                             "hns_roce_ib_umem_write_mtt error for idx que\n");
-                       goto err_idx_buf;
+                       dev_err(hr_dev->dev, "Create user srq failed\n");
+                       goto err_srq;
                }
        } else {
-               page_shift = PAGE_SHIFT + hr_dev->caps.srqwqe_buf_pg_sz;
-               if (hns_roce_buf_alloc(hr_dev, srq_buf_size,
-                                      (1 << page_shift) * 2, &srq->buf,
-                                      page_shift))
-                       return -ENOMEM;
-
-               srq->head = 0;
-               srq->tail = srq->max - 1;
-
-               ret = hns_roce_mtt_init(hr_dev, srq->buf.npages,
-                                       srq->buf.page_shift, &srq->mtt);
-               if (ret)
-                       goto err_buf;
-
-               ret = hns_roce_buf_write_mtt(hr_dev, &srq->mtt, &srq->buf);
-               if (ret)
-                       goto err_srq_mtt;
-
-               page_shift = PAGE_SHIFT + hr_dev->caps.idx_buf_pg_sz;
-               ret = hns_roce_create_idx_que(ib_srq->pd, srq, page_shift);
+               ret = create_kernel_srq(srq, srq_buf_size);
                if (ret) {
-                       dev_err(hr_dev->dev, "Create idx queue fail(%d)!\n",
-                               ret);
-                       goto err_srq_mtt;
-               }
-
-               /* Init mtt table for idx_que */
-               ret = hns_roce_mtt_init(hr_dev, srq->idx_que.idx_buf.npages,
-                                       srq->idx_que.idx_buf.page_shift,
-                                       &srq->idx_que.mtt);
-               if (ret)
-                       goto err_create_idx;
-
-               /* Write buffer address into the mtt table */
-               ret = hns_roce_buf_write_mtt(hr_dev, &srq->idx_que.mtt,
-                                            &srq->idx_que.idx_buf);
-               if (ret)
-                       goto err_idx_buf;
-
-               srq->wrid = kvmalloc_array(srq->max, sizeof(u64), GFP_KERNEL);
-               if (!srq->wrid) {
-                       ret = -ENOMEM;
-                       goto err_idx_buf;
+                       dev_err(hr_dev->dev, "Create kernel srq failed\n");
+                       goto err_srq;
                }
        }
 
@@ -356,7 +412,6 @@ int hns_roce_create_srq(struct ib_srq *ib_srq,
                goto err_wrid;
 
        srq->event = hns_roce_ib_srq_event;
-       srq->ibsrq.ext.xrc.srq_num = srq->srqn;
        resp.srqn = srq->srqn;
 
        if (udata) {
@@ -373,27 +428,12 @@ err_srqc_alloc:
        hns_roce_srq_free(hr_dev, srq);
 
 err_wrid:
-       kvfree(srq->wrid);
-
-err_idx_buf:
-       hns_roce_mtt_cleanup(hr_dev, &srq->idx_que.mtt);
-
-err_idx_mtt:
-       ib_umem_release(srq->idx_que.umem);
-
-err_create_idx:
-       hns_roce_buf_free(hr_dev, srq->idx_que.buf_size,
-                         &srq->idx_que.idx_buf);
-       bitmap_free(srq->idx_que.bitmap);
-
-err_srq_mtt:
-       hns_roce_mtt_cleanup(hr_dev, &srq->mtt);
-
-err_buf:
-       ib_umem_release(srq->umem);
-       if (!udata)
-               hns_roce_buf_free(hr_dev, srq_buf_size, &srq->buf);
+       if (udata)
+               destroy_user_srq(hr_dev, srq);
+       else
+               destroy_kernel_srq(hr_dev, srq, srq_buf_size);
 
+err_srq:
        return ret;
 }
 
index d169a80..8056930 100644 (file)
@@ -97,18 +97,7 @@ static int i40iw_query_port(struct ib_device *ibdev,
                            u8 port,
                            struct ib_port_attr *props)
 {
-       struct i40iw_device *iwdev = to_iwdev(ibdev);
-       struct net_device *netdev = iwdev->netdev;
-
-       /* props being zeroed by the caller, avoid zeroing it here */
-       props->max_mtu = IB_MTU_4096;
-       props->active_mtu = ib_mtu_int_to_enum(netdev->mtu);
-
        props->lid = 1;
-       if (netif_carrier_ok(iwdev->netdev))
-               props->state = IB_PORT_ACTIVE;
-       else
-               props->state = IB_PORT_DOWN;
        props->port_cap_flags = IB_PORT_CM_SUP | IB_PORT_REINIT_SUP |
                IB_PORT_VENDOR_CLASS_SUP | IB_PORT_BOOT_MGMT_SUP;
        props->gid_tbl_len = 1;
index 8790101..8d2f1e3 100644 (file)
@@ -734,7 +734,8 @@ out:
 
 static u8 state_to_phys_state(enum ib_port_state state)
 {
-       return state == IB_PORT_ACTIVE ? 5 : 3;
+       return state == IB_PORT_ACTIVE ?
+               IB_PORT_PHYS_STATE_LINK_UP : IB_PORT_PHYS_STATE_DISABLED;
 }
 
 static int eth_link_query_port(struct ib_device *ibdev, u8 port,
index 7534792..6ae503c 100644 (file)
@@ -377,6 +377,7 @@ static struct ib_umem *mlx4_get_umem_mr(struct ib_udata *udata, u64 start,
         * again
         */
        if (!ib_access_writable(access_flags)) {
+               unsigned long untagged_start = untagged_addr(start);
                struct vm_area_struct *vma;
 
                down_read(&current->mm->mmap_sem);
@@ -385,9 +386,9 @@ static struct ib_umem *mlx4_get_umem_mr(struct ib_udata *udata, u64 start,
                 * cover the memory, but for now it requires a single vma to
                 * entirely cover the MR to support RO mappings.
                 */
-               vma = find_vma(current->mm, start);
-               if (vma && vma->vm_end >= start + length &&
-                   vma->vm_start <= start) {
+               vma = find_vma(current->mm, untagged_start);
+               if (vma && vma->vm_end >= untagged_start + length &&
+                   vma->vm_start <= untagged_start) {
                        if (vma->vm_flags & VM_WRITE)
                                access_flags |= IB_ACCESS_LOCAL_WRITE;
                } else {
index 82aff2f..bd4aa04 100644 (file)
@@ -325,7 +325,7 @@ static int send_wqe_overhead(enum mlx4_ib_qp_type type, u32 flags)
 }
 
 static int set_rq_size(struct mlx4_ib_dev *dev, struct ib_qp_cap *cap,
-                      bool is_user, int has_rq, struct mlx4_ib_qp *qp,
+                      bool is_user, bool has_rq, struct mlx4_ib_qp *qp,
                       u32 inl_recv_sz)
 {
        /* Sanity check RQ size before proceeding */
@@ -506,10 +506,10 @@ static void free_proxy_bufs(struct ib_device *dev, struct mlx4_ib_qp *qp)
        kfree(qp->sqp_proxy_rcv);
 }
 
-static int qp_has_rq(struct ib_qp_init_attr *attr)
+static bool qp_has_rq(struct ib_qp_init_attr *attr)
 {
        if (attr->qp_type == IB_QPT_XRC_INI || attr->qp_type == IB_QPT_XRC_TGT)
-               return 0;
+               return false;
 
        return !attr->srq;
 }
@@ -855,12 +855,143 @@ static void mlx4_ib_release_wqn(struct mlx4_ib_ucontext *context,
        mutex_unlock(&context->wqn_ranges_mutex);
 }
 
-static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
-                           enum mlx4_ib_source_type src,
-                           struct ib_qp_init_attr *init_attr,
+static int create_rq(struct ib_pd *pd, struct ib_qp_init_attr *init_attr,
+                    struct ib_udata *udata, struct mlx4_ib_qp *qp)
+{
+       struct mlx4_ib_dev *dev = to_mdev(pd->device);
+       int qpn;
+       int err;
+       struct mlx4_ib_ucontext *context = rdma_udata_to_drv_context(
+               udata, struct mlx4_ib_ucontext, ibucontext);
+       struct mlx4_ib_cq *mcq;
+       unsigned long flags;
+       int range_size;
+       struct mlx4_ib_create_wq wq;
+       size_t copy_len;
+       int shift;
+       int n;
+
+       qp->mlx4_ib_qp_type = MLX4_IB_QPT_RAW_PACKET;
+
+       mutex_init(&qp->mutex);
+       spin_lock_init(&qp->sq.lock);
+       spin_lock_init(&qp->rq.lock);
+       INIT_LIST_HEAD(&qp->gid_list);
+       INIT_LIST_HEAD(&qp->steering_rules);
+
+       qp->state = IB_QPS_RESET;
+
+       copy_len = min(sizeof(struct mlx4_ib_create_wq), udata->inlen);
+
+       if (ib_copy_from_udata(&wq, udata, copy_len)) {
+               err = -EFAULT;
+               goto err;
+       }
+
+       if (wq.comp_mask || wq.reserved[0] || wq.reserved[1] ||
+           wq.reserved[2]) {
+               pr_debug("user command isn't supported\n");
+               err = -EOPNOTSUPP;
+               goto err;
+       }
+
+       if (wq.log_range_size > ilog2(dev->dev->caps.max_rss_tbl_sz)) {
+               pr_debug("WQN range size must be equal or smaller than %d\n",
+                        dev->dev->caps.max_rss_tbl_sz);
+               err = -EOPNOTSUPP;
+               goto err;
+       }
+       range_size = 1 << wq.log_range_size;
+
+       if (init_attr->create_flags & IB_QP_CREATE_SCATTER_FCS)
+               qp->flags |= MLX4_IB_QP_SCATTER_FCS;
+
+       err = set_rq_size(dev, &init_attr->cap, true, true, qp, qp->inl_recv_sz);
+       if (err)
+               goto err;
+
+       qp->sq_no_prefetch = 1;
+       qp->sq.wqe_cnt = 1;
+       qp->sq.wqe_shift = MLX4_IB_MIN_SQ_STRIDE;
+       qp->buf_size = (qp->rq.wqe_cnt << qp->rq.wqe_shift) +
+                      (qp->sq.wqe_cnt << qp->sq.wqe_shift);
+
+       qp->umem = ib_umem_get(udata, wq.buf_addr, qp->buf_size, 0, 0);
+       if (IS_ERR(qp->umem)) {
+               err = PTR_ERR(qp->umem);
+               goto err;
+       }
+
+       n = ib_umem_page_count(qp->umem);
+       shift = mlx4_ib_umem_calc_optimal_mtt_size(qp->umem, 0, &n);
+       err = mlx4_mtt_init(dev->dev, n, shift, &qp->mtt);
+
+       if (err)
+               goto err_buf;
+
+       err = mlx4_ib_umem_write_mtt(dev, &qp->mtt, qp->umem);
+       if (err)
+               goto err_mtt;
+
+       err = mlx4_ib_db_map_user(udata, wq.db_addr, &qp->db);
+       if (err)
+               goto err_mtt;
+       qp->mqp.usage = MLX4_RES_USAGE_USER_VERBS;
+
+       err = mlx4_ib_alloc_wqn(context, qp, range_size, &qpn);
+       if (err)
+               goto err_wrid;
+
+       err = mlx4_qp_alloc(dev->dev, qpn, &qp->mqp);
+       if (err)
+               goto err_qpn;
+
+       /*
+        * Hardware wants QPN written in big-endian order (after
+        * shifting) for send doorbell.  Precompute this value to save
+        * a little bit when posting sends.
+        */
+       qp->doorbell_qpn = swab32(qp->mqp.qpn << 8);
+
+       qp->mqp.event = mlx4_ib_wq_event;
+
+       spin_lock_irqsave(&dev->reset_flow_resource_lock, flags);
+       mlx4_ib_lock_cqs(to_mcq(init_attr->send_cq),
+                        to_mcq(init_attr->recv_cq));
+       /* Maintain device to QPs access, needed for further handling
+        * via reset flow
+        */
+       list_add_tail(&qp->qps_list, &dev->qp_list);
+       /* Maintain CQ to QPs access, needed for further handling
+        * via reset flow
+        */
+       mcq = to_mcq(init_attr->send_cq);
+       list_add_tail(&qp->cq_send_list, &mcq->send_qp_list);
+       mcq = to_mcq(init_attr->recv_cq);
+       list_add_tail(&qp->cq_recv_list, &mcq->recv_qp_list);
+       mlx4_ib_unlock_cqs(to_mcq(init_attr->send_cq),
+                          to_mcq(init_attr->recv_cq));
+       spin_unlock_irqrestore(&dev->reset_flow_resource_lock, flags);
+       return 0;
+
+err_qpn:
+       mlx4_ib_release_wqn(context, qp, 0);
+err_wrid:
+       mlx4_ib_db_unmap_user(context, &qp->db);
+
+err_mtt:
+       mlx4_mtt_cleanup(dev->dev, &qp->mtt);
+err_buf:
+       ib_umem_release(qp->umem);
+err:
+       return err;
+}
+
+static int create_qp_common(struct ib_pd *pd, struct ib_qp_init_attr *init_attr,
                            struct ib_udata *udata, int sqpn,
                            struct mlx4_ib_qp **caller_qp)
 {
+       struct mlx4_ib_dev *dev = to_mdev(pd->device);
        int qpn;
        int err;
        struct mlx4_ib_sqp *sqp = NULL;
@@ -870,7 +1001,6 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
        enum mlx4_ib_qp_type qp_type = (enum mlx4_ib_qp_type) init_attr->qp_type;
        struct mlx4_ib_cq *mcq;
        unsigned long flags;
-       int range_size = 0;
 
        /* When tunneling special qps, we use a plain UD qp */
        if (sqpn) {
@@ -921,15 +1051,13 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
                        if (!sqp)
                                return -ENOMEM;
                        qp = &sqp->qp;
-                       qp->pri.vid = 0xFFFF;
-                       qp->alt.vid = 0xFFFF;
                } else {
                        qp = kzalloc(sizeof(struct mlx4_ib_qp), GFP_KERNEL);
                        if (!qp)
                                return -ENOMEM;
-                       qp->pri.vid = 0xFFFF;
-                       qp->alt.vid = 0xFFFF;
                }
+               qp->pri.vid = 0xFFFF;
+               qp->alt.vid = 0xFFFF;
        } else
                qp = *caller_qp;
 
@@ -941,48 +1069,24 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
        INIT_LIST_HEAD(&qp->gid_list);
        INIT_LIST_HEAD(&qp->steering_rules);
 
-       qp->state        = IB_QPS_RESET;
+       qp->state = IB_QPS_RESET;
        if (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR)
                qp->sq_signal_bits = cpu_to_be32(MLX4_WQE_CTRL_CQ_UPDATE);
 
-
        if (udata) {
-               union {
-                       struct mlx4_ib_create_qp qp;
-                       struct mlx4_ib_create_wq wq;
-               } ucmd;
+               struct mlx4_ib_create_qp ucmd;
                size_t copy_len;
                int shift;
                int n;
 
-               copy_len = (src == MLX4_IB_QP_SRC) ?
-                          sizeof(struct mlx4_ib_create_qp) :
-                          min(sizeof(struct mlx4_ib_create_wq), udata->inlen);
+               copy_len = sizeof(struct mlx4_ib_create_qp);
 
                if (ib_copy_from_udata(&ucmd, udata, copy_len)) {
                        err = -EFAULT;
                        goto err;
                }
 
-               if (src == MLX4_IB_RWQ_SRC) {
-                       if (ucmd.wq.comp_mask || ucmd.wq.reserved[0] ||
-                           ucmd.wq.reserved[1] || ucmd.wq.reserved[2]) {
-                               pr_debug("user command isn't supported\n");
-                               err = -EOPNOTSUPP;
-                               goto err;
-                       }
-
-                       if (ucmd.wq.log_range_size >
-                           ilog2(dev->dev->caps.max_rss_tbl_sz)) {
-                               pr_debug("WQN range size must be equal or smaller than %d\n",
-                                        dev->dev->caps.max_rss_tbl_sz);
-                               err = -EOPNOTSUPP;
-                               goto err;
-                       }
-                       range_size = 1 << ucmd.wq.log_range_size;
-               } else {
-                       qp->inl_recv_sz = ucmd.qp.inl_recv_sz;
-               }
+               qp->inl_recv_sz = ucmd.inl_recv_sz;
 
                if (init_attr->create_flags & IB_QP_CREATE_SCATTER_FCS) {
                        if (!(dev->dev->caps.flags &
@@ -1000,30 +1104,14 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
                if (err)
                        goto err;
 
-               if (src == MLX4_IB_QP_SRC) {
-                       qp->sq_no_prefetch = ucmd.qp.sq_no_prefetch;
+               qp->sq_no_prefetch = ucmd.sq_no_prefetch;
 
-                       err = set_user_sq_size(dev, qp,
-                                              (struct mlx4_ib_create_qp *)
-                                              &ucmd);
-                       if (err)
-                               goto err;
-               } else {
-                       qp->sq_no_prefetch = 1;
-                       qp->sq.wqe_cnt = 1;
-                       qp->sq.wqe_shift = MLX4_IB_MIN_SQ_STRIDE;
-                       /* Allocated buffer expects to have at least that SQ
-                        * size.
-                        */
-                       qp->buf_size = (qp->rq.wqe_cnt << qp->rq.wqe_shift) +
-                               (qp->sq.wqe_cnt << qp->sq.wqe_shift);
-               }
+               err = set_user_sq_size(dev, qp, &ucmd);
+               if (err)
+                       goto err;
 
                qp->umem =
-                       ib_umem_get(udata,
-                                   (src == MLX4_IB_QP_SRC) ? ucmd.qp.buf_addr :
-                                                             ucmd.wq.buf_addr,
-                                   qp->buf_size, 0, 0);
+                       ib_umem_get(udata, ucmd.buf_addr, qp->buf_size, 0, 0);
                if (IS_ERR(qp->umem)) {
                        err = PTR_ERR(qp->umem);
                        goto err;
@@ -1041,11 +1129,7 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
                        goto err_mtt;
 
                if (qp_has_rq(init_attr)) {
-                       err = mlx4_ib_db_map_user(udata,
-                                                 (src == MLX4_IB_QP_SRC) ?
-                                                         ucmd.qp.db_addr :
-                                                         ucmd.wq.db_addr,
-                                                 &qp->db);
+                       err = mlx4_ib_db_map_user(udata, ucmd.db_addr, &qp->db);
                        if (err)
                                goto err_mtt;
                }
@@ -1115,10 +1199,6 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
                                goto err_wrid;
                        }
                }
-       } else if (src == MLX4_IB_RWQ_SRC) {
-               err = mlx4_ib_alloc_wqn(context, qp, range_size, &qpn);
-               if (err)
-                       goto err_wrid;
        } else {
                /* Raw packet QPNs may not have bits 6,7 set in their qp_num;
                 * otherwise, the WQE BlueFlame setup flow wrongly causes
@@ -1157,8 +1237,7 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
         */
        qp->doorbell_qpn = swab32(qp->mqp.qpn << 8);
 
-       qp->mqp.event = (src == MLX4_IB_QP_SRC) ? mlx4_ib_qp_event :
-                                                 mlx4_ib_wq_event;
+       qp->mqp.event = mlx4_ib_qp_event;
 
        if (!*caller_qp)
                *caller_qp = qp;
@@ -1186,8 +1265,6 @@ err_qpn:
        if (!sqpn) {
                if (qp->flags & MLX4_IB_QP_NETIF)
                        mlx4_ib_steer_qp_free(dev, qpn, 1);
-               else if (src == MLX4_IB_RWQ_SRC)
-                       mlx4_ib_release_wqn(context, qp, 0);
                else
                        mlx4_qp_release_range(dev->dev, qpn, 1);
        }
@@ -1518,8 +1595,7 @@ static struct ib_qp *_mlx4_ib_create_qp(struct ib_pd *pd,
                /* fall through */
        case IB_QPT_UD:
        {
-               err = create_qp_common(to_mdev(pd->device), pd, MLX4_IB_QP_SRC,
-                                      init_attr, udata, 0, &qp);
+               err = create_qp_common(pd, init_attr, udata, 0, &qp);
                if (err) {
                        kfree(qp);
                        return ERR_PTR(err);
@@ -1549,8 +1625,7 @@ static struct ib_qp *_mlx4_ib_create_qp(struct ib_pd *pd,
                        sqpn = get_sqp_num(to_mdev(pd->device), init_attr);
                }
 
-               err = create_qp_common(to_mdev(pd->device), pd, MLX4_IB_QP_SRC,
-                                      init_attr, udata, sqpn, &qp);
+               err = create_qp_common(pd, init_attr, udata, sqpn, &qp);
                if (err)
                        return ERR_PTR(err);
 
@@ -4047,8 +4122,8 @@ struct ib_wq *mlx4_ib_create_wq(struct ib_pd *pd,
                                struct ib_wq_init_attr *init_attr,
                                struct ib_udata *udata)
 {
-       struct mlx4_ib_dev *dev;
-       struct ib_qp_init_attr ib_qp_init_attr;
+       struct mlx4_dev *dev = to_mdev(pd->device)->dev;
+       struct ib_qp_init_attr ib_qp_init_attr = {};
        struct mlx4_ib_qp *qp;
        struct mlx4_ib_create_wq ucmd;
        int err, required_cmd_sz;
@@ -4073,14 +4148,13 @@ struct ib_wq *mlx4_ib_create_wq(struct ib_pd *pd,
        if (udata->outlen)
                return ERR_PTR(-EOPNOTSUPP);
 
-       dev = to_mdev(pd->device);
-
        if (init_attr->wq_type != IB_WQT_RQ) {
                pr_debug("unsupported wq type %d\n", init_attr->wq_type);
                return ERR_PTR(-EOPNOTSUPP);
        }
 
-       if (init_attr->create_flags & ~IB_WQ_FLAGS_SCATTER_FCS) {
+       if (init_attr->create_flags & ~IB_WQ_FLAGS_SCATTER_FCS ||
+           !(dev->caps.flags & MLX4_DEV_CAP_FLAG_FCS_KEEP)) {
                pr_debug("unsupported create_flags %u\n",
                         init_attr->create_flags);
                return ERR_PTR(-EOPNOTSUPP);
@@ -4093,7 +4167,6 @@ struct ib_wq *mlx4_ib_create_wq(struct ib_pd *pd,
        qp->pri.vid = 0xFFFF;
        qp->alt.vid = 0xFFFF;
 
-       memset(&ib_qp_init_attr, 0, sizeof(ib_qp_init_attr));
        ib_qp_init_attr.qp_context = init_attr->wq_context;
        ib_qp_init_attr.qp_type = IB_QPT_RAW_PACKET;
        ib_qp_init_attr.cap.max_recv_wr = init_attr->max_wr;
@@ -4104,8 +4177,7 @@ struct ib_wq *mlx4_ib_create_wq(struct ib_pd *pd,
        if (init_attr->create_flags & IB_WQ_FLAGS_SCATTER_FCS)
                ib_qp_init_attr.create_flags |= IB_QP_CREATE_SCATTER_FCS;
 
-       err = create_qp_common(dev, pd, MLX4_IB_RWQ_SRC, &ib_qp_init_attr,
-                              udata, 0, &qp);
+       err = create_rq(pd, &ib_qp_init_attr, udata, qp);
        if (err) {
                kfree(qp);
                return ERR_PTR(err);
index 25b6482..59022b7 100644 (file)
@@ -233,6 +233,8 @@ static bool is_legacy_obj_event_num(u16 event_num)
        case MLX5_EVENT_TYPE_SRQ_CATAS_ERROR:
        case MLX5_EVENT_TYPE_DCT_DRAINED:
        case MLX5_EVENT_TYPE_COMP:
+       case MLX5_EVENT_TYPE_DCT_KEY_VIOLATION:
+       case MLX5_EVENT_TYPE_XRQ_ERROR:
                return true;
        default:
                return false;
@@ -315,8 +317,10 @@ static u16 get_event_obj_type(unsigned long event_type, struct mlx5_eqe *eqe)
        case MLX5_EVENT_TYPE_SRQ_CATAS_ERROR:
                return eqe->data.qp_srq.type;
        case MLX5_EVENT_TYPE_CQ_ERROR:
+       case MLX5_EVENT_TYPE_XRQ_ERROR:
                return 0;
        case MLX5_EVENT_TYPE_DCT_DRAINED:
+       case MLX5_EVENT_TYPE_DCT_KEY_VIOLATION:
                return MLX5_EVENT_QUEUE_TYPE_DCT;
        default:
                return MLX5_GET(affiliated_event_header, &eqe->data, obj_type);
@@ -542,6 +546,8 @@ static u64 devx_get_obj_id(const void *in)
                break;
        case MLX5_CMD_OP_ARM_XRQ:
        case MLX5_CMD_OP_SET_XRQ_DC_PARAMS_ENTRY:
+       case MLX5_CMD_OP_RELEASE_XRQ_ERROR:
+       case MLX5_CMD_OP_MODIFY_XRQ:
                obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_XRQ,
                                        MLX5_GET(arm_xrq_in, in, xrqn));
                break;
@@ -776,6 +782,14 @@ static bool devx_is_obj_create_cmd(const void *in, u16 *opcode)
                        return true;
                return false;
        }
+       case MLX5_CMD_OP_CREATE_PSV:
+       {
+               u8 num_psv = MLX5_GET(create_psv_in, in, num_psv);
+
+               if (num_psv == 1)
+                       return true;
+               return false;
+       }
        default:
                return false;
        }
@@ -810,6 +824,8 @@ static bool devx_is_obj_modify_cmd(const void *in)
        case MLX5_CMD_OP_ARM_DCT_FOR_KEY_VIOLATION:
        case MLX5_CMD_OP_ARM_XRQ:
        case MLX5_CMD_OP_SET_XRQ_DC_PARAMS_ENTRY:
+       case MLX5_CMD_OP_RELEASE_XRQ_ERROR:
+       case MLX5_CMD_OP_MODIFY_XRQ:
                return true;
        case MLX5_CMD_OP_SET_FLOW_TABLE_ENTRY:
        {
@@ -1216,6 +1232,12 @@ static void devx_obj_build_destroy_cmd(void *in, void *out, void *din,
        case MLX5_CMD_OP_ALLOC_XRCD:
                MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DEALLOC_XRCD);
                break;
+       case MLX5_CMD_OP_CREATE_PSV:
+               MLX5_SET(general_obj_in_cmd_hdr, din, opcode,
+                        MLX5_CMD_OP_DESTROY_PSV);
+               MLX5_SET(destroy_psv_in, din, psvn,
+                        MLX5_GET(create_psv_out, out, psv0_index));
+               break;
        default:
                /* The entry must match to one of the devx_is_obj_create_cmd */
                WARN_ON(true);
@@ -2286,7 +2308,11 @@ static u32 devx_get_obj_id_from_event(unsigned long event_type, void *data)
        case MLX5_EVENT_TYPE_WQ_ACCESS_ERROR:
                obj_id = be32_to_cpu(eqe->data.qp_srq.qp_srq_n) & 0xffffff;
                break;
+       case MLX5_EVENT_TYPE_XRQ_ERROR:
+               obj_id = be32_to_cpu(eqe->data.xrq_err.type_xrqn) & 0xffffff;
+               break;
        case MLX5_EVENT_TYPE_DCT_DRAINED:
+       case MLX5_EVENT_TYPE_DCT_KEY_VIOLATION:
                obj_id = be32_to_cpu(eqe->data.dct.dctn) & 0xffffff;
                break;
        case MLX5_EVENT_TYPE_CQ_ERROR:
index 1c8f04a..b198ff1 100644 (file)
@@ -32,6 +32,9 @@ mlx5_ib_ft_type_to_namespace(enum mlx5_ib_uapi_flow_table_type table_type,
        case MLX5_IB_UAPI_FLOW_TABLE_TYPE_FDB:
                *namespace = MLX5_FLOW_NAMESPACE_FDB;
                break;
+       case MLX5_IB_UAPI_FLOW_TABLE_TYPE_RDMA_RX:
+               *namespace = MLX5_FLOW_NAMESPACE_RDMA_RX;
+               break;
        default:
                return -EINVAL;
        }
@@ -101,6 +104,11 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_CREATE_FLOW)(
        if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_FDB && !dest_devx)
                return -EINVAL;
 
+       /* Allow only DEVX object or QP as dest when inserting to RDMA_RX */
+       if ((fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_RX) &&
+           ((!dest_devx && !dest_qp) || (dest_devx && dest_qp)))
+               return -EINVAL;
+
        if (dest_devx) {
                devx_obj = uverbs_attr_get_obj(
                        attrs, MLX5_IB_ATTR_CREATE_FLOW_DEST_DEVX);
@@ -112,8 +120,9 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_CREATE_FLOW)(
                 */
                if (!mlx5_ib_devx_is_flow_dest(devx_obj, &dest_id, &dest_type))
                        return -EINVAL;
-               /* Allow only flow table as dest when inserting to FDB */
-               if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_FDB &&
+               /* Allow only flow table as dest when inserting to FDB or RDMA_RX */
+               if ((fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_FDB ||
+                    fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_RX) &&
                    dest_type != MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE)
                        return -EINVAL;
        } else if (dest_qp) {
index 4e9f150..8315394 100644 (file)
@@ -535,7 +535,7 @@ static int mlx5_query_port_roce(struct ib_device *device, u8 port_num,
        props->max_msg_sz       = 1 << MLX5_CAP_GEN(dev->mdev, log_max_msg);
        props->pkey_tbl_len     = 1;
        props->state            = IB_PORT_DOWN;
-       props->phys_state       = 3;
+       props->phys_state       = IB_PORT_PHYS_STATE_DISABLED;
 
        mlx5_query_nic_vport_qkey_viol_cntr(mdev, &qkey_viol_cntr);
        props->qkey_viol_cntr = qkey_viol_cntr;
@@ -561,7 +561,7 @@ static int mlx5_query_port_roce(struct ib_device *device, u8 port_num,
 
        if (netif_running(ndev) && netif_carrier_ok(ndev)) {
                props->state      = IB_PORT_ACTIVE;
-               props->phys_state = 5;
+               props->phys_state = IB_PORT_PHYS_STATE_LINK_UP;
        }
 
        ndev_ib_mtu = iboe_get_mtu(ndev->mtu);
@@ -1867,10 +1867,6 @@ static int mlx5_ib_alloc_ucontext(struct ib_ucontext *uctx,
        if (err)
                goto out_sys_pages;
 
-       if (ibdev->attrs.device_cap_flags & IB_DEVICE_ON_DEMAND_PAGING)
-               context->ibucontext.invalidate_range =
-                       &mlx5_ib_invalidate_range;
-
        if (req.flags & MLX5_IB_ALLOC_UCTX_DEVX) {
                err = mlx5_ib_devx_create(dev, true);
                if (err < 0)
@@ -1999,11 +1995,6 @@ static void mlx5_ib_dealloc_ucontext(struct ib_ucontext *ibcontext)
        struct mlx5_ib_dev *dev = to_mdev(ibcontext->device);
        struct mlx5_bfreg_info *bfregi;
 
-       /* All umem's must be destroyed before destroying the ucontext. */
-       mutex_lock(&ibcontext->per_mm_list_lock);
-       WARN_ON(!list_empty(&ibcontext->per_mm_list));
-       mutex_unlock(&ibcontext->per_mm_list_lock);
-
        bfregi = &context->bfregi;
        mlx5_ib_dealloc_transport_domain(dev, context->tdn, context->devx_uid);
 
@@ -3980,6 +3971,11 @@ _get_flow_table(struct mlx5_ib_dev *dev,
                    esw_encap)
                        flags |= MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT;
                priority = FDB_BYPASS_PATH;
+       } else if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_RX) {
+               max_table_size =
+                       BIT(MLX5_CAP_FLOWTABLE_RDMA_RX(dev->mdev,
+                                                      log_max_ft_size));
+               priority = fs_matcher->priority;
        }
 
        max_table_size = min_t(int, max_table_size, MLX5_FS_MAX_ENTRIES);
@@ -3994,6 +3990,8 @@ _get_flow_table(struct mlx5_ib_dev *dev,
                prio = &dev->flow_db->egress_prios[priority];
        else if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_FDB)
                prio = &dev->flow_db->fdb;
+       else if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_RX)
+               prio = &dev->flow_db->rdma_rx[priority];
 
        if (!prio)
                return ERR_PTR(-EINVAL);
@@ -5335,11 +5333,21 @@ static const struct mlx5_ib_counter ext_ppcnt_cnts[] = {
        INIT_EXT_PPCNT_COUNTER(rx_icrc_encapsulated),
 };
 
+static bool is_mdev_switchdev_mode(const struct mlx5_core_dev *mdev)
+{
+       return MLX5_ESWITCH_MANAGER(mdev) &&
+              mlx5_ib_eswitch_mode(mdev->priv.eswitch) ==
+                      MLX5_ESWITCH_OFFLOADS;
+}
+
 static void mlx5_ib_dealloc_counters(struct mlx5_ib_dev *dev)
 {
+       int num_cnt_ports;
        int i;
 
-       for (i = 0; i < dev->num_ports; i++) {
+       num_cnt_ports = is_mdev_switchdev_mode(dev->mdev) ? 1 : dev->num_ports;
+
+       for (i = 0; i < num_cnt_ports; i++) {
                if (dev->port[i].cnts.set_id_valid)
                        mlx5_core_dealloc_q_counter(dev->mdev,
                                                    dev->port[i].cnts.set_id);
@@ -5441,13 +5449,15 @@ static void mlx5_ib_fill_counters(struct mlx5_ib_dev *dev,
 
 static int mlx5_ib_alloc_counters(struct mlx5_ib_dev *dev)
 {
+       int num_cnt_ports;
        int err = 0;
        int i;
        bool is_shared;
 
        is_shared = MLX5_CAP_GEN(dev->mdev, log_max_uctx) != 0;
+       num_cnt_ports = is_mdev_switchdev_mode(dev->mdev) ? 1 : dev->num_ports;
 
-       for (i = 0; i < dev->num_ports; i++) {
+       for (i = 0; i < num_cnt_ports; i++) {
                err = __mlx5_ib_alloc_counters(dev, &dev->port[i].cnts);
                if (err)
                        goto err_alloc;
@@ -5467,7 +5477,6 @@ static int mlx5_ib_alloc_counters(struct mlx5_ib_dev *dev)
                }
                dev->port[i].cnts.set_id_valid = true;
        }
-
        return 0;
 
 err_alloc:
@@ -5475,25 +5484,50 @@ err_alloc:
        return err;
 }
 
+static const struct mlx5_ib_counters *get_counters(struct mlx5_ib_dev *dev,
+                                                  u8 port_num)
+{
+       return is_mdev_switchdev_mode(dev->mdev) ? &dev->port[0].cnts :
+                                                  &dev->port[port_num].cnts;
+}
+
+/**
+ * mlx5_ib_get_counters_id - Returns counters id to use for device+port
+ * @dev:       Pointer to mlx5 IB device
+ * @port_num:  Zero based port number
+ *
+ * mlx5_ib_get_counters_id() Returns counters set id to use for given
+ * device port combination in switchdev and non switchdev mode of the
+ * parent device.
+ */
+u16 mlx5_ib_get_counters_id(struct mlx5_ib_dev *dev, u8 port_num)
+{
+       const struct mlx5_ib_counters *cnts = get_counters(dev, port_num);
+
+       return cnts->set_id;
+}
+
 static struct rdma_hw_stats *mlx5_ib_alloc_hw_stats(struct ib_device *ibdev,
                                                    u8 port_num)
 {
        struct mlx5_ib_dev *dev = to_mdev(ibdev);
-       struct mlx5_ib_port *port = &dev->port[port_num - 1];
+       const struct mlx5_ib_counters *cnts;
+       bool is_switchdev = is_mdev_switchdev_mode(dev->mdev);
 
-       /* We support only per port stats */
-       if (port_num == 0)
+       if ((is_switchdev && port_num) || (!is_switchdev && !port_num))
                return NULL;
 
-       return rdma_alloc_hw_stats_struct(port->cnts.names,
-                                         port->cnts.num_q_counters +
-                                         port->cnts.num_cong_counters +
-                                         port->cnts.num_ext_ppcnt_counters,
+       cnts = get_counters(dev, port_num - 1);
+
+       return rdma_alloc_hw_stats_struct(cnts->names,
+                                         cnts->num_q_counters +
+                                         cnts->num_cong_counters +
+                                         cnts->num_ext_ppcnt_counters,
                                          RDMA_HW_STATS_DEFAULT_LIFESPAN);
 }
 
 static int mlx5_ib_query_q_counters(struct mlx5_core_dev *mdev,
-                                   struct mlx5_ib_port *port,
+                                   const struct mlx5_ib_counters *cnts,
                                    struct rdma_hw_stats *stats,
                                    u16 set_id)
 {
@@ -5510,8 +5544,8 @@ static int mlx5_ib_query_q_counters(struct mlx5_core_dev *mdev,
        if (ret)
                goto free;
 
-       for (i = 0; i < port->cnts.num_q_counters; i++) {
-               val = *(__be32 *)(out + port->cnts.offsets[i]);
+       for (i = 0; i < cnts->num_q_counters; i++) {
+               val = *(__be32 *)(out + cnts->offsets[i]);
                stats->value[i] = (u64)be32_to_cpu(val);
        }
 
@@ -5521,10 +5555,10 @@ free:
 }
 
 static int mlx5_ib_query_ext_ppcnt_counters(struct mlx5_ib_dev *dev,
-                                         struct mlx5_ib_port *port,
-                                         struct rdma_hw_stats *stats)
+                                           const struct mlx5_ib_counters *cnts,
+                                           struct rdma_hw_stats *stats)
 {
-       int offset = port->cnts.num_q_counters + port->cnts.num_cong_counters;
+       int offset = cnts->num_q_counters + cnts->num_cong_counters;
        int sz = MLX5_ST_SZ_BYTES(ppcnt_reg);
        int ret, i;
        void *out;
@@ -5537,12 +5571,10 @@ static int mlx5_ib_query_ext_ppcnt_counters(struct mlx5_ib_dev *dev,
        if (ret)
                goto free;
 
-       for (i = 0; i < port->cnts.num_ext_ppcnt_counters; i++) {
+       for (i = 0; i < cnts->num_ext_ppcnt_counters; i++)
                stats->value[i + offset] =
                        be64_to_cpup((__be64 *)(out +
-                                   port->cnts.offsets[i + offset]));
-       }
-
+                                   cnts->offsets[i + offset]));
 free:
        kvfree(out);
        return ret;
@@ -5553,7 +5585,7 @@ static int mlx5_ib_get_hw_stats(struct ib_device *ibdev,
                                u8 port_num, int index)
 {
        struct mlx5_ib_dev *dev = to_mdev(ibdev);
-       struct mlx5_ib_port *port = &dev->port[port_num - 1];
+       const struct mlx5_ib_counters *cnts = get_counters(dev, port_num - 1);
        struct mlx5_core_dev *mdev;
        int ret, num_counters;
        u8 mdev_port_num;
@@ -5561,18 +5593,17 @@ static int mlx5_ib_get_hw_stats(struct ib_device *ibdev,
        if (!stats)
                return -EINVAL;
 
-       num_counters = port->cnts.num_q_counters +
-                      port->cnts.num_cong_counters +
-                      port->cnts.num_ext_ppcnt_counters;
+       num_counters = cnts->num_q_counters +
+                      cnts->num_cong_counters +
+                      cnts->num_ext_ppcnt_counters;
 
        /* q_counters are per IB device, query the master mdev */
-       ret = mlx5_ib_query_q_counters(dev->mdev, port, stats,
-                                      port->cnts.set_id);
+       ret = mlx5_ib_query_q_counters(dev->mdev, cnts, stats, cnts->set_id);
        if (ret)
                return ret;
 
        if (MLX5_CAP_PCAM_FEATURE(dev->mdev, rx_icrc_encapsulated_counter)) {
-               ret =  mlx5_ib_query_ext_ppcnt_counters(dev, port, stats);
+               ret =  mlx5_ib_query_ext_ppcnt_counters(dev, cnts, stats);
                if (ret)
                        return ret;
        }
@@ -5589,10 +5620,10 @@ static int mlx5_ib_get_hw_stats(struct ib_device *ibdev,
                }
                ret = mlx5_lag_query_cong_counters(dev->mdev,
                                                   stats->value +
-                                                  port->cnts.num_q_counters,
-                                                  port->cnts.num_cong_counters,
-                                                  port->cnts.offsets +
-                                                  port->cnts.num_q_counters);
+                                                  cnts->num_q_counters,
+                                                  cnts->num_cong_counters,
+                                                  cnts->offsets +
+                                                  cnts->num_q_counters);
 
                mlx5_ib_put_native_port_mdev(dev, port_num);
                if (ret)
@@ -5607,20 +5638,22 @@ static struct rdma_hw_stats *
 mlx5_ib_counter_alloc_stats(struct rdma_counter *counter)
 {
        struct mlx5_ib_dev *dev = to_mdev(counter->device);
-       struct mlx5_ib_port *port = &dev->port[counter->port - 1];
+       const struct mlx5_ib_counters *cnts =
+               get_counters(dev, counter->port - 1);
 
        /* Q counters are in the beginning of all counters */
-       return rdma_alloc_hw_stats_struct(port->cnts.names,
-                                         port->cnts.num_q_counters,
+       return rdma_alloc_hw_stats_struct(cnts->names,
+                                         cnts->num_q_counters,
                                          RDMA_HW_STATS_DEFAULT_LIFESPAN);
 }
 
 static int mlx5_ib_counter_update_stats(struct rdma_counter *counter)
 {
        struct mlx5_ib_dev *dev = to_mdev(counter->device);
-       struct mlx5_ib_port *port = &dev->port[counter->port - 1];
+       const struct mlx5_ib_counters *cnts =
+               get_counters(dev, counter->port - 1);
 
-       return mlx5_ib_query_q_counters(dev->mdev, port,
+       return mlx5_ib_query_q_counters(dev->mdev, cnts,
                                        counter->stats, counter->id);
 }
 
@@ -5797,7 +5830,6 @@ static void init_delay_drop(struct mlx5_ib_dev *dev)
                mlx5_ib_warn(dev, "Failed to init delay drop debugfs\n");
 }
 
-/* The mlx5_ib_multiport_mutex should be held when calling this function */
 static void mlx5_ib_unbind_slave_port(struct mlx5_ib_dev *ibdev,
                                      struct mlx5_ib_multiport_info *mpi)
 {
@@ -5807,6 +5839,8 @@ static void mlx5_ib_unbind_slave_port(struct mlx5_ib_dev *ibdev,
        int err;
        int i;
 
+       lockdep_assert_held(&mlx5_ib_multiport_mutex);
+
        mlx5_ib_cleanup_cong_debugfs(ibdev, port_num);
 
        spin_lock(&port->mp.mpi_lock);
@@ -5856,13 +5890,14 @@ static void mlx5_ib_unbind_slave_port(struct mlx5_ib_dev *ibdev,
        ibdev->port[port_num].roce.last_port_state = IB_PORT_DOWN;
 }
 
-/* The mlx5_ib_multiport_mutex should be held when calling this function */
 static bool mlx5_ib_bind_slave_port(struct mlx5_ib_dev *ibdev,
                                    struct mlx5_ib_multiport_info *mpi)
 {
        u8 port_num = mlx5_core_native_port_num(mpi->mdev) - 1;
        int err;
 
+       lockdep_assert_held(&mlx5_ib_multiport_mutex);
+
        spin_lock(&ibdev->port[port_num].mp.mpi_lock);
        if (ibdev->port[port_num].mp.mpi) {
                mlx5_ib_dbg(ibdev, "port %d already affiliated.\n",
@@ -6891,7 +6926,7 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
        dev->port = kcalloc(num_ports, sizeof(*dev->port),
                             GFP_KERNEL);
        if (!dev->port) {
-               ib_dealloc_device((struct ib_device *)dev);
+               ib_dealloc_device(&dev->ib_dev);
                return NULL;
        }
 
@@ -6918,6 +6953,7 @@ static void mlx5_ib_remove(struct mlx5_core_dev *mdev, void *context)
                        mlx5_ib_unbind_slave_port(mpi->ibdev, mpi);
                list_del(&mpi->list);
                mutex_unlock(&mlx5_ib_multiport_mutex);
+               kfree(mpi);
                return;
        }
 
index a40e0ab..b5aece7 100644 (file)
@@ -56,19 +56,6 @@ void mlx5_ib_cont_pages(struct ib_umem *umem, u64 addr,
        struct scatterlist *sg;
        int entry;
 
-       if (umem->is_odp) {
-               struct ib_umem_odp *odp = to_ib_umem_odp(umem);
-               unsigned int page_shift = odp->page_shift;
-
-               *ncont = ib_umem_odp_num_pages(odp);
-               *count = *ncont << (page_shift - PAGE_SHIFT);
-               *shift = page_shift;
-               if (order)
-                       *order = ilog2(roundup_pow_of_two(*ncont));
-
-               return;
-       }
-
        addr = addr >> PAGE_SHIFT;
        tmp = (unsigned long)addr;
        m = find_first_bit(&tmp, BITS_PER_LONG);
index 125a507..2ceaef3 100644 (file)
@@ -200,6 +200,7 @@ struct mlx5_ib_flow_db {
        struct mlx5_ib_flow_prio        sniffer[MLX5_IB_NUM_SNIFFER_FTS];
        struct mlx5_ib_flow_prio        egress[MLX5_IB_NUM_EGRESS_FTS];
        struct mlx5_ib_flow_prio        fdb;
+       struct mlx5_ib_flow_prio        rdma_rx[MLX5_IB_NUM_FLOW_FT];
        struct mlx5_flow_table          *lag_demux_ft;
        /* Protect flow steering bypass flow tables
         * when add/del flow rules.
@@ -1476,6 +1477,7 @@ int bfregn_to_uar_index(struct mlx5_ib_dev *dev,
                        bool dyn_bfreg);
 
 int mlx5_ib_qp_set_counter(struct ib_qp *qp, struct rdma_counter *counter);
+u16 mlx5_ib_get_counters_id(struct mlx5_ib_dev *dev, u8 port_num);
 
 static inline bool mlx5_ib_can_use_umr(struct mlx5_ib_dev *dev,
                                       bool do_modify_atomic)
index 3401f5f..1eff031 100644 (file)
@@ -784,19 +784,37 @@ static int mr_umem_get(struct mlx5_ib_dev *dev, struct ib_udata *udata,
                       int *ncont, int *order)
 {
        struct ib_umem *u;
-       int err;
 
        *umem = NULL;
 
-       u = ib_umem_get(udata, start, length, access_flags, 0);
-       err = PTR_ERR_OR_ZERO(u);
-       if (err) {
-               mlx5_ib_dbg(dev, "umem get failed (%d)\n", err);
-               return err;
+       if (access_flags & IB_ACCESS_ON_DEMAND) {
+               struct ib_umem_odp *odp;
+
+               odp = ib_umem_odp_get(udata, start, length, access_flags);
+               if (IS_ERR(odp)) {
+                       mlx5_ib_dbg(dev, "umem get failed (%ld)\n",
+                                   PTR_ERR(odp));
+                       return PTR_ERR(odp);
+               }
+
+               u = &odp->umem;
+
+               *page_shift = odp->page_shift;
+               *ncont = ib_umem_odp_num_pages(odp);
+               *npages = *ncont << (*page_shift - PAGE_SHIFT);
+               if (order)
+                       *order = ilog2(roundup_pow_of_two(*ncont));
+       } else {
+               u = ib_umem_get(udata, start, length, access_flags, 0);
+               if (IS_ERR(u)) {
+                       mlx5_ib_dbg(dev, "umem get failed (%ld)\n", PTR_ERR(u));
+                       return PTR_ERR(u);
+               }
+
+               mlx5_ib_cont_pages(u, start, MLX5_MKEY_PAGE_SHIFT_MASK, npages,
+                                  page_shift, ncont, order);
        }
 
-       mlx5_ib_cont_pages(u, start, MLX5_MKEY_PAGE_SHIFT_MASK, npages,
-                          page_shift, ncont, order);
        if (!*npages) {
                mlx5_ib_warn(dev, "avoid zero region\n");
                ib_umem_release(u);
@@ -1599,7 +1617,7 @@ static void dereg_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
                /* Wait for all running page-fault handlers to finish. */
                synchronize_srcu(&dev->mr_srcu);
                /* Destroy all page mappings */
-               if (umem_odp->page_list)
+               if (!umem_odp->is_implicit_odp)
                        mlx5_ib_invalidate_range(umem_odp,
                                                 ib_umem_start(umem_odp),
                                                 ib_umem_end(umem_odp));
@@ -1610,7 +1628,7 @@ static void dereg_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
                 * so that there will not be any invalidations in
                 * flight, looking at the *mr struct.
                 */
-               ib_umem_release(umem);
+               ib_umem_odp_release(umem_odp);
                atomic_sub(npages, &dev->mdev->priv.reg_pages);
 
                /* Avoid double-freeing the umem. */
index 0a59912..2e9b430 100644 (file)
@@ -184,7 +184,7 @@ void mlx5_odp_populate_klm(struct mlx5_klm *pklm, size_t offset,
        for (i = 0; i < nentries; i++, pklm++) {
                pklm->bcount = cpu_to_be32(MLX5_IMR_MTT_SIZE);
                va = (offset + i) * MLX5_IMR_MTT_SIZE;
-               if (odp && odp->umem.address == va) {
+               if (odp && ib_umem_start(odp) == va) {
                        struct mlx5_ib_mr *mtt = odp->private;
 
                        pklm->key = cpu_to_be32(mtt->ibmr.lkey);
@@ -206,7 +206,7 @@ static void mr_leaf_free_action(struct work_struct *work)
        mr->parent = NULL;
        synchronize_srcu(&mr->dev->mr_srcu);
 
-       ib_umem_release(&odp->umem);
+       ib_umem_odp_release(odp);
        if (imr->live)
                mlx5_ib_update_xlt(imr, idx, 1, 0,
                                   MLX5_IB_UPD_XLT_INDIRECT |
@@ -386,7 +386,7 @@ static void mlx5_ib_page_fault_resume(struct mlx5_ib_dev *dev,
 }
 
 static struct mlx5_ib_mr *implicit_mr_alloc(struct ib_pd *pd,
-                                           struct ib_umem *umem,
+                                           struct ib_umem_odp *umem_odp,
                                            bool ksm, int access_flags)
 {
        struct mlx5_ib_dev *dev = to_mdev(pd->device);
@@ -404,7 +404,7 @@ static struct mlx5_ib_mr *implicit_mr_alloc(struct ib_pd *pd,
        mr->dev = dev;
        mr->access_flags = access_flags;
        mr->mmkey.iova = 0;
-       mr->umem = umem;
+       mr->umem = &umem_odp->umem;
 
        if (ksm) {
                err = mlx5_ib_update_xlt(mr, 0,
@@ -464,18 +464,17 @@ next_mr:
                if (nentries)
                        nentries++;
        } else {
-               odp = ib_alloc_odp_umem(odp_mr, addr,
-                                       MLX5_IMR_MTT_SIZE);
+               odp = ib_umem_odp_alloc_child(odp_mr, addr, MLX5_IMR_MTT_SIZE);
                if (IS_ERR(odp)) {
                        mutex_unlock(&odp_mr->umem_mutex);
                        return ERR_CAST(odp);
                }
 
-               mtt = implicit_mr_alloc(mr->ibmr.pd, &odp->umem, 0,
+               mtt = implicit_mr_alloc(mr->ibmr.pd, odp, 0,
                                        mr->access_flags);
                if (IS_ERR(mtt)) {
                        mutex_unlock(&odp_mr->umem_mutex);
-                       ib_umem_release(&odp->umem);
+                       ib_umem_odp_release(odp);
                        return ERR_CAST(mtt);
                }
 
@@ -497,7 +496,7 @@ next_mr:
        addr += MLX5_IMR_MTT_SIZE;
        if (unlikely(addr < io_virt + bcnt)) {
                odp = odp_next(odp);
-               if (odp && odp->umem.address != addr)
+               if (odp && ib_umem_start(odp) != addr)
                        odp = NULL;
                goto next_mr;
        }
@@ -521,19 +520,19 @@ struct mlx5_ib_mr *mlx5_ib_alloc_implicit_mr(struct mlx5_ib_pd *pd,
                                             int access_flags)
 {
        struct mlx5_ib_mr *imr;
-       struct ib_umem *umem;
+       struct ib_umem_odp *umem_odp;
 
-       umem = ib_umem_get(udata, 0, 0, access_flags, 0);
-       if (IS_ERR(umem))
-               return ERR_CAST(umem);
+       umem_odp = ib_umem_odp_alloc_implicit(udata, access_flags);
+       if (IS_ERR(umem_odp))
+               return ERR_CAST(umem_odp);
 
-       imr = implicit_mr_alloc(&pd->ibpd, umem, 1, access_flags);
+       imr = implicit_mr_alloc(&pd->ibpd, umem_odp, 1, access_flags);
        if (IS_ERR(imr)) {
-               ib_umem_release(umem);
+               ib_umem_odp_release(umem_odp);
                return ERR_CAST(imr);
        }
 
-       imr->umem = umem;
+       imr->umem = &umem_odp->umem;
        init_waitqueue_head(&imr->q_leaf_free);
        atomic_set(&imr->num_leaf_free, 0);
        atomic_set(&imr->num_pending_prefetch, 0);
@@ -541,34 +540,31 @@ struct mlx5_ib_mr *mlx5_ib_alloc_implicit_mr(struct mlx5_ib_pd *pd,
        return imr;
 }
 
-static int mr_leaf_free(struct ib_umem_odp *umem_odp, u64 start, u64 end,
-                       void *cookie)
+void mlx5_ib_free_implicit_mr(struct mlx5_ib_mr *imr)
 {
-       struct mlx5_ib_mr *mr = umem_odp->private, *imr = cookie;
-
-       if (mr->parent != imr)
-               return 0;
-
-       ib_umem_odp_unmap_dma_pages(umem_odp, ib_umem_start(umem_odp),
-                                   ib_umem_end(umem_odp));
+       struct ib_ucontext_per_mm *per_mm = mr_to_per_mm(imr);
+       struct rb_node *node;
 
-       if (umem_odp->dying)
-               return 0;
+       down_read(&per_mm->umem_rwsem);
+       for (node = rb_first_cached(&per_mm->umem_tree); node;
+            node = rb_next(node)) {
+               struct ib_umem_odp *umem_odp =
+                       rb_entry(node, struct ib_umem_odp, interval_tree.rb);
+               struct mlx5_ib_mr *mr = umem_odp->private;
 
-       WRITE_ONCE(umem_odp->dying, 1);
-       atomic_inc(&imr->num_leaf_free);
-       schedule_work(&umem_odp->work);
+               if (mr->parent != imr)
+                       continue;
 
-       return 0;
-}
+               ib_umem_odp_unmap_dma_pages(umem_odp, ib_umem_start(umem_odp),
+                                           ib_umem_end(umem_odp));
 
-void mlx5_ib_free_implicit_mr(struct mlx5_ib_mr *imr)
-{
-       struct ib_ucontext_per_mm *per_mm = mr_to_per_mm(imr);
+               if (umem_odp->dying)
+                       continue;
 
-       down_read(&per_mm->umem_rwsem);
-       rbt_ib_umem_for_each_in_range(&per_mm->umem_tree, 0, ULLONG_MAX,
-                                     mr_leaf_free, true, imr);
+               WRITE_ONCE(umem_odp->dying, 1);
+               atomic_inc(&imr->num_leaf_free);
+               schedule_work(&umem_odp->work);
+       }
        up_read(&per_mm->umem_rwsem);
 
        wait_event(imr->q_leaf_free, !atomic_read(&imr->num_leaf_free));
@@ -589,7 +585,7 @@ static int pagefault_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr,
        struct ib_umem_odp *odp;
        size_t size;
 
-       if (!odp_mr->page_list) {
+       if (odp_mr->is_implicit_odp) {
                odp = implicit_mr_get_data(mr, io_virt, bcnt);
 
                if (IS_ERR(odp))
@@ -607,7 +603,7 @@ next_mr:
        start_idx = (io_virt - (mr->mmkey.iova & page_mask)) >> page_shift;
        access_mask = ODP_READ_ALLOWED_BIT;
 
-       if (prefetch && !downgrade && !mr->umem->writable) {
+       if (prefetch && !downgrade && !odp->umem.writable) {
                /* prefetch with write-access must
                 * be supported by the MR
                 */
@@ -615,7 +611,7 @@ next_mr:
                goto out;
        }
 
-       if (mr->umem->writable && !downgrade)
+       if (odp->umem.writable && !downgrade)
                access_mask |= ODP_WRITE_ALLOWED_BIT;
 
        current_seq = READ_ONCE(odp->notifiers_seq);
@@ -625,8 +621,8 @@ next_mr:
         */
        smp_rmb();
 
-       ret = ib_umem_odp_map_dma_pages(to_ib_umem_odp(mr->umem), io_virt, size,
-                                       access_mask, current_seq);
+       ret = ib_umem_odp_map_dma_pages(odp, io_virt, size, access_mask,
+                                       current_seq);
 
        if (ret < 0)
                goto out;
@@ -634,8 +630,7 @@ next_mr:
        np = ret;
 
        mutex_lock(&odp->umem_mutex);
-       if (!ib_umem_mmu_notifier_retry(to_ib_umem_odp(mr->umem),
-                                       current_seq)) {
+       if (!ib_umem_mmu_notifier_retry(odp, current_seq)) {
                /*
                 * No need to check whether the MTTs really belong to
                 * this MR, since ib_umem_odp_map_dma_pages already
@@ -668,7 +663,7 @@ next_mr:
 
                io_virt += size;
                next = odp_next(odp);
-               if (unlikely(!next || next->umem.address != io_virt)) {
+               if (unlikely(!next || ib_umem_start(next) != io_virt)) {
                        mlx5_ib_dbg(dev, "next implicit leaf removed at 0x%llx. got %p\n",
                                    io_virt, next);
                        return -EAGAIN;
@@ -987,17 +982,6 @@ static int pagefault_data_segments(struct mlx5_ib_dev *dev,
        return ret < 0 ? ret : npages;
 }
 
-static const u32 mlx5_ib_odp_opcode_cap[] = {
-       [MLX5_OPCODE_SEND]             = IB_ODP_SUPPORT_SEND,
-       [MLX5_OPCODE_SEND_IMM]         = IB_ODP_SUPPORT_SEND,
-       [MLX5_OPCODE_SEND_INVAL]       = IB_ODP_SUPPORT_SEND,
-       [MLX5_OPCODE_RDMA_WRITE]       = IB_ODP_SUPPORT_WRITE,
-       [MLX5_OPCODE_RDMA_WRITE_IMM]   = IB_ODP_SUPPORT_WRITE,
-       [MLX5_OPCODE_RDMA_READ]        = IB_ODP_SUPPORT_READ,
-       [MLX5_OPCODE_ATOMIC_CS]        = IB_ODP_SUPPORT_ATOMIC,
-       [MLX5_OPCODE_ATOMIC_FA]        = IB_ODP_SUPPORT_ATOMIC,
-};
-
 /*
  * Parse initiator WQE. Advances the wqe pointer to point at the
  * scatter-gather list, and set wqe_end to the end of the WQE.
@@ -1008,12 +992,8 @@ static int mlx5_ib_mr_initiator_pfault_handler(
 {
        struct mlx5_wqe_ctrl_seg *ctrl = *wqe;
        u16 wqe_index = pfault->wqe.wqe_index;
-       u32 transport_caps;
        struct mlx5_base_av *av;
        unsigned ds, opcode;
-#if defined(DEBUG)
-       u32 ctrl_wqe_index, ctrl_qpn;
-#endif
        u32 qpn = qp->trans_qp.base.mqp.qpn;
 
        ds = be32_to_cpu(ctrl->qpn_ds) & MLX5_WQE_CTRL_DS_MASK;
@@ -1029,58 +1009,17 @@ static int mlx5_ib_mr_initiator_pfault_handler(
                return -EFAULT;
        }
 
-#if defined(DEBUG)
-       ctrl_wqe_index = (be32_to_cpu(ctrl->opmod_idx_opcode) &
-                       MLX5_WQE_CTRL_WQE_INDEX_MASK) >>
-                       MLX5_WQE_CTRL_WQE_INDEX_SHIFT;
-       if (wqe_index != ctrl_wqe_index) {
-               mlx5_ib_err(dev, "Got WQE with invalid wqe_index. wqe_index=0x%x, qpn=0x%x ctrl->wqe_index=0x%x\n",
-                           wqe_index, qpn,
-                           ctrl_wqe_index);
-               return -EFAULT;
-       }
-
-       ctrl_qpn = (be32_to_cpu(ctrl->qpn_ds) & MLX5_WQE_CTRL_QPN_MASK) >>
-               MLX5_WQE_CTRL_QPN_SHIFT;
-       if (qpn != ctrl_qpn) {
-               mlx5_ib_err(dev, "Got WQE with incorrect QP number. wqe_index=0x%x, qpn=0x%x ctrl->qpn=0x%x\n",
-                           wqe_index, qpn,
-                           ctrl_qpn);
-               return -EFAULT;
-       }
-#endif /* DEBUG */
-
        *wqe_end = *wqe + ds * MLX5_WQE_DS_UNITS;
        *wqe += sizeof(*ctrl);
 
        opcode = be32_to_cpu(ctrl->opmod_idx_opcode) &
                 MLX5_WQE_CTRL_OPCODE_MASK;
 
-       switch (qp->ibqp.qp_type) {
-       case IB_QPT_XRC_INI:
+       if (qp->ibqp.qp_type == IB_QPT_XRC_INI)
                *wqe += sizeof(struct mlx5_wqe_xrc_seg);
-               transport_caps = dev->odp_caps.per_transport_caps.xrc_odp_caps;
-               break;
-       case IB_QPT_RC:
-               transport_caps = dev->odp_caps.per_transport_caps.rc_odp_caps;
-               break;
-       case IB_QPT_UD:
-               transport_caps = dev->odp_caps.per_transport_caps.ud_odp_caps;
-               break;
-       default:
-               mlx5_ib_err(dev, "ODP fault on QP of an unsupported transport 0x%x\n",
-                           qp->ibqp.qp_type);
-               return -EFAULT;
-       }
 
-       if (unlikely(opcode >= ARRAY_SIZE(mlx5_ib_odp_opcode_cap) ||
-                    !(transport_caps & mlx5_ib_odp_opcode_cap[opcode]))) {
-               mlx5_ib_err(dev, "ODP fault on QP of an unsupported opcode 0x%x\n",
-                           opcode);
-               return -EFAULT;
-       }
-
-       if (qp->ibqp.qp_type == IB_QPT_UD) {
+       if (qp->ibqp.qp_type == IB_QPT_UD ||
+           qp->qp_sub_type == MLX5_IB_QPT_DCI) {
                av = *wqe;
                if (av->dqp_dct & cpu_to_be32(MLX5_EXTENDED_UD_AV))
                        *wqe += sizeof(struct mlx5_av);
@@ -1143,19 +1082,6 @@ static int mlx5_ib_mr_responder_pfault_handler_rq(struct mlx5_ib_dev *dev,
                return -EFAULT;
        }
 
-       switch (qp->ibqp.qp_type) {
-       case IB_QPT_RC:
-               if (!(dev->odp_caps.per_transport_caps.rc_odp_caps &
-                     IB_ODP_SUPPORT_RECV))
-                       goto invalid_transport_or_opcode;
-               break;
-       default:
-invalid_transport_or_opcode:
-               mlx5_ib_err(dev, "ODP fault on QP of an unsupported transport. transport: 0x%x\n",
-                           qp->ibqp.qp_type);
-               return -EFAULT;
-       }
-
        *wqe_end = wqe + wqe_size;
 
        return 0;
@@ -1205,7 +1131,7 @@ static void mlx5_ib_mr_wqe_pfault_handler(struct mlx5_ib_dev *dev,
 {
        bool sq = pfault->type & MLX5_PFAULT_REQUESTOR;
        u16 wqe_index = pfault->wqe.wqe_index;
-       void *wqe = NULL, *wqe_end = NULL;
+       void *wqe, *wqe_start = NULL, *wqe_end = NULL;
        u32 bytes_mapped, total_wqe_bytes;
        struct mlx5_core_rsc_common *res;
        int resume_with_error = 1;
@@ -1226,12 +1152,13 @@ static void mlx5_ib_mr_wqe_pfault_handler(struct mlx5_ib_dev *dev,
                goto resolve_page_fault;
        }
 
-       wqe = (void *)__get_free_page(GFP_KERNEL);
-       if (!wqe) {
+       wqe_start = (void *)__get_free_page(GFP_KERNEL);
+       if (!wqe_start) {
                mlx5_ib_err(dev, "Error allocating memory for IO page fault handling.\n");
                goto resolve_page_fault;
        }
 
+       wqe = wqe_start;
        qp = (res->res == MLX5_RES_QP) ? res_to_qp(res) : NULL;
        if (qp && sq) {
                ret = mlx5_ib_read_user_wqe_sq(qp, wqe_index, wqe, PAGE_SIZE,
@@ -1286,7 +1213,7 @@ resolve_page_fault:
                    pfault->wqe.wq_num, resume_with_error,
                    pfault->type);
        mlx5_core_res_put(res);
-       free_page((unsigned long)wqe);
+       free_page((unsigned long)wqe_start);
 }
 
 static int pages_in_range(u64 address, u32 length)
@@ -1618,6 +1545,7 @@ void mlx5_odp_init_mr_cache_entry(struct mlx5_cache_ent *ent)
 
 static const struct ib_device_ops mlx5_ib_dev_odp_ops = {
        .advise_mr = mlx5_ib_advise_mr,
+       .invalidate_range = mlx5_ib_invalidate_range,
 };
 
 int mlx5_ib_odp_init_one(struct mlx5_ib_dev *dev)
index 72869ff..8937d72 100644 (file)
@@ -3386,19 +3386,16 @@ static int __mlx5_ib_qp_set_counter(struct ib_qp *qp,
        struct mlx5_ib_dev *dev = to_mdev(qp->device);
        struct mlx5_ib_qp *mqp = to_mqp(qp);
        struct mlx5_qp_context context = {};
-       struct mlx5_ib_port *mibport = NULL;
        struct mlx5_ib_qp_base *base;
        u32 set_id;
 
        if (!MLX5_CAP_GEN(dev->mdev, rts2rts_qp_counters_set_id))
                return 0;
 
-       if (counter) {
+       if (counter)
                set_id = counter->id;
-       } else {
-               mibport = &dev->port[mqp->port - 1];
-               set_id = mibport->cnts.set_id;
-       }
+       else
+               set_id = mlx5_ib_get_counters_id(dev, mqp->port - 1);
 
        base = &mqp->trans_qp.base;
        context.qp_counter_set_usr_page &= cpu_to_be32(0xffffff);
@@ -3459,7 +3456,6 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp,
        struct mlx5_ib_cq *send_cq, *recv_cq;
        struct mlx5_qp_context *context;
        struct mlx5_ib_pd *pd;
-       struct mlx5_ib_port *mibport = NULL;
        enum mlx5_qp_state mlx5_cur, mlx5_new;
        enum mlx5_qp_optpar optpar;
        u32 set_id = 0;
@@ -3624,11 +3620,10 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp,
                if (qp->flags & MLX5_IB_QP_UNDERLAY)
                        port_num = 0;
 
-               mibport = &dev->port[port_num];
                if (ibqp->counter)
                        set_id = ibqp->counter->id;
                else
-                       set_id = mibport->cnts.set_id;
+                       set_id = mlx5_ib_get_counters_id(dev, port_num);
                context->qp_counter_set_usr_page |=
                        cpu_to_be32(set_id << 24);
        }
@@ -3817,6 +3812,8 @@ static int mlx5_ib_modify_dct(struct ib_qp *ibqp, struct ib_qp_attr *attr,
 
        dctc = MLX5_ADDR_OF(create_dct_in, qp->dct.in, dct_context_entry);
        if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) {
+               u16 set_id;
+
                required |= IB_QP_ACCESS_FLAGS | IB_QP_PKEY_INDEX | IB_QP_PORT;
                if (!is_valid_mask(attr_mask, required, 0))
                        return -EINVAL;
@@ -3843,7 +3840,9 @@ static int mlx5_ib_modify_dct(struct ib_qp *ibqp, struct ib_qp_attr *attr,
                }
                MLX5_SET(dctc, dctc, pkey_index, attr->pkey_index);
                MLX5_SET(dctc, dctc, port, attr->port_num);
-               MLX5_SET(dctc, dctc, counter_set_id, dev->port[attr->port_num - 1].cnts.set_id);
+
+               set_id = mlx5_ib_get_counters_id(dev, attr->port_num - 1);
+               MLX5_SET(dctc, dctc, counter_set_id, set_id);
 
        } else if (cur_state == IB_QPS_INIT && new_state == IB_QPS_RTR) {
                struct mlx5_ib_modify_qp_resp resp = {};
@@ -6345,11 +6344,13 @@ int mlx5_ib_modify_wq(struct ib_wq *wq, struct ib_wq_attr *wq_attr,
        }
 
        if (curr_wq_state == IB_WQS_RESET && wq_state == IB_WQS_RDY) {
+               u16 set_id;
+
+               set_id = mlx5_ib_get_counters_id(dev, 0);
                if (MLX5_CAP_GEN(dev->mdev, modify_rq_counter_set_id)) {
                        MLX5_SET64(modify_rq_in, in, modify_bitmask,
                                   MLX5_MODIFY_RQ_IN_MODIFY_BITMASK_RQ_COUNTER_SET_ID);
-                       MLX5_SET(rqc, rqc, counter_set_id,
-                                dev->port->cnts.set_id);
+                       MLX5_SET(rqc, rqc, counter_set_id, set_id);
                } else
                        dev_info_once(
                                &dev->ib_dev.dev,
index bccc113..e8267e5 100644 (file)
@@ -163,10 +163,10 @@ int ocrdma_query_port(struct ib_device *ibdev,
        netdev = dev->nic_info.netdev;
        if (netif_running(netdev) && netif_oper_up(netdev)) {
                port_state = IB_PORT_ACTIVE;
-               props->phys_state = 5;
+               props->phys_state = IB_PORT_PHYS_STATE_LINK_UP;
        } else {
                port_state = IB_PORT_DOWN;
-               props->phys_state = 3;
+               props->phys_state = IB_PORT_PHYS_STATE_DISABLED;
        }
        props->max_mtu = IB_MTU_4096;
        props->active_mtu = iboe_get_mtu(netdev->mtu);
index f97b3d6..5136b83 100644 (file)
@@ -826,7 +826,7 @@ static int qedr_init_hw(struct qedr_dev *dev)
        if (rc)
                goto out;
 
-       dev->db_addr = (void __iomem *)(uintptr_t)out_params.dpi_addr;
+       dev->db_addr = out_params.dpi_addr;
        dev->db_phys_addr = out_params.dpi_phys_addr;
        dev->db_size = out_params.dpi_size;
        dev->dpi = out_params.dpi;
index a92ca22..0cfd849 100644 (file)
@@ -229,7 +229,7 @@ struct qedr_ucontext {
        struct ib_ucontext ibucontext;
        struct qedr_dev *dev;
        struct qedr_pd *pd;
-       u64 dpi_addr;
+       void __iomem *dpi_addr;
        u64 dpi_phys_addr;
        u32 dpi_size;
        u16 dpi;
index 27d90a8..6f3ce86 100644 (file)
@@ -221,10 +221,10 @@ int qedr_query_port(struct ib_device *ibdev, u8 port, struct ib_port_attr *attr)
        /* *attr being zeroed by the caller, avoid zeroing it here */
        if (rdma_port->port_state == QED_RDMA_PORT_UP) {
                attr->state = IB_PORT_ACTIVE;
-               attr->phys_state = 5;
+               attr->phys_state = IB_PORT_PHYS_STATE_LINK_UP;
        } else {
                attr->state = IB_PORT_DOWN;
-               attr->phys_state = 3;
+               attr->phys_state = IB_PORT_PHYS_STATE_DISABLED;
        }
        attr->max_mtu = IB_MTU_4096;
        attr->active_mtu = iboe_get_mtu(dev->ndev->mtu);
@@ -2451,7 +2451,6 @@ int qedr_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata)
        struct qedr_dev *dev = qp->dev;
        struct ib_qp_attr attr;
        int attr_mask = 0;
-       int rc = 0;
 
        DP_DEBUG(dev, QEDR_MSG_QP, "destroy qp: destroying %p, qp type=%d\n",
                 qp, qp->qp_type);
@@ -2496,7 +2495,7 @@ int qedr_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata)
                xa_erase_irq(&dev->qps, qp->qp_id);
                kfree(qp);
        }
-       return rc;
+       return 0;
 }
 
 int qedr_create_ah(struct ib_ah *ibah, struct rdma_ah_attr *attr, u32 flags,
index 27b6e66..b014422 100644 (file)
@@ -1789,7 +1789,6 @@ static void unlock_expected_tids(struct qib_ctxtdata *rcd)
 
 static int qib_close(struct inode *in, struct file *fp)
 {
-       int ret = 0;
        struct qib_filedata *fd;
        struct qib_ctxtdata *rcd;
        struct qib_devdata *dd;
@@ -1873,7 +1872,7 @@ static int qib_close(struct inode *in, struct file *fp)
 
 bail:
        kfree(fd);
-       return ret;
+       return 0;
 }
 
 static int qib_ctxt_info(struct file *fp, struct qib_ctxt_info __user *uinfo)
index 1d5e2d4..aaf7438 100644 (file)
@@ -313,11 +313,8 @@ int qib_make_rc_req(struct rvt_qp *qp, unsigned long *flags)
                case IB_WR_SEND:
                case IB_WR_SEND_WITH_IMM:
                        /* If no credit, return. */
-                       if (!(qp->s_flags & RVT_S_UNLIMITED_CREDIT) &&
-                           rvt_cmp_msn(wqe->ssn, qp->s_lsn + 1) > 0) {
-                               qp->s_flags |= RVT_S_WAIT_SSN_CREDIT;
+                       if (!rvt_rc_credit_avail(qp, wqe))
                                goto bail;
-                       }
                        if (len > pmtu) {
                                qp->s_state = OP(SEND_FIRST);
                                len = pmtu;
@@ -344,11 +341,8 @@ int qib_make_rc_req(struct rvt_qp *qp, unsigned long *flags)
                        goto no_flow_control;
                case IB_WR_RDMA_WRITE_WITH_IMM:
                        /* If no credit, return. */
-                       if (!(qp->s_flags & RVT_S_UNLIMITED_CREDIT) &&
-                           rvt_cmp_msn(wqe->ssn, qp->s_lsn + 1) > 0) {
-                               qp->s_flags |= RVT_S_WAIT_SSN_CREDIT;
+                       if (!rvt_rc_credit_avail(qp, wqe))
                                goto bail;
-                       }
 no_flow_control:
                        ohdr->u.rc.reth.vaddr =
                                cpu_to_be64(wqe->rdma_wr.remote_addr);
index 905206a..3926be7 100644 (file)
@@ -436,6 +436,7 @@ QIB_DIAGC_ATTR(dmawait);
 QIB_DIAGC_ATTR(unaligned);
 QIB_DIAGC_ATTR(rc_dupreq);
 QIB_DIAGC_ATTR(rc_seqnak);
+QIB_DIAGC_ATTR(rc_crwaits);
 
 static struct attribute *diagc_default_attributes[] = {
        &qib_diagc_attr_rc_resends.attr,
@@ -453,6 +454,7 @@ static struct attribute *diagc_default_attributes[] = {
        &qib_diagc_attr_unaligned.attr,
        &qib_diagc_attr_rc_dupreq.attr,
        &qib_diagc_attr_rc_seqnak.attr,
+       &qib_diagc_attr_rc_crwaits.attr,
        NULL
 };
 
index bfbfbb7..6bf764e 100644 (file)
 static void __qib_release_user_pages(struct page **p, size_t num_pages,
                                     int dirty)
 {
-       if (dirty)
-               put_user_pages_dirty_lock(p, num_pages);
-       else
-               put_user_pages(p, num_pages);
+       put_user_pages_dirty_lock(p, num_pages, dirty);
 }
 
 /**
index 03f54eb..c9abe1c 100644 (file)
@@ -89,9 +89,15 @@ static void usnic_ib_dump_vf(struct usnic_ib_vf *vf, char *buf, int buf_sz)
 
 void usnic_ib_log_vf(struct usnic_ib_vf *vf)
 {
-       char buf[1000];
-       usnic_ib_dump_vf(vf, buf, sizeof(buf));
+       char *buf = kzalloc(1000, GFP_KERNEL);
+
+       if (!buf)
+               return;
+
+       usnic_ib_dump_vf(vf, buf, 1000);
        usnic_dbg("%s\n", buf);
+
+       kfree(buf);
 }
 
 /* Start of netdev section */
index eeb07b2..556b8e4 100644 (file)
@@ -194,7 +194,7 @@ find_free_vf_and_create_qp_grp(struct usnic_ib_dev *us_ibdev,
                        return ERR_CAST(dev_list);
                for (i = 0; dev_list[i]; i++) {
                        dev = dev_list[i];
-                       vf = pci_get_drvdata(to_pci_dev(dev));
+                       vf = dev_get_drvdata(dev);
                        spin_lock(&vf->lock);
                        vnic = vf->vnic;
                        if (!usnic_vnic_check_room(vnic, res_spec)) {
@@ -356,13 +356,14 @@ int usnic_ib_query_port(struct ib_device *ibdev, u8 port,
 
        if (!us_ibdev->ufdev->link_up) {
                props->state = IB_PORT_DOWN;
-               props->phys_state = 3;
+               props->phys_state = IB_PORT_PHYS_STATE_DISABLED;
        } else if (!us_ibdev->ufdev->inaddr) {
                props->state = IB_PORT_INIT;
-               props->phys_state = 4;
+               props->phys_state =
+                       IB_PORT_PHYS_STATE_PORT_CONFIGURATION_TRAINING;
        } else {
                props->state = IB_PORT_ACTIVE;
-               props->phys_state = 5;
+               props->phys_state = IB_PORT_PHYS_STATE_LINK_UP;
        }
 
        props->port_cap_flags = 0;
index 0b0237d..62e6ffa 100644 (file)
@@ -75,10 +75,7 @@ static void usnic_uiom_put_pages(struct list_head *chunk_list, int dirty)
                for_each_sg(chunk->page_list, sg, chunk->nents, i) {
                        page = sg_page(sg);
                        pa = sg_phys(sg);
-                       if (dirty)
-                               put_user_pages_dirty_lock(&page, 1);
-                       else
-                               put_user_page(page);
+                       put_user_pages_dirty_lock(&page, 1, dirty);
                        usnic_dbg("pa: %pa\n", &pa);
                }
                kfree(chunk);
index ecf6e65..fb07eed 100644 (file)
  */
 #define RXE_UVERBS_ABI_VERSION         2
 
-#define RDMA_LINK_PHYS_STATE_LINK_UP   (5)
-#define RDMA_LINK_PHYS_STATE_DISABLED  (3)
-#define RDMA_LINK_PHYS_STATE_POLLING   (2)
-
 #define RXE_ROCE_V2_SPORT              (0xc000)
 
 static inline u32 rxe_crc32(struct rxe_dev *rxe,
index 1abed47..fe52073 100644 (file)
@@ -154,7 +154,7 @@ enum rxe_port_param {
        RXE_PORT_ACTIVE_WIDTH           = IB_WIDTH_1X,
        RXE_PORT_ACTIVE_SPEED           = 1,
        RXE_PORT_PKEY_TBL_LEN           = 64,
-       RXE_PORT_PHYS_STATE             = 2,
+       RXE_PORT_PHYS_STATE             = IB_PORT_PHYS_STATE_POLLING,
        RXE_PORT_SUBNET_PREFIX          = 0xfe80000000000000ULL,
 };
 
index 4ebdfcf..623129f 100644 (file)
@@ -69,11 +69,11 @@ static int rxe_query_port(struct ib_device *dev,
                              &attr->active_width);
 
        if (attr->state == IB_PORT_ACTIVE)
-               attr->phys_state = RDMA_LINK_PHYS_STATE_LINK_UP;
+               attr->phys_state = IB_PORT_PHYS_STATE_LINK_UP;
        else if (dev_get_flags(rxe->ndev) & IFF_UP)
-               attr->phys_state = RDMA_LINK_PHYS_STATE_POLLING;
+               attr->phys_state = IB_PORT_PHYS_STATE_POLLING;
        else
-               attr->phys_state = RDMA_LINK_PHYS_STATE_DISABLED;
+               attr->phys_state = IB_PORT_PHYS_STATE_DISABLED;
 
        mutex_unlock(&rxe->usdev_lock);
 
index 87a5603..e99983f 100644 (file)
@@ -63,15 +63,7 @@ struct siw_mem *siw_mem_id2obj(struct siw_device *sdev, int stag_index)
 static void siw_free_plist(struct siw_page_chunk *chunk, int num_pages,
                           bool dirty)
 {
-       struct page **p = chunk->plist;
-
-       while (num_pages--) {
-               if (!PageDirty(*p) && dirty)
-                       put_user_pages_dirty_lock(p, 1);
-               else
-                       put_user_page(*p);
-               p++;
-       }
+       put_user_pages_dirty_lock(chunk->plist, num_pages, dirty);
 }
 
 void siw_umem_release(struct siw_umem *umem, bool dirty)
index 438a291..5d97bba 100644 (file)
@@ -76,16 +76,15 @@ static int siw_try_1seg(struct siw_iwarp_tx *c_tx, void *paddr)
                        if (unlikely(!p))
                                return -EFAULT;
 
-                       buffer = kmap_atomic(p);
+                       buffer = kmap(p);
 
                        if (likely(PAGE_SIZE - off >= bytes)) {
                                memcpy(paddr, buffer + off, bytes);
-                               kunmap_atomic(buffer);
                        } else {
                                unsigned long part = bytes - (PAGE_SIZE - off);
 
                                memcpy(paddr, buffer + off, part);
-                               kunmap_atomic(buffer);
+                               kunmap(p);
 
                                if (!mem->is_pbl)
                                        p = siw_get_upage(mem->umem,
@@ -97,11 +96,10 @@ static int siw_try_1seg(struct siw_iwarp_tx *c_tx, void *paddr)
                                if (unlikely(!p))
                                        return -EFAULT;
 
-                               buffer = kmap_atomic(p);
-                               memcpy(paddr + part, buffer,
-                                      bytes - part);
-                               kunmap_atomic(buffer);
+                               buffer = kmap(p);
+                               memcpy(paddr + part, buffer, bytes - part);
                        }
+                       kunmap(p);
                }
        }
        return (int)bytes;
@@ -518,11 +516,12 @@ static int siw_tx_hdt(struct siw_iwarp_tx *c_tx, struct socket *s)
                                                        c_tx->mpa_crc_hd,
                                                        iov[seg].iov_base,
                                                        plen);
-                               } else if (do_crc)
-                                       crypto_shash_update(
-                                               c_tx->mpa_crc_hd,
-                                               page_address(p) + fp_off,
-                                               plen);
+                               } else if (do_crc) {
+                                       crypto_shash_update(c_tx->mpa_crc_hd,
+                                                           kmap(p) + fp_off,
+                                                           plen);
+                                       kunmap(p);
+                               }
                        } else {
                                u64 va = sge->laddr + sge_off;
 
index da52c90..869e02b 100644 (file)
@@ -206,7 +206,8 @@ int siw_query_port(struct ib_device *base_dev, u8 port,
        attr->gid_tbl_len = 1;
        attr->max_msg_sz = -1;
        attr->max_mtu = ib_mtu_int_to_enum(sdev->netdev->mtu);
-       attr->phys_state = sdev->state == IB_PORT_ACTIVE ? 5 : 3;
+       attr->phys_state = sdev->state == IB_PORT_ACTIVE ?
+               IB_PORT_PHYS_STATE_LINK_UP : IB_PORT_PHYS_STATE_DISABLED;
        attr->pkey_tbl_len = 1;
        attr->port_cap_flags = IB_PORT_CM_SUP | IB_PORT_DEVICE_MGMT_SUP;
        attr->state = sdev->state;
index 39bf213..52ce635 100644 (file)
 
 /* Default support is 512KB I/O size */
 #define ISER_DEF_MAX_SECTORS           1024
-#define ISCSI_ISER_DEF_SG_TABLESIZE    ((ISER_DEF_MAX_SECTORS * 512) >> SHIFT_4K)
-/* Maximum support is 8MB I/O size */
-#define ISCSI_ISER_MAX_SG_TABLESIZE    ((16384 * 512) >> SHIFT_4K)
+#define ISCSI_ISER_DEF_SG_TABLESIZE                                            \
+       ((ISER_DEF_MAX_SECTORS * SECTOR_SIZE) >> SHIFT_4K)
+/* Maximum support is 16MB I/O size */
+#define ISCSI_ISER_MAX_SG_TABLESIZE    ((32768 * SECTOR_SIZE) >> SHIFT_4K)
 
 #define ISER_DEF_XMIT_CMDS_DEFAULT             512
 #if ISCSI_DEF_XMIT_CMDS_MAX > ISER_DEF_XMIT_CMDS_DEFAULT
index 1a039f1..e25c70a 100644 (file)
@@ -1767,8 +1767,8 @@ static int srpt_create_ch_ib(struct srpt_rdma_ch *ch)
                goto out;
 
 retry:
-       ch->cq = ib_alloc_cq(sdev->device, ch, ch->rq_size + sq_size,
-                       0 /* XXX: spread CQs */, IB_POLL_WORKQUEUE);
+       ch->cq = ib_alloc_cq_any(sdev->device, ch, ch->rq_size + sq_size,
+                                IB_POLL_WORKQUEUE);
        if (IS_ERR(ch->cq)) {
                ret = PTR_ERR(ch->cq);
                pr_err("failed to create CQ cqe= %d ret= %d\n",
index 80e10f4..ccbb897 100644 (file)
@@ -315,6 +315,18 @@ config INGENIC_IRQ
        depends on MACH_INGENIC
        default y
 
+config INGENIC_TCU_IRQ
+       bool "Ingenic JZ47xx TCU interrupt controller"
+       default MACH_INGENIC
+       depends on MIPS || COMPILE_TEST
+       select MFD_SYSCON
+       select GENERIC_IRQ_CHIP
+       help
+         Support for interrupts in the Timer/Counter Unit (TCU) of the Ingenic
+         JZ47xx SoCs.
+
+         If unsure, say N.
+
 config RENESAS_H8300H_INTC
         bool
        select IRQ_DOMAIN
index 8d0fcec..cc7c439 100644 (file)
@@ -75,6 +75,7 @@ obj-$(CONFIG_RENESAS_H8300H_INTC)     += irq-renesas-h8300h.o
 obj-$(CONFIG_RENESAS_H8S_INTC)         += irq-renesas-h8s.o
 obj-$(CONFIG_ARCH_SA1100)              += irq-sa11x0.o
 obj-$(CONFIG_INGENIC_IRQ)              += irq-ingenic.o
+obj-$(CONFIG_INGENIC_TCU_IRQ)          += irq-ingenic-tcu.o
 obj-$(CONFIG_IMX_GPCV2)                        += irq-imx-gpcv2.o
 obj-$(CONFIG_PIC32_EVIC)               += irq-pic32-evic.o
 obj-$(CONFIG_MSCC_OCELOT_IRQ)          += irq-mscc-ocelot.o
diff --git a/drivers/irqchip/irq-ingenic-tcu.c b/drivers/irqchip/irq-ingenic-tcu.c
new file mode 100644 (file)
index 0000000..6d05cef
--- /dev/null
@@ -0,0 +1,182 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * JZ47xx SoCs TCU IRQ driver
+ * Copyright (C) 2019 Paul Cercueil <paul@crapouillou.net>
+ */
+
+#include <linux/clk.h>
+#include <linux/interrupt.h>
+#include <linux/irqchip.h>
+#include <linux/irqchip/chained_irq.h>
+#include <linux/mfd/ingenic-tcu.h>
+#include <linux/mfd/syscon.h>
+#include <linux/of_irq.h>
+#include <linux/regmap.h>
+
+struct ingenic_tcu {
+       struct regmap *map;
+       struct clk *clk;
+       struct irq_domain *domain;
+       unsigned int nb_parent_irqs;
+       u32 parent_irqs[3];
+};
+
+static void ingenic_tcu_intc_cascade(struct irq_desc *desc)
+{
+       struct irq_chip *irq_chip = irq_data_get_irq_chip(&desc->irq_data);
+       struct irq_domain *domain = irq_desc_get_handler_data(desc);
+       struct irq_chip_generic *gc = irq_get_domain_generic_chip(domain, 0);
+       struct regmap *map = gc->private;
+       uint32_t irq_reg, irq_mask;
+       unsigned int i;
+
+       regmap_read(map, TCU_REG_TFR, &irq_reg);
+       regmap_read(map, TCU_REG_TMR, &irq_mask);
+
+       chained_irq_enter(irq_chip, desc);
+
+       irq_reg &= ~irq_mask;
+
+       for_each_set_bit(i, (unsigned long *)&irq_reg, 32)
+               generic_handle_irq(irq_linear_revmap(domain, i));
+
+       chained_irq_exit(irq_chip, desc);
+}
+
+static void ingenic_tcu_gc_unmask_enable_reg(struct irq_data *d)
+{
+       struct irq_chip_generic *gc = irq_data_get_irq_chip_data(d);
+       struct irq_chip_type *ct = irq_data_get_chip_type(d);
+       struct regmap *map = gc->private;
+       u32 mask = d->mask;
+
+       irq_gc_lock(gc);
+       regmap_write(map, ct->regs.ack, mask);
+       regmap_write(map, ct->regs.enable, mask);
+       *ct->mask_cache |= mask;
+       irq_gc_unlock(gc);
+}
+
+static void ingenic_tcu_gc_mask_disable_reg(struct irq_data *d)
+{
+       struct irq_chip_generic *gc = irq_data_get_irq_chip_data(d);
+       struct irq_chip_type *ct = irq_data_get_chip_type(d);
+       struct regmap *map = gc->private;
+       u32 mask = d->mask;
+
+       irq_gc_lock(gc);
+       regmap_write(map, ct->regs.disable, mask);
+       *ct->mask_cache &= ~mask;
+       irq_gc_unlock(gc);
+}
+
+static void ingenic_tcu_gc_mask_disable_reg_and_ack(struct irq_data *d)
+{
+       struct irq_chip_generic *gc = irq_data_get_irq_chip_data(d);
+       struct irq_chip_type *ct = irq_data_get_chip_type(d);
+       struct regmap *map = gc->private;
+       u32 mask = d->mask;
+
+       irq_gc_lock(gc);
+       regmap_write(map, ct->regs.ack, mask);
+       regmap_write(map, ct->regs.disable, mask);
+       irq_gc_unlock(gc);
+}
+
+static int __init ingenic_tcu_irq_init(struct device_node *np,
+                                      struct device_node *parent)
+{
+       struct irq_chip_generic *gc;
+       struct irq_chip_type *ct;
+       struct ingenic_tcu *tcu;
+       struct regmap *map;
+       unsigned int i;
+       int ret, irqs;
+
+       map = device_node_to_regmap(np);
+       if (IS_ERR(map))
+               return PTR_ERR(map);
+
+       tcu = kzalloc(sizeof(*tcu), GFP_KERNEL);
+       if (!tcu)
+               return -ENOMEM;
+
+       tcu->map = map;
+
+       irqs = of_property_count_elems_of_size(np, "interrupts", sizeof(u32));
+       if (irqs < 0 || irqs > ARRAY_SIZE(tcu->parent_irqs)) {
+               pr_crit("%s: Invalid 'interrupts' property\n", __func__);
+               ret = -EINVAL;
+               goto err_free_tcu;
+       }
+
+       tcu->nb_parent_irqs = irqs;
+
+       tcu->domain = irq_domain_add_linear(np, 32, &irq_generic_chip_ops,
+                                           NULL);
+       if (!tcu->domain) {
+               ret = -ENOMEM;
+               goto err_free_tcu;
+       }
+
+       ret = irq_alloc_domain_generic_chips(tcu->domain, 32, 1, "TCU",
+                                            handle_level_irq, 0,
+                                            IRQ_NOPROBE | IRQ_LEVEL, 0);
+       if (ret) {
+               pr_crit("%s: Invalid 'interrupts' property\n", __func__);
+               goto out_domain_remove;
+       }
+
+       gc = irq_get_domain_generic_chip(tcu->domain, 0);
+       ct = gc->chip_types;
+
+       gc->wake_enabled = IRQ_MSK(32);
+       gc->private = tcu->map;
+
+       ct->regs.disable = TCU_REG_TMSR;
+       ct->regs.enable = TCU_REG_TMCR;
+       ct->regs.ack = TCU_REG_TFCR;
+       ct->chip.irq_unmask = ingenic_tcu_gc_unmask_enable_reg;
+       ct->chip.irq_mask = ingenic_tcu_gc_mask_disable_reg;
+       ct->chip.irq_mask_ack = ingenic_tcu_gc_mask_disable_reg_and_ack;
+       ct->chip.flags = IRQCHIP_MASK_ON_SUSPEND | IRQCHIP_SKIP_SET_WAKE;
+
+       /* Mask all IRQs by default */
+       regmap_write(tcu->map, TCU_REG_TMSR, IRQ_MSK(32));
+
+       /*
+        * On JZ4740, timer 0 and timer 1 have their own interrupt line;
+        * timers 2-7 share one interrupt.
+        * On SoCs >= JZ4770, timer 5 has its own interrupt line;
+        * timers 0-4 and 6-7 share one single interrupt.
+        *
+        * To keep things simple, we just register the same handler to
+        * all parent interrupts. The handler will properly detect which
+        * channel fired the interrupt.
+        */
+       for (i = 0; i < irqs; i++) {
+               tcu->parent_irqs[i] = irq_of_parse_and_map(np, i);
+               if (!tcu->parent_irqs[i]) {
+                       ret = -EINVAL;
+                       goto out_unmap_irqs;
+               }
+
+               irq_set_chained_handler_and_data(tcu->parent_irqs[i],
+                                                ingenic_tcu_intc_cascade,
+                                                tcu->domain);
+       }
+
+       return 0;
+
+out_unmap_irqs:
+       for (; i > 0; i--)
+               irq_dispose_mapping(tcu->parent_irqs[i - 1]);
+out_domain_remove:
+       irq_domain_remove(tcu->domain);
+err_free_tcu:
+       kfree(tcu);
+       return ret;
+}
+IRQCHIP_DECLARE(jz4740_tcu_irq, "ingenic,jz4740-tcu", ingenic_tcu_irq_init);
+IRQCHIP_DECLARE(jz4725b_tcu_irq, "ingenic,jz4725b-tcu", ingenic_tcu_irq_init);
+IRQCHIP_DECLARE(jz4770_tcu_irq, "ingenic,jz4770-tcu", ingenic_tcu_irq_init);
index 3834332..aa98953 100644 (file)
@@ -271,6 +271,7 @@ config DM_CRYPT
        depends on BLK_DEV_DM
        select CRYPTO
        select CRYPTO_CBC
+       select CRYPTO_ESSIV
        ---help---
          This device-mapper target allows you to create a device that
          transparently encrypts the data on it. You'll need to activate
@@ -346,6 +347,20 @@ config DM_ERA
          over time.  Useful for maintaining cache coherency when using
          vendor snapshots.
 
+config DM_CLONE
+       tristate "Clone target (EXPERIMENTAL)"
+       depends on BLK_DEV_DM
+       default n
+       select DM_PERSISTENT_DATA
+       ---help---
+         dm-clone produces a one-to-one copy of an existing, read-only source
+         device into a writable destination device. The cloned device is
+         visible/mountable immediately and the copy of the source device to the
+         destination device happens in the background, in parallel with user
+         I/O.
+
+         If unsure, say N.
+
 config DM_MIRROR
        tristate "Mirror target"
        depends on BLK_DEV_DM
@@ -490,6 +505,18 @@ config DM_VERITY
 
          If unsure, say N.
 
+config DM_VERITY_VERIFY_ROOTHASH_SIG
+       def_bool n
+       bool "Verity data device root hash signature verification support"
+       depends on DM_VERITY
+       select SYSTEM_DATA_VERIFICATION
+         help
+         Add ability for dm-verity device to be validated if the
+         pre-generated tree of cryptographic checksums passed has a pkcs#7
+         signature file that can validate the roothash of the tree.
+
+         If unsure, say N.
+
 config DM_VERITY_FEC
        bool "Verity forward error correction support"
        depends on DM_VERITY
index be7a6eb..d91a7ed 100644 (file)
@@ -18,6 +18,7 @@ dm-cache-y    += dm-cache-target.o dm-cache-metadata.o dm-cache-policy.o \
                    dm-cache-background-tracker.o
 dm-cache-smq-y   += dm-cache-policy-smq.o
 dm-era-y       += dm-era-target.o
+dm-clone-y     += dm-clone-target.o dm-clone-metadata.o
 dm-verity-y    += dm-verity-target.o
 md-mod-y       += md.o md-bitmap.o
 raid456-y      += raid5.o raid5-cache.o raid5-ppl.o
@@ -65,6 +66,7 @@ obj-$(CONFIG_DM_VERITY)               += dm-verity.o
 obj-$(CONFIG_DM_CACHE)         += dm-cache.o
 obj-$(CONFIG_DM_CACHE_SMQ)     += dm-cache-smq.o
 obj-$(CONFIG_DM_ERA)           += dm-era.o
+obj-$(CONFIG_DM_CLONE)         += dm-clone.o
 obj-$(CONFIG_DM_LOG_WRITES)    += dm-log-writes.o
 obj-$(CONFIG_DM_INTEGRITY)     += dm-integrity.o
 obj-$(CONFIG_DM_ZONED)         += dm-zoned.o
@@ -81,3 +83,7 @@ endif
 ifeq ($(CONFIG_DM_VERITY_FEC),y)
 dm-verity-objs                 += dm-verity-fec.o
 endif
+
+ifeq ($(CONFIG_DM_VERITY_VERIFY_ROOTHASH_SIG),y)
+dm-verity-objs                 += dm-verity-verify-sig.o
+endif
index 2a48ea3..2d519c2 100644 (file)
@@ -33,7 +33,8 @@
 
 #define DM_BUFIO_MEMORY_PERCENT                2
 #define DM_BUFIO_VMALLOC_PERCENT       25
-#define DM_BUFIO_WRITEBACK_PERCENT     75
+#define DM_BUFIO_WRITEBACK_RATIO       3
+#define DM_BUFIO_LOW_WATERMARK_RATIO   16
 
 /*
  * Check buffer ages in this interval (seconds)
@@ -132,12 +133,14 @@ enum data_mode {
 struct dm_buffer {
        struct rb_node node;
        struct list_head lru_list;
+       struct list_head global_list;
        sector_t block;
        void *data;
        unsigned char data_mode;                /* DATA_MODE_* */
        unsigned char list_mode;                /* LIST_* */
        blk_status_t read_error;
        blk_status_t write_error;
+       unsigned accessed;
        unsigned hold_count;
        unsigned long state;
        unsigned long last_accessed;
@@ -192,7 +195,11 @@ static unsigned long dm_bufio_cache_size;
  */
 static unsigned long dm_bufio_cache_size_latch;
 
-static DEFINE_SPINLOCK(param_spinlock);
+static DEFINE_SPINLOCK(global_spinlock);
+
+static LIST_HEAD(global_queue);
+
+static unsigned long global_num = 0;
 
 /*
  * Buffers are freed after this timeout
@@ -209,11 +216,6 @@ static unsigned long dm_bufio_current_allocated;
 /*----------------------------------------------------------------*/
 
 /*
- * Per-client cache: dm_bufio_cache_size / dm_bufio_client_count
- */
-static unsigned long dm_bufio_cache_size_per_client;
-
-/*
  * The current number of clients.
  */
 static int dm_bufio_client_count;
@@ -224,11 +226,15 @@ static int dm_bufio_client_count;
 static LIST_HEAD(dm_bufio_all_clients);
 
 /*
- * This mutex protects dm_bufio_cache_size_latch,
- * dm_bufio_cache_size_per_client and dm_bufio_client_count
+ * This mutex protects dm_bufio_cache_size_latch and dm_bufio_client_count
  */
 static DEFINE_MUTEX(dm_bufio_clients_lock);
 
+static struct workqueue_struct *dm_bufio_wq;
+static struct delayed_work dm_bufio_cleanup_old_work;
+static struct work_struct dm_bufio_replacement_work;
+
+
 #ifdef CONFIG_DM_DEBUG_BLOCK_STACK_TRACING
 static void buffer_record_stack(struct dm_buffer *b)
 {
@@ -285,15 +291,23 @@ static void __remove(struct dm_bufio_client *c, struct dm_buffer *b)
 
 /*----------------------------------------------------------------*/
 
-static void adjust_total_allocated(unsigned char data_mode, long diff)
+static void adjust_total_allocated(struct dm_buffer *b, bool unlink)
 {
+       unsigned char data_mode;
+       long diff;
+
        static unsigned long * const class_ptr[DATA_MODE_LIMIT] = {
                &dm_bufio_allocated_kmem_cache,
                &dm_bufio_allocated_get_free_pages,
                &dm_bufio_allocated_vmalloc,
        };
 
-       spin_lock(&param_spinlock);
+       data_mode = b->data_mode;
+       diff = (long)b->c->block_size;
+       if (unlink)
+               diff = -diff;
+
+       spin_lock(&global_spinlock);
 
        *class_ptr[data_mode] += diff;
 
@@ -302,7 +316,19 @@ static void adjust_total_allocated(unsigned char data_mode, long diff)
        if (dm_bufio_current_allocated > dm_bufio_peak_allocated)
                dm_bufio_peak_allocated = dm_bufio_current_allocated;
 
-       spin_unlock(&param_spinlock);
+       b->accessed = 1;
+
+       if (!unlink) {
+               list_add(&b->global_list, &global_queue);
+               global_num++;
+               if (dm_bufio_current_allocated > dm_bufio_cache_size)
+                       queue_work(dm_bufio_wq, &dm_bufio_replacement_work);
+       } else {
+               list_del(&b->global_list);
+               global_num--;
+       }
+
+       spin_unlock(&global_spinlock);
 }
 
 /*
@@ -323,9 +349,6 @@ static void __cache_size_refresh(void)
                              dm_bufio_default_cache_size);
                dm_bufio_cache_size_latch = dm_bufio_default_cache_size;
        }
-
-       dm_bufio_cache_size_per_client = dm_bufio_cache_size_latch /
-                                        (dm_bufio_client_count ? : 1);
 }
 
 /*
@@ -431,8 +454,6 @@ static struct dm_buffer *alloc_buffer(struct dm_bufio_client *c, gfp_t gfp_mask)
                return NULL;
        }
 
-       adjust_total_allocated(b->data_mode, (long)c->block_size);
-
 #ifdef CONFIG_DM_DEBUG_BLOCK_STACK_TRACING
        b->stack_len = 0;
 #endif
@@ -446,8 +467,6 @@ static void free_buffer(struct dm_buffer *b)
 {
        struct dm_bufio_client *c = b->c;
 
-       adjust_total_allocated(b->data_mode, -(long)c->block_size);
-
        free_buffer_data(c, b->data, b->data_mode);
        kmem_cache_free(c->slab_buffer, b);
 }
@@ -465,6 +484,8 @@ static void __link_buffer(struct dm_buffer *b, sector_t block, int dirty)
        list_add(&b->lru_list, &c->lru[dirty]);
        __insert(b->c, b);
        b->last_accessed = jiffies;
+
+       adjust_total_allocated(b, false);
 }
 
 /*
@@ -479,6 +500,8 @@ static void __unlink_buffer(struct dm_buffer *b)
        c->n_buffers[b->list_mode]--;
        __remove(b->c, b);
        list_del(&b->lru_list);
+
+       adjust_total_allocated(b, true);
 }
 
 /*
@@ -488,6 +511,8 @@ static void __relink_lru(struct dm_buffer *b, int dirty)
 {
        struct dm_bufio_client *c = b->c;
 
+       b->accessed = 1;
+
        BUG_ON(!c->n_buffers[b->list_mode]);
 
        c->n_buffers[b->list_mode]--;
@@ -907,36 +932,6 @@ static void __write_dirty_buffers_async(struct dm_bufio_client *c, int no_wait,
 }
 
 /*
- * Get writeback threshold and buffer limit for a given client.
- */
-static void __get_memory_limit(struct dm_bufio_client *c,
-                              unsigned long *threshold_buffers,
-                              unsigned long *limit_buffers)
-{
-       unsigned long buffers;
-
-       if (unlikely(READ_ONCE(dm_bufio_cache_size) != dm_bufio_cache_size_latch)) {
-               if (mutex_trylock(&dm_bufio_clients_lock)) {
-                       __cache_size_refresh();
-                       mutex_unlock(&dm_bufio_clients_lock);
-               }
-       }
-
-       buffers = dm_bufio_cache_size_per_client;
-       if (likely(c->sectors_per_block_bits >= 0))
-               buffers >>= c->sectors_per_block_bits + SECTOR_SHIFT;
-       else
-               buffers /= c->block_size;
-
-       if (buffers < c->minimum_buffers)
-               buffers = c->minimum_buffers;
-
-       *limit_buffers = buffers;
-       *threshold_buffers = mult_frac(buffers,
-                                      DM_BUFIO_WRITEBACK_PERCENT, 100);
-}
-
-/*
  * Check if we're over watermark.
  * If we are over threshold_buffers, start freeing buffers.
  * If we're over "limit_buffers", block until we get under the limit.
@@ -944,23 +939,7 @@ static void __get_memory_limit(struct dm_bufio_client *c,
 static void __check_watermark(struct dm_bufio_client *c,
                              struct list_head *write_list)
 {
-       unsigned long threshold_buffers, limit_buffers;
-
-       __get_memory_limit(c, &threshold_buffers, &limit_buffers);
-
-       while (c->n_buffers[LIST_CLEAN] + c->n_buffers[LIST_DIRTY] >
-              limit_buffers) {
-
-               struct dm_buffer *b = __get_unclaimed_buffer(c);
-
-               if (!b)
-                       return;
-
-               __free_buffer_wake(b);
-               cond_resched();
-       }
-
-       if (c->n_buffers[LIST_DIRTY] > threshold_buffers)
+       if (c->n_buffers[LIST_DIRTY] > c->n_buffers[LIST_CLEAN] * DM_BUFIO_WRITEBACK_RATIO)
                __write_dirty_buffers_async(c, 1, write_list);
 }
 
@@ -1841,6 +1820,74 @@ static void __evict_old_buffers(struct dm_bufio_client *c, unsigned long age_hz)
        dm_bufio_unlock(c);
 }
 
+static void do_global_cleanup(struct work_struct *w)
+{
+       struct dm_bufio_client *locked_client = NULL;
+       struct dm_bufio_client *current_client;
+       struct dm_buffer *b;
+       unsigned spinlock_hold_count;
+       unsigned long threshold = dm_bufio_cache_size -
+               dm_bufio_cache_size / DM_BUFIO_LOW_WATERMARK_RATIO;
+       unsigned long loops = global_num * 2;
+
+       mutex_lock(&dm_bufio_clients_lock);
+
+       while (1) {
+               cond_resched();
+
+               spin_lock(&global_spinlock);
+               if (unlikely(dm_bufio_current_allocated <= threshold))
+                       break;
+
+               spinlock_hold_count = 0;
+get_next:
+               if (!loops--)
+                       break;
+               if (unlikely(list_empty(&global_queue)))
+                       break;
+               b = list_entry(global_queue.prev, struct dm_buffer, global_list);
+
+               if (b->accessed) {
+                       b->accessed = 0;
+                       list_move(&b->global_list, &global_queue);
+                       if (likely(++spinlock_hold_count < 16))
+                               goto get_next;
+                       spin_unlock(&global_spinlock);
+                       continue;
+               }
+
+               current_client = b->c;
+               if (unlikely(current_client != locked_client)) {
+                       if (locked_client)
+                               dm_bufio_unlock(locked_client);
+
+                       if (!dm_bufio_trylock(current_client)) {
+                               spin_unlock(&global_spinlock);
+                               dm_bufio_lock(current_client);
+                               locked_client = current_client;
+                               continue;
+                       }
+
+                       locked_client = current_client;
+               }
+
+               spin_unlock(&global_spinlock);
+
+               if (unlikely(!__try_evict_buffer(b, GFP_KERNEL))) {
+                       spin_lock(&global_spinlock);
+                       list_move(&b->global_list, &global_queue);
+                       spin_unlock(&global_spinlock);
+               }
+       }
+
+       spin_unlock(&global_spinlock);
+
+       if (locked_client)
+               dm_bufio_unlock(locked_client);
+
+       mutex_unlock(&dm_bufio_clients_lock);
+}
+
 static void cleanup_old_buffers(void)
 {
        unsigned long max_age_hz = get_max_age_hz();
@@ -1856,14 +1903,11 @@ static void cleanup_old_buffers(void)
        mutex_unlock(&dm_bufio_clients_lock);
 }
 
-static struct workqueue_struct *dm_bufio_wq;
-static struct delayed_work dm_bufio_work;
-
 static void work_fn(struct work_struct *w)
 {
        cleanup_old_buffers();
 
-       queue_delayed_work(dm_bufio_wq, &dm_bufio_work,
+       queue_delayed_work(dm_bufio_wq, &dm_bufio_cleanup_old_work,
                           DM_BUFIO_WORK_TIMER_SECS * HZ);
 }
 
@@ -1905,8 +1949,9 @@ static int __init dm_bufio_init(void)
        if (!dm_bufio_wq)
                return -ENOMEM;
 
-       INIT_DELAYED_WORK(&dm_bufio_work, work_fn);
-       queue_delayed_work(dm_bufio_wq, &dm_bufio_work,
+       INIT_DELAYED_WORK(&dm_bufio_cleanup_old_work, work_fn);
+       INIT_WORK(&dm_bufio_replacement_work, do_global_cleanup);
+       queue_delayed_work(dm_bufio_wq, &dm_bufio_cleanup_old_work,
                           DM_BUFIO_WORK_TIMER_SECS * HZ);
 
        return 0;
@@ -1919,7 +1964,8 @@ static void __exit dm_bufio_exit(void)
 {
        int bug = 0;
 
-       cancel_delayed_work_sync(&dm_bufio_work);
+       cancel_delayed_work_sync(&dm_bufio_cleanup_old_work);
+       flush_workqueue(dm_bufio_wq);
        destroy_workqueue(dm_bufio_wq);
 
        if (dm_bufio_client_count) {
diff --git a/drivers/md/dm-clone-metadata.c b/drivers/md/dm-clone-metadata.c
new file mode 100644 (file)
index 0000000..6bc8c1d
--- /dev/null
@@ -0,0 +1,964 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2019 Arrikto, Inc. All Rights Reserved.
+ */
+
+#include <linux/mm.h>
+#include <linux/err.h>
+#include <linux/slab.h>
+#include <linux/rwsem.h>
+#include <linux/bitops.h>
+#include <linux/bitmap.h>
+#include <linux/device-mapper.h>
+
+#include "persistent-data/dm-bitset.h"
+#include "persistent-data/dm-space-map.h"
+#include "persistent-data/dm-block-manager.h"
+#include "persistent-data/dm-transaction-manager.h"
+
+#include "dm-clone-metadata.h"
+
+#define DM_MSG_PREFIX "clone metadata"
+
+#define SUPERBLOCK_LOCATION 0
+#define SUPERBLOCK_MAGIC 0x8af27f64
+#define SUPERBLOCK_CSUM_XOR 257649492
+
+#define DM_CLONE_MAX_CONCURRENT_LOCKS 5
+
+#define UUID_LEN 16
+
+/* Min and max dm-clone metadata versions supported */
+#define DM_CLONE_MIN_METADATA_VERSION 1
+#define DM_CLONE_MAX_METADATA_VERSION 1
+
+/*
+ * On-disk metadata layout
+ */
+struct superblock_disk {
+       __le32 csum;
+       __le32 flags;
+       __le64 blocknr;
+
+       __u8 uuid[UUID_LEN];
+       __le64 magic;
+       __le32 version;
+
+       __u8 metadata_space_map_root[SPACE_MAP_ROOT_SIZE];
+
+       __le64 region_size;
+       __le64 target_size;
+
+       __le64 bitset_root;
+} __packed;
+
+/*
+ * Region and Dirty bitmaps.
+ *
+ * dm-clone logically splits the source and destination devices in regions of
+ * fixed size. The destination device's regions are gradually hydrated, i.e.,
+ * we copy (clone) the source's regions to the destination device. Eventually,
+ * all regions will get hydrated and all I/O will be served from the
+ * destination device.
+ *
+ * We maintain an on-disk bitmap which tracks the state of each of the
+ * destination device's regions, i.e., whether they are hydrated or not.
+ *
+ * To save constantly doing look ups on disk we keep an in core copy of the
+ * on-disk bitmap, the region_map.
+ *
+ * To further reduce metadata I/O overhead we use a second bitmap, the dmap
+ * (dirty bitmap), which tracks the dirty words, i.e. longs, of the region_map.
+ *
+ * When a region finishes hydrating dm-clone calls
+ * dm_clone_set_region_hydrated(), or for discard requests
+ * dm_clone_cond_set_range(), which sets the corresponding bits in region_map
+ * and dmap.
+ *
+ * During a metadata commit we scan the dmap for dirty region_map words (longs)
+ * and update accordingly the on-disk metadata. Thus, we don't have to flush to
+ * disk the whole region_map. We can just flush the dirty region_map words.
+ *
+ * We use a dirty bitmap, which is smaller than the original region_map, to
+ * reduce the amount of memory accesses during a metadata commit. As dm-bitset
+ * accesses the on-disk bitmap in 64-bit word granularity, there is no
+ * significant benefit in tracking the dirty region_map bits with a smaller
+ * granularity.
+ *
+ * We could update directly the on-disk bitmap, when dm-clone calls either
+ * dm_clone_set_region_hydrated() or dm_clone_cond_set_range(), buts this
+ * inserts significant metadata I/O overhead in dm-clone's I/O path. Also, as
+ * these two functions don't block, we can call them in interrupt context,
+ * e.g., in a hooked overwrite bio's completion routine, and further reduce the
+ * I/O completion latency.
+ *
+ * We maintain two dirty bitmaps. During a metadata commit we atomically swap
+ * the currently used dmap with the unused one. This allows the metadata update
+ * functions to run concurrently with an ongoing commit.
+ */
+struct dirty_map {
+       unsigned long *dirty_words;
+       unsigned int changed;
+};
+
+struct dm_clone_metadata {
+       /* The metadata block device */
+       struct block_device *bdev;
+
+       sector_t target_size;
+       sector_t region_size;
+       unsigned long nr_regions;
+       unsigned long nr_words;
+
+       /* Spinlock protecting the region and dirty bitmaps. */
+       spinlock_t bitmap_lock;
+       struct dirty_map dmap[2];
+       struct dirty_map *current_dmap;
+
+       /*
+        * In core copy of the on-disk bitmap to save constantly doing look ups
+        * on disk.
+        */
+       unsigned long *region_map;
+
+       /* Protected by bitmap_lock */
+       unsigned int read_only;
+
+       struct dm_block_manager *bm;
+       struct dm_space_map *sm;
+       struct dm_transaction_manager *tm;
+
+       struct rw_semaphore lock;
+
+       struct dm_disk_bitset bitset_info;
+       dm_block_t bitset_root;
+
+       /*
+        * Reading the space map root can fail, so we read it into this
+        * buffer before the superblock is locked and updated.
+        */
+       __u8 metadata_space_map_root[SPACE_MAP_ROOT_SIZE];
+
+       bool hydration_done:1;
+       bool fail_io:1;
+};
+
+/*---------------------------------------------------------------------------*/
+
+/*
+ * Superblock validation.
+ */
+static void sb_prepare_for_write(struct dm_block_validator *v,
+                                struct dm_block *b, size_t sb_block_size)
+{
+       struct superblock_disk *sb;
+       u32 csum;
+
+       sb = dm_block_data(b);
+       sb->blocknr = cpu_to_le64(dm_block_location(b));
+
+       csum = dm_bm_checksum(&sb->flags, sb_block_size - sizeof(__le32),
+                             SUPERBLOCK_CSUM_XOR);
+       sb->csum = cpu_to_le32(csum);
+}
+
+static int sb_check(struct dm_block_validator *v, struct dm_block *b,
+                   size_t sb_block_size)
+{
+       struct superblock_disk *sb;
+       u32 csum, metadata_version;
+
+       sb = dm_block_data(b);
+
+       if (dm_block_location(b) != le64_to_cpu(sb->blocknr)) {
+               DMERR("Superblock check failed: blocknr %llu, expected %llu",
+                     le64_to_cpu(sb->blocknr),
+                     (unsigned long long)dm_block_location(b));
+               return -ENOTBLK;
+       }
+
+       if (le64_to_cpu(sb->magic) != SUPERBLOCK_MAGIC) {
+               DMERR("Superblock check failed: magic %llu, expected %llu",
+                     le64_to_cpu(sb->magic),
+                     (unsigned long long)SUPERBLOCK_MAGIC);
+               return -EILSEQ;
+       }
+
+       csum = dm_bm_checksum(&sb->flags, sb_block_size - sizeof(__le32),
+                             SUPERBLOCK_CSUM_XOR);
+       if (sb->csum != cpu_to_le32(csum)) {
+               DMERR("Superblock check failed: checksum %u, expected %u",
+                     csum, le32_to_cpu(sb->csum));
+               return -EILSEQ;
+       }
+
+       /* Check metadata version */
+       metadata_version = le32_to_cpu(sb->version);
+       if (metadata_version < DM_CLONE_MIN_METADATA_VERSION ||
+           metadata_version > DM_CLONE_MAX_METADATA_VERSION) {
+               DMERR("Clone metadata version %u found, but only versions between %u and %u supported.",
+                     metadata_version, DM_CLONE_MIN_METADATA_VERSION,
+                     DM_CLONE_MAX_METADATA_VERSION);
+               return -EINVAL;
+       }
+
+       return 0;
+}
+
+static struct dm_block_validator sb_validator = {
+       .name = "superblock",
+       .prepare_for_write = sb_prepare_for_write,
+       .check = sb_check
+};
+
+/*
+ * Check if the superblock is formatted or not. We consider the superblock to
+ * be formatted in case we find non-zero bytes in it.
+ */
+static int __superblock_all_zeroes(struct dm_block_manager *bm, bool *formatted)
+{
+       int r;
+       unsigned int i, nr_words;
+       struct dm_block *sblock;
+       __le64 *data_le, zero = cpu_to_le64(0);
+
+       /*
+        * We don't use a validator here because the superblock could be all
+        * zeroes.
+        */
+       r = dm_bm_read_lock(bm, SUPERBLOCK_LOCATION, NULL, &sblock);
+       if (r) {
+               DMERR("Failed to read_lock superblock");
+               return r;
+       }
+
+       data_le = dm_block_data(sblock);
+       *formatted = false;
+
+       /* This assumes that the block size is a multiple of 8 bytes */
+       BUG_ON(dm_bm_block_size(bm) % sizeof(__le64));
+       nr_words = dm_bm_block_size(bm) / sizeof(__le64);
+       for (i = 0; i < nr_words; i++) {
+               if (data_le[i] != zero) {
+                       *formatted = true;
+                       break;
+               }
+       }
+
+       dm_bm_unlock(sblock);
+
+       return 0;
+}
+
+/*---------------------------------------------------------------------------*/
+
+/*
+ * Low-level metadata handling.
+ */
+static inline int superblock_read_lock(struct dm_clone_metadata *cmd,
+                                      struct dm_block **sblock)
+{
+       return dm_bm_read_lock(cmd->bm, SUPERBLOCK_LOCATION, &sb_validator, sblock);
+}
+
+static inline int superblock_write_lock(struct dm_clone_metadata *cmd,
+                                       struct dm_block **sblock)
+{
+       return dm_bm_write_lock(cmd->bm, SUPERBLOCK_LOCATION, &sb_validator, sblock);
+}
+
+static inline int superblock_write_lock_zero(struct dm_clone_metadata *cmd,
+                                            struct dm_block **sblock)
+{
+       return dm_bm_write_lock_zero(cmd->bm, SUPERBLOCK_LOCATION, &sb_validator, sblock);
+}
+
+static int __copy_sm_root(struct dm_clone_metadata *cmd)
+{
+       int r;
+       size_t root_size;
+
+       r = dm_sm_root_size(cmd->sm, &root_size);
+       if (r)
+               return r;
+
+       return dm_sm_copy_root(cmd->sm, &cmd->metadata_space_map_root, root_size);
+}
+
+/* Save dm-clone metadata in superblock */
+static void __prepare_superblock(struct dm_clone_metadata *cmd,
+                                struct superblock_disk *sb)
+{
+       sb->flags = cpu_to_le32(0UL);
+
+       /* FIXME: UUID is currently unused */
+       memset(sb->uuid, 0, sizeof(sb->uuid));
+
+       sb->magic = cpu_to_le64(SUPERBLOCK_MAGIC);
+       sb->version = cpu_to_le32(DM_CLONE_MAX_METADATA_VERSION);
+
+       /* Save the metadata space_map root */
+       memcpy(&sb->metadata_space_map_root, &cmd->metadata_space_map_root,
+              sizeof(cmd->metadata_space_map_root));
+
+       sb->region_size = cpu_to_le64(cmd->region_size);
+       sb->target_size = cpu_to_le64(cmd->target_size);
+       sb->bitset_root = cpu_to_le64(cmd->bitset_root);
+}
+
+static int __open_metadata(struct dm_clone_metadata *cmd)
+{
+       int r;
+       struct dm_block *sblock;
+       struct superblock_disk *sb;
+
+       r = superblock_read_lock(cmd, &sblock);
+
+       if (r) {
+               DMERR("Failed to read_lock superblock");
+               return r;
+       }
+
+       sb = dm_block_data(sblock);
+
+       /* Verify that target_size and region_size haven't changed. */
+       if (cmd->region_size != le64_to_cpu(sb->region_size) ||
+           cmd->target_size != le64_to_cpu(sb->target_size)) {
+               DMERR("Region and/or target size don't match the ones in metadata");
+               r = -EINVAL;
+               goto out_with_lock;
+       }
+
+       r = dm_tm_open_with_sm(cmd->bm, SUPERBLOCK_LOCATION,
+                              sb->metadata_space_map_root,
+                              sizeof(sb->metadata_space_map_root),
+                              &cmd->tm, &cmd->sm);
+
+       if (r) {
+               DMERR("dm_tm_open_with_sm failed");
+               goto out_with_lock;
+       }
+
+       dm_disk_bitset_init(cmd->tm, &cmd->bitset_info);
+       cmd->bitset_root = le64_to_cpu(sb->bitset_root);
+
+out_with_lock:
+       dm_bm_unlock(sblock);
+
+       return r;
+}
+
+static int __format_metadata(struct dm_clone_metadata *cmd)
+{
+       int r;
+       struct dm_block *sblock;
+       struct superblock_disk *sb;
+
+       r = dm_tm_create_with_sm(cmd->bm, SUPERBLOCK_LOCATION, &cmd->tm, &cmd->sm);
+       if (r) {
+               DMERR("Failed to create transaction manager");
+               return r;
+       }
+
+       dm_disk_bitset_init(cmd->tm, &cmd->bitset_info);
+
+       r = dm_bitset_empty(&cmd->bitset_info, &cmd->bitset_root);
+       if (r) {
+               DMERR("Failed to create empty on-disk bitset");
+               goto err_with_tm;
+       }
+
+       r = dm_bitset_resize(&cmd->bitset_info, cmd->bitset_root, 0,
+                            cmd->nr_regions, false, &cmd->bitset_root);
+       if (r) {
+               DMERR("Failed to resize on-disk bitset to %lu entries", cmd->nr_regions);
+               goto err_with_tm;
+       }
+
+       /* Flush to disk all blocks, except the superblock */
+       r = dm_tm_pre_commit(cmd->tm);
+       if (r) {
+               DMERR("dm_tm_pre_commit failed");
+               goto err_with_tm;
+       }
+
+       r = __copy_sm_root(cmd);
+       if (r) {
+               DMERR("__copy_sm_root failed");
+               goto err_with_tm;
+       }
+
+       r = superblock_write_lock_zero(cmd, &sblock);
+       if (r) {
+               DMERR("Failed to write_lock superblock");
+               goto err_with_tm;
+       }
+
+       sb = dm_block_data(sblock);
+       __prepare_superblock(cmd, sb);
+       r = dm_tm_commit(cmd->tm, sblock);
+       if (r) {
+               DMERR("Failed to commit superblock");
+               goto err_with_tm;
+       }
+
+       return 0;
+
+err_with_tm:
+       dm_sm_destroy(cmd->sm);
+       dm_tm_destroy(cmd->tm);
+
+       return r;
+}
+
+static int __open_or_format_metadata(struct dm_clone_metadata *cmd, bool may_format_device)
+{
+       int r;
+       bool formatted = false;
+
+       r = __superblock_all_zeroes(cmd->bm, &formatted);
+       if (r)
+               return r;
+
+       if (!formatted)
+               return may_format_device ? __format_metadata(cmd) : -EPERM;
+
+       return __open_metadata(cmd);
+}
+
+static int __create_persistent_data_structures(struct dm_clone_metadata *cmd,
+                                              bool may_format_device)
+{
+       int r;
+
+       /* Create block manager */
+       cmd->bm = dm_block_manager_create(cmd->bdev,
+                                        DM_CLONE_METADATA_BLOCK_SIZE << SECTOR_SHIFT,
+                                        DM_CLONE_MAX_CONCURRENT_LOCKS);
+       if (IS_ERR(cmd->bm)) {
+               DMERR("Failed to create block manager");
+               return PTR_ERR(cmd->bm);
+       }
+
+       r = __open_or_format_metadata(cmd, may_format_device);
+       if (r)
+               dm_block_manager_destroy(cmd->bm);
+
+       return r;
+}
+
+static void __destroy_persistent_data_structures(struct dm_clone_metadata *cmd)
+{
+       dm_sm_destroy(cmd->sm);
+       dm_tm_destroy(cmd->tm);
+       dm_block_manager_destroy(cmd->bm);
+}
+
+/*---------------------------------------------------------------------------*/
+
+static size_t bitmap_size(unsigned long nr_bits)
+{
+       return BITS_TO_LONGS(nr_bits) * sizeof(long);
+}
+
+static int dirty_map_init(struct dm_clone_metadata *cmd)
+{
+       cmd->dmap[0].changed = 0;
+       cmd->dmap[0].dirty_words = kvzalloc(bitmap_size(cmd->nr_words), GFP_KERNEL);
+
+       if (!cmd->dmap[0].dirty_words) {
+               DMERR("Failed to allocate dirty bitmap");
+               return -ENOMEM;
+       }
+
+       cmd->dmap[1].changed = 0;
+       cmd->dmap[1].dirty_words = kvzalloc(bitmap_size(cmd->nr_words), GFP_KERNEL);
+
+       if (!cmd->dmap[1].dirty_words) {
+               DMERR("Failed to allocate dirty bitmap");
+               kvfree(cmd->dmap[0].dirty_words);
+               return -ENOMEM;
+       }
+
+       cmd->current_dmap = &cmd->dmap[0];
+
+       return 0;
+}
+
+static void dirty_map_exit(struct dm_clone_metadata *cmd)
+{
+       kvfree(cmd->dmap[0].dirty_words);
+       kvfree(cmd->dmap[1].dirty_words);
+}
+
+static int __load_bitset_in_core(struct dm_clone_metadata *cmd)
+{
+       int r;
+       unsigned long i;
+       struct dm_bitset_cursor c;
+
+       /* Flush bitset cache */
+       r = dm_bitset_flush(&cmd->bitset_info, cmd->bitset_root, &cmd->bitset_root);
+       if (r)
+               return r;
+
+       r = dm_bitset_cursor_begin(&cmd->bitset_info, cmd->bitset_root, cmd->nr_regions, &c);
+       if (r)
+               return r;
+
+       for (i = 0; ; i++) {
+               if (dm_bitset_cursor_get_value(&c))
+                       __set_bit(i, cmd->region_map);
+               else
+                       __clear_bit(i, cmd->region_map);
+
+               if (i >= (cmd->nr_regions - 1))
+                       break;
+
+               r = dm_bitset_cursor_next(&c);
+
+               if (r)
+                       break;
+       }
+
+       dm_bitset_cursor_end(&c);
+
+       return r;
+}
+
+struct dm_clone_metadata *dm_clone_metadata_open(struct block_device *bdev,
+                                                sector_t target_size,
+                                                sector_t region_size)
+{
+       int r;
+       struct dm_clone_metadata *cmd;
+
+       cmd = kzalloc(sizeof(*cmd), GFP_KERNEL);
+       if (!cmd) {
+               DMERR("Failed to allocate memory for dm-clone metadata");
+               return ERR_PTR(-ENOMEM);
+       }
+
+       cmd->bdev = bdev;
+       cmd->target_size = target_size;
+       cmd->region_size = region_size;
+       cmd->nr_regions = dm_sector_div_up(cmd->target_size, cmd->region_size);
+       cmd->nr_words = BITS_TO_LONGS(cmd->nr_regions);
+
+       init_rwsem(&cmd->lock);
+       spin_lock_init(&cmd->bitmap_lock);
+       cmd->read_only = 0;
+       cmd->fail_io = false;
+       cmd->hydration_done = false;
+
+       cmd->region_map = kvmalloc(bitmap_size(cmd->nr_regions), GFP_KERNEL);
+       if (!cmd->region_map) {
+               DMERR("Failed to allocate memory for region bitmap");
+               r = -ENOMEM;
+               goto out_with_md;
+       }
+
+       r = __create_persistent_data_structures(cmd, true);
+       if (r)
+               goto out_with_region_map;
+
+       r = __load_bitset_in_core(cmd);
+       if (r) {
+               DMERR("Failed to load on-disk region map");
+               goto out_with_pds;
+       }
+
+       r = dirty_map_init(cmd);
+       if (r)
+               goto out_with_pds;
+
+       if (bitmap_full(cmd->region_map, cmd->nr_regions))
+               cmd->hydration_done = true;
+
+       return cmd;
+
+out_with_pds:
+       __destroy_persistent_data_structures(cmd);
+
+out_with_region_map:
+       kvfree(cmd->region_map);
+
+out_with_md:
+       kfree(cmd);
+
+       return ERR_PTR(r);
+}
+
+void dm_clone_metadata_close(struct dm_clone_metadata *cmd)
+{
+       if (!cmd->fail_io)
+               __destroy_persistent_data_structures(cmd);
+
+       dirty_map_exit(cmd);
+       kvfree(cmd->region_map);
+       kfree(cmd);
+}
+
+bool dm_clone_is_hydration_done(struct dm_clone_metadata *cmd)
+{
+       return cmd->hydration_done;
+}
+
+bool dm_clone_is_region_hydrated(struct dm_clone_metadata *cmd, unsigned long region_nr)
+{
+       return dm_clone_is_hydration_done(cmd) || test_bit(region_nr, cmd->region_map);
+}
+
+bool dm_clone_is_range_hydrated(struct dm_clone_metadata *cmd,
+                               unsigned long start, unsigned long nr_regions)
+{
+       unsigned long bit;
+
+       if (dm_clone_is_hydration_done(cmd))
+               return true;
+
+       bit = find_next_zero_bit(cmd->region_map, cmd->nr_regions, start);
+
+       return (bit >= (start + nr_regions));
+}
+
+unsigned long dm_clone_nr_of_hydrated_regions(struct dm_clone_metadata *cmd)
+{
+       return bitmap_weight(cmd->region_map, cmd->nr_regions);
+}
+
+unsigned long dm_clone_find_next_unhydrated_region(struct dm_clone_metadata *cmd,
+                                                  unsigned long start)
+{
+       return find_next_zero_bit(cmd->region_map, cmd->nr_regions, start);
+}
+
+static int __update_metadata_word(struct dm_clone_metadata *cmd, unsigned long word)
+{
+       int r;
+       unsigned long index = word * BITS_PER_LONG;
+       unsigned long max_index = min(cmd->nr_regions, (word + 1) * BITS_PER_LONG);
+
+       while (index < max_index) {
+               if (test_bit(index, cmd->region_map)) {
+                       r = dm_bitset_set_bit(&cmd->bitset_info, cmd->bitset_root,
+                                             index, &cmd->bitset_root);
+
+                       if (r) {
+                               DMERR("dm_bitset_set_bit failed");
+                               return r;
+                       }
+               }
+               index++;
+       }
+
+       return 0;
+}
+
+static int __metadata_commit(struct dm_clone_metadata *cmd)
+{
+       int r;
+       struct dm_block *sblock;
+       struct superblock_disk *sb;
+
+       /* Flush bitset cache */
+       r = dm_bitset_flush(&cmd->bitset_info, cmd->bitset_root, &cmd->bitset_root);
+       if (r) {
+               DMERR("dm_bitset_flush failed");
+               return r;
+       }
+
+       /* Flush to disk all blocks, except the superblock */
+       r = dm_tm_pre_commit(cmd->tm);
+       if (r) {
+               DMERR("dm_tm_pre_commit failed");
+               return r;
+       }
+
+       /* Save the space map root in cmd->metadata_space_map_root */
+       r = __copy_sm_root(cmd);
+       if (r) {
+               DMERR("__copy_sm_root failed");
+               return r;
+       }
+
+       /* Lock the superblock */
+       r = superblock_write_lock_zero(cmd, &sblock);
+       if (r) {
+               DMERR("Failed to write_lock superblock");
+               return r;
+       }
+
+       /* Save the metadata in superblock */
+       sb = dm_block_data(sblock);
+       __prepare_superblock(cmd, sb);
+
+       /* Unlock superblock and commit it to disk */
+       r = dm_tm_commit(cmd->tm, sblock);
+       if (r) {
+               DMERR("Failed to commit superblock");
+               return r;
+       }
+
+       /*
+        * FIXME: Find a more efficient way to check if the hydration is done.
+        */
+       if (bitmap_full(cmd->region_map, cmd->nr_regions))
+               cmd->hydration_done = true;
+
+       return 0;
+}
+
+static int __flush_dmap(struct dm_clone_metadata *cmd, struct dirty_map *dmap)
+{
+       int r;
+       unsigned long word, flags;
+
+       word = 0;
+       do {
+               word = find_next_bit(dmap->dirty_words, cmd->nr_words, word);
+
+               if (word == cmd->nr_words)
+                       break;
+
+               r = __update_metadata_word(cmd, word);
+
+               if (r)
+                       return r;
+
+               __clear_bit(word, dmap->dirty_words);
+               word++;
+       } while (word < cmd->nr_words);
+
+       r = __metadata_commit(cmd);
+
+       if (r)
+               return r;
+
+       /* Update the changed flag */
+       spin_lock_irqsave(&cmd->bitmap_lock, flags);
+       dmap->changed = 0;
+       spin_unlock_irqrestore(&cmd->bitmap_lock, flags);
+
+       return 0;
+}
+
+int dm_clone_metadata_commit(struct dm_clone_metadata *cmd)
+{
+       int r = -EPERM;
+       unsigned long flags;
+       struct dirty_map *dmap, *next_dmap;
+
+       down_write(&cmd->lock);
+
+       if (cmd->fail_io || dm_bm_is_read_only(cmd->bm))
+               goto out;
+
+       /* Get current dirty bitmap */
+       dmap = cmd->current_dmap;
+
+       /* Get next dirty bitmap */
+       next_dmap = (dmap == &cmd->dmap[0]) ? &cmd->dmap[1] : &cmd->dmap[0];
+
+       /*
+        * The last commit failed, so we don't have a clean dirty-bitmap to
+        * use.
+        */
+       if (WARN_ON(next_dmap->changed)) {
+               r = -EINVAL;
+               goto out;
+       }
+
+       /* Swap dirty bitmaps */
+       spin_lock_irqsave(&cmd->bitmap_lock, flags);
+       cmd->current_dmap = next_dmap;
+       spin_unlock_irqrestore(&cmd->bitmap_lock, flags);
+
+       /*
+        * No one is accessing the old dirty bitmap anymore, so we can flush
+        * it.
+        */
+       r = __flush_dmap(cmd, dmap);
+out:
+       up_write(&cmd->lock);
+
+       return r;
+}
+
+int dm_clone_set_region_hydrated(struct dm_clone_metadata *cmd, unsigned long region_nr)
+{
+       int r = 0;
+       struct dirty_map *dmap;
+       unsigned long word, flags;
+
+       word = region_nr / BITS_PER_LONG;
+
+       spin_lock_irqsave(&cmd->bitmap_lock, flags);
+
+       if (cmd->read_only) {
+               r = -EPERM;
+               goto out;
+       }
+
+       dmap = cmd->current_dmap;
+
+       __set_bit(word, dmap->dirty_words);
+       __set_bit(region_nr, cmd->region_map);
+       dmap->changed = 1;
+
+out:
+       spin_unlock_irqrestore(&cmd->bitmap_lock, flags);
+
+       return r;
+}
+
+int dm_clone_cond_set_range(struct dm_clone_metadata *cmd, unsigned long start,
+                           unsigned long nr_regions)
+{
+       int r = 0;
+       struct dirty_map *dmap;
+       unsigned long word, region_nr, flags;
+
+       spin_lock_irqsave(&cmd->bitmap_lock, flags);
+
+       if (cmd->read_only) {
+               r = -EPERM;
+               goto out;
+       }
+
+       dmap = cmd->current_dmap;
+       for (region_nr = start; region_nr < (start + nr_regions); region_nr++) {
+               if (!test_bit(region_nr, cmd->region_map)) {
+                       word = region_nr / BITS_PER_LONG;
+                       __set_bit(word, dmap->dirty_words);
+                       __set_bit(region_nr, cmd->region_map);
+                       dmap->changed = 1;
+               }
+       }
+out:
+       spin_unlock_irqrestore(&cmd->bitmap_lock, flags);
+
+       return r;
+}
+
+/*
+ * WARNING: This must not be called concurrently with either
+ * dm_clone_set_region_hydrated() or dm_clone_cond_set_range(), as it changes
+ * cmd->region_map without taking the cmd->bitmap_lock spinlock. The only
+ * exception is after setting the metadata to read-only mode, using
+ * dm_clone_metadata_set_read_only().
+ *
+ * We don't take the spinlock because __load_bitset_in_core() does I/O, so it
+ * may block.
+ */
+int dm_clone_reload_in_core_bitset(struct dm_clone_metadata *cmd)
+{
+       int r = -EINVAL;
+
+       down_write(&cmd->lock);
+
+       if (cmd->fail_io)
+               goto out;
+
+       r = __load_bitset_in_core(cmd);
+out:
+       up_write(&cmd->lock);
+
+       return r;
+}
+
+bool dm_clone_changed_this_transaction(struct dm_clone_metadata *cmd)
+{
+       bool r;
+       unsigned long flags;
+
+       spin_lock_irqsave(&cmd->bitmap_lock, flags);
+       r = cmd->dmap[0].changed || cmd->dmap[1].changed;
+       spin_unlock_irqrestore(&cmd->bitmap_lock, flags);
+
+       return r;
+}
+
+int dm_clone_metadata_abort(struct dm_clone_metadata *cmd)
+{
+       int r = -EPERM;
+
+       down_write(&cmd->lock);
+
+       if (cmd->fail_io || dm_bm_is_read_only(cmd->bm))
+               goto out;
+
+       __destroy_persistent_data_structures(cmd);
+
+       r = __create_persistent_data_structures(cmd, false);
+       if (r) {
+               /* If something went wrong we can neither write nor read the metadata */
+               cmd->fail_io = true;
+       }
+out:
+       up_write(&cmd->lock);
+
+       return r;
+}
+
+void dm_clone_metadata_set_read_only(struct dm_clone_metadata *cmd)
+{
+       unsigned long flags;
+
+       down_write(&cmd->lock);
+
+       spin_lock_irqsave(&cmd->bitmap_lock, flags);
+       cmd->read_only = 1;
+       spin_unlock_irqrestore(&cmd->bitmap_lock, flags);
+
+       if (!cmd->fail_io)
+               dm_bm_set_read_only(cmd->bm);
+
+       up_write(&cmd->lock);
+}
+
+void dm_clone_metadata_set_read_write(struct dm_clone_metadata *cmd)
+{
+       unsigned long flags;
+
+       down_write(&cmd->lock);
+
+       spin_lock_irqsave(&cmd->bitmap_lock, flags);
+       cmd->read_only = 0;
+       spin_unlock_irqrestore(&cmd->bitmap_lock, flags);
+
+       if (!cmd->fail_io)
+               dm_bm_set_read_write(cmd->bm);
+
+       up_write(&cmd->lock);
+}
+
+int dm_clone_get_free_metadata_block_count(struct dm_clone_metadata *cmd,
+                                          dm_block_t *result)
+{
+       int r = -EINVAL;
+
+       down_read(&cmd->lock);
+
+       if (!cmd->fail_io)
+               r = dm_sm_get_nr_free(cmd->sm, result);
+
+       up_read(&cmd->lock);
+
+       return r;
+}
+
+int dm_clone_get_metadata_dev_size(struct dm_clone_metadata *cmd,
+                                  dm_block_t *result)
+{
+       int r = -EINVAL;
+
+       down_read(&cmd->lock);
+
+       if (!cmd->fail_io)
+               r = dm_sm_get_nr_blocks(cmd->sm, result);
+
+       up_read(&cmd->lock);
+
+       return r;
+}
diff --git a/drivers/md/dm-clone-metadata.h b/drivers/md/dm-clone-metadata.h
new file mode 100644 (file)
index 0000000..434bff0
--- /dev/null
@@ -0,0 +1,158 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2019 Arrikto, Inc. All Rights Reserved.
+ */
+
+#ifndef DM_CLONE_METADATA_H
+#define DM_CLONE_METADATA_H
+
+#include "persistent-data/dm-block-manager.h"
+#include "persistent-data/dm-space-map-metadata.h"
+
+#define DM_CLONE_METADATA_BLOCK_SIZE DM_SM_METADATA_BLOCK_SIZE
+
+/*
+ * The metadata device is currently limited in size.
+ */
+#define DM_CLONE_METADATA_MAX_SECTORS DM_SM_METADATA_MAX_SECTORS
+
+/*
+ * A metadata device larger than 16GB triggers a warning.
+ */
+#define DM_CLONE_METADATA_MAX_SECTORS_WARNING (16 * (1024 * 1024 * 1024 >> SECTOR_SHIFT))
+
+#define SPACE_MAP_ROOT_SIZE 128
+
+/* dm-clone metadata */
+struct dm_clone_metadata;
+
+/*
+ * Set region status to hydrated.
+ *
+ * @cmd: The dm-clone metadata
+ * @region_nr: The region number
+ *
+ * This function doesn't block, so it's safe to call it from interrupt context.
+ */
+int dm_clone_set_region_hydrated(struct dm_clone_metadata *cmd, unsigned long region_nr);
+
+/*
+ * Set status of all regions in the provided range to hydrated, if not already
+ * hydrated.
+ *
+ * @cmd: The dm-clone metadata
+ * @start: Starting region number
+ * @nr_regions: Number of regions in the range
+ *
+ * This function doesn't block, so it's safe to call it from interrupt context.
+ */
+int dm_clone_cond_set_range(struct dm_clone_metadata *cmd, unsigned long start,
+                           unsigned long nr_regions);
+
+/*
+ * Read existing or create fresh metadata.
+ *
+ * @bdev: The device storing the metadata
+ * @target_size: The target size
+ * @region_size: The region size
+ *
+ * @returns: The dm-clone metadata
+ *
+ * This function reads the superblock of @bdev and checks if it's all zeroes.
+ * If it is, it formats @bdev and creates fresh metadata. If it isn't, it
+ * validates the metadata stored in @bdev.
+ */
+struct dm_clone_metadata *dm_clone_metadata_open(struct block_device *bdev,
+                                                sector_t target_size,
+                                                sector_t region_size);
+
+/*
+ * Free the resources related to metadata management.
+ */
+void dm_clone_metadata_close(struct dm_clone_metadata *cmd);
+
+/*
+ * Commit dm-clone metadata to disk.
+ */
+int dm_clone_metadata_commit(struct dm_clone_metadata *cmd);
+
+/*
+ * Reload the in core copy of the on-disk bitmap.
+ *
+ * This should be used after aborting a metadata transaction and setting the
+ * metadata to read-only, to invalidate the in-core cache and make it match the
+ * on-disk metadata.
+ *
+ * WARNING: It must not be called concurrently with either
+ * dm_clone_set_region_hydrated() or dm_clone_cond_set_range(), as it updates
+ * the region bitmap without taking the relevant spinlock. We don't take the
+ * spinlock because dm_clone_reload_in_core_bitset() does I/O, so it may block.
+ *
+ * But, it's safe to use it after calling dm_clone_metadata_set_read_only(),
+ * because the latter sets the metadata to read-only mode. Both
+ * dm_clone_set_region_hydrated() and dm_clone_cond_set_range() refuse to touch
+ * the region bitmap, after calling dm_clone_metadata_set_read_only().
+ */
+int dm_clone_reload_in_core_bitset(struct dm_clone_metadata *cmd);
+
+/*
+ * Check whether dm-clone's metadata changed this transaction.
+ */
+bool dm_clone_changed_this_transaction(struct dm_clone_metadata *cmd);
+
+/*
+ * Abort current metadata transaction and rollback metadata to the last
+ * committed transaction.
+ */
+int dm_clone_metadata_abort(struct dm_clone_metadata *cmd);
+
+/*
+ * Switches metadata to a read only mode. Once read-only mode has been entered
+ * the following functions will return -EPERM:
+ *
+ *   dm_clone_metadata_commit()
+ *   dm_clone_set_region_hydrated()
+ *   dm_clone_cond_set_range()
+ *   dm_clone_metadata_abort()
+ */
+void dm_clone_metadata_set_read_only(struct dm_clone_metadata *cmd);
+void dm_clone_metadata_set_read_write(struct dm_clone_metadata *cmd);
+
+/*
+ * Returns true if the hydration of the destination device is finished.
+ */
+bool dm_clone_is_hydration_done(struct dm_clone_metadata *cmd);
+
+/*
+ * Returns true if region @region_nr is hydrated.
+ */
+bool dm_clone_is_region_hydrated(struct dm_clone_metadata *cmd, unsigned long region_nr);
+
+/*
+ * Returns true if all the regions in the range are hydrated.
+ */
+bool dm_clone_is_range_hydrated(struct dm_clone_metadata *cmd,
+                               unsigned long start, unsigned long nr_regions);
+
+/*
+ * Returns the number of hydrated regions.
+ */
+unsigned long dm_clone_nr_of_hydrated_regions(struct dm_clone_metadata *cmd);
+
+/*
+ * Returns the first unhydrated region with region_nr >= @start
+ */
+unsigned long dm_clone_find_next_unhydrated_region(struct dm_clone_metadata *cmd,
+                                                  unsigned long start);
+
+/*
+ * Get the number of free metadata blocks.
+ */
+int dm_clone_get_free_metadata_block_count(struct dm_clone_metadata *cmd, dm_block_t *result);
+
+/*
+ * Get the total number of metadata blocks.
+ */
+int dm_clone_get_metadata_dev_size(struct dm_clone_metadata *cmd, dm_block_t *result);
+
+#endif /* DM_CLONE_METADATA_H */
diff --git a/drivers/md/dm-clone-target.c b/drivers/md/dm-clone-target.c
new file mode 100644 (file)
index 0000000..cd6f9e9
--- /dev/null
@@ -0,0 +1,2191 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2019 Arrikto, Inc. All Rights Reserved.
+ */
+
+#include <linux/mm.h>
+#include <linux/bio.h>
+#include <linux/err.h>
+#include <linux/hash.h>
+#include <linux/list.h>
+#include <linux/log2.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/wait.h>
+#include <linux/dm-io.h>
+#include <linux/mutex.h>
+#include <linux/atomic.h>
+#include <linux/bitops.h>
+#include <linux/blkdev.h>
+#include <linux/kdev_t.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/jiffies.h>
+#include <linux/mempool.h>
+#include <linux/spinlock.h>
+#include <linux/blk_types.h>
+#include <linux/dm-kcopyd.h>
+#include <linux/workqueue.h>
+#include <linux/backing-dev.h>
+#include <linux/device-mapper.h>
+
+#include "dm.h"
+#include "dm-clone-metadata.h"
+
+#define DM_MSG_PREFIX "clone"
+
+/*
+ * Minimum and maximum allowed region sizes
+ */
+#define MIN_REGION_SIZE (1 << 3)  /* 4KB */
+#define MAX_REGION_SIZE (1 << 21) /* 1GB */
+
+#define MIN_HYDRATIONS 256 /* Size of hydration mempool */
+#define DEFAULT_HYDRATION_THRESHOLD 1 /* 1 region */
+#define DEFAULT_HYDRATION_BATCH_SIZE 1 /* Hydrate in batches of 1 region */
+
+#define COMMIT_PERIOD HZ /* 1 sec */
+
+/*
+ * Hydration hash table size: 1 << HASH_TABLE_BITS
+ */
+#define HASH_TABLE_BITS 15
+
+DECLARE_DM_KCOPYD_THROTTLE_WITH_MODULE_PARM(clone_hydration_throttle,
+       "A percentage of time allocated for hydrating regions");
+
+/* Slab cache for struct dm_clone_region_hydration */
+static struct kmem_cache *_hydration_cache;
+
+/* dm-clone metadata modes */
+enum clone_metadata_mode {
+       CM_WRITE,               /* metadata may be changed */
+       CM_READ_ONLY,           /* metadata may not be changed */
+       CM_FAIL,                /* all metadata I/O fails */
+};
+
+struct hash_table_bucket;
+
+struct clone {
+       struct dm_target *ti;
+       struct dm_target_callbacks callbacks;
+
+       struct dm_dev *metadata_dev;
+       struct dm_dev *dest_dev;
+       struct dm_dev *source_dev;
+
+       unsigned long nr_regions;
+       sector_t region_size;
+       unsigned int region_shift;
+
+       /*
+        * A metadata commit and the actions taken in case it fails should run
+        * as a single atomic step.
+        */
+       struct mutex commit_lock;
+
+       struct dm_clone_metadata *cmd;
+
+       /* Region hydration hash table */
+       struct hash_table_bucket *ht;
+
+       atomic_t ios_in_flight;
+
+       wait_queue_head_t hydration_stopped;
+
+       mempool_t hydration_pool;
+
+       unsigned long last_commit_jiffies;
+
+       /*
+        * We defer incoming WRITE bios for regions that are not hydrated,
+        * until after these regions have been hydrated.
+        *
+        * Also, we defer REQ_FUA and REQ_PREFLUSH bios, until after the
+        * metadata have been committed.
+        */
+       spinlock_t lock;
+       struct bio_list deferred_bios;
+       struct bio_list deferred_discard_bios;
+       struct bio_list deferred_flush_bios;
+       struct bio_list deferred_flush_completions;
+
+       /* Maximum number of regions being copied during background hydration. */
+       unsigned int hydration_threshold;
+
+       /* Number of regions to batch together during background hydration. */
+       unsigned int hydration_batch_size;
+
+       /* Which region to hydrate next */
+       unsigned long hydration_offset;
+
+       atomic_t hydrations_in_flight;
+
+       /*
+        * Save a copy of the table line rather than reconstructing it for the
+        * status.
+        */
+       unsigned int nr_ctr_args;
+       const char **ctr_args;
+
+       struct workqueue_struct *wq;
+       struct work_struct worker;
+       struct delayed_work waker;
+
+       struct dm_kcopyd_client *kcopyd_client;
+
+       enum clone_metadata_mode mode;
+       unsigned long flags;
+};
+
+/*
+ * dm-clone flags
+ */
+#define DM_CLONE_DISCARD_PASSDOWN 0
+#define DM_CLONE_HYDRATION_ENABLED 1
+#define DM_CLONE_HYDRATION_SUSPENDED 2
+
+/*---------------------------------------------------------------------------*/
+
+/*
+ * Metadata failure handling.
+ */
+static enum clone_metadata_mode get_clone_mode(struct clone *clone)
+{
+       return READ_ONCE(clone->mode);
+}
+
+static const char *clone_device_name(struct clone *clone)
+{
+       return dm_table_device_name(clone->ti->table);
+}
+
+static void __set_clone_mode(struct clone *clone, enum clone_metadata_mode new_mode)
+{
+       const char *descs[] = {
+               "read-write",
+               "read-only",
+               "fail"
+       };
+
+       enum clone_metadata_mode old_mode = get_clone_mode(clone);
+
+       /* Never move out of fail mode */
+       if (old_mode == CM_FAIL)
+               new_mode = CM_FAIL;
+
+       switch (new_mode) {
+       case CM_FAIL:
+       case CM_READ_ONLY:
+               dm_clone_metadata_set_read_only(clone->cmd);
+               break;
+
+       case CM_WRITE:
+               dm_clone_metadata_set_read_write(clone->cmd);
+               break;
+       }
+
+       WRITE_ONCE(clone->mode, new_mode);
+
+       if (new_mode != old_mode) {
+               dm_table_event(clone->ti->table);
+               DMINFO("%s: Switching to %s mode", clone_device_name(clone),
+                      descs[(int)new_mode]);
+       }
+}
+
+static void __abort_transaction(struct clone *clone)
+{
+       const char *dev_name = clone_device_name(clone);
+
+       if (get_clone_mode(clone) >= CM_READ_ONLY)
+               return;
+
+       DMERR("%s: Aborting current metadata transaction", dev_name);
+       if (dm_clone_metadata_abort(clone->cmd)) {
+               DMERR("%s: Failed to abort metadata transaction", dev_name);
+               __set_clone_mode(clone, CM_FAIL);
+       }
+}
+
+static void __reload_in_core_bitset(struct clone *clone)
+{
+       const char *dev_name = clone_device_name(clone);
+
+       if (get_clone_mode(clone) == CM_FAIL)
+               return;
+
+       /* Reload the on-disk bitset */
+       DMINFO("%s: Reloading on-disk bitmap", dev_name);
+       if (dm_clone_reload_in_core_bitset(clone->cmd)) {
+               DMERR("%s: Failed to reload on-disk bitmap", dev_name);
+               __set_clone_mode(clone, CM_FAIL);
+       }
+}
+
+static void __metadata_operation_failed(struct clone *clone, const char *op, int r)
+{
+       DMERR("%s: Metadata operation `%s' failed: error = %d",
+             clone_device_name(clone), op, r);
+
+       __abort_transaction(clone);
+       __set_clone_mode(clone, CM_READ_ONLY);
+
+       /*
+        * dm_clone_reload_in_core_bitset() may run concurrently with either
+        * dm_clone_set_region_hydrated() or dm_clone_cond_set_range(), but
+        * it's safe as we have already set the metadata to read-only mode.
+        */
+       __reload_in_core_bitset(clone);
+}
+
+/*---------------------------------------------------------------------------*/
+
+/* Wake up anyone waiting for region hydrations to stop */
+static inline void wakeup_hydration_waiters(struct clone *clone)
+{
+       wake_up_all(&clone->hydration_stopped);
+}
+
+static inline void wake_worker(struct clone *clone)
+{
+       queue_work(clone->wq, &clone->worker);
+}
+
+/*---------------------------------------------------------------------------*/
+
+/*
+ * bio helper functions.
+ */
+static inline void remap_to_source(struct clone *clone, struct bio *bio)
+{
+       bio_set_dev(bio, clone->source_dev->bdev);
+}
+
+static inline void remap_to_dest(struct clone *clone, struct bio *bio)
+{
+       bio_set_dev(bio, clone->dest_dev->bdev);
+}
+
+static bool bio_triggers_commit(struct clone *clone, struct bio *bio)
+{
+       return op_is_flush(bio->bi_opf) &&
+               dm_clone_changed_this_transaction(clone->cmd);
+}
+
+/* Get the address of the region in sectors */
+static inline sector_t region_to_sector(struct clone *clone, unsigned long region_nr)
+{
+       return (region_nr << clone->region_shift);
+}
+
+/* Get the region number of the bio */
+static inline unsigned long bio_to_region(struct clone *clone, struct bio *bio)
+{
+       return (bio->bi_iter.bi_sector >> clone->region_shift);
+}
+
+/* Get the region range covered by the bio */
+static void bio_region_range(struct clone *clone, struct bio *bio,
+                            unsigned long *rs, unsigned long *re)
+{
+       *rs = dm_sector_div_up(bio->bi_iter.bi_sector, clone->region_size);
+       *re = bio_end_sector(bio) >> clone->region_shift;
+}
+
+/* Check whether a bio overwrites a region */
+static inline bool is_overwrite_bio(struct clone *clone, struct bio *bio)
+{
+       return (bio_data_dir(bio) == WRITE && bio_sectors(bio) == clone->region_size);
+}
+
+static void fail_bios(struct bio_list *bios, blk_status_t status)
+{
+       struct bio *bio;
+
+       while ((bio = bio_list_pop(bios))) {
+               bio->bi_status = status;
+               bio_endio(bio);
+       }
+}
+
+static void submit_bios(struct bio_list *bios)
+{
+       struct bio *bio;
+       struct blk_plug plug;
+
+       blk_start_plug(&plug);
+
+       while ((bio = bio_list_pop(bios)))
+               generic_make_request(bio);
+
+       blk_finish_plug(&plug);
+}
+
+/*
+ * Submit bio to the underlying device.
+ *
+ * If the bio triggers a commit, delay it, until after the metadata have been
+ * committed.
+ *
+ * NOTE: The bio remapping must be performed by the caller.
+ */
+static void issue_bio(struct clone *clone, struct bio *bio)
+{
+       unsigned long flags;
+
+       if (!bio_triggers_commit(clone, bio)) {
+               generic_make_request(bio);
+               return;
+       }
+
+       /*
+        * If the metadata mode is RO or FAIL we won't be able to commit the
+        * metadata, so we complete the bio with an error.
+        */
+       if (unlikely(get_clone_mode(clone) >= CM_READ_ONLY)) {
+               bio_io_error(bio);
+               return;
+       }
+
+       /*
+        * Batch together any bios that trigger commits and then issue a single
+        * commit for them in process_deferred_flush_bios().
+        */
+       spin_lock_irqsave(&clone->lock, flags);
+       bio_list_add(&clone->deferred_flush_bios, bio);
+       spin_unlock_irqrestore(&clone->lock, flags);
+
+       wake_worker(clone);
+}
+
+/*
+ * Remap bio to the destination device and submit it.
+ *
+ * If the bio triggers a commit, delay it, until after the metadata have been
+ * committed.
+ */
+static void remap_and_issue(struct clone *clone, struct bio *bio)
+{
+       remap_to_dest(clone, bio);
+       issue_bio(clone, bio);
+}
+
+/*
+ * Issue bios that have been deferred until after their region has finished
+ * hydrating.
+ *
+ * We delegate the bio submission to the worker thread, so this is safe to call
+ * from interrupt context.
+ */
+static void issue_deferred_bios(struct clone *clone, struct bio_list *bios)
+{
+       struct bio *bio;
+       unsigned long flags;
+       struct bio_list flush_bios = BIO_EMPTY_LIST;
+       struct bio_list normal_bios = BIO_EMPTY_LIST;
+
+       if (bio_list_empty(bios))
+               return;
+
+       while ((bio = bio_list_pop(bios))) {
+               if (bio_triggers_commit(clone, bio))
+                       bio_list_add(&flush_bios, bio);
+               else
+                       bio_list_add(&normal_bios, bio);
+       }
+
+       spin_lock_irqsave(&clone->lock, flags);
+       bio_list_merge(&clone->deferred_bios, &normal_bios);
+       bio_list_merge(&clone->deferred_flush_bios, &flush_bios);
+       spin_unlock_irqrestore(&clone->lock, flags);
+
+       wake_worker(clone);
+}
+
+static void complete_overwrite_bio(struct clone *clone, struct bio *bio)
+{
+       unsigned long flags;
+
+       /*
+        * If the bio has the REQ_FUA flag set we must commit the metadata
+        * before signaling its completion.
+        *
+        * complete_overwrite_bio() is only called by hydration_complete(),
+        * after having successfully updated the metadata. This means we don't
+        * need to call dm_clone_changed_this_transaction() to check if the
+        * metadata has changed and thus we can avoid taking the metadata spin
+        * lock.
+        */
+       if (!(bio->bi_opf & REQ_FUA)) {
+               bio_endio(bio);
+               return;
+       }
+
+       /*
+        * If the metadata mode is RO or FAIL we won't be able to commit the
+        * metadata, so we complete the bio with an error.
+        */
+       if (unlikely(get_clone_mode(clone) >= CM_READ_ONLY)) {
+               bio_io_error(bio);
+               return;
+       }
+
+       /*
+        * Batch together any bios that trigger commits and then issue a single
+        * commit for them in process_deferred_flush_bios().
+        */
+       spin_lock_irqsave(&clone->lock, flags);
+       bio_list_add(&clone->deferred_flush_completions, bio);
+       spin_unlock_irqrestore(&clone->lock, flags);
+
+       wake_worker(clone);
+}
+
+static void trim_bio(struct bio *bio, sector_t sector, unsigned int len)
+{
+       bio->bi_iter.bi_sector = sector;
+       bio->bi_iter.bi_size = to_bytes(len);
+}
+
+static void complete_discard_bio(struct clone *clone, struct bio *bio, bool success)
+{
+       unsigned long rs, re;
+
+       /*
+        * If the destination device supports discards, remap and trim the
+        * discard bio and pass it down. Otherwise complete the bio
+        * immediately.
+        */
+       if (test_bit(DM_CLONE_DISCARD_PASSDOWN, &clone->flags) && success) {
+               remap_to_dest(clone, bio);
+               bio_region_range(clone, bio, &rs, &re);
+               trim_bio(bio, rs << clone->region_shift,
+                        (re - rs) << clone->region_shift);
+               generic_make_request(bio);
+       } else
+               bio_endio(bio);
+}
+
+static void process_discard_bio(struct clone *clone, struct bio *bio)
+{
+       unsigned long rs, re, flags;
+
+       bio_region_range(clone, bio, &rs, &re);
+       BUG_ON(re > clone->nr_regions);
+
+       if (unlikely(rs == re)) {
+               bio_endio(bio);
+               return;
+       }
+
+       /*
+        * The covered regions are already hydrated so we just need to pass
+        * down the discard.
+        */
+       if (dm_clone_is_range_hydrated(clone->cmd, rs, re - rs)) {
+               complete_discard_bio(clone, bio, true);
+               return;
+       }
+
+       /*
+        * If the metadata mode is RO or FAIL we won't be able to update the
+        * metadata for the regions covered by the discard so we just ignore
+        * it.
+        */
+       if (unlikely(get_clone_mode(clone) >= CM_READ_ONLY)) {
+               bio_endio(bio);
+               return;
+       }
+
+       /*
+        * Defer discard processing.
+        */
+       spin_lock_irqsave(&clone->lock, flags);
+       bio_list_add(&clone->deferred_discard_bios, bio);
+       spin_unlock_irqrestore(&clone->lock, flags);
+
+       wake_worker(clone);
+}
+
+/*---------------------------------------------------------------------------*/
+
+/*
+ * dm-clone region hydrations.
+ */
+struct dm_clone_region_hydration {
+       struct clone *clone;
+       unsigned long region_nr;
+
+       struct bio *overwrite_bio;
+       bio_end_io_t *overwrite_bio_end_io;
+
+       struct bio_list deferred_bios;
+
+       blk_status_t status;
+
+       /* Used by hydration batching */
+       struct list_head list;
+
+       /* Used by hydration hash table */
+       struct hlist_node h;
+};
+
+/*
+ * Hydration hash table implementation.
+ *
+ * Ideally we would like to use list_bl, which uses bit spin locks and employs
+ * the least significant bit of the list head to lock the corresponding bucket,
+ * reducing the memory overhead for the locks. But, currently, list_bl and bit
+ * spin locks don't support IRQ safe versions. Since we have to take the lock
+ * in both process and interrupt context, we must fall back to using regular
+ * spin locks; one per hash table bucket.
+ */
+struct hash_table_bucket {
+       struct hlist_head head;
+
+       /* Spinlock protecting the bucket */
+       spinlock_t lock;
+};
+
+#define bucket_lock_irqsave(bucket, flags) \
+       spin_lock_irqsave(&(bucket)->lock, flags)
+
+#define bucket_unlock_irqrestore(bucket, flags) \
+       spin_unlock_irqrestore(&(bucket)->lock, flags)
+
+static int hash_table_init(struct clone *clone)
+{
+       unsigned int i, sz;
+       struct hash_table_bucket *bucket;
+
+       sz = 1 << HASH_TABLE_BITS;
+
+       clone->ht = kvmalloc(sz * sizeof(struct hash_table_bucket), GFP_KERNEL);
+       if (!clone->ht)
+               return -ENOMEM;
+
+       for (i = 0; i < sz; i++) {
+               bucket = clone->ht + i;
+
+               INIT_HLIST_HEAD(&bucket->head);
+               spin_lock_init(&bucket->lock);
+       }
+
+       return 0;
+}
+
+static void hash_table_exit(struct clone *clone)
+{
+       kvfree(clone->ht);
+}
+
+static struct hash_table_bucket *get_hash_table_bucket(struct clone *clone,
+                                                      unsigned long region_nr)
+{
+       return &clone->ht[hash_long(region_nr, HASH_TABLE_BITS)];
+}
+
+/*
+ * Search hash table for a hydration with hd->region_nr == region_nr
+ *
+ * NOTE: Must be called with the bucket lock held
+ */
+struct dm_clone_region_hydration *__hash_find(struct hash_table_bucket *bucket,
+                                             unsigned long region_nr)
+{
+       struct dm_clone_region_hydration *hd;
+
+       hlist_for_each_entry(hd, &bucket->head, h) {
+               if (hd->region_nr == region_nr)
+                       return hd;
+       }
+
+       return NULL;
+}
+
+/*
+ * Insert a hydration into the hash table.
+ *
+ * NOTE: Must be called with the bucket lock held.
+ */
+static inline void __insert_region_hydration(struct hash_table_bucket *bucket,
+                                            struct dm_clone_region_hydration *hd)
+{
+       hlist_add_head(&hd->h, &bucket->head);
+}
+
+/*
+ * This function inserts a hydration into the hash table, unless someone else
+ * managed to insert a hydration for the same region first. In the latter case
+ * it returns the existing hydration descriptor for this region.
+ *
+ * NOTE: Must be called with the hydration hash table lock held.
+ */
+static struct dm_clone_region_hydration *
+__find_or_insert_region_hydration(struct hash_table_bucket *bucket,
+                                 struct dm_clone_region_hydration *hd)
+{
+       struct dm_clone_region_hydration *hd2;
+
+       hd2 = __hash_find(bucket, hd->region_nr);
+       if (hd2)
+               return hd2;
+
+       __insert_region_hydration(bucket, hd);
+
+       return hd;
+}
+
+/*---------------------------------------------------------------------------*/
+
+/* Allocate a hydration */
+static struct dm_clone_region_hydration *alloc_hydration(struct clone *clone)
+{
+       struct dm_clone_region_hydration *hd;
+
+       /*
+        * Allocate a hydration from the hydration mempool.
+        * This might block but it can't fail.
+        */
+       hd = mempool_alloc(&clone->hydration_pool, GFP_NOIO);
+       hd->clone = clone;
+
+       return hd;
+}
+
+static inline void free_hydration(struct dm_clone_region_hydration *hd)
+{
+       mempool_free(hd, &hd->clone->hydration_pool);
+}
+
+/* Initialize a hydration */
+static void hydration_init(struct dm_clone_region_hydration *hd, unsigned long region_nr)
+{
+       hd->region_nr = region_nr;
+       hd->overwrite_bio = NULL;
+       bio_list_init(&hd->deferred_bios);
+       hd->status = 0;
+
+       INIT_LIST_HEAD(&hd->list);
+       INIT_HLIST_NODE(&hd->h);
+}
+
+/*---------------------------------------------------------------------------*/
+
+/*
+ * Update dm-clone's metadata after a region has finished hydrating and remove
+ * hydration from the hash table.
+ */
+static int hydration_update_metadata(struct dm_clone_region_hydration *hd)
+{
+       int r = 0;
+       unsigned long flags;
+       struct hash_table_bucket *bucket;
+       struct clone *clone = hd->clone;
+
+       if (unlikely(get_clone_mode(clone) >= CM_READ_ONLY))
+               r = -EPERM;
+
+       /* Update the metadata */
+       if (likely(!r) && hd->status == BLK_STS_OK)
+               r = dm_clone_set_region_hydrated(clone->cmd, hd->region_nr);
+
+       bucket = get_hash_table_bucket(clone, hd->region_nr);
+
+       /* Remove hydration from hash table */
+       bucket_lock_irqsave(bucket, flags);
+       hlist_del(&hd->h);
+       bucket_unlock_irqrestore(bucket, flags);
+
+       return r;
+}
+
+/*
+ * Complete a region's hydration:
+ *
+ *     1. Update dm-clone's metadata.
+ *     2. Remove hydration from hash table.
+ *     3. Complete overwrite bio.
+ *     4. Issue deferred bios.
+ *     5. If this was the last hydration, wake up anyone waiting for
+ *        hydrations to finish.
+ */
+static void hydration_complete(struct dm_clone_region_hydration *hd)
+{
+       int r;
+       blk_status_t status;
+       struct clone *clone = hd->clone;
+
+       r = hydration_update_metadata(hd);
+
+       if (hd->status == BLK_STS_OK && likely(!r)) {
+               if (hd->overwrite_bio)
+                       complete_overwrite_bio(clone, hd->overwrite_bio);
+
+               issue_deferred_bios(clone, &hd->deferred_bios);
+       } else {
+               status = r ? BLK_STS_IOERR : hd->status;
+
+               if (hd->overwrite_bio)
+                       bio_list_add(&hd->deferred_bios, hd->overwrite_bio);
+
+               fail_bios(&hd->deferred_bios, status);
+       }
+
+       free_hydration(hd);
+
+       if (atomic_dec_and_test(&clone->hydrations_in_flight))
+               wakeup_hydration_waiters(clone);
+}
+
+static void hydration_kcopyd_callback(int read_err, unsigned long write_err, void *context)
+{
+       blk_status_t status;
+
+       struct dm_clone_region_hydration *tmp, *hd = context;
+       struct clone *clone = hd->clone;
+
+       LIST_HEAD(batched_hydrations);
+
+       if (read_err || write_err) {
+               DMERR_LIMIT("%s: hydration failed", clone_device_name(clone));
+               status = BLK_STS_IOERR;
+       } else {
+               status = BLK_STS_OK;
+       }
+       list_splice_tail(&hd->list, &batched_hydrations);
+
+       hd->status = status;
+       hydration_complete(hd);
+
+       /* Complete batched hydrations */
+       list_for_each_entry_safe(hd, tmp, &batched_hydrations, list) {
+               hd->status = status;
+               hydration_complete(hd);
+       }
+
+       /* Continue background hydration, if there is no I/O in-flight */
+       if (test_bit(DM_CLONE_HYDRATION_ENABLED, &clone->flags) &&
+           !atomic_read(&clone->ios_in_flight))
+               wake_worker(clone);
+}
+
+static void hydration_copy(struct dm_clone_region_hydration *hd, unsigned int nr_regions)
+{
+       unsigned long region_start, region_end;
+       sector_t tail_size, region_size, total_size;
+       struct dm_io_region from, to;
+       struct clone *clone = hd->clone;
+
+       region_size = clone->region_size;
+       region_start = hd->region_nr;
+       region_end = region_start + nr_regions - 1;
+
+       total_size = (nr_regions - 1) << clone->region_shift;
+
+       if (region_end == clone->nr_regions - 1) {
+               /*
+                * The last region of the target might be smaller than
+                * region_size.
+                */
+               tail_size = clone->ti->len & (region_size - 1);
+               if (!tail_size)
+                       tail_size = region_size;
+       } else {
+               tail_size = region_size;
+       }
+
+       total_size += tail_size;
+
+       from.bdev = clone->source_dev->bdev;
+       from.sector = region_to_sector(clone, region_start);
+       from.count = total_size;
+
+       to.bdev = clone->dest_dev->bdev;
+       to.sector = from.sector;
+       to.count = from.count;
+
+       /* Issue copy */
+       atomic_add(nr_regions, &clone->hydrations_in_flight);
+       dm_kcopyd_copy(clone->kcopyd_client, &from, 1, &to, 0,
+                      hydration_kcopyd_callback, hd);
+}
+
+static void overwrite_endio(struct bio *bio)
+{
+       struct dm_clone_region_hydration *hd = bio->bi_private;
+
+       bio->bi_end_io = hd->overwrite_bio_end_io;
+       hd->status = bio->bi_status;
+
+       hydration_complete(hd);
+}
+
+static void hydration_overwrite(struct dm_clone_region_hydration *hd, struct bio *bio)
+{
+       /*
+        * We don't need to save and restore bio->bi_private because device
+        * mapper core generates a new bio for us to use, with clean
+        * bi_private.
+        */
+       hd->overwrite_bio = bio;
+       hd->overwrite_bio_end_io = bio->bi_end_io;
+
+       bio->bi_end_io = overwrite_endio;
+       bio->bi_private = hd;
+
+       atomic_inc(&hd->clone->hydrations_in_flight);
+       generic_make_request(bio);
+}
+
+/*
+ * Hydrate bio's region.
+ *
+ * This function starts the hydration of the bio's region and puts the bio in
+ * the list of deferred bios for this region. In case, by the time this
+ * function is called, the region has finished hydrating it's submitted to the
+ * destination device.
+ *
+ * NOTE: The bio remapping must be performed by the caller.
+ */
+static void hydrate_bio_region(struct clone *clone, struct bio *bio)
+{
+       unsigned long flags;
+       unsigned long region_nr;
+       struct hash_table_bucket *bucket;
+       struct dm_clone_region_hydration *hd, *hd2;
+
+       region_nr = bio_to_region(clone, bio);
+       bucket = get_hash_table_bucket(clone, region_nr);
+
+       bucket_lock_irqsave(bucket, flags);
+
+       hd = __hash_find(bucket, region_nr);
+       if (hd) {
+               /* Someone else is hydrating the region */
+               bio_list_add(&hd->deferred_bios, bio);
+               bucket_unlock_irqrestore(bucket, flags);
+               return;
+       }
+
+       if (dm_clone_is_region_hydrated(clone->cmd, region_nr)) {
+               /* The region has been hydrated */
+               bucket_unlock_irqrestore(bucket, flags);
+               issue_bio(clone, bio);
+               return;
+       }
+
+       /*
+        * We must allocate a hydration descriptor and start the hydration of
+        * the corresponding region.
+        */
+       bucket_unlock_irqrestore(bucket, flags);
+
+       hd = alloc_hydration(clone);
+       hydration_init(hd, region_nr);
+
+       bucket_lock_irqsave(bucket, flags);
+
+       /* Check if the region has been hydrated in the meantime. */
+       if (dm_clone_is_region_hydrated(clone->cmd, region_nr)) {
+               bucket_unlock_irqrestore(bucket, flags);
+               free_hydration(hd);
+               issue_bio(clone, bio);
+               return;
+       }
+
+       hd2 = __find_or_insert_region_hydration(bucket, hd);
+       if (hd2 != hd) {
+               /* Someone else started the region's hydration. */
+               bio_list_add(&hd2->deferred_bios, bio);
+               bucket_unlock_irqrestore(bucket, flags);
+               free_hydration(hd);
+               return;
+       }
+
+       /*
+        * If the metadata mode is RO or FAIL then there is no point starting a
+        * hydration, since we will not be able to update the metadata when the
+        * hydration finishes.
+        */
+       if (unlikely(get_clone_mode(clone) >= CM_READ_ONLY)) {
+               hlist_del(&hd->h);
+               bucket_unlock_irqrestore(bucket, flags);
+               free_hydration(hd);
+               bio_io_error(bio);
+               return;
+       }
+
+       /*
+        * Start region hydration.
+        *
+        * If a bio overwrites a region, i.e., its size is equal to the
+        * region's size, then we don't need to copy the region from the source
+        * to the destination device.
+        */
+       if (is_overwrite_bio(clone, bio)) {
+               bucket_unlock_irqrestore(bucket, flags);
+               hydration_overwrite(hd, bio);
+       } else {
+               bio_list_add(&hd->deferred_bios, bio);
+               bucket_unlock_irqrestore(bucket, flags);
+               hydration_copy(hd, 1);
+       }
+}
+
+/*---------------------------------------------------------------------------*/
+
+/*
+ * Background hydrations.
+ */
+
+/*
+ * Batch region hydrations.
+ *
+ * To better utilize device bandwidth we batch together the hydration of
+ * adjacent regions. This allows us to use small region sizes, e.g., 4KB, which
+ * is good for small, random write performance (because of the overwriting of
+ * un-hydrated regions) and at the same time issue big copy requests to kcopyd
+ * to achieve high hydration bandwidth.
+ */
+struct batch_info {
+       struct dm_clone_region_hydration *head;
+       unsigned int nr_batched_regions;
+};
+
+static void __batch_hydration(struct batch_info *batch,
+                             struct dm_clone_region_hydration *hd)
+{
+       struct clone *clone = hd->clone;
+       unsigned int max_batch_size = READ_ONCE(clone->hydration_batch_size);
+
+       if (batch->head) {
+               /* Try to extend the current batch */
+               if (batch->nr_batched_regions < max_batch_size &&
+                   (batch->head->region_nr + batch->nr_batched_regions) == hd->region_nr) {
+                       list_add_tail(&hd->list, &batch->head->list);
+                       batch->nr_batched_regions++;
+                       hd = NULL;
+               }
+
+               /* Check if we should issue the current batch */
+               if (batch->nr_batched_regions >= max_batch_size || hd) {
+                       hydration_copy(batch->head, batch->nr_batched_regions);
+                       batch->head = NULL;
+                       batch->nr_batched_regions = 0;
+               }
+       }
+
+       if (!hd)
+               return;
+
+       /* We treat max batch sizes of zero and one equivalently */
+       if (max_batch_size <= 1) {
+               hydration_copy(hd, 1);
+               return;
+       }
+
+       /* Start a new batch */
+       BUG_ON(!list_empty(&hd->list));
+       batch->head = hd;
+       batch->nr_batched_regions = 1;
+}
+
+static unsigned long __start_next_hydration(struct clone *clone,
+                                           unsigned long offset,
+                                           struct batch_info *batch)
+{
+       unsigned long flags;
+       struct hash_table_bucket *bucket;
+       struct dm_clone_region_hydration *hd;
+       unsigned long nr_regions = clone->nr_regions;
+
+       hd = alloc_hydration(clone);
+
+       /* Try to find a region to hydrate. */
+       do {
+               offset = dm_clone_find_next_unhydrated_region(clone->cmd, offset);
+               if (offset == nr_regions)
+                       break;
+
+               bucket = get_hash_table_bucket(clone, offset);
+               bucket_lock_irqsave(bucket, flags);
+
+               if (!dm_clone_is_region_hydrated(clone->cmd, offset) &&
+                   !__hash_find(bucket, offset)) {
+                       hydration_init(hd, offset);
+                       __insert_region_hydration(bucket, hd);
+                       bucket_unlock_irqrestore(bucket, flags);
+
+                       /* Batch hydration */
+                       __batch_hydration(batch, hd);
+
+                       return (offset + 1);
+               }
+
+               bucket_unlock_irqrestore(bucket, flags);
+
+       } while (++offset < nr_regions);
+
+       if (hd)
+               free_hydration(hd);
+
+       return offset;
+}
+
+/*
+ * This function searches for regions that still reside in the source device
+ * and starts their hydration.
+ */
+static void do_hydration(struct clone *clone)
+{
+       unsigned int current_volume;
+       unsigned long offset, nr_regions = clone->nr_regions;
+
+       struct batch_info batch = {
+               .head = NULL,
+               .nr_batched_regions = 0,
+       };
+
+       if (unlikely(get_clone_mode(clone) >= CM_READ_ONLY))
+               return;
+
+       if (dm_clone_is_hydration_done(clone->cmd))
+               return;
+
+       /*
+        * Avoid race with device suspension.
+        */
+       atomic_inc(&clone->hydrations_in_flight);
+
+       /*
+        * Make sure atomic_inc() is ordered before test_bit(), otherwise we
+        * might race with clone_postsuspend() and start a region hydration
+        * after the target has been suspended.
+        *
+        * This is paired with the smp_mb__after_atomic() in
+        * clone_postsuspend().
+        */
+       smp_mb__after_atomic();
+
+       offset = clone->hydration_offset;
+       while (likely(!test_bit(DM_CLONE_HYDRATION_SUSPENDED, &clone->flags)) &&
+              !atomic_read(&clone->ios_in_flight) &&
+              test_bit(DM_CLONE_HYDRATION_ENABLED, &clone->flags) &&
+              offset < nr_regions) {
+               current_volume = atomic_read(&clone->hydrations_in_flight);
+               current_volume += batch.nr_batched_regions;
+
+               if (current_volume > READ_ONCE(clone->hydration_threshold))
+                       break;
+
+               offset = __start_next_hydration(clone, offset, &batch);
+       }
+
+       if (batch.head)
+               hydration_copy(batch.head, batch.nr_batched_regions);
+
+       if (offset >= nr_regions)
+               offset = 0;
+
+       clone->hydration_offset = offset;
+
+       if (atomic_dec_and_test(&clone->hydrations_in_flight))
+               wakeup_hydration_waiters(clone);
+}
+
+/*---------------------------------------------------------------------------*/
+
+static bool need_commit_due_to_time(struct clone *clone)
+{
+       return !time_in_range(jiffies, clone->last_commit_jiffies,
+                             clone->last_commit_jiffies + COMMIT_PERIOD);
+}
+
+/*
+ * A non-zero return indicates read-only or fail mode.
+ */
+static int commit_metadata(struct clone *clone)
+{
+       int r = 0;
+
+       mutex_lock(&clone->commit_lock);
+
+       if (!dm_clone_changed_this_transaction(clone->cmd))
+               goto out;
+
+       if (unlikely(get_clone_mode(clone) >= CM_READ_ONLY)) {
+               r = -EPERM;
+               goto out;
+       }
+
+       r = dm_clone_metadata_commit(clone->cmd);
+
+       if (unlikely(r)) {
+               __metadata_operation_failed(clone, "dm_clone_metadata_commit", r);
+               goto out;
+       }
+
+       if (dm_clone_is_hydration_done(clone->cmd))
+               dm_table_event(clone->ti->table);
+out:
+       mutex_unlock(&clone->commit_lock);
+
+       return r;
+}
+
+static void process_deferred_discards(struct clone *clone)
+{
+       int r = -EPERM;
+       struct bio *bio;
+       struct blk_plug plug;
+       unsigned long rs, re, flags;
+       struct bio_list discards = BIO_EMPTY_LIST;
+
+       spin_lock_irqsave(&clone->lock, flags);
+       bio_list_merge(&discards, &clone->deferred_discard_bios);
+       bio_list_init(&clone->deferred_discard_bios);
+       spin_unlock_irqrestore(&clone->lock, flags);
+
+       if (bio_list_empty(&discards))
+               return;
+
+       if (unlikely(get_clone_mode(clone) >= CM_READ_ONLY))
+               goto out;
+
+       /* Update the metadata */
+       bio_list_for_each(bio, &discards) {
+               bio_region_range(clone, bio, &rs, &re);
+               /*
+                * A discard request might cover regions that have been already
+                * hydrated. There is no need to update the metadata for these
+                * regions.
+                */
+               r = dm_clone_cond_set_range(clone->cmd, rs, re - rs);
+
+               if (unlikely(r))
+                       break;
+       }
+out:
+       blk_start_plug(&plug);
+       while ((bio = bio_list_pop(&discards)))
+               complete_discard_bio(clone, bio, r == 0);
+       blk_finish_plug(&plug);
+}
+
+static void process_deferred_bios(struct clone *clone)
+{
+       unsigned long flags;
+       struct bio_list bios = BIO_EMPTY_LIST;
+
+       spin_lock_irqsave(&clone->lock, flags);
+       bio_list_merge(&bios, &clone->deferred_bios);
+       bio_list_init(&clone->deferred_bios);
+       spin_unlock_irqrestore(&clone->lock, flags);
+
+       if (bio_list_empty(&bios))
+               return;
+
+       submit_bios(&bios);
+}
+
+static void process_deferred_flush_bios(struct clone *clone)
+{
+       struct bio *bio;
+       unsigned long flags;
+       struct bio_list bios = BIO_EMPTY_LIST;
+       struct bio_list bio_completions = BIO_EMPTY_LIST;
+
+       /*
+        * If there are any deferred flush bios, we must commit the metadata
+        * before issuing them or signaling their completion.
+        */
+       spin_lock_irqsave(&clone->lock, flags);
+       bio_list_merge(&bios, &clone->deferred_flush_bios);
+       bio_list_init(&clone->deferred_flush_bios);
+
+       bio_list_merge(&bio_completions, &clone->deferred_flush_completions);
+       bio_list_init(&clone->deferred_flush_completions);
+       spin_unlock_irqrestore(&clone->lock, flags);
+
+       if (bio_list_empty(&bios) && bio_list_empty(&bio_completions) &&
+           !(dm_clone_changed_this_transaction(clone->cmd) && need_commit_due_to_time(clone)))
+               return;
+
+       if (commit_metadata(clone)) {
+               bio_list_merge(&bios, &bio_completions);
+
+               while ((bio = bio_list_pop(&bios)))
+                       bio_io_error(bio);
+
+               return;
+       }
+
+       clone->last_commit_jiffies = jiffies;
+
+       while ((bio = bio_list_pop(&bio_completions)))
+               bio_endio(bio);
+
+       while ((bio = bio_list_pop(&bios)))
+               generic_make_request(bio);
+}
+
+static void do_worker(struct work_struct *work)
+{
+       struct clone *clone = container_of(work, typeof(*clone), worker);
+
+       process_deferred_bios(clone);
+       process_deferred_discards(clone);
+
+       /*
+        * process_deferred_flush_bios():
+        *
+        *   - Commit metadata
+        *
+        *   - Process deferred REQ_FUA completions
+        *
+        *   - Process deferred REQ_PREFLUSH bios
+        */
+       process_deferred_flush_bios(clone);
+
+       /* Background hydration */
+       do_hydration(clone);
+}
+
+/*
+ * Commit periodically so that not too much unwritten data builds up.
+ *
+ * Also, restart background hydration, if it has been stopped by in-flight I/O.
+ */
+static void do_waker(struct work_struct *work)
+{
+       struct clone *clone = container_of(to_delayed_work(work), struct clone, waker);
+
+       wake_worker(clone);
+       queue_delayed_work(clone->wq, &clone->waker, COMMIT_PERIOD);
+}
+
+/*---------------------------------------------------------------------------*/
+
+/*
+ * Target methods
+ */
+static int clone_map(struct dm_target *ti, struct bio *bio)
+{
+       struct clone *clone = ti->private;
+       unsigned long region_nr;
+
+       atomic_inc(&clone->ios_in_flight);
+
+       if (unlikely(get_clone_mode(clone) == CM_FAIL))
+               return DM_MAPIO_KILL;
+
+       /*
+        * REQ_PREFLUSH bios carry no data:
+        *
+        * - Commit metadata, if changed
+        *
+        * - Pass down to destination device
+        */
+       if (bio->bi_opf & REQ_PREFLUSH) {
+               remap_and_issue(clone, bio);
+               return DM_MAPIO_SUBMITTED;
+       }
+
+       bio->bi_iter.bi_sector = dm_target_offset(ti, bio->bi_iter.bi_sector);
+
+       /*
+        * dm-clone interprets discards and performs a fast hydration of the
+        * discarded regions, i.e., we skip the copy from the source device and
+        * just mark the regions as hydrated.
+        */
+       if (bio_op(bio) == REQ_OP_DISCARD) {
+               process_discard_bio(clone, bio);
+               return DM_MAPIO_SUBMITTED;
+       }
+
+       /*
+        * If the bio's region is hydrated, redirect it to the destination
+        * device.
+        *
+        * If the region is not hydrated and the bio is a READ, redirect it to
+        * the source device.
+        *
+        * Else, defer WRITE bio until after its region has been hydrated and
+        * start the region's hydration immediately.
+        */
+       region_nr = bio_to_region(clone, bio);
+       if (dm_clone_is_region_hydrated(clone->cmd, region_nr)) {
+               remap_and_issue(clone, bio);
+               return DM_MAPIO_SUBMITTED;
+       } else if (bio_data_dir(bio) == READ) {
+               remap_to_source(clone, bio);
+               return DM_MAPIO_REMAPPED;
+       }
+
+       remap_to_dest(clone, bio);
+       hydrate_bio_region(clone, bio);
+
+       return DM_MAPIO_SUBMITTED;
+}
+
+static int clone_endio(struct dm_target *ti, struct bio *bio, blk_status_t *error)
+{
+       struct clone *clone = ti->private;
+
+       atomic_dec(&clone->ios_in_flight);
+
+       return DM_ENDIO_DONE;
+}
+
+static void emit_flags(struct clone *clone, char *result, unsigned int maxlen,
+                      ssize_t *sz_ptr)
+{
+       ssize_t sz = *sz_ptr;
+       unsigned int count;
+
+       count = !test_bit(DM_CLONE_HYDRATION_ENABLED, &clone->flags);
+       count += !test_bit(DM_CLONE_DISCARD_PASSDOWN, &clone->flags);
+
+       DMEMIT("%u ", count);
+
+       if (!test_bit(DM_CLONE_HYDRATION_ENABLED, &clone->flags))
+               DMEMIT("no_hydration ");
+
+       if (!test_bit(DM_CLONE_DISCARD_PASSDOWN, &clone->flags))
+               DMEMIT("no_discard_passdown ");
+
+       *sz_ptr = sz;
+}
+
+static void emit_core_args(struct clone *clone, char *result,
+                          unsigned int maxlen, ssize_t *sz_ptr)
+{
+       ssize_t sz = *sz_ptr;
+       unsigned int count = 4;
+
+       DMEMIT("%u hydration_threshold %u hydration_batch_size %u ", count,
+              READ_ONCE(clone->hydration_threshold),
+              READ_ONCE(clone->hydration_batch_size));
+
+       *sz_ptr = sz;
+}
+
+/*
+ * Status format:
+ *
+ * <metadata block size> <#used metadata blocks>/<#total metadata blocks>
+ * <clone region size> <#hydrated regions>/<#total regions> <#hydrating regions>
+ * <#features> <features>* <#core args> <core args>* <clone metadata mode>
+ */
+static void clone_status(struct dm_target *ti, status_type_t type,
+                        unsigned int status_flags, char *result,
+                        unsigned int maxlen)
+{
+       int r;
+       unsigned int i;
+       ssize_t sz = 0;
+       dm_block_t nr_free_metadata_blocks = 0;
+       dm_block_t nr_metadata_blocks = 0;
+       char buf[BDEVNAME_SIZE];
+       struct clone *clone = ti->private;
+
+       switch (type) {
+       case STATUSTYPE_INFO:
+               if (get_clone_mode(clone) == CM_FAIL) {
+                       DMEMIT("Fail");
+                       break;
+               }
+
+               /* Commit to ensure statistics aren't out-of-date */
+               if (!(status_flags & DM_STATUS_NOFLUSH_FLAG) && !dm_suspended(ti))
+                       (void) commit_metadata(clone);
+
+               r = dm_clone_get_free_metadata_block_count(clone->cmd, &nr_free_metadata_blocks);
+
+               if (r) {
+                       DMERR("%s: dm_clone_get_free_metadata_block_count returned %d",
+                             clone_device_name(clone), r);
+                       goto error;
+               }
+
+               r = dm_clone_get_metadata_dev_size(clone->cmd, &nr_metadata_blocks);
+
+               if (r) {
+                       DMERR("%s: dm_clone_get_metadata_dev_size returned %d",
+                             clone_device_name(clone), r);
+                       goto error;
+               }
+
+               DMEMIT("%u %llu/%llu %llu %lu/%lu %u ",
+                      DM_CLONE_METADATA_BLOCK_SIZE,
+                      (unsigned long long)(nr_metadata_blocks - nr_free_metadata_blocks),
+                      (unsigned long long)nr_metadata_blocks,
+                      (unsigned long long)clone->region_size,
+                      dm_clone_nr_of_hydrated_regions(clone->cmd),
+                      clone->nr_regions,
+                      atomic_read(&clone->hydrations_in_flight));
+
+               emit_flags(clone, result, maxlen, &sz);
+               emit_core_args(clone, result, maxlen, &sz);
+
+               switch (get_clone_mode(clone)) {
+               case CM_WRITE:
+                       DMEMIT("rw");
+                       break;
+               case CM_READ_ONLY:
+                       DMEMIT("ro");
+                       break;
+               case CM_FAIL:
+                       DMEMIT("Fail");
+               }
+
+               break;
+
+       case STATUSTYPE_TABLE:
+               format_dev_t(buf, clone->metadata_dev->bdev->bd_dev);
+               DMEMIT("%s ", buf);
+
+               format_dev_t(buf, clone->dest_dev->bdev->bd_dev);
+               DMEMIT("%s ", buf);
+
+               format_dev_t(buf, clone->source_dev->bdev->bd_dev);
+               DMEMIT("%s", buf);
+
+               for (i = 0; i < clone->nr_ctr_args; i++)
+                       DMEMIT(" %s", clone->ctr_args[i]);
+       }
+
+       return;
+
+error:
+       DMEMIT("Error");
+}
+
+static int clone_is_congested(struct dm_target_callbacks *cb, int bdi_bits)
+{
+       struct request_queue *dest_q, *source_q;
+       struct clone *clone = container_of(cb, struct clone, callbacks);
+
+       source_q = bdev_get_queue(clone->source_dev->bdev);
+       dest_q = bdev_get_queue(clone->dest_dev->bdev);
+
+       return (bdi_congested(dest_q->backing_dev_info, bdi_bits) |
+               bdi_congested(source_q->backing_dev_info, bdi_bits));
+}
+
+static sector_t get_dev_size(struct dm_dev *dev)
+{
+       return i_size_read(dev->bdev->bd_inode) >> SECTOR_SHIFT;
+}
+
+/*---------------------------------------------------------------------------*/
+
+/*
+ * Construct a clone device mapping:
+ *
+ * clone <metadata dev> <destination dev> <source dev> <region size>
+ *     [<#feature args> [<feature arg>]* [<#core args> [key value]*]]
+ *
+ * metadata dev: Fast device holding the persistent metadata
+ * destination dev: The destination device, which will become a clone of the
+ *                  source device
+ * source dev: The read-only source device that gets cloned
+ * region size: dm-clone unit size in sectors
+ *
+ * #feature args: Number of feature arguments passed
+ * feature args: E.g. no_hydration, no_discard_passdown
+ *
+ * #core arguments: An even number of core arguments
+ * core arguments: Key/value pairs for tuning the core
+ *                E.g. 'hydration_threshold 256'
+ */
+static int parse_feature_args(struct dm_arg_set *as, struct clone *clone)
+{
+       int r;
+       unsigned int argc;
+       const char *arg_name;
+       struct dm_target *ti = clone->ti;
+
+       const struct dm_arg args = {
+               .min = 0,
+               .max = 2,
+               .error = "Invalid number of feature arguments"
+       };
+
+       /* No feature arguments supplied */
+       if (!as->argc)
+               return 0;
+
+       r = dm_read_arg_group(&args, as, &argc, &ti->error);
+       if (r)
+               return r;
+
+       while (argc) {
+               arg_name = dm_shift_arg(as);
+               argc--;
+
+               if (!strcasecmp(arg_name, "no_hydration")) {
+                       __clear_bit(DM_CLONE_HYDRATION_ENABLED, &clone->flags);
+               } else if (!strcasecmp(arg_name, "no_discard_passdown")) {
+                       __clear_bit(DM_CLONE_DISCARD_PASSDOWN, &clone->flags);
+               } else {
+                       ti->error = "Invalid feature argument";
+                       return -EINVAL;
+               }
+       }
+
+       return 0;
+}
+
+static int parse_core_args(struct dm_arg_set *as, struct clone *clone)
+{
+       int r;
+       unsigned int argc;
+       unsigned int value;
+       const char *arg_name;
+       struct dm_target *ti = clone->ti;
+
+       const struct dm_arg args = {
+               .min = 0,
+               .max = 4,
+               .error = "Invalid number of core arguments"
+       };
+
+       /* Initialize core arguments */
+       clone->hydration_batch_size = DEFAULT_HYDRATION_BATCH_SIZE;
+       clone->hydration_threshold = DEFAULT_HYDRATION_THRESHOLD;
+
+       /* No core arguments supplied */
+       if (!as->argc)
+               return 0;
+
+       r = dm_read_arg_group(&args, as, &argc, &ti->error);
+       if (r)
+               return r;
+
+       if (argc & 1) {
+               ti->error = "Number of core arguments must be even";
+               return -EINVAL;
+       }
+
+       while (argc) {
+               arg_name = dm_shift_arg(as);
+               argc -= 2;
+
+               if (!strcasecmp(arg_name, "hydration_threshold")) {
+                       if (kstrtouint(dm_shift_arg(as), 10, &value)) {
+                               ti->error = "Invalid value for argument `hydration_threshold'";
+                               return -EINVAL;
+                       }
+                       clone->hydration_threshold = value;
+               } else if (!strcasecmp(arg_name, "hydration_batch_size")) {
+                       if (kstrtouint(dm_shift_arg(as), 10, &value)) {
+                               ti->error = "Invalid value for argument `hydration_batch_size'";
+                               return -EINVAL;
+                       }
+                       clone->hydration_batch_size = value;
+               } else {
+                       ti->error = "Invalid core argument";
+                       return -EINVAL;
+               }
+       }
+
+       return 0;
+}
+
+static int parse_region_size(struct clone *clone, struct dm_arg_set *as, char **error)
+{
+       int r;
+       unsigned int region_size;
+       struct dm_arg arg;
+
+       arg.min = MIN_REGION_SIZE;
+       arg.max = MAX_REGION_SIZE;
+       arg.error = "Invalid region size";
+
+       r = dm_read_arg(&arg, as, &region_size, error);
+       if (r)
+               return r;
+
+       /* Check region size is a power of 2 */
+       if (!is_power_of_2(region_size)) {
+               *error = "Region size is not a power of 2";
+               return -EINVAL;
+       }
+
+       /* Validate the region size against the device logical block size */
+       if (region_size % (bdev_logical_block_size(clone->source_dev->bdev) >> 9) ||
+           region_size % (bdev_logical_block_size(clone->dest_dev->bdev) >> 9)) {
+               *error = "Region size is not a multiple of device logical block size";
+               return -EINVAL;
+       }
+
+       clone->region_size = region_size;
+
+       return 0;
+}
+
+static int validate_nr_regions(unsigned long n, char **error)
+{
+       /*
+        * dm_bitset restricts us to 2^32 regions. test_bit & co. restrict us
+        * further to 2^31 regions.
+        */
+       if (n > (1UL << 31)) {
+               *error = "Too many regions. Consider increasing the region size";
+               return -EINVAL;
+       }
+
+       return 0;
+}
+
+static int parse_metadata_dev(struct clone *clone, struct dm_arg_set *as, char **error)
+{
+       int r;
+       sector_t metadata_dev_size;
+       char b[BDEVNAME_SIZE];
+
+       r = dm_get_device(clone->ti, dm_shift_arg(as), FMODE_READ | FMODE_WRITE,
+                         &clone->metadata_dev);
+       if (r) {
+               *error = "Error opening metadata device";
+               return r;
+       }
+
+       metadata_dev_size = get_dev_size(clone->metadata_dev);
+       if (metadata_dev_size > DM_CLONE_METADATA_MAX_SECTORS_WARNING)
+               DMWARN("Metadata device %s is larger than %u sectors: excess space will not be used.",
+                      bdevname(clone->metadata_dev->bdev, b), DM_CLONE_METADATA_MAX_SECTORS);
+
+       return 0;
+}
+
+static int parse_dest_dev(struct clone *clone, struct dm_arg_set *as, char **error)
+{
+       int r;
+       sector_t dest_dev_size;
+
+       r = dm_get_device(clone->ti, dm_shift_arg(as), FMODE_READ | FMODE_WRITE,
+                         &clone->dest_dev);
+       if (r) {
+               *error = "Error opening destination device";
+               return r;
+       }
+
+       dest_dev_size = get_dev_size(clone->dest_dev);
+       if (dest_dev_size < clone->ti->len) {
+               dm_put_device(clone->ti, clone->dest_dev);
+               *error = "Device size larger than destination device";
+               return -EINVAL;
+       }
+
+       return 0;
+}
+
+static int parse_source_dev(struct clone *clone, struct dm_arg_set *as, char **error)
+{
+       int r;
+       sector_t source_dev_size;
+
+       r = dm_get_device(clone->ti, dm_shift_arg(as), FMODE_READ,
+                         &clone->source_dev);
+       if (r) {
+               *error = "Error opening source device";
+               return r;
+       }
+
+       source_dev_size = get_dev_size(clone->source_dev);
+       if (source_dev_size < clone->ti->len) {
+               dm_put_device(clone->ti, clone->source_dev);
+               *error = "Device size larger than source device";
+               return -EINVAL;
+       }
+
+       return 0;
+}
+
+static int copy_ctr_args(struct clone *clone, int argc, const char **argv, char **error)
+{
+       unsigned int i;
+       const char **copy;
+
+       copy = kcalloc(argc, sizeof(*copy), GFP_KERNEL);
+       if (!copy)
+               goto error;
+
+       for (i = 0; i < argc; i++) {
+               copy[i] = kstrdup(argv[i], GFP_KERNEL);
+
+               if (!copy[i]) {
+                       while (i--)
+                               kfree(copy[i]);
+                       kfree(copy);
+                       goto error;
+               }
+       }
+
+       clone->nr_ctr_args = argc;
+       clone->ctr_args = copy;
+       return 0;
+
+error:
+       *error = "Failed to allocate memory for table line";
+       return -ENOMEM;
+}
+
+static int clone_ctr(struct dm_target *ti, unsigned int argc, char **argv)
+{
+       int r;
+       struct clone *clone;
+       struct dm_arg_set as;
+
+       if (argc < 4) {
+               ti->error = "Invalid number of arguments";
+               return -EINVAL;
+       }
+
+       as.argc = argc;
+       as.argv = argv;
+
+       clone = kzalloc(sizeof(*clone), GFP_KERNEL);
+       if (!clone) {
+               ti->error = "Failed to allocate clone structure";
+               return -ENOMEM;
+       }
+
+       clone->ti = ti;
+
+       /* Initialize dm-clone flags */
+       __set_bit(DM_CLONE_HYDRATION_ENABLED, &clone->flags);
+       __set_bit(DM_CLONE_HYDRATION_SUSPENDED, &clone->flags);
+       __set_bit(DM_CLONE_DISCARD_PASSDOWN, &clone->flags);
+
+       r = parse_metadata_dev(clone, &as, &ti->error);
+       if (r)
+               goto out_with_clone;
+
+       r = parse_dest_dev(clone, &as, &ti->error);
+       if (r)
+               goto out_with_meta_dev;
+
+       r = parse_source_dev(clone, &as, &ti->error);
+       if (r)
+               goto out_with_dest_dev;
+
+       r = parse_region_size(clone, &as, &ti->error);
+       if (r)
+               goto out_with_source_dev;
+
+       clone->region_shift = __ffs(clone->region_size);
+       clone->nr_regions = dm_sector_div_up(ti->len, clone->region_size);
+
+       r = validate_nr_regions(clone->nr_regions, &ti->error);
+       if (r)
+               goto out_with_source_dev;
+
+       r = dm_set_target_max_io_len(ti, clone->region_size);
+       if (r) {
+               ti->error = "Failed to set max io len";
+               goto out_with_source_dev;
+       }
+
+       r = parse_feature_args(&as, clone);
+       if (r)
+               goto out_with_source_dev;
+
+       r = parse_core_args(&as, clone);
+       if (r)
+               goto out_with_source_dev;
+
+       /* Load metadata */
+       clone->cmd = dm_clone_metadata_open(clone->metadata_dev->bdev, ti->len,
+                                           clone->region_size);
+       if (IS_ERR(clone->cmd)) {
+               ti->error = "Failed to load metadata";
+               r = PTR_ERR(clone->cmd);
+               goto out_with_source_dev;
+       }
+
+       __set_clone_mode(clone, CM_WRITE);
+
+       if (get_clone_mode(clone) != CM_WRITE) {
+               ti->error = "Unable to get write access to metadata, please check/repair metadata";
+               r = -EPERM;
+               goto out_with_metadata;
+       }
+
+       clone->last_commit_jiffies = jiffies;
+
+       /* Allocate hydration hash table */
+       r = hash_table_init(clone);
+       if (r) {
+               ti->error = "Failed to allocate hydration hash table";
+               goto out_with_metadata;
+       }
+
+       atomic_set(&clone->ios_in_flight, 0);
+       init_waitqueue_head(&clone->hydration_stopped);
+       spin_lock_init(&clone->lock);
+       bio_list_init(&clone->deferred_bios);
+       bio_list_init(&clone->deferred_discard_bios);
+       bio_list_init(&clone->deferred_flush_bios);
+       bio_list_init(&clone->deferred_flush_completions);
+       clone->hydration_offset = 0;
+       atomic_set(&clone->hydrations_in_flight, 0);
+
+       clone->wq = alloc_workqueue("dm-" DM_MSG_PREFIX, WQ_MEM_RECLAIM, 0);
+       if (!clone->wq) {
+               ti->error = "Failed to allocate workqueue";
+               r = -ENOMEM;
+               goto out_with_ht;
+       }
+
+       INIT_WORK(&clone->worker, do_worker);
+       INIT_DELAYED_WORK(&clone->waker, do_waker);
+
+       clone->kcopyd_client = dm_kcopyd_client_create(&dm_kcopyd_throttle);
+       if (IS_ERR(clone->kcopyd_client)) {
+               r = PTR_ERR(clone->kcopyd_client);
+               goto out_with_wq;
+       }
+
+       r = mempool_init_slab_pool(&clone->hydration_pool, MIN_HYDRATIONS,
+                                  _hydration_cache);
+       if (r) {
+               ti->error = "Failed to create dm_clone_region_hydration memory pool";
+               goto out_with_kcopyd;
+       }
+
+       /* Save a copy of the table line */
+       r = copy_ctr_args(clone, argc - 3, (const char **)argv + 3, &ti->error);
+       if (r)
+               goto out_with_mempool;
+
+       mutex_init(&clone->commit_lock);
+       clone->callbacks.congested_fn = clone_is_congested;
+       dm_table_add_target_callbacks(ti->table, &clone->callbacks);
+
+       /* Enable flushes */
+       ti->num_flush_bios = 1;
+       ti->flush_supported = true;
+
+       /* Enable discards */
+       ti->discards_supported = true;
+       ti->num_discard_bios = 1;
+
+       ti->private = clone;
+
+       return 0;
+
+out_with_mempool:
+       mempool_exit(&clone->hydration_pool);
+out_with_kcopyd:
+       dm_kcopyd_client_destroy(clone->kcopyd_client);
+out_with_wq:
+       destroy_workqueue(clone->wq);
+out_with_ht:
+       hash_table_exit(clone);
+out_with_metadata:
+       dm_clone_metadata_close(clone->cmd);
+out_with_source_dev:
+       dm_put_device(ti, clone->source_dev);
+out_with_dest_dev:
+       dm_put_device(ti, clone->dest_dev);
+out_with_meta_dev:
+       dm_put_device(ti, clone->metadata_dev);
+out_with_clone:
+       kfree(clone);
+
+       return r;
+}
+
+static void clone_dtr(struct dm_target *ti)
+{
+       unsigned int i;
+       struct clone *clone = ti->private;
+
+       mutex_destroy(&clone->commit_lock);
+
+       for (i = 0; i < clone->nr_ctr_args; i++)
+               kfree(clone->ctr_args[i]);
+       kfree(clone->ctr_args);
+
+       mempool_exit(&clone->hydration_pool);
+       dm_kcopyd_client_destroy(clone->kcopyd_client);
+       destroy_workqueue(clone->wq);
+       hash_table_exit(clone);
+       dm_clone_metadata_close(clone->cmd);
+       dm_put_device(ti, clone->source_dev);
+       dm_put_device(ti, clone->dest_dev);
+       dm_put_device(ti, clone->metadata_dev);
+
+       kfree(clone);
+}
+
+/*---------------------------------------------------------------------------*/
+
+static void clone_postsuspend(struct dm_target *ti)
+{
+       struct clone *clone = ti->private;
+
+       /*
+        * To successfully suspend the device:
+        *
+        *      - We cancel the delayed work for periodic commits and wait for
+        *        it to finish.
+        *
+        *      - We stop the background hydration, i.e. we prevent new region
+        *        hydrations from starting.
+        *
+        *      - We wait for any in-flight hydrations to finish.
+        *
+        *      - We flush the workqueue.
+        *
+        *      - We commit the metadata.
+        */
+       cancel_delayed_work_sync(&clone->waker);
+
+       set_bit(DM_CLONE_HYDRATION_SUSPENDED, &clone->flags);
+
+       /*
+        * Make sure set_bit() is ordered before atomic_read(), otherwise we
+        * might race with do_hydration() and miss some started region
+        * hydrations.
+        *
+        * This is paired with smp_mb__after_atomic() in do_hydration().
+        */
+       smp_mb__after_atomic();
+
+       wait_event(clone->hydration_stopped, !atomic_read(&clone->hydrations_in_flight));
+       flush_workqueue(clone->wq);
+
+       (void) commit_metadata(clone);
+}
+
+static void clone_resume(struct dm_target *ti)
+{
+       struct clone *clone = ti->private;
+
+       clear_bit(DM_CLONE_HYDRATION_SUSPENDED, &clone->flags);
+       do_waker(&clone->waker.work);
+}
+
+static bool bdev_supports_discards(struct block_device *bdev)
+{
+       struct request_queue *q = bdev_get_queue(bdev);
+
+       return (q && blk_queue_discard(q));
+}
+
+/*
+ * If discard_passdown was enabled verify that the destination device supports
+ * discards. Disable discard_passdown if not.
+ */
+static void disable_passdown_if_not_supported(struct clone *clone)
+{
+       struct block_device *dest_dev = clone->dest_dev->bdev;
+       struct queue_limits *dest_limits = &bdev_get_queue(dest_dev)->limits;
+       const char *reason = NULL;
+       char buf[BDEVNAME_SIZE];
+
+       if (!test_bit(DM_CLONE_DISCARD_PASSDOWN, &clone->flags))
+               return;
+
+       if (!bdev_supports_discards(dest_dev))
+               reason = "discard unsupported";
+       else if (dest_limits->max_discard_sectors < clone->region_size)
+               reason = "max discard sectors smaller than a region";
+
+       if (reason) {
+               DMWARN("Destination device (%s) %s: Disabling discard passdown.",
+                      bdevname(dest_dev, buf), reason);
+               clear_bit(DM_CLONE_DISCARD_PASSDOWN, &clone->flags);
+       }
+}
+
+static void set_discard_limits(struct clone *clone, struct queue_limits *limits)
+{
+       struct block_device *dest_bdev = clone->dest_dev->bdev;
+       struct queue_limits *dest_limits = &bdev_get_queue(dest_bdev)->limits;
+
+       if (!test_bit(DM_CLONE_DISCARD_PASSDOWN, &clone->flags)) {
+               /* No passdown is done so we set our own virtual limits */
+               limits->discard_granularity = clone->region_size << SECTOR_SHIFT;
+               limits->max_discard_sectors = round_down(UINT_MAX >> SECTOR_SHIFT, clone->region_size);
+               return;
+       }
+
+       /*
+        * clone_iterate_devices() is stacking both the source and destination
+        * device limits but discards aren't passed to the source device, so
+        * inherit destination's limits.
+        */
+       limits->max_discard_sectors = dest_limits->max_discard_sectors;
+       limits->max_hw_discard_sectors = dest_limits->max_hw_discard_sectors;
+       limits->discard_granularity = dest_limits->discard_granularity;
+       limits->discard_alignment = dest_limits->discard_alignment;
+       limits->discard_misaligned = dest_limits->discard_misaligned;
+       limits->max_discard_segments = dest_limits->max_discard_segments;
+}
+
+static void clone_io_hints(struct dm_target *ti, struct queue_limits *limits)
+{
+       struct clone *clone = ti->private;
+       u64 io_opt_sectors = limits->io_opt >> SECTOR_SHIFT;
+
+       /*
+        * If the system-determined stacked limits are compatible with
+        * dm-clone's region size (io_opt is a factor) do not override them.
+        */
+       if (io_opt_sectors < clone->region_size ||
+           do_div(io_opt_sectors, clone->region_size)) {
+               blk_limits_io_min(limits, clone->region_size << SECTOR_SHIFT);
+               blk_limits_io_opt(limits, clone->region_size << SECTOR_SHIFT);
+       }
+
+       disable_passdown_if_not_supported(clone);
+       set_discard_limits(clone, limits);
+}
+
+static int clone_iterate_devices(struct dm_target *ti,
+                                iterate_devices_callout_fn fn, void *data)
+{
+       int ret;
+       struct clone *clone = ti->private;
+       struct dm_dev *dest_dev = clone->dest_dev;
+       struct dm_dev *source_dev = clone->source_dev;
+
+       ret = fn(ti, source_dev, 0, ti->len, data);
+       if (!ret)
+               ret = fn(ti, dest_dev, 0, ti->len, data);
+       return ret;
+}
+
+/*
+ * dm-clone message functions.
+ */
+static void set_hydration_threshold(struct clone *clone, unsigned int nr_regions)
+{
+       WRITE_ONCE(clone->hydration_threshold, nr_regions);
+
+       /*
+        * If user space sets hydration_threshold to zero then the hydration
+        * will stop. If at a later time the hydration_threshold is increased
+        * we must restart the hydration process by waking up the worker.
+        */
+       wake_worker(clone);
+}
+
+static void set_hydration_batch_size(struct clone *clone, unsigned int nr_regions)
+{
+       WRITE_ONCE(clone->hydration_batch_size, nr_regions);
+}
+
+static void enable_hydration(struct clone *clone)
+{
+       if (!test_and_set_bit(DM_CLONE_HYDRATION_ENABLED, &clone->flags))
+               wake_worker(clone);
+}
+
+static void disable_hydration(struct clone *clone)
+{
+       clear_bit(DM_CLONE_HYDRATION_ENABLED, &clone->flags);
+}
+
+static int clone_message(struct dm_target *ti, unsigned int argc, char **argv,
+                        char *result, unsigned int maxlen)
+{
+       struct clone *clone = ti->private;
+       unsigned int value;
+
+       if (!argc)
+               return -EINVAL;
+
+       if (!strcasecmp(argv[0], "enable_hydration")) {
+               enable_hydration(clone);
+               return 0;
+       }
+
+       if (!strcasecmp(argv[0], "disable_hydration")) {
+               disable_hydration(clone);
+               return 0;
+       }
+
+       if (argc != 2)
+               return -EINVAL;
+
+       if (!strcasecmp(argv[0], "hydration_threshold")) {
+               if (kstrtouint(argv[1], 10, &value))
+                       return -EINVAL;
+
+               set_hydration_threshold(clone, value);
+
+               return 0;
+       }
+
+       if (!strcasecmp(argv[0], "hydration_batch_size")) {
+               if (kstrtouint(argv[1], 10, &value))
+                       return -EINVAL;
+
+               set_hydration_batch_size(clone, value);
+
+               return 0;
+       }
+
+       DMERR("%s: Unsupported message `%s'", clone_device_name(clone), argv[0]);
+       return -EINVAL;
+}
+
+static struct target_type clone_target = {
+       .name = "clone",
+       .version = {1, 0, 0},
+       .module = THIS_MODULE,
+       .ctr = clone_ctr,
+       .dtr =  clone_dtr,
+       .map = clone_map,
+       .end_io = clone_endio,
+       .postsuspend = clone_postsuspend,
+       .resume = clone_resume,
+       .status = clone_status,
+       .message = clone_message,
+       .io_hints = clone_io_hints,
+       .iterate_devices = clone_iterate_devices,
+};
+
+/*---------------------------------------------------------------------------*/
+
+/* Module functions */
+static int __init dm_clone_init(void)
+{
+       int r;
+
+       _hydration_cache = KMEM_CACHE(dm_clone_region_hydration, 0);
+       if (!_hydration_cache)
+               return -ENOMEM;
+
+       r = dm_register_target(&clone_target);
+       if (r < 0) {
+               DMERR("Failed to register clone target");
+               return r;
+       }
+
+       return 0;
+}
+
+static void __exit dm_clone_exit(void)
+{
+       dm_unregister_target(&clone_target);
+
+       kmem_cache_destroy(_hydration_cache);
+       _hydration_cache = NULL;
+}
+
+/* Module hooks */
+module_init(dm_clone_init);
+module_exit(dm_clone_exit);
+
+MODULE_DESCRIPTION(DM_NAME " clone target");
+MODULE_AUTHOR("Nikos Tsironis <ntsironis@arrikto.com>");
+MODULE_LICENSE("GPL");
index d5216bc..f87f649 100644 (file)
@@ -98,11 +98,6 @@ struct crypt_iv_operations {
                    struct dm_crypt_request *dmreq);
 };
 
-struct iv_essiv_private {
-       struct crypto_shash *hash_tfm;
-       u8 *salt;
-};
-
 struct iv_benbi_private {
        int shift;
 };
@@ -120,10 +115,6 @@ struct iv_tcw_private {
        u8 *whitening;
 };
 
-struct iv_eboiv_private {
-       struct crypto_cipher *tfm;
-};
-
 /*
  * Crypt: maps a linear range of a block device
  * and encrypts / decrypts at the same time.
@@ -152,26 +143,21 @@ struct crypt_config {
        struct task_struct *write_thread;
        struct rb_root write_tree;
 
-       char *cipher;
        char *cipher_string;
        char *cipher_auth;
        char *key_string;
 
        const struct crypt_iv_operations *iv_gen_ops;
        union {
-               struct iv_essiv_private essiv;
                struct iv_benbi_private benbi;
                struct iv_lmk_private lmk;
                struct iv_tcw_private tcw;
-               struct iv_eboiv_private eboiv;
        } iv_gen_private;
        u64 iv_offset;
        unsigned int iv_size;
        unsigned short int sector_size;
        unsigned char sector_shift;
 
-       /* ESSIV: struct crypto_cipher *essiv_tfm */
-       void *iv_private;
        union {
                struct crypto_skcipher **tfms;
                struct crypto_aead **tfms_aead;
@@ -329,157 +315,15 @@ static int crypt_iv_plain64be_gen(struct crypt_config *cc, u8 *iv,
        return 0;
 }
 
-/* Initialise ESSIV - compute salt but no local memory allocations */
-static int crypt_iv_essiv_init(struct crypt_config *cc)
-{
-       struct iv_essiv_private *essiv = &cc->iv_gen_private.essiv;
-       SHASH_DESC_ON_STACK(desc, essiv->hash_tfm);
-       struct crypto_cipher *essiv_tfm;
-       int err;
-
-       desc->tfm = essiv->hash_tfm;
-
-       err = crypto_shash_digest(desc, cc->key, cc->key_size, essiv->salt);
-       shash_desc_zero(desc);
-       if (err)
-               return err;
-
-       essiv_tfm = cc->iv_private;
-
-       err = crypto_cipher_setkey(essiv_tfm, essiv->salt,
-                           crypto_shash_digestsize(essiv->hash_tfm));
-       if (err)
-               return err;
-
-       return 0;
-}
-
-/* Wipe salt and reset key derived from volume key */
-static int crypt_iv_essiv_wipe(struct crypt_config *cc)
-{
-       struct iv_essiv_private *essiv = &cc->iv_gen_private.essiv;
-       unsigned salt_size = crypto_shash_digestsize(essiv->hash_tfm);
-       struct crypto_cipher *essiv_tfm;
-       int r, err = 0;
-
-       memset(essiv->salt, 0, salt_size);
-
-       essiv_tfm = cc->iv_private;
-       r = crypto_cipher_setkey(essiv_tfm, essiv->salt, salt_size);
-       if (r)
-               err = r;
-
-       return err;
-}
-
-/* Allocate the cipher for ESSIV */
-static struct crypto_cipher *alloc_essiv_cipher(struct crypt_config *cc,
-                                               struct dm_target *ti,
-                                               const u8 *salt,
-                                               unsigned int saltsize)
-{
-       struct crypto_cipher *essiv_tfm;
-       int err;
-
-       /* Setup the essiv_tfm with the given salt */
-       essiv_tfm = crypto_alloc_cipher(cc->cipher, 0, 0);
-       if (IS_ERR(essiv_tfm)) {
-               ti->error = "Error allocating crypto tfm for ESSIV";
-               return essiv_tfm;
-       }
-
-       if (crypto_cipher_blocksize(essiv_tfm) != cc->iv_size) {
-               ti->error = "Block size of ESSIV cipher does "
-                           "not match IV size of block cipher";
-               crypto_free_cipher(essiv_tfm);
-               return ERR_PTR(-EINVAL);
-       }
-
-       err = crypto_cipher_setkey(essiv_tfm, salt, saltsize);
-       if (err) {
-               ti->error = "Failed to set key for ESSIV cipher";
-               crypto_free_cipher(essiv_tfm);
-               return ERR_PTR(err);
-       }
-
-       return essiv_tfm;
-}
-
-static void crypt_iv_essiv_dtr(struct crypt_config *cc)
-{
-       struct crypto_cipher *essiv_tfm;
-       struct iv_essiv_private *essiv = &cc->iv_gen_private.essiv;
-
-       crypto_free_shash(essiv->hash_tfm);
-       essiv->hash_tfm = NULL;
-
-       kzfree(essiv->salt);
-       essiv->salt = NULL;
-
-       essiv_tfm = cc->iv_private;
-
-       if (essiv_tfm)
-               crypto_free_cipher(essiv_tfm);
-
-       cc->iv_private = NULL;
-}
-
-static int crypt_iv_essiv_ctr(struct crypt_config *cc, struct dm_target *ti,
-                             const char *opts)
-{
-       struct crypto_cipher *essiv_tfm = NULL;
-       struct crypto_shash *hash_tfm = NULL;
-       u8 *salt = NULL;
-       int err;
-
-       if (!opts) {
-               ti->error = "Digest algorithm missing for ESSIV mode";
-               return -EINVAL;
-       }
-
-       /* Allocate hash algorithm */
-       hash_tfm = crypto_alloc_shash(opts, 0, 0);
-       if (IS_ERR(hash_tfm)) {
-               ti->error = "Error initializing ESSIV hash";
-               err = PTR_ERR(hash_tfm);
-               goto bad;
-       }
-
-       salt = kzalloc(crypto_shash_digestsize(hash_tfm), GFP_KERNEL);
-       if (!salt) {
-               ti->error = "Error kmallocing salt storage in ESSIV";
-               err = -ENOMEM;
-               goto bad;
-       }
-
-       cc->iv_gen_private.essiv.salt = salt;
-       cc->iv_gen_private.essiv.hash_tfm = hash_tfm;
-
-       essiv_tfm = alloc_essiv_cipher(cc, ti, salt,
-                                      crypto_shash_digestsize(hash_tfm));
-       if (IS_ERR(essiv_tfm)) {
-               crypt_iv_essiv_dtr(cc);
-               return PTR_ERR(essiv_tfm);
-       }
-       cc->iv_private = essiv_tfm;
-
-       return 0;
-
-bad:
-       if (hash_tfm && !IS_ERR(hash_tfm))
-               crypto_free_shash(hash_tfm);
-       kfree(salt);
-       return err;
-}
-
 static int crypt_iv_essiv_gen(struct crypt_config *cc, u8 *iv,
                              struct dm_crypt_request *dmreq)
 {
-       struct crypto_cipher *essiv_tfm = cc->iv_private;
-
+       /*
+        * ESSIV encryption of the IV is now handled by the crypto API,
+        * so just pass the plain sector number here.
+        */
        memset(iv, 0, cc->iv_size);
        *(__le64 *)iv = cpu_to_le64(dmreq->iv_sector);
-       crypto_cipher_encrypt_one(essiv_tfm, iv, iv);
 
        return 0;
 }
@@ -847,65 +691,47 @@ static int crypt_iv_random_gen(struct crypt_config *cc, u8 *iv,
        return 0;
 }
 
-static void crypt_iv_eboiv_dtr(struct crypt_config *cc)
-{
-       struct iv_eboiv_private *eboiv = &cc->iv_gen_private.eboiv;
-
-       crypto_free_cipher(eboiv->tfm);
-       eboiv->tfm = NULL;
-}
-
 static int crypt_iv_eboiv_ctr(struct crypt_config *cc, struct dm_target *ti,
                            const char *opts)
 {
-       struct iv_eboiv_private *eboiv = &cc->iv_gen_private.eboiv;
-       struct crypto_cipher *tfm;
-
-       tfm = crypto_alloc_cipher(cc->cipher, 0, 0);
-       if (IS_ERR(tfm)) {
-               ti->error = "Error allocating crypto tfm for EBOIV";
-               return PTR_ERR(tfm);
+       if (test_bit(CRYPT_MODE_INTEGRITY_AEAD, &cc->cipher_flags)) {
+               ti->error = "AEAD transforms not supported for EBOIV";
+               return -EINVAL;
        }
 
-       if (crypto_cipher_blocksize(tfm) != cc->iv_size) {
+       if (crypto_skcipher_blocksize(any_tfm(cc)) != cc->iv_size) {
                ti->error = "Block size of EBOIV cipher does "
                            "not match IV size of block cipher";
-               crypto_free_cipher(tfm);
                return -EINVAL;
        }
 
-       eboiv->tfm = tfm;
        return 0;
 }
 
-static int crypt_iv_eboiv_init(struct crypt_config *cc)
+static int crypt_iv_eboiv_gen(struct crypt_config *cc, u8 *iv,
+                           struct dm_crypt_request *dmreq)
 {
-       struct iv_eboiv_private *eboiv = &cc->iv_gen_private.eboiv;
+       u8 buf[MAX_CIPHER_BLOCKSIZE] __aligned(__alignof__(__le64));
+       struct skcipher_request *req;
+       struct scatterlist src, dst;
+       struct crypto_wait wait;
        int err;
 
-       err = crypto_cipher_setkey(eboiv->tfm, cc->key, cc->key_size);
-       if (err)
-               return err;
+       req = skcipher_request_alloc(any_tfm(cc), GFP_KERNEL | GFP_NOFS);
+       if (!req)
+               return -ENOMEM;
 
-       return 0;
-}
+       memset(buf, 0, cc->iv_size);
+       *(__le64 *)buf = cpu_to_le64(dmreq->iv_sector * cc->sector_size);
 
-static int crypt_iv_eboiv_wipe(struct crypt_config *cc)
-{
-       /* Called after cc->key is set to random key in crypt_wipe() */
-       return crypt_iv_eboiv_init(cc);
-}
+       sg_init_one(&src, page_address(ZERO_PAGE(0)), cc->iv_size);
+       sg_init_one(&dst, iv, cc->iv_size);
+       skcipher_request_set_crypt(req, &src, &dst, cc->iv_size, buf);
+       skcipher_request_set_callback(req, 0, crypto_req_done, &wait);
+       err = crypto_wait_req(crypto_skcipher_encrypt(req), &wait);
+       skcipher_request_free(req);
 
-static int crypt_iv_eboiv_gen(struct crypt_config *cc, u8 *iv,
-                           struct dm_crypt_request *dmreq)
-{
-       struct iv_eboiv_private *eboiv = &cc->iv_gen_private.eboiv;
-
-       memset(iv, 0, cc->iv_size);
-       *(__le64 *)iv = cpu_to_le64(dmreq->iv_sector * cc->sector_size);
-       crypto_cipher_encrypt_one(eboiv->tfm, iv, iv);
-
-       return 0;
+       return err;
 }
 
 static const struct crypt_iv_operations crypt_iv_plain_ops = {
@@ -921,10 +747,6 @@ static const struct crypt_iv_operations crypt_iv_plain64be_ops = {
 };
 
 static const struct crypt_iv_operations crypt_iv_essiv_ops = {
-       .ctr       = crypt_iv_essiv_ctr,
-       .dtr       = crypt_iv_essiv_dtr,
-       .init      = crypt_iv_essiv_init,
-       .wipe      = crypt_iv_essiv_wipe,
        .generator = crypt_iv_essiv_gen
 };
 
@@ -962,9 +784,6 @@ static struct crypt_iv_operations crypt_iv_random_ops = {
 
 static struct crypt_iv_operations crypt_iv_eboiv_ops = {
        .ctr       = crypt_iv_eboiv_ctr,
-       .dtr       = crypt_iv_eboiv_dtr,
-       .init      = crypt_iv_eboiv_init,
-       .wipe      = crypt_iv_eboiv_wipe,
        .generator = crypt_iv_eboiv_gen
 };
 
@@ -2320,7 +2139,6 @@ static void crypt_dtr(struct dm_target *ti)
        if (cc->dev)
                dm_put_device(ti, cc->dev);
 
-       kzfree(cc->cipher);
        kzfree(cc->cipher_string);
        kzfree(cc->key_string);
        kzfree(cc->cipher_auth);
@@ -2402,52 +2220,6 @@ static int crypt_ctr_ivmode(struct dm_target *ti, const char *ivmode)
 }
 
 /*
- * Workaround to parse cipher algorithm from crypto API spec.
- * The cc->cipher is currently used only in ESSIV.
- * This should be probably done by crypto-api calls (once available...)
- */
-static int crypt_ctr_blkdev_cipher(struct crypt_config *cc)
-{
-       const char *alg_name = NULL;
-       char *start, *end;
-
-       if (crypt_integrity_aead(cc)) {
-               alg_name = crypto_tfm_alg_name(crypto_aead_tfm(any_tfm_aead(cc)));
-               if (!alg_name)
-                       return -EINVAL;
-               if (crypt_integrity_hmac(cc)) {
-                       alg_name = strchr(alg_name, ',');
-                       if (!alg_name)
-                               return -EINVAL;
-               }
-               alg_name++;
-       } else {
-               alg_name = crypto_tfm_alg_name(crypto_skcipher_tfm(any_tfm(cc)));
-               if (!alg_name)
-                       return -EINVAL;
-       }
-
-       start = strchr(alg_name, '(');
-       end = strchr(alg_name, ')');
-
-       if (!start && !end) {
-               cc->cipher = kstrdup(alg_name, GFP_KERNEL);
-               return cc->cipher ? 0 : -ENOMEM;
-       }
-
-       if (!start || !end || ++start >= end)
-               return -EINVAL;
-
-       cc->cipher = kzalloc(end - start + 1, GFP_KERNEL);
-       if (!cc->cipher)
-               return -ENOMEM;
-
-       strncpy(cc->cipher, start, end - start);
-
-       return 0;
-}
-
-/*
  * Workaround to parse HMAC algorithm from AEAD crypto API spec.
  * The HMAC is needed to calculate tag size (HMAC digest size).
  * This should be probably done by crypto-api calls (once available...)
@@ -2490,7 +2262,7 @@ static int crypt_ctr_cipher_new(struct dm_target *ti, char *cipher_in, char *key
                                char **ivmode, char **ivopts)
 {
        struct crypt_config *cc = ti->private;
-       char *tmp, *cipher_api;
+       char *tmp, *cipher_api, buf[CRYPTO_MAX_ALG_NAME];
        int ret = -EINVAL;
 
        cc->tfms_count = 1;
@@ -2516,9 +2288,32 @@ static int crypt_ctr_cipher_new(struct dm_target *ti, char *cipher_in, char *key
        /* The rest is crypto API spec */
        cipher_api = tmp;
 
+       /* Alloc AEAD, can be used only in new format. */
+       if (crypt_integrity_aead(cc)) {
+               ret = crypt_ctr_auth_cipher(cc, cipher_api);
+               if (ret < 0) {
+                       ti->error = "Invalid AEAD cipher spec";
+                       return -ENOMEM;
+               }
+       }
+
        if (*ivmode && !strcmp(*ivmode, "lmk"))
                cc->tfms_count = 64;
 
+       if (*ivmode && !strcmp(*ivmode, "essiv")) {
+               if (!*ivopts) {
+                       ti->error = "Digest algorithm missing for ESSIV mode";
+                       return -EINVAL;
+               }
+               ret = snprintf(buf, CRYPTO_MAX_ALG_NAME, "essiv(%s,%s)",
+                              cipher_api, *ivopts);
+               if (ret < 0 || ret >= CRYPTO_MAX_ALG_NAME) {
+                       ti->error = "Cannot allocate cipher string";
+                       return -ENOMEM;
+               }
+               cipher_api = buf;
+       }
+
        cc->key_parts = cc->tfms_count;
 
        /* Allocate cipher */
@@ -2528,23 +2323,11 @@ static int crypt_ctr_cipher_new(struct dm_target *ti, char *cipher_in, char *key
                return ret;
        }
 
-       /* Alloc AEAD, can be used only in new format. */
-       if (crypt_integrity_aead(cc)) {
-               ret = crypt_ctr_auth_cipher(cc, cipher_api);
-               if (ret < 0) {
-                       ti->error = "Invalid AEAD cipher spec";
-                       return -ENOMEM;
-               }
+       if (crypt_integrity_aead(cc))
                cc->iv_size = crypto_aead_ivsize(any_tfm_aead(cc));
-       else
+       else
                cc->iv_size = crypto_skcipher_ivsize(any_tfm(cc));
 
-       ret = crypt_ctr_blkdev_cipher(cc);
-       if (ret < 0) {
-               ti->error = "Cannot allocate cipher string";
-               return -ENOMEM;
-       }
-
        return 0;
 }
 
@@ -2579,10 +2362,6 @@ static int crypt_ctr_cipher_old(struct dm_target *ti, char *cipher_in, char *key
        }
        cc->key_parts = cc->tfms_count;
 
-       cc->cipher = kstrdup(cipher, GFP_KERNEL);
-       if (!cc->cipher)
-               goto bad_mem;
-
        chainmode = strsep(&tmp, "-");
        *ivmode = strsep(&tmp, ":");
        *ivopts = tmp;
@@ -2605,9 +2384,19 @@ static int crypt_ctr_cipher_old(struct dm_target *ti, char *cipher_in, char *key
        if (!cipher_api)
                goto bad_mem;
 
-       ret = snprintf(cipher_api, CRYPTO_MAX_ALG_NAME,
-                      "%s(%s)", chainmode, cipher);
-       if (ret < 0) {
+       if (*ivmode && !strcmp(*ivmode, "essiv")) {
+               if (!*ivopts) {
+                       ti->error = "Digest algorithm missing for ESSIV mode";
+                       kfree(cipher_api);
+                       return -EINVAL;
+               }
+               ret = snprintf(cipher_api, CRYPTO_MAX_ALG_NAME,
+                              "essiv(%s(%s),%s)", chainmode, cipher, *ivopts);
+       } else {
+               ret = snprintf(cipher_api, CRYPTO_MAX_ALG_NAME,
+                              "%s(%s)", chainmode, cipher);
+       }
+       if (ret < 0 || ret >= CRYPTO_MAX_ALG_NAME) {
                kfree(cipher_api);
                goto bad_mem;
        }
index 9118ab8..dab4446 100644 (file)
@@ -345,6 +345,14 @@ static void __DEBUG_bytes(__u8 *bytes, size_t len, const char *msg, ...)
 #define DEBUG_bytes(bytes, len, msg, ...)      do { } while (0)
 #endif
 
+static void dm_integrity_prepare(struct request *rq)
+{
+}
+
+static void dm_integrity_complete(struct request *rq, unsigned int nr_bytes)
+{
+}
+
 /*
  * DM Integrity profile, protection is performed layer above (dm-crypt)
  */
@@ -352,6 +360,8 @@ static const struct blk_integrity_profile dm_integrity_profile = {
        .name                   = "DM-DIF-EXT-TAG",
        .generate_fn            = NULL,
        .verify_fn              = NULL,
+       .prepare_fn             = dm_integrity_prepare,
+       .complete_fn            = dm_integrity_complete,
 };
 
 static void dm_integrity_map_continue(struct dm_integrity_io *dio, bool from_map);
index 1e03bc8..ac83f50 100644 (file)
@@ -601,17 +601,27 @@ static void list_version_get_info(struct target_type *tt, void *param)
     info->vers = align_ptr(((void *) ++info->vers) + strlen(tt->name) + 1);
 }
 
-static int list_versions(struct file *filp, struct dm_ioctl *param, size_t param_size)
+static int __list_versions(struct dm_ioctl *param, size_t param_size, const char *name)
 {
        size_t len, needed = 0;
        struct dm_target_versions *vers;
        struct vers_iter iter_info;
+       struct target_type *tt = NULL;
+
+       if (name) {
+               tt = dm_get_target_type(name);
+               if (!tt)
+                       return -EINVAL;
+       }
 
        /*
         * Loop through all the devices working out how much
         * space we need.
         */
-       dm_target_iterate(list_version_get_needed, &needed);
+       if (!tt)
+               dm_target_iterate(list_version_get_needed, &needed);
+       else
+               list_version_get_needed(tt, &needed);
 
        /*
         * Grab our output buffer.
@@ -632,13 +642,28 @@ static int list_versions(struct file *filp, struct dm_ioctl *param, size_t param
        /*
         * Now loop through filling out the names & versions.
         */
-       dm_target_iterate(list_version_get_info, &iter_info);
+       if (!tt)
+               dm_target_iterate(list_version_get_info, &iter_info);
+       else
+               list_version_get_info(tt, &iter_info);
        param->flags |= iter_info.flags;
 
  out:
+       if (tt)
+               dm_put_target_type(tt);
        return 0;
 }
 
+static int list_versions(struct file *filp, struct dm_ioctl *param, size_t param_size)
+{
+       return __list_versions(param, param_size, NULL);
+}
+
+static int get_target_version(struct file *filp, struct dm_ioctl *param, size_t param_size)
+{
+       return __list_versions(param, param_size, param->name);
+}
+
 static int check_name(const char *name)
 {
        if (strchr(name, '/')) {
@@ -1592,7 +1617,7 @@ static int target_message(struct file *filp, struct dm_ioctl *param, size_t para
        }
 
        ti = dm_table_find_target(table, tmsg->sector);
-       if (!dm_target_is_valid(ti)) {
+       if (!ti) {
                DMWARN("Target message sector outside device.");
                r = -EINVAL;
        } else if (ti->type->message)
@@ -1664,6 +1689,7 @@ static ioctl_fn lookup_ioctl(unsigned int cmd, int *ioctl_flags)
                {DM_TARGET_MSG_CMD, 0, target_message},
                {DM_DEV_SET_GEOMETRY_CMD, 0, dev_set_geometry},
                {DM_DEV_ARM_POLL, IOCTL_FLAGS_NO_PARAMS, dev_arm_poll},
+               {DM_GET_TARGET_VERSION, 0, get_target_version},
        };
 
        if (unlikely(cmd >= ARRAY_SIZE(_ioctls)))
index 1f933dd..b0aa595 100644 (file)
@@ -3738,18 +3738,18 @@ static int raid_iterate_devices(struct dm_target *ti,
 static void raid_io_hints(struct dm_target *ti, struct queue_limits *limits)
 {
        struct raid_set *rs = ti->private;
-       unsigned int chunk_size = to_bytes(rs->md.chunk_sectors);
+       unsigned int chunk_size_bytes = to_bytes(rs->md.chunk_sectors);
 
-       blk_limits_io_min(limits, chunk_size);
-       blk_limits_io_opt(limits, chunk_size * mddev_data_stripes(rs));
+       blk_limits_io_min(limits, chunk_size_bytes);
+       blk_limits_io_opt(limits, chunk_size_bytes * mddev_data_stripes(rs));
 
        /*
         * RAID1 and RAID10 personalities require bio splitting,
         * RAID0/4/5/6 don't and process large discard bios properly.
         */
        if (rs_is_raid1(rs) || rs_is_raid10(rs)) {
-               limits->discard_granularity = chunk_size;
-               limits->max_discard_sectors = chunk_size;
+               limits->discard_granularity = chunk_size_bytes;
+               limits->max_discard_sectors = rs->md.chunk_sectors;
        }
 }
 
index 5a51151..089aed5 100644 (file)
@@ -878,12 +878,9 @@ static struct mirror_set *alloc_context(unsigned int nr_mirrors,
                                        struct dm_target *ti,
                                        struct dm_dirty_log *dl)
 {
-       size_t len;
-       struct mirror_set *ms = NULL;
-
-       len = sizeof(*ms) + (sizeof(ms->mirror[0]) * nr_mirrors);
+       struct mirror_set *ms =
+               kzalloc(struct_size(ms, mirror, nr_mirrors), GFP_KERNEL);
 
-       ms = kzalloc(len, GFP_KERNEL);
        if (!ms) {
                ti->error = "Cannot allocate mirror context";
                return NULL;
index 45b92a3..7141704 100644 (file)
@@ -262,7 +262,7 @@ static int dm_stats_create(struct dm_stats *stats, sector_t start, sector_t end,
        if (n_entries != (size_t)n_entries || !(size_t)(n_entries + 1))
                return -EOVERFLOW;
 
-       shared_alloc_size = sizeof(struct dm_stat) + (size_t)n_entries * sizeof(struct dm_stat_shared);
+       shared_alloc_size = struct_size(s, stat_shared, n_entries);
        if ((shared_alloc_size - sizeof(struct dm_stat)) / sizeof(struct dm_stat_shared) != n_entries)
                return -EOVERFLOW;
 
index 8820931..52e0495 100644 (file)
@@ -163,10 +163,8 @@ static int alloc_targets(struct dm_table *t, unsigned int num)
 
        /*
         * Allocate both the target array and offset array at once.
-        * Append an empty entry to catch sectors beyond the end of
-        * the device.
         */
-       n_highs = (sector_t *) dm_vcalloc(num + 1, sizeof(struct dm_target) +
+       n_highs = (sector_t *) dm_vcalloc(num, sizeof(struct dm_target) +
                                          sizeof(sector_t));
        if (!n_highs)
                return -ENOMEM;
@@ -1359,7 +1357,7 @@ struct dm_target *dm_table_get_target(struct dm_table *t, unsigned int index)
 /*
  * Search the btree for the correct target.
  *
- * Caller should check returned pointer with dm_target_is_valid()
+ * Caller should check returned pointer for NULL
  * to trap I/O beyond end of device.
  */
 struct dm_target *dm_table_find_target(struct dm_table *t, sector_t sector)
@@ -1368,7 +1366,7 @@ struct dm_target *dm_table_find_target(struct dm_table *t, sector_t sector)
        sector_t *node;
 
        if (unlikely(sector >= dm_table_get_size(t)))
-               return &t->targets[t->num_targets];
+               return NULL;
 
        for (l = 0; l < t->depth; l++) {
                n = get_child(n, k);
index ea24ff0..4fb33e7 100644 (file)
@@ -15,7 +15,7 @@
 
 #include "dm-verity.h"
 #include "dm-verity-fec.h"
-
+#include "dm-verity-verify-sig.h"
 #include <linux/module.h>
 #include <linux/reboot.h>
 
@@ -33,7 +33,8 @@
 #define DM_VERITY_OPT_IGN_ZEROES       "ignore_zero_blocks"
 #define DM_VERITY_OPT_AT_MOST_ONCE     "check_at_most_once"
 
-#define DM_VERITY_OPTS_MAX             (2 + DM_VERITY_OPTS_FEC)
+#define DM_VERITY_OPTS_MAX             (2 + DM_VERITY_OPTS_FEC + \
+                                        DM_VERITY_ROOT_HASH_VERIFICATION_OPTS)
 
 static unsigned dm_verity_prefetch_cluster = DM_VERITY_DEFAULT_PREFETCH_SIZE;
 
@@ -713,6 +714,8 @@ static void verity_status(struct dm_target *ti, status_type_t type,
                        args++;
                if (v->validated_blocks)
                        args++;
+               if (v->signature_key_desc)
+                       args += DM_VERITY_ROOT_HASH_VERIFICATION_OPTS;
                if (!args)
                        return;
                DMEMIT(" %u", args);
@@ -734,6 +737,9 @@ static void verity_status(struct dm_target *ti, status_type_t type,
                if (v->validated_blocks)
                        DMEMIT(" " DM_VERITY_OPT_AT_MOST_ONCE);
                sz = verity_fec_status_table(v, sz, result, maxlen);
+               if (v->signature_key_desc)
+                       DMEMIT(" " DM_VERITY_ROOT_HASH_VERIFICATION_OPT_SIG_KEY
+                               " %s", v->signature_key_desc);
                break;
        }
 }
@@ -799,6 +805,8 @@ static void verity_dtr(struct dm_target *ti)
 
        verity_fec_dtr(v);
 
+       kfree(v->signature_key_desc);
+
        kfree(v);
 }
 
@@ -854,7 +862,8 @@ out:
        return r;
 }
 
-static int verity_parse_opt_args(struct dm_arg_set *as, struct dm_verity *v)
+static int verity_parse_opt_args(struct dm_arg_set *as, struct dm_verity *v,
+                                struct dm_verity_sig_opts *verify_args)
 {
        int r;
        unsigned argc;
@@ -903,6 +912,14 @@ static int verity_parse_opt_args(struct dm_arg_set *as, struct dm_verity *v)
                        if (r)
                                return r;
                        continue;
+               } else if (verity_verify_is_sig_opt_arg(arg_name)) {
+                       r = verity_verify_sig_parse_opt_args(as, v,
+                                                            verify_args,
+                                                            &argc, arg_name);
+                       if (r)
+                               return r;
+                       continue;
+
                }
 
                ti->error = "Unrecognized verity feature request";
@@ -929,6 +946,7 @@ static int verity_parse_opt_args(struct dm_arg_set *as, struct dm_verity *v)
 static int verity_ctr(struct dm_target *ti, unsigned argc, char **argv)
 {
        struct dm_verity *v;
+       struct dm_verity_sig_opts verify_args = {0};
        struct dm_arg_set as;
        unsigned int num;
        unsigned long long num_ll;
@@ -936,6 +954,7 @@ static int verity_ctr(struct dm_target *ti, unsigned argc, char **argv)
        int i;
        sector_t hash_position;
        char dummy;
+       char *root_hash_digest_to_validate;
 
        v = kzalloc(sizeof(struct dm_verity), GFP_KERNEL);
        if (!v) {
@@ -1069,6 +1088,7 @@ static int verity_ctr(struct dm_target *ti, unsigned argc, char **argv)
                r = -EINVAL;
                goto bad;
        }
+       root_hash_digest_to_validate = argv[8];
 
        if (strcmp(argv[9], "-")) {
                v->salt_size = strlen(argv[9]) / 2;
@@ -1094,11 +1114,20 @@ static int verity_ctr(struct dm_target *ti, unsigned argc, char **argv)
                as.argc = argc;
                as.argv = argv;
 
-               r = verity_parse_opt_args(&as, v);
+               r = verity_parse_opt_args(&as, v, &verify_args);
                if (r < 0)
                        goto bad;
        }
 
+       /* Root hash signature is  a optional parameter*/
+       r = verity_verify_root_hash(root_hash_digest_to_validate,
+                                   strlen(root_hash_digest_to_validate),
+                                   verify_args.sig,
+                                   verify_args.sig_size);
+       if (r < 0) {
+               ti->error = "Root hash verification failed";
+               goto bad;
+       }
        v->hash_per_block_bits =
                __fls((1 << v->hash_dev_block_bits) / v->digest_size);
 
@@ -1164,9 +1193,13 @@ static int verity_ctr(struct dm_target *ti, unsigned argc, char **argv)
        ti->per_io_data_size = roundup(ti->per_io_data_size,
                                       __alignof__(struct dm_verity_io));
 
+       verity_verify_sig_opts_cleanup(&verify_args);
+
        return 0;
 
 bad:
+
+       verity_verify_sig_opts_cleanup(&verify_args);
        verity_dtr(ti);
 
        return r;
@@ -1174,7 +1207,7 @@ bad:
 
 static struct target_type verity_target = {
        .name           = "verity",
-       .version        = {1, 4, 0},
+       .version        = {1, 5, 0},
        .module         = THIS_MODULE,
        .ctr            = verity_ctr,
        .dtr            = verity_dtr,
diff --git a/drivers/md/dm-verity-verify-sig.c b/drivers/md/dm-verity-verify-sig.c
new file mode 100644 (file)
index 0000000..614e43d
--- /dev/null
@@ -0,0 +1,133 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2019 Microsoft Corporation.
+ *
+ * Author:  Jaskaran Singh Khurana <jaskarankhurana@linux.microsoft.com>
+ *
+ */
+#include <linux/device-mapper.h>
+#include <linux/verification.h>
+#include <keys/user-type.h>
+#include <linux/module.h>
+#include "dm-verity.h"
+#include "dm-verity-verify-sig.h"
+
+#define DM_VERITY_VERIFY_ERR(s) DM_VERITY_ROOT_HASH_VERIFICATION " " s
+
+static bool require_signatures;
+module_param(require_signatures, bool, false);
+MODULE_PARM_DESC(require_signatures,
+               "Verify the roothash of dm-verity hash tree");
+
+#define DM_VERITY_IS_SIG_FORCE_ENABLED() \
+       (require_signatures != false)
+
+bool verity_verify_is_sig_opt_arg(const char *arg_name)
+{
+       return (!strcasecmp(arg_name,
+                           DM_VERITY_ROOT_HASH_VERIFICATION_OPT_SIG_KEY));
+}
+
+static int verity_verify_get_sig_from_key(const char *key_desc,
+                                       struct dm_verity_sig_opts *sig_opts)
+{
+       struct key *key;
+       const struct user_key_payload *ukp;
+       int ret = 0;
+
+       key = request_key(&key_type_user,
+                       key_desc, NULL);
+       if (IS_ERR(key))
+               return PTR_ERR(key);
+
+       down_read(&key->sem);
+
+       ukp = user_key_payload_locked(key);
+       if (!ukp) {
+               ret = -EKEYREVOKED;
+               goto end;
+       }
+
+       sig_opts->sig = kmalloc(ukp->datalen, GFP_KERNEL);
+       if (!sig_opts->sig) {
+               ret = -ENOMEM;
+               goto end;
+       }
+       sig_opts->sig_size = ukp->datalen;
+
+       memcpy(sig_opts->sig, ukp->data, sig_opts->sig_size);
+
+end:
+       up_read(&key->sem);
+       key_put(key);
+
+       return ret;
+}
+
+int verity_verify_sig_parse_opt_args(struct dm_arg_set *as,
+                                    struct dm_verity *v,
+                                    struct dm_verity_sig_opts *sig_opts,
+                                    unsigned int *argc,
+                                    const char *arg_name)
+{
+       struct dm_target *ti = v->ti;
+       int ret = 0;
+       const char *sig_key = NULL;
+
+       if (!*argc) {
+               ti->error = DM_VERITY_VERIFY_ERR("Signature key not specified");
+               return -EINVAL;
+       }
+
+       sig_key = dm_shift_arg(as);
+       (*argc)--;
+
+       ret = verity_verify_get_sig_from_key(sig_key, sig_opts);
+       if (ret < 0)
+               ti->error = DM_VERITY_VERIFY_ERR("Invalid key specified");
+
+       v->signature_key_desc = kstrdup(sig_key, GFP_KERNEL);
+       if (!v->signature_key_desc)
+               return -ENOMEM;
+
+       return ret;
+}
+
+/*
+ * verify_verify_roothash - Verify the root hash of the verity hash device
+ *                          using builtin trusted keys.
+ *
+ * @root_hash: For verity, the roothash/data to be verified.
+ * @root_hash_len: Size of the roothash/data to be verified.
+ * @sig_data: The trusted signature that verifies the roothash/data.
+ * @sig_len: Size of the signature.
+ *
+ */
+int verity_verify_root_hash(const void *root_hash, size_t root_hash_len,
+                           const void *sig_data, size_t sig_len)
+{
+       int ret;
+
+       if (!root_hash || root_hash_len == 0)
+               return -EINVAL;
+
+       if (!sig_data  || sig_len == 0) {
+               if (DM_VERITY_IS_SIG_FORCE_ENABLED())
+                       return -ENOKEY;
+               else
+                       return 0;
+       }
+
+       ret = verify_pkcs7_signature(root_hash, root_hash_len, sig_data,
+                               sig_len, NULL, VERIFYING_UNSPECIFIED_SIGNATURE,
+                               NULL, NULL);
+
+       return ret;
+}
+
+void verity_verify_sig_opts_cleanup(struct dm_verity_sig_opts *sig_opts)
+{
+       kfree(sig_opts->sig);
+       sig_opts->sig = NULL;
+       sig_opts->sig_size = 0;
+}
diff --git a/drivers/md/dm-verity-verify-sig.h b/drivers/md/dm-verity-verify-sig.h
new file mode 100644 (file)
index 0000000..19b1547
--- /dev/null
@@ -0,0 +1,60 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2019 Microsoft Corporation.
+ *
+ * Author:  Jaskaran Singh Khurana <jaskarankhurana@linux.microsoft.com>
+ *
+ */
+#ifndef DM_VERITY_SIG_VERIFICATION_H
+#define DM_VERITY_SIG_VERIFICATION_H
+
+#define DM_VERITY_ROOT_HASH_VERIFICATION "DM Verity Sig Verification"
+#define DM_VERITY_ROOT_HASH_VERIFICATION_OPT_SIG_KEY "root_hash_sig_key_desc"
+
+struct dm_verity_sig_opts {
+       unsigned int sig_size;
+       u8 *sig;
+};
+
+#ifdef CONFIG_DM_VERITY_VERIFY_ROOTHASH_SIG
+
+#define DM_VERITY_ROOT_HASH_VERIFICATION_OPTS 2
+
+int verity_verify_root_hash(const void *data, size_t data_len,
+                           const void *sig_data, size_t sig_len);
+bool verity_verify_is_sig_opt_arg(const char *arg_name);
+
+int verity_verify_sig_parse_opt_args(struct dm_arg_set *as, struct dm_verity *v,
+                                   struct dm_verity_sig_opts *sig_opts,
+                                   unsigned int *argc, const char *arg_name);
+
+void verity_verify_sig_opts_cleanup(struct dm_verity_sig_opts *sig_opts);
+
+#else
+
+#define DM_VERITY_ROOT_HASH_VERIFICATION_OPTS 0
+
+int verity_verify_root_hash(const void *data, size_t data_len,
+                           const void *sig_data, size_t sig_len)
+{
+       return 0;
+}
+
+bool verity_verify_is_sig_opt_arg(const char *arg_name)
+{
+       return false;
+}
+
+int verity_verify_sig_parse_opt_args(struct dm_arg_set *as, struct dm_verity *v,
+                                   struct dm_verity_sig_opts *sig_opts,
+                                   unsigned int *argc, const char *arg_name)
+{
+       return -EINVAL;
+}
+
+void verity_verify_sig_opts_cleanup(struct dm_verity_sig_opts *sig_opts)
+{
+}
+
+#endif /* CONFIG_DM_VERITY_VERIFY_ROOTHASH_SIG */
+#endif /* DM_VERITY_SIG_VERIFICATION_H */
index eeaf940..641b9e3 100644 (file)
@@ -63,6 +63,8 @@ struct dm_verity {
 
        struct dm_verity_fec *fec;      /* forward error correction */
        unsigned long *validated_blocks; /* bitset blocks validated */
+
+       char *signature_key_desc; /* signature keyring reference */
 };
 
 struct dm_verity_io {
index 1cb137f..d06b8aa 100644 (file)
@@ -190,7 +190,6 @@ struct writeback_struct {
        struct dm_writecache *wc;
        struct wc_entry **wc_list;
        unsigned wc_list_n;
-       struct page *page;
        struct wc_entry *wc_list_inline[WB_LIST_INLINE];
        struct bio bio;
 };
@@ -727,7 +726,8 @@ static void writecache_flush(struct dm_writecache *wc)
        }
        writecache_commit_flushed(wc);
 
-       writecache_wait_for_ios(wc, WRITE);
+       if (!WC_MODE_PMEM(wc))
+               writecache_wait_for_ios(wc, WRITE);
 
        wc->seq_count++;
        pmem_assign(sb(wc)->seq_count, cpu_to_le64(wc->seq_count));
@@ -1561,7 +1561,7 @@ static void writecache_writeback(struct work_struct *work)
 {
        struct dm_writecache *wc = container_of(work, struct dm_writecache, writeback_work);
        struct blk_plug plug;
-       struct wc_entry *e, *f, *g;
+       struct wc_entry *f, *g, *e = NULL;
        struct rb_node *node, *next_node;
        struct list_head skipped;
        struct writeback_list wbl;
@@ -1598,7 +1598,14 @@ restart:
                        break;
                }
 
-               e = container_of(wc->lru.prev, struct wc_entry, lru);
+               if (unlikely(wc->writeback_all)) {
+                       if (unlikely(!e)) {
+                               writecache_flush(wc);
+                               e = container_of(rb_first(&wc->tree), struct wc_entry, rb_node);
+                       } else
+                               e = g;
+               } else
+                       e = container_of(wc->lru.prev, struct wc_entry, lru);
                BUG_ON(e->write_in_progress);
                if (unlikely(!writecache_entry_is_committed(wc, e))) {
                        writecache_flush(wc);
@@ -1629,8 +1636,8 @@ restart:
                        if (unlikely(!next_node))
                                break;
                        g = container_of(next_node, struct wc_entry, rb_node);
-                       if (read_original_sector(wc, g) ==
-                           read_original_sector(wc, f)) {
+                       if (unlikely(read_original_sector(wc, g) ==
+                           read_original_sector(wc, f))) {
                                f = g;
                                continue;
                        }
@@ -1659,8 +1666,14 @@ restart:
                        g->wc_list_contiguous = BIO_MAX_PAGES;
                        f = g;
                        e->wc_list_contiguous++;
-                       if (unlikely(e->wc_list_contiguous == BIO_MAX_PAGES))
+                       if (unlikely(e->wc_list_contiguous == BIO_MAX_PAGES)) {
+                               if (unlikely(wc->writeback_all)) {
+                                       next_node = rb_next(&f->rb_node);
+                                       if (likely(next_node))
+                                               g = container_of(next_node, struct wc_entry, rb_node);
+                               }
                                break;
+                       }
                }
                cond_resched();
        }
index 31478fe..d3bcc41 100644 (file)
@@ -134,8 +134,6 @@ static int dmz_submit_bio(struct dmz_target *dmz, struct dm_zone *zone,
 
        refcount_inc(&bioctx->ref);
        generic_make_request(clone);
-       if (clone->bi_status == BLK_STS_IOERR)
-               return -EIO;
 
        if (bio_op(bio) == REQ_OP_WRITE && dmz_is_seq(zone))
                zone->wp_block += nr_blocks;
index d0beef0..1a5e328 100644 (file)
@@ -457,7 +457,7 @@ static int dm_blk_report_zones(struct gendisk *disk, sector_t sector,
                return -EIO;
 
        tgt = dm_table_find_target(map, sector);
-       if (!dm_target_is_valid(tgt)) {
+       if (!tgt) {
                ret = -EIO;
                goto out;
        }
@@ -1072,7 +1072,7 @@ static struct dm_target *dm_dax_get_live_target(struct mapped_device *md,
                return NULL;
 
        ti = dm_table_find_target(map, sector);
-       if (!dm_target_is_valid(ti))
+       if (!ti)
                return NULL;
 
        return ti;
@@ -1572,7 +1572,7 @@ static int __split_and_process_non_flush(struct clone_info *ci)
        int r;
 
        ti = dm_table_find_target(ci->map, ci->sector);
-       if (!dm_target_is_valid(ti))
+       if (!ti)
                return -EIO;
 
        if (__process_abnormal_io(ci, ti, &r))
@@ -1748,7 +1748,7 @@ static blk_qc_t dm_process_bio(struct mapped_device *md,
 
        if (!ti) {
                ti = dm_table_find_target(map, bio->bi_iter.bi_sector);
-               if (unlikely(!ti || !dm_target_is_valid(ti))) {
+               if (unlikely(!ti)) {
                        bio_io_error(bio);
                        return ret;
                }
index 0475673..d7c4f66 100644 (file)
@@ -86,11 +86,6 @@ struct target_type *dm_get_immutable_target_type(struct mapped_device *md);
 int dm_setup_md_queue(struct mapped_device *md, struct dm_table *t);
 
 /*
- * To check the return value from dm_table_find_target().
- */
-#define dm_target_is_valid(t) ((t)->table)
-
-/*
  * To check whether the target type is bio-based or not (request-based).
  */
 #define dm_target_bio_based(t) ((t)->type->map != NULL)
index b8a6218..bd68f6f 100644 (file)
@@ -369,10 +369,6 @@ int sm_ll_find_free_block(struct ll_disk *ll, dm_block_t begin,
                         */
                        dm_tm_unlock(ll->tm, blk);
                        continue;
-
-               } else if (r < 0) {
-                       dm_tm_unlock(ll->tm, blk);
-                       return r;
                }
 
                dm_tm_unlock(ll->tm, blk);
index 0a47d47..23e02ff 100644 (file)
@@ -183,14 +183,14 @@ static int adv748x_initialise_clients(struct adv748x_state *state)
        int ret;
 
        for (i = ADV748X_PAGE_DPLL; i < ADV748X_PAGE_MAX; ++i) {
-               state->i2c_clients[i] = i2c_new_secondary_device(
+               state->i2c_clients[i] = i2c_new_ancillary_device(
                                state->client,
                                adv748x_default_addresses[i].name,
                                adv748x_default_addresses[i].default_addr);
 
-               if (state->i2c_clients[i] == NULL) {
+               if (IS_ERR(state->i2c_clients[i])) {
                        adv_err(state, "failed to create i2c client %u\n", i);
-                       return -ENOMEM;
+                       return PTR_ERR(state->i2c_clients[i]);
                }
 
                ret = adv748x_configure_regmap(state, i);
index 28a84bf..2dedd6e 100644 (file)
@@ -2862,10 +2862,8 @@ static void adv76xx_unregister_clients(struct adv76xx_state *state)
 {
        unsigned int i;
 
-       for (i = 1; i < ARRAY_SIZE(state->i2c_clients); ++i) {
-               if (state->i2c_clients[i])
-                       i2c_unregister_device(state->i2c_clients[i]);
-       }
+       for (i = 1; i < ARRAY_SIZE(state->i2c_clients); ++i)
+               i2c_unregister_device(state->i2c_clients[i]);
 }
 
 static struct i2c_client *adv76xx_dummy_client(struct v4l2_subdev *sd,
@@ -2878,14 +2876,14 @@ static struct i2c_client *adv76xx_dummy_client(struct v4l2_subdev *sd,
        struct i2c_client *new_client;
 
        if (pdata && pdata->i2c_addresses[page])
-               new_client = i2c_new_dummy(client->adapter,
+               new_client = i2c_new_dummy_device(client->adapter,
                                           pdata->i2c_addresses[page]);
        else
-               new_client = i2c_new_secondary_device(client,
+               new_client = i2c_new_ancillary_device(client,
                                adv76xx_default_addresses[page].name,
                                adv76xx_default_addresses[page].default_addr);
 
-       if (new_client)
+       if (!IS_ERR(new_client))
                io_write(sd, io_reg, new_client->addr << 1);
 
        return new_client;
@@ -3516,15 +3514,19 @@ static int adv76xx_probe(struct i2c_client *client,
        }
 
        for (i = 1; i < ADV76XX_PAGE_MAX; ++i) {
+               struct i2c_client *dummy_client;
+
                if (!(BIT(i) & state->info->page_mask))
                        continue;
 
-               state->i2c_clients[i] = adv76xx_dummy_client(sd, i);
-               if (!state->i2c_clients[i]) {
-                       err = -EINVAL;
+               dummy_client = adv76xx_dummy_client(sd, i);
+               if (IS_ERR(dummy_client)) {
+                       err = PTR_ERR(dummy_client);
                        v4l2_err(sd, "failed to create i2c client %u\n", i);
                        goto err_i2c;
                }
+
+               state->i2c_clients[i] = dummy_client;
        }
 
        INIT_DELAYED_WORK(&state->delayed_work_enable_hotplug,
index 76b4ac7..aeb2f49 100644 (file)
@@ -157,6 +157,7 @@ static void videobuf_dma_contig_user_put(struct videobuf_dma_contig_memory *mem)
 static int videobuf_dma_contig_user_get(struct videobuf_dma_contig_memory *mem,
                                        struct videobuf_buffer *vb)
 {
+       unsigned long untagged_baddr = untagged_addr(vb->baddr);
        struct mm_struct *mm = current->mm;
        struct vm_area_struct *vma;
        unsigned long prev_pfn, this_pfn;
@@ -164,22 +165,22 @@ static int videobuf_dma_contig_user_get(struct videobuf_dma_contig_memory *mem,
        unsigned int offset;
        int ret;
 
-       offset = vb->baddr & ~PAGE_MASK;
+       offset = untagged_baddr & ~PAGE_MASK;
        mem->size = PAGE_ALIGN(vb->size + offset);
        ret = -EINVAL;
 
        down_read(&mm->mmap_sem);
 
-       vma = find_vma(mm, vb->baddr);
+       vma = find_vma(mm, untagged_baddr);
        if (!vma)
                goto out_up;
 
-       if ((vb->baddr + mem->size) > vma->vm_end)
+       if ((untagged_baddr + mem->size) > vma->vm_end)
                goto out_up;
 
        pages_done = 0;
        prev_pfn = 0; /* kill warning */
-       user_address = vb->baddr;
+       user_address = untagged_baddr;
 
        while (pages_done < (mem->size >> PAGE_SHIFT)) {
                ret = follow_pfn(vma, user_address, &this_pfn);
index f2d9fb4..4e8d0d6 100644 (file)
@@ -425,10 +425,10 @@ static int pm800_pages_init(struct pm80x_chip *chip)
                return -ENODEV;
 
        /* PM800 block power page */
-       subchip->power_page = i2c_new_dummy(client->adapter,
+       subchip->power_page = i2c_new_dummy_device(client->adapter,
                                            subchip->power_page_addr);
-       if (subchip->power_page == NULL) {
-               ret = -ENODEV;
+       if (IS_ERR(subchip->power_page)) {
+               ret = PTR_ERR(subchip->power_page);
                goto out;
        }
 
@@ -444,10 +444,10 @@ static int pm800_pages_init(struct pm80x_chip *chip)
        i2c_set_clientdata(subchip->power_page, chip);
 
        /* PM800 block GPADC */
-       subchip->gpadc_page = i2c_new_dummy(client->adapter,
+       subchip->gpadc_page = i2c_new_dummy_device(client->adapter,
                                            subchip->gpadc_page_addr);
-       if (subchip->gpadc_page == NULL) {
-               ret = -ENODEV;
+       if (IS_ERR(subchip->gpadc_page)) {
+               ret = PTR_ERR(subchip->gpadc_page);
                goto out;
        }
 
index 9e0bd13..c9bae71 100644 (file)
@@ -1178,12 +1178,12 @@ static int pm860x_probe(struct i2c_client *client)
         */
        if (pdata->companion_addr && (pdata->companion_addr != client->addr)) {
                chip->companion_addr = pdata->companion_addr;
-               chip->companion = i2c_new_dummy(chip->client->adapter,
+               chip->companion = i2c_new_dummy_device(chip->client->adapter,
                                                chip->companion_addr);
-               if (!chip->companion) {
+               if (IS_ERR(chip->companion)) {
                        dev_err(&client->dev,
                                "Failed to allocate I2C companion device\n");
-                       return -ENODEV;
+                       return PTR_ERR(chip->companion);
                }
                chip->regmap_companion = regmap_init_i2c(chip->companion,
                                                        &pm860x_regmap_config);
index c9c49da..ae24d3e 100644 (file)
@@ -589,6 +589,17 @@ config INTEL_SOC_PMIC_CHTDC_TI
          Select this option for supporting Dollar Cove (TI version) PMIC
          device that is found on some Intel Cherry Trail systems.
 
+config INTEL_SOC_PMIC_MRFLD
+       tristate "Support for Intel Merrifield Basin Cove PMIC"
+       depends on GPIOLIB
+       depends on ACPI
+       depends on INTEL_SCU_IPC
+       select MFD_CORE
+       select REGMAP_IRQ
+       help
+         Select this option for supporting Basin Cove PMIC device
+         that is found on Intel Merrifield systems.
+
 config MFD_INTEL_LPSS
        tristate
        select COMMON_CLK
@@ -641,15 +652,6 @@ config MFD_JANZ_CMODIO
          host many different types of MODULbus daughterboards, including
          CAN and GPIO controllers.
 
-config MFD_JZ4740_ADC
-       bool "Janz JZ4740 ADC core"
-       select MFD_CORE
-       select GENERIC_IRQ_CHIP
-       depends on MACH_JZ4740
-       help
-         Say yes here if you want support for the ADC unit in the JZ4740 SoC.
-         This driver is necessary for jz4740-battery and jz4740-hwmon driver.
-
 config MFD_KEMPLD
        tristate "Kontron module PLD device"
        select MFD_CORE
index 0c0a848..c1067ea 100644 (file)
@@ -189,7 +189,6 @@ obj-$(CONFIG_LPC_SCH)               += lpc_sch.o
 obj-$(CONFIG_LPC_ICH)          += lpc_ich.o
 obj-$(CONFIG_MFD_RDC321X)      += rdc321x-southbridge.o
 obj-$(CONFIG_MFD_JANZ_CMODIO)  += janz-cmodio.o
-obj-$(CONFIG_MFD_JZ4740_ADC)   += jz4740-adc.o
 obj-$(CONFIG_MFD_TPS6586X)     += tps6586x.o
 obj-$(CONFIG_MFD_VX855)                += vx855.o
 obj-$(CONFIG_MFD_WL1273_CORE)  += wl1273-core.o
@@ -239,7 +238,9 @@ obj-$(CONFIG_INTEL_SOC_PMIC)        += intel-soc-pmic.o
 obj-$(CONFIG_INTEL_SOC_PMIC_BXTWC)     += intel_soc_pmic_bxtwc.o
 obj-$(CONFIG_INTEL_SOC_PMIC_CHTWC)     += intel_soc_pmic_chtwc.o
 obj-$(CONFIG_INTEL_SOC_PMIC_CHTDC_TI)  += intel_soc_pmic_chtdc_ti.o
-obj-$(CONFIG_MFD_MT6397)       += mt6397-core.o
+mt6397-objs    := mt6397-core.o mt6397-irq.o
+obj-$(CONFIG_MFD_MT6397)       += mt6397.o
+obj-$(CONFIG_INTEL_SOC_PMIC_MRFLD)     += intel_soc_pmic_mrfld.o
 
 obj-$(CONFIG_MFD_ALTERA_A10SR) += altera-a10sr.o
 obj-$(CONFIG_MFD_ALTERA_SYSMGR) += altera-sysmgr.o
index 9f3dbc3..57723f1 100644 (file)
@@ -865,10 +865,10 @@ static int ab3100_probe(struct i2c_client *client,
                 &ab3100->chip_name[0]);
 
        /* Attach a second dummy i2c_client to the test register address */
-       ab3100->testreg_client = i2c_new_dummy(client->adapter,
+       ab3100->testreg_client = i2c_new_dummy_device(client->adapter,
                                               client->addr + 1);
-       if (!ab3100->testreg_client) {
-               err = -ENOMEM;
+       if (IS_ERR(ab3100->testreg_client)) {
+               err = PTR_ERR(ab3100->testreg_client);
                goto exit_no_testreg_client;
        }
 
index 567a34b..f4e26b6 100644 (file)
@@ -2680,16 +2680,12 @@ static int ab8500_debug_probe(struct platform_device *plf)
        irq_ab8500 = res->start;
 
        irq_first = platform_get_irq_byname(plf, "IRQ_FIRST");
-       if (irq_first < 0) {
-               dev_err(&plf->dev, "First irq not found, err %d\n", irq_first);
+       if (irq_first < 0)
                return irq_first;
-       }
 
        irq_last = platform_get_irq_byname(plf, "IRQ_LAST");
-       if (irq_last < 0) {
-               dev_err(&plf->dev, "Last irq not found, err %d\n", irq_last);
+       if (irq_last < 0)
                return irq_last;
-       }
 
        ab8500_dir = debugfs_create_dir(AB8500_NAME_STRING, NULL);
 
index 83b18c9..a6bd213 100644 (file)
@@ -15,7 +15,7 @@
 #include <linux/kernel.h>
 #include <linux/delay.h>
 #include <linux/irq.h>
-#include <linux/gpio.h>
+#include <linux/gpio/driver.h>
 #include <linux/export.h>
 #include <linux/io.h>
 #include <linux/slab.h>
index 1aeb5e4..bfac5dc 100644 (file)
@@ -61,11 +61,11 @@ static int bcm590xx_i2c_probe(struct i2c_client *i2c_pri,
        }
 
        /* Secondary I2C slave address is the base address with A(2) asserted */
-       bcm590xx->i2c_sec = i2c_new_dummy(i2c_pri->adapter,
+       bcm590xx->i2c_sec = i2c_new_dummy_device(i2c_pri->adapter,
                                          i2c_pri->addr | BIT(2));
-       if (!bcm590xx->i2c_sec) {
+       if (IS_ERR(bcm590xx->i2c_sec)) {
                dev_err(&i2c_pri->dev, "failed to add secondary I2C device\n");
-               return -ENODEV;
+               return PTR_ERR(bcm590xx->i2c_sec);
        }
        i2c_set_clientdata(bcm590xx->i2c_sec, bcm590xx);
 
index 1303306..7f0aa1e 100644 (file)
@@ -420,10 +420,10 @@ static int da9150_probe(struct i2c_client *client,
        qif_addr = da9150_reg_read(da9150, DA9150_CORE2WIRE_CTRL_A);
        qif_addr = (qif_addr & DA9150_CORE_BASE_ADDR_MASK) >> 1;
        qif_addr |= DA9150_QIF_I2C_ADDR_LSB;
-       da9150->core_qif = i2c_new_dummy(client->adapter, qif_addr);
-       if (!da9150->core_qif) {
+       da9150->core_qif = i2c_new_dummy_device(client->adapter, qif_addr);
+       if (IS_ERR(da9150->core_qif)) {
                dev_err(da9150->dev, "Failed to attach QIF client\n");
-               return -ENODEV;
+               return PTR_ERR(da9150->core_qif);
        }
 
        i2c_set_clientdata(da9150->core_qif, da9150);
index 13ca720..e5c8bc9 100644 (file)
@@ -19,7 +19,6 @@
 #include <sound/pcm.h>
 
 #include <linux/mfd/davinci_voicecodec.h>
-#include <mach/hardware.h>
 
 static const struct regmap_config davinci_vc_regmap = {
        .reg_bits = 32,
@@ -31,6 +30,7 @@ static int __init davinci_vc_probe(struct platform_device *pdev)
        struct davinci_vc *davinci_vc;
        struct resource *res;
        struct mfd_cell *cell = NULL;
+       dma_addr_t fifo_base;
        int ret;
 
        davinci_vc = devm_kzalloc(&pdev->dev,
@@ -48,6 +48,7 @@ static int __init davinci_vc_probe(struct platform_device *pdev)
 
        res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 
+       fifo_base = (dma_addr_t)res->start;
        davinci_vc->base = devm_ioremap_resource(&pdev->dev, res);
        if (IS_ERR(davinci_vc->base)) {
                ret = PTR_ERR(davinci_vc->base);
@@ -70,8 +71,7 @@ static int __init davinci_vc_probe(struct platform_device *pdev)
        }
 
        davinci_vc->davinci_vcif.dma_tx_channel = res->start;
-       davinci_vc->davinci_vcif.dma_tx_addr =
-               (dma_addr_t)(io_v2p(davinci_vc->base) + DAVINCI_VC_WFIFO);
+       davinci_vc->davinci_vcif.dma_tx_addr = fifo_base + DAVINCI_VC_WFIFO;
 
        res = platform_get_resource(pdev, IORESOURCE_DMA, 1);
        if (!res) {
@@ -81,8 +81,7 @@ static int __init davinci_vc_probe(struct platform_device *pdev)
        }
 
        davinci_vc->davinci_vcif.dma_rx_channel = res->start;
-       davinci_vc->davinci_vcif.dma_rx_addr =
-               (dma_addr_t)(io_v2p(davinci_vc->base) + DAVINCI_VC_RFIFO);
+       davinci_vc->davinci_vcif.dma_rx_addr = fifo_base + DAVINCI_VC_RFIFO;
 
        davinci_vc->dev = &pdev->dev;
        davinci_vc->pdev = pdev;
index 90e0f21..0e019cc 100644 (file)
@@ -1695,21 +1695,41 @@ static long round_clock_rate(u8 clock, unsigned long rate)
        return rounded_rate;
 }
 
-static const unsigned long armss_freqs[] = {
+static const unsigned long db8500_armss_freqs[] = {
        200000000,
        400000000,
        800000000,
        998400000
 };
 
+/* The DB8520 has slightly higher ARMSS max frequency */
+static const unsigned long db8520_armss_freqs[] = {
+       200000000,
+       400000000,
+       800000000,
+       1152000000
+};
+
+
+
 static long round_armss_rate(unsigned long rate)
 {
        unsigned long freq = 0;
+       const unsigned long *freqs;
+       int nfreqs;
        int i;
 
+       if (fw_info.version.project == PRCMU_FW_PROJECT_U8520) {
+               freqs = db8520_armss_freqs;
+               nfreqs = ARRAY_SIZE(db8520_armss_freqs);
+       } else {
+               freqs = db8500_armss_freqs;
+               nfreqs = ARRAY_SIZE(db8500_armss_freqs);
+       }
+
        /* Find the corresponding arm opp from the cpufreq table. */
-       for (i = 0; i < ARRAY_SIZE(armss_freqs); i++) {
-               freq = armss_freqs[i];
+       for (i = 0; i < nfreqs; i++) {
+               freq = freqs[i];
                if (rate <= freq)
                        break;
        }
@@ -1854,11 +1874,21 @@ static int set_armss_rate(unsigned long rate)
 {
        unsigned long freq;
        u8 opps[] = { ARM_EXTCLK, ARM_50_OPP, ARM_100_OPP, ARM_MAX_OPP };
+       const unsigned long *freqs;
+       int nfreqs;
        int i;
 
+       if (fw_info.version.project == PRCMU_FW_PROJECT_U8520) {
+               freqs = db8520_armss_freqs;
+               nfreqs = ARRAY_SIZE(db8520_armss_freqs);
+       } else {
+               freqs = db8500_armss_freqs;
+               nfreqs = ARRAY_SIZE(db8500_armss_freqs);
+       }
+
        /* Find the corresponding arm opp from the cpufreq table. */
-       for (i = 0; i < ARRAY_SIZE(armss_freqs); i++) {
-               freq = armss_freqs[i];
+       for (i = 0; i < nfreqs; i++) {
+               freq = freqs[i];
                if (rate == freq)
                        break;
        }
@@ -3130,10 +3160,8 @@ static int db8500_prcmu_probe(struct platform_device *pdev)
        writel(ALL_MBOX_BITS, PRCM_ARM_IT1_CLR);
 
        irq = platform_get_irq(pdev, 0);
-       if (irq <= 0) {
-               dev_err(&pdev->dev, "no prcmu irq provided\n");
+       if (irq <= 0)
                return irq;
-       }
 
        err = request_threaded_irq(irq, prcmu_irq_handler,
                prcmu_irq_thread_fn, IRQF_NO_SUSPEND, "prcmu", NULL);
index f505e3e..70fa18b 100644 (file)
@@ -35,7 +35,7 @@ struct pcap_chip {
 
        /* IO */
        u32 buf;
-       struct mutex io_mutex;
+       spinlock_t io_lock;
 
        /* IRQ */
        unsigned int irq_base;
@@ -48,7 +48,7 @@ struct pcap_chip {
        struct pcap_adc_request *adc_queue[PCAP_ADC_MAXQ];
        u8 adc_head;
        u8 adc_tail;
-       struct mutex adc_mutex;
+       spinlock_t adc_lock;
 };
 
 /* IO */
@@ -76,14 +76,15 @@ static int ezx_pcap_putget(struct pcap_chip *pcap, u32 *data)
 
 int ezx_pcap_write(struct pcap_chip *pcap, u8 reg_num, u32 value)
 {
+       unsigned long flags;
        int ret;
 
-       mutex_lock(&pcap->io_mutex);
+       spin_lock_irqsave(&pcap->io_lock, flags);
        value &= PCAP_REGISTER_VALUE_MASK;
        value |= PCAP_REGISTER_WRITE_OP_BIT
                | (reg_num << PCAP_REGISTER_ADDRESS_SHIFT);
        ret = ezx_pcap_putget(pcap, &value);
-       mutex_unlock(&pcap->io_mutex);
+       spin_unlock_irqrestore(&pcap->io_lock, flags);
 
        return ret;
 }
@@ -91,14 +92,15 @@ EXPORT_SYMBOL_GPL(ezx_pcap_write);
 
 int ezx_pcap_read(struct pcap_chip *pcap, u8 reg_num, u32 *value)
 {
+       unsigned long flags;
        int ret;
 
-       mutex_lock(&pcap->io_mutex);
+       spin_lock_irqsave(&pcap->io_lock, flags);
        *value = PCAP_REGISTER_READ_OP_BIT
                | (reg_num << PCAP_REGISTER_ADDRESS_SHIFT);
 
        ret = ezx_pcap_putget(pcap, value);
-       mutex_unlock(&pcap->io_mutex);
+       spin_unlock_irqrestore(&pcap->io_lock, flags);
 
        return ret;
 }
@@ -106,11 +108,12 @@ EXPORT_SYMBOL_GPL(ezx_pcap_read);
 
 int ezx_pcap_set_bits(struct pcap_chip *pcap, u8 reg_num, u32 mask, u32 val)
 {
+       unsigned long flags;
        int ret;
        u32 tmp = PCAP_REGISTER_READ_OP_BIT |
                (reg_num << PCAP_REGISTER_ADDRESS_SHIFT);
 
-       mutex_lock(&pcap->io_mutex);
+       spin_lock_irqsave(&pcap->io_lock, flags);
        ret = ezx_pcap_putget(pcap, &tmp);
        if (ret)
                goto out_unlock;
@@ -121,7 +124,7 @@ int ezx_pcap_set_bits(struct pcap_chip *pcap, u8 reg_num, u32 mask, u32 val)
 
        ret = ezx_pcap_putget(pcap, &tmp);
 out_unlock:
-       mutex_unlock(&pcap->io_mutex);
+       spin_unlock_irqrestore(&pcap->io_lock, flags);
 
        return ret;
 }
@@ -212,14 +215,15 @@ static void pcap_irq_handler(struct irq_desc *desc)
 /* ADC */
 void pcap_set_ts_bits(struct pcap_chip *pcap, u32 bits)
 {
+       unsigned long flags;
        u32 tmp;
 
-       mutex_lock(&pcap->adc_mutex);
+       spin_lock_irqsave(&pcap->adc_lock, flags);
        ezx_pcap_read(pcap, PCAP_REG_ADC, &tmp);
        tmp &= ~(PCAP_ADC_TS_M_MASK | PCAP_ADC_TS_REF_LOWPWR);
        tmp |= bits & (PCAP_ADC_TS_M_MASK | PCAP_ADC_TS_REF_LOWPWR);
        ezx_pcap_write(pcap, PCAP_REG_ADC, tmp);
-       mutex_unlock(&pcap->adc_mutex);
+       spin_unlock_irqrestore(&pcap->adc_lock, flags);
 }
 EXPORT_SYMBOL_GPL(pcap_set_ts_bits);
 
@@ -234,15 +238,16 @@ static void pcap_disable_adc(struct pcap_chip *pcap)
 
 static void pcap_adc_trigger(struct pcap_chip *pcap)
 {
+       unsigned long flags;
        u32 tmp;
        u8 head;
 
-       mutex_lock(&pcap->adc_mutex);
+       spin_lock_irqsave(&pcap->adc_lock, flags);
        head = pcap->adc_head;
        if (!pcap->adc_queue[head]) {
                /* queue is empty, save power */
                pcap_disable_adc(pcap);
-               mutex_unlock(&pcap->adc_mutex);
+               spin_unlock_irqrestore(&pcap->adc_lock, flags);
                return;
        }
        /* start conversion on requested bank, save TS_M bits */
@@ -254,7 +259,7 @@ static void pcap_adc_trigger(struct pcap_chip *pcap)
                tmp |= PCAP_ADC_AD_SEL1;
 
        ezx_pcap_write(pcap, PCAP_REG_ADC, tmp);
-       mutex_unlock(&pcap->adc_mutex);
+       spin_unlock_irqrestore(&pcap->adc_lock, flags);
        ezx_pcap_write(pcap, PCAP_REG_ADR, PCAP_ADR_ASC);
 }
 
@@ -265,11 +270,11 @@ static irqreturn_t pcap_adc_irq(int irq, void *_pcap)
        u16 res[2];
        u32 tmp;
 
-       mutex_lock(&pcap->adc_mutex);
+       spin_lock(&pcap->adc_lock);
        req = pcap->adc_queue[pcap->adc_head];
 
        if (WARN(!req, "adc irq without pending request\n")) {
-               mutex_unlock(&pcap->adc_mutex);
+               spin_unlock(&pcap->adc_lock);
                return IRQ_HANDLED;
        }
 
@@ -285,7 +290,7 @@ static irqreturn_t pcap_adc_irq(int irq, void *_pcap)
 
        pcap->adc_queue[pcap->adc_head] = NULL;
        pcap->adc_head = (pcap->adc_head + 1) & (PCAP_ADC_MAXQ - 1);
-       mutex_unlock(&pcap->adc_mutex);
+       spin_unlock(&pcap->adc_lock);
 
        /* pass the results and release memory */
        req->callback(req->data, res);
@@ -301,6 +306,7 @@ int pcap_adc_async(struct pcap_chip *pcap, u8 bank, u32 flags, u8 ch[],
                                                void *callback, void *data)
 {
        struct pcap_adc_request *req;
+       unsigned long irq_flags;
 
        /* This will be freed after we have a result */
        req = kmalloc(sizeof(struct pcap_adc_request), GFP_KERNEL);
@@ -314,15 +320,15 @@ int pcap_adc_async(struct pcap_chip *pcap, u8 bank, u32 flags, u8 ch[],
        req->callback = callback;
        req->data = data;
 
-       mutex_lock(&pcap->adc_mutex);
+       spin_lock_irqsave(&pcap->adc_lock, irq_flags);
        if (pcap->adc_queue[pcap->adc_tail]) {
-               mutex_unlock(&pcap->adc_mutex);
+               spin_unlock_irqrestore(&pcap->adc_lock, irq_flags);
                kfree(req);
                return -EBUSY;
        }
        pcap->adc_queue[pcap->adc_tail] = req;
        pcap->adc_tail = (pcap->adc_tail + 1) & (PCAP_ADC_MAXQ - 1);
-       mutex_unlock(&pcap->adc_mutex);
+       spin_unlock_irqrestore(&pcap->adc_lock, irq_flags);
 
        /* start conversion */
        pcap_adc_trigger(pcap);
@@ -389,16 +395,17 @@ static int pcap_add_subdev(struct pcap_chip *pcap,
 static int ezx_pcap_remove(struct spi_device *spi)
 {
        struct pcap_chip *pcap = spi_get_drvdata(spi);
+       unsigned long flags;
        int i;
 
        /* remove all registered subdevs */
        device_for_each_child(&spi->dev, NULL, pcap_remove_subdev);
 
        /* cleanup ADC */
-       mutex_lock(&pcap->adc_mutex);
+       spin_lock_irqsave(&pcap->adc_lock, flags);
        for (i = 0; i < PCAP_ADC_MAXQ; i++)
                kfree(pcap->adc_queue[i]);
-       mutex_unlock(&pcap->adc_mutex);
+       spin_unlock_irqrestore(&pcap->adc_lock, flags);
 
        /* cleanup irqchip */
        for (i = pcap->irq_base; i < (pcap->irq_base + PCAP_NIRQS); i++)
@@ -426,8 +433,8 @@ static int ezx_pcap_probe(struct spi_device *spi)
                goto ret;
        }
 
-       mutex_init(&pcap->io_mutex);
-       mutex_init(&pcap->adc_mutex);
+       spin_lock_init(&pcap->io_lock);
+       spin_lock_init(&pcap->adc_lock);
        INIT_WORK(&pcap->isr_work, pcap_isr_work);
        INIT_WORK(&pcap->msr_work, pcap_msr_work);
        spi_set_drvdata(spi, pcap);
index 20791ca..a016b39 100644 (file)
@@ -69,10 +69,8 @@ static int mx25_tsadc_setup_irq(struct platform_device *pdev,
        int irq;
 
        irq = platform_get_irq(pdev, 0);
-       if (irq <= 0) {
-               dev_err(dev, "Failed to get irq\n");
+       if (irq <= 0)
                return irq;
-       }
 
        tsadc->domain = irq_domain_add_simple(np, 2, 0, &mx25_tsadc_domain_ops,
                                              tsadc);
index 370519a..8ad6768 100644 (file)
@@ -385,8 +385,7 @@ static void htcpld_unregister_chip_i2c(
        htcpld = platform_get_drvdata(pdev);
        chip = &htcpld->chip[chip_index];
 
-       if (chip->client)
-               i2c_unregister_device(chip->client);
+       i2c_unregister_device(chip->client);
 }
 
 static int htcpld_register_chip_gpio(
index 61ffb8b..c8fe334 100644 (file)
 
 #include "intel-lpss.h"
 
+static const struct intel_lpss_platform_info spt_info = {
+       .clk_rate = 120000000,
+};
+
 static struct property_entry spt_i2c_properties[] = {
        PROPERTY_ENTRY_U32("i2c-sda-hold-time-ns", 230),
        { },
@@ -28,6 +32,19 @@ static const struct intel_lpss_platform_info spt_i2c_info = {
        .properties = spt_i2c_properties,
 };
 
+static struct property_entry uart_properties[] = {
+       PROPERTY_ENTRY_U32("reg-io-width", 4),
+       PROPERTY_ENTRY_U32("reg-shift", 2),
+       PROPERTY_ENTRY_BOOL("snps,uart-16550-compatible"),
+       { },
+};
+
+static const struct intel_lpss_platform_info spt_uart_info = {
+       .clk_rate = 120000000,
+       .clk_con_id = "baudclk",
+       .properties = uart_properties,
+};
+
 static const struct intel_lpss_platform_info bxt_info = {
        .clk_rate = 100000000,
 };
@@ -58,8 +75,17 @@ static const struct intel_lpss_platform_info apl_i2c_info = {
 
 static const struct acpi_device_id intel_lpss_acpi_ids[] = {
        /* SPT */
+       { "INT3440", (kernel_ulong_t)&spt_info },
+       { "INT3441", (kernel_ulong_t)&spt_info },
+       { "INT3442", (kernel_ulong_t)&spt_i2c_info },
+       { "INT3443", (kernel_ulong_t)&spt_i2c_info },
+       { "INT3444", (kernel_ulong_t)&spt_i2c_info },
+       { "INT3445", (kernel_ulong_t)&spt_i2c_info },
        { "INT3446", (kernel_ulong_t)&spt_i2c_info },
        { "INT3447", (kernel_ulong_t)&spt_i2c_info },
+       { "INT3448", (kernel_ulong_t)&spt_uart_info },
+       { "INT3449", (kernel_ulong_t)&spt_uart_info },
+       { "INT344A", (kernel_ulong_t)&spt_uart_info },
        /* BXT */
        { "80860AAC", (kernel_ulong_t)&bxt_i2c_info },
        { "80860ABC", (kernel_ulong_t)&bxt_info },
index ade6e1c..9355db2 100644 (file)
@@ -35,6 +35,8 @@ static int intel_lpss_pci_probe(struct pci_dev *pdev,
        info->mem = &pdev->resource[0];
        info->irq = pdev->irq;
 
+       pdev->d3cold_delay = 0;
+
        /* Probably it is enough to set this for iDMA capable devices only */
        pci_set_master(pdev);
        pci_try_set_mwi(pdev);
@@ -256,6 +258,29 @@ static const struct pci_device_id intel_lpss_pci_ids[] = {
        { PCI_VDEVICE(INTEL, 0x9dea), (kernel_ulong_t)&cnl_i2c_info },
        { PCI_VDEVICE(INTEL, 0x9deb), (kernel_ulong_t)&cnl_i2c_info },
        { PCI_VDEVICE(INTEL, 0x9dfb), (kernel_ulong_t)&spt_info },
+       /* TGL-LP */
+       { PCI_VDEVICE(INTEL, 0xa0a8), (kernel_ulong_t)&bxt_uart_info },
+       { PCI_VDEVICE(INTEL, 0xa0a9), (kernel_ulong_t)&bxt_uart_info },
+       { PCI_VDEVICE(INTEL, 0xa0aa), (kernel_ulong_t)&spt_info },
+       { PCI_VDEVICE(INTEL, 0xa0ab), (kernel_ulong_t)&spt_info },
+       { PCI_VDEVICE(INTEL, 0xa0c5), (kernel_ulong_t)&spt_i2c_info },
+       { PCI_VDEVICE(INTEL, 0xa0c6), (kernel_ulong_t)&spt_i2c_info },
+       { PCI_VDEVICE(INTEL, 0xa0c7), (kernel_ulong_t)&bxt_uart_info },
+       { PCI_VDEVICE(INTEL, 0xa0d8), (kernel_ulong_t)&spt_i2c_info },
+       { PCI_VDEVICE(INTEL, 0xa0d9), (kernel_ulong_t)&spt_i2c_info },
+       { PCI_VDEVICE(INTEL, 0xa0da), (kernel_ulong_t)&bxt_uart_info },
+       { PCI_VDEVICE(INTEL, 0xa0db), (kernel_ulong_t)&bxt_uart_info },
+       { PCI_VDEVICE(INTEL, 0xa0dc), (kernel_ulong_t)&bxt_uart_info },
+       { PCI_VDEVICE(INTEL, 0xa0dd), (kernel_ulong_t)&bxt_uart_info },
+       { PCI_VDEVICE(INTEL, 0xa0de), (kernel_ulong_t)&spt_info },
+       { PCI_VDEVICE(INTEL, 0xa0df), (kernel_ulong_t)&spt_info },
+       { PCI_VDEVICE(INTEL, 0xa0e8), (kernel_ulong_t)&spt_i2c_info },
+       { PCI_VDEVICE(INTEL, 0xa0e9), (kernel_ulong_t)&spt_i2c_info },
+       { PCI_VDEVICE(INTEL, 0xa0ea), (kernel_ulong_t)&spt_i2c_info },
+       { PCI_VDEVICE(INTEL, 0xa0eb), (kernel_ulong_t)&spt_i2c_info },
+       { PCI_VDEVICE(INTEL, 0xa0fb), (kernel_ulong_t)&spt_info },
+       { PCI_VDEVICE(INTEL, 0xa0fd), (kernel_ulong_t)&spt_info },
+       { PCI_VDEVICE(INTEL, 0xa0fe), (kernel_ulong_t)&spt_info },
        /* SPT-H */
        { PCI_VDEVICE(INTEL, 0xa127), (kernel_ulong_t)&spt_uart_info },
        { PCI_VDEVICE(INTEL, 0xa128), (kernel_ulong_t)&spt_uart_info },
index 277f48f..bfe4ff3 100644 (file)
 #define LPSS_PRIV_IDLELTR              0x14
 
 #define LPSS_PRIV_LTR_REQ              BIT(15)
-#define LPSS_PRIV_LTR_SCALE_MASK       0xc00
-#define LPSS_PRIV_LTR_SCALE_1US                0x800
-#define LPSS_PRIV_LTR_SCALE_32US       0xc00
-#define LPSS_PRIV_LTR_VALUE_MASK       0x3ff
+#define LPSS_PRIV_LTR_SCALE_MASK       GENMASK(11, 10)
+#define LPSS_PRIV_LTR_SCALE_1US                (2 << 10)
+#define LPSS_PRIV_LTR_SCALE_32US       (3 << 10)
+#define LPSS_PRIV_LTR_VALUE_MASK       GENMASK(9, 0)
 
 #define LPSS_PRIV_SSP_REG              0x20
 #define LPSS_PRIV_SSP_REG_DIS_DMA_FIN  BIT(0)
@@ -59,8 +59,8 @@
 
 #define LPSS_PRIV_CAPS                 0xfc
 #define LPSS_PRIV_CAPS_NO_IDMA         BIT(8)
+#define LPSS_PRIV_CAPS_TYPE_MASK       GENMASK(7, 4)
 #define LPSS_PRIV_CAPS_TYPE_SHIFT      4
-#define LPSS_PRIV_CAPS_TYPE_MASK       (0xf << LPSS_PRIV_CAPS_TYPE_SHIFT)
 
 /* This matches the type field in CAPS register */
 enum intel_lpss_dev_type {
@@ -128,17 +128,6 @@ static const struct mfd_cell intel_lpss_spi_cell = {
 static DEFINE_IDA(intel_lpss_devid_ida);
 static struct dentry *intel_lpss_debugfs;
 
-static int intel_lpss_request_dma_module(const char *name)
-{
-       static bool intel_lpss_dma_requested;
-
-       if (intel_lpss_dma_requested)
-               return 0;
-
-       intel_lpss_dma_requested = true;
-       return request_module("%s", name);
-}
-
 static void intel_lpss_cache_ltr(struct intel_lpss *lpss)
 {
        lpss->active_ltr = readl(lpss->priv + LPSS_PRIV_ACTIVELTR);
@@ -429,16 +418,6 @@ int intel_lpss_probe(struct device *dev,
                dev_warn(dev, "Failed to create debugfs entries\n");
 
        if (intel_lpss_has_idma(lpss)) {
-               /*
-                * Ensure the DMA driver is loaded before the host
-                * controller device appears, so that the host controller
-                * driver can request its DMA channels as early as
-                * possible.
-                *
-                * If the DMA module is not there that's OK as well.
-                */
-               intel_lpss_request_dma_module(LPSS_IDMA64_DRIVER_NAME);
-
                ret = mfd_add_devices(dev, lpss->devid, &intel_lpss_idma64_cell,
                                      1, info->mem, info->irq, NULL);
                if (ret)
@@ -554,3 +533,11 @@ MODULE_AUTHOR("Heikki Krogerus <heikki.krogerus@linux.intel.com>");
 MODULE_AUTHOR("Jarkko Nikula <jarkko.nikula@linux.intel.com>");
 MODULE_DESCRIPTION("Intel LPSS core driver");
 MODULE_LICENSE("GPL v2");
+/*
+ * Ensure the DMA driver is loaded before the host controller device appears,
+ * so that the host controller driver can request its DMA channels as early
+ * as possible.
+ *
+ * If the DMA module is not there that's OK as well.
+ */
+MODULE_SOFTDEP("pre: platform:" LPSS_IDMA64_DRIVER_NAME);
index 6310c3b..739cfb5 100644 (file)
@@ -450,10 +450,8 @@ static int bxtwc_probe(struct platform_device *pdev)
                return -ENOMEM;
 
        ret = platform_get_irq(pdev, 0);
-       if (ret < 0) {
-               dev_err(&pdev->dev, "Invalid IRQ\n");
+       if (ret < 0)
                return ret;
-       }
        pmic->irq = ret;
 
        dev_set_drvdata(&pdev->dev, pmic);
diff --git a/drivers/mfd/intel_soc_pmic_mrfld.c b/drivers/mfd/intel_soc_pmic_mrfld.c
new file mode 100644 (file)
index 0000000..26a1551
--- /dev/null
@@ -0,0 +1,157 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Device access for Basin Cove PMIC
+ *
+ * Copyright (c) 2019, Intel Corporation.
+ * Author: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
+ */
+
+#include <linux/acpi.h>
+#include <linux/interrupt.h>
+#include <linux/mfd/core.h>
+#include <linux/mfd/intel_soc_pmic.h>
+#include <linux/mfd/intel_soc_pmic_mrfld.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/regmap.h>
+
+#include <asm/intel_scu_ipc.h>
+
+/*
+ * Level 2 IRQs
+ *
+ * Firmware on the systems with Basin Cove PMIC services Level 1 IRQs
+ * without an assistance. Thus, each of the Level 1 IRQ is represented
+ * as a separate RTE in IOAPIC.
+ */
+static struct resource irq_level2_resources[] = {
+       DEFINE_RES_IRQ(0), /* power button */
+       DEFINE_RES_IRQ(0), /* TMU */
+       DEFINE_RES_IRQ(0), /* thermal */
+       DEFINE_RES_IRQ(0), /* BCU */
+       DEFINE_RES_IRQ(0), /* ADC */
+       DEFINE_RES_IRQ(0), /* charger */
+       DEFINE_RES_IRQ(0), /* GPIO */
+};
+
+static const struct mfd_cell bcove_dev[] = {
+       {
+               .name = "mrfld_bcove_pwrbtn",
+               .num_resources = 1,
+               .resources = &irq_level2_resources[0],
+       }, {
+               .name = "mrfld_bcove_tmu",
+               .num_resources = 1,
+               .resources = &irq_level2_resources[1],
+       }, {
+               .name = "mrfld_bcove_thermal",
+               .num_resources = 1,
+               .resources = &irq_level2_resources[2],
+       }, {
+               .name = "mrfld_bcove_bcu",
+               .num_resources = 1,
+               .resources = &irq_level2_resources[3],
+       }, {
+               .name = "mrfld_bcove_adc",
+               .num_resources = 1,
+               .resources = &irq_level2_resources[4],
+       }, {
+               .name = "mrfld_bcove_charger",
+               .num_resources = 1,
+               .resources = &irq_level2_resources[5],
+       }, {
+               .name = "mrfld_bcove_pwrsrc",
+               .num_resources = 1,
+               .resources = &irq_level2_resources[5],
+       }, {
+               .name = "mrfld_bcove_gpio",
+               .num_resources = 1,
+               .resources = &irq_level2_resources[6],
+       },
+       {       .name = "mrfld_bcove_region", },
+};
+
+static int bcove_ipc_byte_reg_read(void *context, unsigned int reg,
+                                   unsigned int *val)
+{
+       u8 ipc_out;
+       int ret;
+
+       ret = intel_scu_ipc_ioread8(reg, &ipc_out);
+       if (ret)
+               return ret;
+
+       *val = ipc_out;
+       return 0;
+}
+
+static int bcove_ipc_byte_reg_write(void *context, unsigned int reg,
+                                    unsigned int val)
+{
+       u8 ipc_in = val;
+       int ret;
+
+       ret = intel_scu_ipc_iowrite8(reg, ipc_in);
+       if (ret)
+               return ret;
+
+       return 0;
+}
+
+static const struct regmap_config bcove_regmap_config = {
+       .reg_bits = 16,
+       .val_bits = 8,
+       .max_register = 0xff,
+       .reg_write = bcove_ipc_byte_reg_write,
+       .reg_read = bcove_ipc_byte_reg_read,
+};
+
+static int bcove_probe(struct platform_device *pdev)
+{
+       struct device *dev = &pdev->dev;
+       struct intel_soc_pmic *pmic;
+       unsigned int i;
+       int ret;
+
+       pmic = devm_kzalloc(dev, sizeof(*pmic), GFP_KERNEL);
+       if (!pmic)
+               return -ENOMEM;
+
+       platform_set_drvdata(pdev, pmic);
+       pmic->dev = &pdev->dev;
+
+       pmic->regmap = devm_regmap_init(dev, NULL, pmic, &bcove_regmap_config);
+       if (IS_ERR(pmic->regmap))
+               return PTR_ERR(pmic->regmap);
+
+       for (i = 0; i < ARRAY_SIZE(irq_level2_resources); i++) {
+               ret = platform_get_irq(pdev, i);
+               if (ret < 0)
+                       return ret;
+
+               irq_level2_resources[i].start = ret;
+               irq_level2_resources[i].end = ret;
+       }
+
+       return devm_mfd_add_devices(dev, PLATFORM_DEVID_NONE,
+                                   bcove_dev, ARRAY_SIZE(bcove_dev),
+                                   NULL, 0, NULL);
+}
+
+static const struct acpi_device_id bcove_acpi_ids[] = {
+       { "INTC100E" },
+       {}
+};
+MODULE_DEVICE_TABLE(acpi, bcove_acpi_ids);
+
+static struct platform_driver bcove_driver = {
+       .driver = {
+               .name = "intel_soc_pmic_mrfld",
+               .acpi_match_table = bcove_acpi_ids,
+       },
+       .probe = bcove_probe,
+};
+module_platform_driver(bcove_driver);
+
+MODULE_DESCRIPTION("IPC driver for Intel SoC Basin Cove PMIC");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/mfd/jz4740-adc.c b/drivers/mfd/jz4740-adc.c
deleted file mode 100644 (file)
index 082f169..0000000
+++ /dev/null
@@ -1,324 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * Copyright (C) 2009-2010, Lars-Peter Clausen <lars@metafoo.de>
- * JZ4740 SoC ADC driver
- *
- * This driver synchronizes access to the JZ4740 ADC core between the
- * JZ4740 battery and hwmon drivers.
- */
-
-#include <linux/err.h>
-#include <linux/io.h>
-#include <linux/irq.h>
-#include <linux/interrupt.h>
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/platform_device.h>
-#include <linux/slab.h>
-#include <linux/spinlock.h>
-
-#include <linux/clk.h>
-#include <linux/mfd/core.h>
-
-#include <linux/jz4740-adc.h>
-
-
-#define JZ_REG_ADC_ENABLE      0x00
-#define JZ_REG_ADC_CFG         0x04
-#define JZ_REG_ADC_CTRL                0x08
-#define JZ_REG_ADC_STATUS      0x0c
-
-#define JZ_REG_ADC_TOUCHSCREEN_BASE    0x10
-#define JZ_REG_ADC_BATTERY_BASE        0x1c
-#define JZ_REG_ADC_HWMON_BASE  0x20
-
-#define JZ_ADC_ENABLE_TOUCH    BIT(2)
-#define JZ_ADC_ENABLE_BATTERY  BIT(1)
-#define JZ_ADC_ENABLE_ADCIN    BIT(0)
-
-enum {
-       JZ_ADC_IRQ_ADCIN = 0,
-       JZ_ADC_IRQ_BATTERY,
-       JZ_ADC_IRQ_TOUCH,
-       JZ_ADC_IRQ_PENUP,
-       JZ_ADC_IRQ_PENDOWN,
-};
-
-struct jz4740_adc {
-       struct resource *mem;
-       void __iomem *base;
-
-       int irq;
-       struct irq_chip_generic *gc;
-
-       struct clk *clk;
-       atomic_t clk_ref;
-
-       spinlock_t lock;
-};
-
-static void jz4740_adc_irq_demux(struct irq_desc *desc)
-{
-       struct irq_chip_generic *gc = irq_desc_get_handler_data(desc);
-       uint8_t status;
-       unsigned int i;
-
-       status = readb(gc->reg_base + JZ_REG_ADC_STATUS);
-
-       for (i = 0; i < 5; ++i) {
-               if (status & BIT(i))
-                       generic_handle_irq(gc->irq_base + i);
-       }
-}
-
-
-/* Refcounting for the ADC clock is done in here instead of in the clock
- * framework, because it is the only clock which is shared between multiple
- * devices and thus is the only clock which needs refcounting */
-static inline void jz4740_adc_clk_enable(struct jz4740_adc *adc)
-{
-       if (atomic_inc_return(&adc->clk_ref) == 1)
-               clk_prepare_enable(adc->clk);
-}
-
-static inline void jz4740_adc_clk_disable(struct jz4740_adc *adc)
-{
-       if (atomic_dec_return(&adc->clk_ref) == 0)
-               clk_disable_unprepare(adc->clk);
-}
-
-static inline void jz4740_adc_set_enabled(struct jz4740_adc *adc, int engine,
-       bool enabled)
-{
-       unsigned long flags;
-       uint8_t val;
-
-       spin_lock_irqsave(&adc->lock, flags);
-
-       val = readb(adc->base + JZ_REG_ADC_ENABLE);
-       if (enabled)
-               val |= BIT(engine);
-       else
-               val &= ~BIT(engine);
-       writeb(val, adc->base + JZ_REG_ADC_ENABLE);
-
-       spin_unlock_irqrestore(&adc->lock, flags);
-}
-
-static int jz4740_adc_cell_enable(struct platform_device *pdev)
-{
-       struct jz4740_adc *adc = dev_get_drvdata(pdev->dev.parent);
-
-       jz4740_adc_clk_enable(adc);
-       jz4740_adc_set_enabled(adc, pdev->id, true);
-
-       return 0;
-}
-
-static int jz4740_adc_cell_disable(struct platform_device *pdev)
-{
-       struct jz4740_adc *adc = dev_get_drvdata(pdev->dev.parent);
-
-       jz4740_adc_set_enabled(adc, pdev->id, false);
-       jz4740_adc_clk_disable(adc);
-
-       return 0;
-}
-
-int jz4740_adc_set_config(struct device *dev, uint32_t mask, uint32_t val)
-{
-       struct jz4740_adc *adc = dev_get_drvdata(dev);
-       unsigned long flags;
-       uint32_t cfg;
-
-       if (!adc)
-               return -ENODEV;
-
-       spin_lock_irqsave(&adc->lock, flags);
-
-       cfg = readl(adc->base + JZ_REG_ADC_CFG);
-
-       cfg &= ~mask;
-       cfg |= val;
-
-       writel(cfg, adc->base + JZ_REG_ADC_CFG);
-
-       spin_unlock_irqrestore(&adc->lock, flags);
-
-       return 0;
-}
-EXPORT_SYMBOL_GPL(jz4740_adc_set_config);
-
-static struct resource jz4740_hwmon_resources[] = {
-       {
-               .start = JZ_ADC_IRQ_ADCIN,
-               .flags = IORESOURCE_IRQ,
-       },
-       {
-               .start  = JZ_REG_ADC_HWMON_BASE,
-               .end    = JZ_REG_ADC_HWMON_BASE + 3,
-               .flags  = IORESOURCE_MEM,
-       },
-};
-
-static struct resource jz4740_battery_resources[] = {
-       {
-               .start = JZ_ADC_IRQ_BATTERY,
-               .flags = IORESOURCE_IRQ,
-       },
-       {
-               .start  = JZ_REG_ADC_BATTERY_BASE,
-               .end    = JZ_REG_ADC_BATTERY_BASE + 3,
-               .flags  = IORESOURCE_MEM,
-       },
-};
-
-static const struct mfd_cell jz4740_adc_cells[] = {
-       {
-               .id = 0,
-               .name = "jz4740-hwmon",
-               .num_resources = ARRAY_SIZE(jz4740_hwmon_resources),
-               .resources = jz4740_hwmon_resources,
-
-               .enable = jz4740_adc_cell_enable,
-               .disable = jz4740_adc_cell_disable,
-       },
-       {
-               .id = 1,
-               .name = "jz4740-battery",
-               .num_resources = ARRAY_SIZE(jz4740_battery_resources),
-               .resources = jz4740_battery_resources,
-
-               .enable = jz4740_adc_cell_enable,
-               .disable = jz4740_adc_cell_disable,
-       },
-};
-
-static int jz4740_adc_probe(struct platform_device *pdev)
-{
-       struct irq_chip_generic *gc;
-       struct irq_chip_type *ct;
-       struct jz4740_adc *adc;
-       struct resource *mem_base;
-       int ret;
-       int irq_base;
-
-       adc = devm_kzalloc(&pdev->dev, sizeof(*adc), GFP_KERNEL);
-       if (!adc)
-               return -ENOMEM;
-
-       adc->irq = platform_get_irq(pdev, 0);
-       if (adc->irq < 0) {
-               ret = adc->irq;
-               dev_err(&pdev->dev, "Failed to get platform irq: %d\n", ret);
-               return ret;
-       }
-
-       irq_base = platform_get_irq(pdev, 1);
-       if (irq_base < 0) {
-               dev_err(&pdev->dev, "Failed to get irq base: %d\n", irq_base);
-               return irq_base;
-       }
-
-       mem_base = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-       if (!mem_base) {
-               dev_err(&pdev->dev, "Failed to get platform mmio resource\n");
-               return -ENOENT;
-       }
-
-       /* Only request the shared registers for the MFD driver */
-       adc->mem = request_mem_region(mem_base->start, JZ_REG_ADC_STATUS,
-                                       pdev->name);
-       if (!adc->mem) {
-               dev_err(&pdev->dev, "Failed to request mmio memory region\n");
-               return -EBUSY;
-       }
-
-       adc->base = ioremap_nocache(adc->mem->start, resource_size(adc->mem));
-       if (!adc->base) {
-               ret = -EBUSY;
-               dev_err(&pdev->dev, "Failed to ioremap mmio memory\n");
-               goto err_release_mem_region;
-       }
-
-       adc->clk = clk_get(&pdev->dev, "adc");
-       if (IS_ERR(adc->clk)) {
-               ret = PTR_ERR(adc->clk);
-               dev_err(&pdev->dev, "Failed to get clock: %d\n", ret);
-               goto err_iounmap;
-       }
-
-       spin_lock_init(&adc->lock);
-       atomic_set(&adc->clk_ref, 0);
-
-       platform_set_drvdata(pdev, adc);
-
-       gc = irq_alloc_generic_chip("INTC", 1, irq_base, adc->base,
-               handle_level_irq);
-
-       ct = gc->chip_types;
-       ct->regs.mask = JZ_REG_ADC_CTRL;
-       ct->regs.ack = JZ_REG_ADC_STATUS;
-       ct->chip.irq_mask = irq_gc_mask_set_bit;
-       ct->chip.irq_unmask = irq_gc_mask_clr_bit;
-       ct->chip.irq_ack = irq_gc_ack_set_bit;
-
-       irq_setup_generic_chip(gc, IRQ_MSK(5), IRQ_GC_INIT_MASK_CACHE, 0,
-                               IRQ_NOPROBE | IRQ_LEVEL);
-
-       adc->gc = gc;
-
-       irq_set_chained_handler_and_data(adc->irq, jz4740_adc_irq_demux, gc);
-
-       writeb(0x00, adc->base + JZ_REG_ADC_ENABLE);
-       writeb(0xff, adc->base + JZ_REG_ADC_CTRL);
-
-       ret = mfd_add_devices(&pdev->dev, 0, jz4740_adc_cells,
-                             ARRAY_SIZE(jz4740_adc_cells), mem_base,
-                             irq_base, NULL);
-       if (ret < 0)
-               goto err_clk_put;
-
-       return 0;
-
-err_clk_put:
-       clk_put(adc->clk);
-err_iounmap:
-       iounmap(adc->base);
-err_release_mem_region:
-       release_mem_region(adc->mem->start, resource_size(adc->mem));
-       return ret;
-}
-
-static int jz4740_adc_remove(struct platform_device *pdev)
-{
-       struct jz4740_adc *adc = platform_get_drvdata(pdev);
-
-       mfd_remove_devices(&pdev->dev);
-
-       irq_remove_generic_chip(adc->gc, IRQ_MSK(5), IRQ_NOPROBE | IRQ_LEVEL, 0);
-       kfree(adc->gc);
-       irq_set_chained_handler_and_data(adc->irq, NULL, NULL);
-
-       iounmap(adc->base);
-       release_mem_region(adc->mem->start, resource_size(adc->mem));
-
-       clk_put(adc->clk);
-
-       return 0;
-}
-
-static struct platform_driver jz4740_adc_driver = {
-       .probe  = jz4740_adc_probe,
-       .remove = jz4740_adc_remove,
-       .driver = {
-               .name = "jz4740-adc",
-       },
-};
-
-module_platform_driver(jz4740_adc_driver);
-
-MODULE_DESCRIPTION("JZ4740 SoC ADC driver");
-MODULE_AUTHOR("Lars-Peter Clausen <lars@metafoo.de>");
-MODULE_LICENSE("GPL");
-MODULE_ALIAS("platform:jz4740-adc");
index ebb13d5..fd8864c 100644 (file)
@@ -297,11 +297,11 @@ static int max77836_init(struct max14577 *max14577)
        int ret;
        u8 intsrc_mask;
 
-       max14577->i2c_pmic = i2c_new_dummy(max14577->i2c->adapter,
+       max14577->i2c_pmic = i2c_new_dummy_device(max14577->i2c->adapter,
                        I2C_ADDR_PMIC);
-       if (!max14577->i2c_pmic) {
+       if (IS_ERR(max14577->i2c_pmic)) {
                dev_err(max14577->dev, "Failed to register PMIC I2C device\n");
-               return -ENODEV;
+               return PTR_ERR(max14577->i2c_pmic);
        }
        i2c_set_clientdata(max14577->i2c_pmic, max14577);
 
index 0c28965..a851ff4 100644 (file)
@@ -416,8 +416,10 @@ static int max77620_initialise_fps(struct max77620_chip *chip)
 
        for_each_child_of_node(fps_np, fps_child) {
                ret = max77620_config_fps(chip, fps_child);
-               if (ret < 0)
+               if (ret < 0) {
+                       of_node_put(fps_child);
                        return ret;
+               }
        }
 
        config = chip->enable_global_lpm ? MAX77620_ONOFFCNFG2_SLP_LPM_MSK : 0;
index 901d99d..596ed85 100644 (file)
@@ -183,17 +183,17 @@ static int max77693_i2c_probe(struct i2c_client *i2c,
        } else
                dev_info(max77693->dev, "device ID: 0x%x\n", reg_data);
 
-       max77693->i2c_muic = i2c_new_dummy(i2c->adapter, I2C_ADDR_MUIC);
-       if (!max77693->i2c_muic) {
+       max77693->i2c_muic = i2c_new_dummy_device(i2c->adapter, I2C_ADDR_MUIC);
+       if (IS_ERR(max77693->i2c_muic)) {
                dev_err(max77693->dev, "Failed to allocate I2C device for MUIC\n");
-               return -ENODEV;
+               return PTR_ERR(max77693->i2c_muic);
        }
        i2c_set_clientdata(max77693->i2c_muic, max77693);
 
-       max77693->i2c_haptic = i2c_new_dummy(i2c->adapter, I2C_ADDR_HAPTIC);
-       if (!max77693->i2c_haptic) {
+       max77693->i2c_haptic = i2c_new_dummy_device(i2c->adapter, I2C_ADDR_HAPTIC);
+       if (IS_ERR(max77693->i2c_haptic)) {
                dev_err(max77693->dev, "Failed to allocate I2C device for Haptic\n");
-               ret = -ENODEV;
+               ret = PTR_ERR(max77693->i2c_haptic);
                goto err_i2c_haptic;
        }
        i2c_set_clientdata(max77693->i2c_haptic, max77693);
index 25cbb22..209ee24 100644 (file)
@@ -70,11 +70,11 @@ static int max77843_chg_init(struct max77693_dev *max77843)
 {
        int ret;
 
-       max77843->i2c_chg = i2c_new_dummy(max77843->i2c->adapter, I2C_ADDR_CHG);
-       if (!max77843->i2c_chg) {
+       max77843->i2c_chg = i2c_new_dummy_device(max77843->i2c->adapter, I2C_ADDR_CHG);
+       if (IS_ERR(max77843->i2c_chg)) {
                dev_err(&max77843->i2c->dev,
                                "Cannot allocate I2C device for Charger\n");
-               return -ENODEV;
+               return PTR_ERR(max77843->i2c_chg);
        }
        i2c_set_clientdata(max77843->i2c_chg, max77843);
 
index cc01f70..d44baaf 100644 (file)
@@ -214,9 +214,9 @@ static int max8907_i2c_probe(struct i2c_client *i2c,
                goto err_regmap_gen;
        }
 
-       max8907->i2c_rtc = i2c_new_dummy(i2c->adapter, MAX8907_RTC_I2C_ADDR);
-       if (!max8907->i2c_rtc) {
-               ret = -ENOMEM;
+       max8907->i2c_rtc = i2c_new_dummy_device(i2c->adapter, MAX8907_RTC_I2C_ADDR);
+       if (IS_ERR(max8907->i2c_rtc)) {
+               ret = PTR_ERR(max8907->i2c_rtc);
                goto err_dummy_rtc;
        }
        i2c_set_clientdata(max8907->i2c_rtc, max8907);
index 20bb19b..114e905 100644 (file)
@@ -176,18 +176,18 @@ static int max8925_probe(struct i2c_client *client,
        dev_set_drvdata(chip->dev, chip);
        mutex_init(&chip->io_lock);
 
-       chip->rtc = i2c_new_dummy(chip->i2c->adapter, RTC_I2C_ADDR);
-       if (!chip->rtc) {
+       chip->rtc = i2c_new_dummy_device(chip->i2c->adapter, RTC_I2C_ADDR);
+       if (IS_ERR(chip->rtc)) {
                dev_err(chip->dev, "Failed to allocate I2C device for RTC\n");
-               return -ENODEV;
+               return PTR_ERR(chip->rtc);
        }
        i2c_set_clientdata(chip->rtc, chip);
 
-       chip->adc = i2c_new_dummy(chip->i2c->adapter, ADC_I2C_ADDR);
-       if (!chip->adc) {
+       chip->adc = i2c_new_dummy_device(chip->i2c->adapter, ADC_I2C_ADDR);
+       if (IS_ERR(chip->adc)) {
                dev_err(chip->dev, "Failed to allocate I2C device for ADC\n");
                i2c_unregister_device(chip->rtc);
-               return -ENODEV;
+               return PTR_ERR(chip->adc);
        }
        i2c_set_clientdata(chip->adc, chip);
 
index 8c06c09..68d8f2b 100644 (file)
@@ -185,25 +185,25 @@ static int max8997_i2c_probe(struct i2c_client *i2c,
 
        mutex_init(&max8997->iolock);
 
-       max8997->rtc = i2c_new_dummy(i2c->adapter, I2C_ADDR_RTC);
-       if (!max8997->rtc) {
+       max8997->rtc = i2c_new_dummy_device(i2c->adapter, I2C_ADDR_RTC);
+       if (IS_ERR(max8997->rtc)) {
                dev_err(max8997->dev, "Failed to allocate I2C device for RTC\n");
-               return -ENODEV;
+               return PTR_ERR(max8997->rtc);
        }
        i2c_set_clientdata(max8997->rtc, max8997);
 
-       max8997->haptic = i2c_new_dummy(i2c->adapter, I2C_ADDR_HAPTIC);
-       if (!max8997->haptic) {
+       max8997->haptic = i2c_new_dummy_device(i2c->adapter, I2C_ADDR_HAPTIC);
+       if (IS_ERR(max8997->haptic)) {
                dev_err(max8997->dev, "Failed to allocate I2C device for Haptic\n");
-               ret = -ENODEV;
+               ret = PTR_ERR(max8997->haptic);
                goto err_i2c_haptic;
        }
        i2c_set_clientdata(max8997->haptic, max8997);
 
-       max8997->muic = i2c_new_dummy(i2c->adapter, I2C_ADDR_MUIC);
-       if (!max8997->muic) {
+       max8997->muic = i2c_new_dummy_device(i2c->adapter, I2C_ADDR_MUIC);
+       if (IS_ERR(max8997->muic)) {
                dev_err(max8997->dev, "Failed to allocate I2C device for MUIC\n");
-               ret = -ENODEV;
+               ret = PTR_ERR(max8997->muic);
                goto err_i2c_muic;
        }
        i2c_set_clientdata(max8997->muic, max8997);
index 56409df..785f8e9 100644 (file)
@@ -195,10 +195,10 @@ static int max8998_i2c_probe(struct i2c_client *i2c,
        }
        mutex_init(&max8998->iolock);
 
-       max8998->rtc = i2c_new_dummy(i2c->adapter, RTC_I2C_ADDR);
-       if (!max8998->rtc) {
+       max8998->rtc = i2c_new_dummy_device(i2c->adapter, RTC_I2C_ADDR);
+       if (IS_ERR(max8998->rtc)) {
                dev_err(&i2c->dev, "Failed to allocate I2C device for RTC\n");
-               return -ENODEV;
+               return PTR_ERR(max8998->rtc);
        }
        i2c_set_clientdata(max8998->rtc, max8998);
 
index 337bccc..310dae2 100644 (file)
@@ -5,34 +5,34 @@
  */
 
 #include <linux/interrupt.h>
+#include <linux/ioport.h>
 #include <linux/module.h>
 #include <linux/of_device.h>
 #include <linux/of_irq.h>
 #include <linux/regmap.h>
 #include <linux/mfd/core.h>
-#include <linux/mfd/mt6397/core.h>
 #include <linux/mfd/mt6323/core.h>
-#include <linux/mfd/mt6397/registers.h>
+#include <linux/mfd/mt6397/core.h>
 #include <linux/mfd/mt6323/registers.h>
+#include <linux/mfd/mt6397/registers.h>
+
+#define MT6323_RTC_BASE                0x8000
+#define MT6323_RTC_SIZE                0x40
 
 #define MT6397_RTC_BASE                0xe000
 #define MT6397_RTC_SIZE                0x3e
 
-#define MT6323_CID_CODE                0x23
-#define MT6391_CID_CODE                0x91
-#define MT6397_CID_CODE                0x97
+#define MT6323_PWRC_BASE       0x8000
+#define MT6323_PWRC_SIZE       0x40
+
+static const struct resource mt6323_rtc_resources[] = {
+       DEFINE_RES_MEM(MT6323_RTC_BASE, MT6323_RTC_SIZE),
+       DEFINE_RES_IRQ(MT6323_IRQ_STATUS_RTC),
+};
 
 static const struct resource mt6397_rtc_resources[] = {
-       {
-               .start = MT6397_RTC_BASE,
-               .end   = MT6397_RTC_BASE + MT6397_RTC_SIZE,
-               .flags = IORESOURCE_MEM,
-       },
-       {
-               .start = MT6397_IRQ_RTC,
-               .end   = MT6397_IRQ_RTC,
-               .flags = IORESOURCE_IRQ,
-       },
+       DEFINE_RES_MEM(MT6397_RTC_BASE, MT6397_RTC_SIZE),
+       DEFINE_RES_IRQ(MT6397_IRQ_RTC),
 };
 
 static const struct resource mt6323_keys_resources[] = {
@@ -45,8 +45,17 @@ static const struct resource mt6397_keys_resources[] = {
        DEFINE_RES_IRQ(MT6397_IRQ_HOMEKEY),
 };
 
+static const struct resource mt6323_pwrc_resources[] = {
+       DEFINE_RES_MEM(MT6323_PWRC_BASE, MT6323_PWRC_SIZE),
+};
+
 static const struct mfd_cell mt6323_devs[] = {
        {
+               .name = "mt6323-rtc",
+               .num_resources = ARRAY_SIZE(mt6323_rtc_resources),
+               .resources = mt6323_rtc_resources,
+               .of_compatible = "mediatek,mt6323-rtc",
+       }, {
                .name = "mt6323-regulator",
                .of_compatible = "mediatek,mt6323-regulator"
        }, {
@@ -57,6 +66,11 @@ static const struct mfd_cell mt6323_devs[] = {
                .num_resources = ARRAY_SIZE(mt6323_keys_resources),
                .resources = mt6323_keys_resources,
                .of_compatible = "mediatek,mt6323-keys"
+       }, {
+               .name = "mt6323-pwrc",
+               .num_resources = ARRAY_SIZE(mt6323_pwrc_resources),
+               .resources = mt6323_pwrc_resources,
+               .of_compatible = "mediatek,mt6323-pwrc"
        },
 };
 
@@ -86,148 +100,6 @@ static const struct mfd_cell mt6397_devs[] = {
        }
 };
 
-static void mt6397_irq_lock(struct irq_data *data)
-{
-       struct mt6397_chip *mt6397 = irq_data_get_irq_chip_data(data);
-
-       mutex_lock(&mt6397->irqlock);
-}
-
-static void mt6397_irq_sync_unlock(struct irq_data *data)
-{
-       struct mt6397_chip *mt6397 = irq_data_get_irq_chip_data(data);
-
-       regmap_write(mt6397->regmap, mt6397->int_con[0],
-                    mt6397->irq_masks_cur[0]);
-       regmap_write(mt6397->regmap, mt6397->int_con[1],
-                    mt6397->irq_masks_cur[1]);
-
-       mutex_unlock(&mt6397->irqlock);
-}
-
-static void mt6397_irq_disable(struct irq_data *data)
-{
-       struct mt6397_chip *mt6397 = irq_data_get_irq_chip_data(data);
-       int shift = data->hwirq & 0xf;
-       int reg = data->hwirq >> 4;
-
-       mt6397->irq_masks_cur[reg] &= ~BIT(shift);
-}
-
-static void mt6397_irq_enable(struct irq_data *data)
-{
-       struct mt6397_chip *mt6397 = irq_data_get_irq_chip_data(data);
-       int shift = data->hwirq & 0xf;
-       int reg = data->hwirq >> 4;
-
-       mt6397->irq_masks_cur[reg] |= BIT(shift);
-}
-
-#ifdef CONFIG_PM_SLEEP
-static int mt6397_irq_set_wake(struct irq_data *irq_data, unsigned int on)
-{
-       struct mt6397_chip *mt6397 = irq_data_get_irq_chip_data(irq_data);
-       int shift = irq_data->hwirq & 0xf;
-       int reg = irq_data->hwirq >> 4;
-
-       if (on)
-               mt6397->wake_mask[reg] |= BIT(shift);
-       else
-               mt6397->wake_mask[reg] &= ~BIT(shift);
-
-       return 0;
-}
-#else
-#define mt6397_irq_set_wake NULL
-#endif
-
-static struct irq_chip mt6397_irq_chip = {
-       .name = "mt6397-irq",
-       .irq_bus_lock = mt6397_irq_lock,
-       .irq_bus_sync_unlock = mt6397_irq_sync_unlock,
-       .irq_enable = mt6397_irq_enable,
-       .irq_disable = mt6397_irq_disable,
-       .irq_set_wake = mt6397_irq_set_wake,
-};
-
-static void mt6397_irq_handle_reg(struct mt6397_chip *mt6397, int reg,
-               int irqbase)
-{
-       unsigned int status;
-       int i, irq, ret;
-
-       ret = regmap_read(mt6397->regmap, reg, &status);
-       if (ret) {
-               dev_err(mt6397->dev, "Failed to read irq status: %d\n", ret);
-               return;
-       }
-
-       for (i = 0; i < 16; i++) {
-               if (status & BIT(i)) {
-                       irq = irq_find_mapping(mt6397->irq_domain, irqbase + i);
-                       if (irq)
-                               handle_nested_irq(irq);
-               }
-       }
-
-       regmap_write(mt6397->regmap, reg, status);
-}
-
-static irqreturn_t mt6397_irq_thread(int irq, void *data)
-{
-       struct mt6397_chip *mt6397 = data;
-
-       mt6397_irq_handle_reg(mt6397, mt6397->int_status[0], 0);
-       mt6397_irq_handle_reg(mt6397, mt6397->int_status[1], 16);
-
-       return IRQ_HANDLED;
-}
-
-static int mt6397_irq_domain_map(struct irq_domain *d, unsigned int irq,
-                                       irq_hw_number_t hw)
-{
-       struct mt6397_chip *mt6397 = d->host_data;
-
-       irq_set_chip_data(irq, mt6397);
-       irq_set_chip_and_handler(irq, &mt6397_irq_chip, handle_level_irq);
-       irq_set_nested_thread(irq, 1);
-       irq_set_noprobe(irq);
-
-       return 0;
-}
-
-static const struct irq_domain_ops mt6397_irq_domain_ops = {
-       .map = mt6397_irq_domain_map,
-};
-
-static int mt6397_irq_init(struct mt6397_chip *mt6397)
-{
-       int ret;
-
-       mutex_init(&mt6397->irqlock);
-
-       /* Mask all interrupt sources */
-       regmap_write(mt6397->regmap, mt6397->int_con[0], 0x0);
-       regmap_write(mt6397->regmap, mt6397->int_con[1], 0x0);
-
-       mt6397->irq_domain = irq_domain_add_linear(mt6397->dev->of_node,
-               MT6397_IRQ_NR, &mt6397_irq_domain_ops, mt6397);
-       if (!mt6397->irq_domain) {
-               dev_err(mt6397->dev, "could not create irq domain\n");
-               return -ENOMEM;
-       }
-
-       ret = devm_request_threaded_irq(mt6397->dev, mt6397->irq, NULL,
-               mt6397_irq_thread, IRQF_ONESHOT, "mt6397-pmic", mt6397);
-       if (ret) {
-               dev_err(mt6397->dev, "failed to register irq=%d; err: %d\n",
-                       mt6397->irq, ret);
-               return ret;
-       }
-
-       return 0;
-}
-
 #ifdef CONFIG_PM_SLEEP
 static int mt6397_irq_suspend(struct device *dev)
 {
@@ -290,7 +162,7 @@ static int mt6397_probe(struct platform_device *pdev)
                return pmic->irq;
 
        switch (id & 0xff) {
-       case MT6323_CID_CODE:
+       case MT6323_CHIP_ID:
                pmic->int_con[0] = MT6323_INT_CON0;
                pmic->int_con[1] = MT6323_INT_CON1;
                pmic->int_status[0] = MT6323_INT_STATUS0;
@@ -304,8 +176,8 @@ static int mt6397_probe(struct platform_device *pdev)
                                           0, pmic->irq_domain);
                break;
 
-       case MT6397_CID_CODE:
-       case MT6391_CID_CODE:
+       case MT6391_CHIP_ID:
+       case MT6397_CHIP_ID:
                pmic->int_con[0] = MT6397_INT_CON0;
                pmic->int_con[1] = MT6397_INT_CON1;
                pmic->int_status[0] = MT6397_INT_STATUS0;
diff --git a/drivers/mfd/mt6397-irq.c b/drivers/mfd/mt6397-irq.c
new file mode 100644 (file)
index 0000000..b2d3ce1
--- /dev/null
@@ -0,0 +1,181 @@
+// SPDX-License-Identifier: GPL-2.0
+//
+// Copyright (c) 2019 MediaTek Inc.
+
+#include <linux/interrupt.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
+#include <linux/of_irq.h>
+#include <linux/platform_device.h>
+#include <linux/regmap.h>
+#include <linux/mfd/mt6323/core.h>
+#include <linux/mfd/mt6323/registers.h>
+#include <linux/mfd/mt6397/core.h>
+#include <linux/mfd/mt6397/registers.h>
+
+static void mt6397_irq_lock(struct irq_data *data)
+{
+       struct mt6397_chip *mt6397 = irq_data_get_irq_chip_data(data);
+
+       mutex_lock(&mt6397->irqlock);
+}
+
+static void mt6397_irq_sync_unlock(struct irq_data *data)
+{
+       struct mt6397_chip *mt6397 = irq_data_get_irq_chip_data(data);
+
+       regmap_write(mt6397->regmap, mt6397->int_con[0],
+                    mt6397->irq_masks_cur[0]);
+       regmap_write(mt6397->regmap, mt6397->int_con[1],
+                    mt6397->irq_masks_cur[1]);
+
+       mutex_unlock(&mt6397->irqlock);
+}
+
+static void mt6397_irq_disable(struct irq_data *data)
+{
+       struct mt6397_chip *mt6397 = irq_data_get_irq_chip_data(data);
+       int shift = data->hwirq & 0xf;
+       int reg = data->hwirq >> 4;
+
+       mt6397->irq_masks_cur[reg] &= ~BIT(shift);
+}
+
+static void mt6397_irq_enable(struct irq_data *data)
+{
+       struct mt6397_chip *mt6397 = irq_data_get_irq_chip_data(data);
+       int shift = data->hwirq & 0xf;
+       int reg = data->hwirq >> 4;
+
+       mt6397->irq_masks_cur[reg] |= BIT(shift);
+}
+
+#ifdef CONFIG_PM_SLEEP
+static int mt6397_irq_set_wake(struct irq_data *irq_data, unsigned int on)
+{
+       struct mt6397_chip *mt6397 = irq_data_get_irq_chip_data(irq_data);
+       int shift = irq_data->hwirq & 0xf;
+       int reg = irq_data->hwirq >> 4;
+
+       if (on)
+               mt6397->wake_mask[reg] |= BIT(shift);
+       else
+               mt6397->wake_mask[reg] &= ~BIT(shift);
+
+       return 0;
+}
+#else
+#define mt6397_irq_set_wake NULL
+#endif
+
+static struct irq_chip mt6397_irq_chip = {
+       .name = "mt6397-irq",
+       .irq_bus_lock = mt6397_irq_lock,
+       .irq_bus_sync_unlock = mt6397_irq_sync_unlock,
+       .irq_enable = mt6397_irq_enable,
+       .irq_disable = mt6397_irq_disable,
+       .irq_set_wake = mt6397_irq_set_wake,
+};
+
+static void mt6397_irq_handle_reg(struct mt6397_chip *mt6397, int reg,
+                                 int irqbase)
+{
+       unsigned int status;
+       int i, irq, ret;
+
+       ret = regmap_read(mt6397->regmap, reg, &status);
+       if (ret) {
+               dev_err(mt6397->dev, "Failed to read irq status: %d\n", ret);
+               return;
+       }
+
+       for (i = 0; i < 16; i++) {
+               if (status & BIT(i)) {
+                       irq = irq_find_mapping(mt6397->irq_domain, irqbase + i);
+                       if (irq)
+                               handle_nested_irq(irq);
+               }
+       }
+
+       regmap_write(mt6397->regmap, reg, status);
+}
+
+static irqreturn_t mt6397_irq_thread(int irq, void *data)
+{
+       struct mt6397_chip *mt6397 = data;
+
+       mt6397_irq_handle_reg(mt6397, mt6397->int_status[0], 0);
+       mt6397_irq_handle_reg(mt6397, mt6397->int_status[1], 16);
+
+       return IRQ_HANDLED;
+}
+
+static int mt6397_irq_domain_map(struct irq_domain *d, unsigned int irq,
+                                irq_hw_number_t hw)
+{
+       struct mt6397_chip *mt6397 = d->host_data;
+
+       irq_set_chip_data(irq, mt6397);
+       irq_set_chip_and_handler(irq, &mt6397_irq_chip, handle_level_irq);
+       irq_set_nested_thread(irq, 1);
+       irq_set_noprobe(irq);
+
+       return 0;
+}
+
+static const struct irq_domain_ops mt6397_irq_domain_ops = {
+       .map = mt6397_irq_domain_map,
+};
+
+int mt6397_irq_init(struct mt6397_chip *chip)
+{
+       int ret;
+
+       mutex_init(&chip->irqlock);
+
+       switch (chip->chip_id) {
+       case MT6323_CHIP_ID:
+               chip->int_con[0] = MT6323_INT_CON0;
+               chip->int_con[1] = MT6323_INT_CON1;
+               chip->int_status[0] = MT6323_INT_STATUS0;
+               chip->int_status[1] = MT6323_INT_STATUS1;
+               break;
+
+       case MT6391_CHIP_ID:
+       case MT6397_CHIP_ID:
+               chip->int_con[0] = MT6397_INT_CON0;
+               chip->int_con[1] = MT6397_INT_CON1;
+               chip->int_status[0] = MT6397_INT_STATUS0;
+               chip->int_status[1] = MT6397_INT_STATUS1;
+               break;
+
+       default:
+               dev_err(chip->dev, "unsupported chip: 0x%x\n", chip->chip_id);
+               return -ENODEV;
+       }
+
+       /* Mask all interrupt sources */
+       regmap_write(chip->regmap, chip->int_con[0], 0x0);
+       regmap_write(chip->regmap, chip->int_con[1], 0x0);
+
+       chip->irq_domain = irq_domain_add_linear(chip->dev->of_node,
+                                                MT6397_IRQ_NR,
+                                                &mt6397_irq_domain_ops,
+                                                chip);
+       if (!chip->irq_domain) {
+               dev_err(chip->dev, "could not create irq domain\n");
+               return -ENOMEM;
+       }
+
+       ret = devm_request_threaded_irq(chip->dev, chip->irq, NULL,
+                                       mt6397_irq_thread, IRQF_ONESHOT,
+                                       "mt6397-pmic", chip);
+       if (ret) {
+               dev_err(chip->dev, "failed to register irq=%d; err: %d\n",
+                       chip->irq, ret);
+               return ret;
+       }
+
+       return 0;
+}
index 6818ff3..f5b3fa9 100644 (file)
@@ -549,12 +549,12 @@ static int palmas_i2c_probe(struct i2c_client *i2c,
                        palmas->i2c_clients[i] = i2c;
                else {
                        palmas->i2c_clients[i] =
-                                       i2c_new_dummy(i2c->adapter,
+                                       i2c_new_dummy_device(i2c->adapter,
                                                        i2c->addr + i);
-                       if (!palmas->i2c_clients[i]) {
+                       if (IS_ERR(palmas->i2c_clients[i])) {
                                dev_err(palmas->dev,
                                        "can't attach client %d\n", i);
-                               ret = -ENOMEM;
+                               ret = PTR_ERR(palmas->i2c_clients[i]);
                                goto err_i2c;
                        }
                        palmas->i2c_clients[i]->dev.of_node = of_node_get(node);
index 4d7e900..71bc34b 100644 (file)
@@ -561,22 +561,16 @@ static int qcom_rpm_probe(struct platform_device *pdev)
        clk_prepare_enable(rpm->ramclk); /* Accepts NULL */
 
        irq_ack = platform_get_irq_byname(pdev, "ack");
-       if (irq_ack < 0) {
-               dev_err(&pdev->dev, "required ack interrupt missing\n");
+       if (irq_ack < 0)
                return irq_ack;
-       }
 
        irq_err = platform_get_irq_byname(pdev, "err");
-       if (irq_err < 0) {
-               dev_err(&pdev->dev, "required err interrupt missing\n");
+       if (irq_err < 0)
                return irq_err;
-       }
 
        irq_wakeup = platform_get_irq_byname(pdev, "wakeup");
-       if (irq_wakeup < 0) {
-               dev_err(&pdev->dev, "required wakeup interrupt missing\n");
+       if (irq_wakeup < 0)
                return irq_wakeup;
-       }
 
        match = of_match_device(qcom_rpm_of_match, &pdev->dev);
        if (!match)
index 9b9b06d..154270f 100644 (file)
@@ -17,6 +17,7 @@
 #include <linux/platform_device.h>
 #include <linux/pci.h>
 #include <linux/platform_data/i2c-gpio.h>
+#include <linux/gpio/driver.h>
 #include <linux/gpio/machine.h>
 #include <linux/slab.h>
 
@@ -1394,10 +1395,8 @@ static int sm501_plat_probe(struct platform_device *dev)
        sm->platdata = dev_get_platdata(&dev->dev);
 
        ret = platform_get_irq(dev, 0);
-       if (ret < 0) {
-               dev_err(&dev->dev, "failed to get irq resource\n");
+       if (ret < 0)
                goto err_res;
-       }
        sm->irq = ret;
 
        sm->io_res = platform_get_resource(dev, IORESOURCE_MEM, 1);
index b65e585..6607232 100644 (file)
@@ -40,7 +40,7 @@ static const struct regmap_config syscon_regmap_config = {
        .reg_stride = 4,
 };
 
-static struct syscon *of_syscon_register(struct device_node *np)
+static struct syscon *of_syscon_register(struct device_node *np, bool check_clk)
 {
        struct clk *clk;
        struct syscon *syscon;
@@ -51,9 +51,6 @@ static struct syscon *of_syscon_register(struct device_node *np)
        struct regmap_config syscon_config = syscon_regmap_config;
        struct resource res;
 
-       if (!of_device_is_compatible(np, "syscon"))
-               return ERR_PTR(-EINVAL);
-
        syscon = kzalloc(sizeof(*syscon), GFP_KERNEL);
        if (!syscon)
                return ERR_PTR(-ENOMEM);
@@ -117,16 +114,18 @@ static struct syscon *of_syscon_register(struct device_node *np)
                goto err_regmap;
        }
 
-       clk = of_clk_get(np, 0);
-       if (IS_ERR(clk)) {
-               ret = PTR_ERR(clk);
-               /* clock is optional */
-               if (ret != -ENOENT)
-                       goto err_clk;
-       } else {
-               ret = regmap_mmio_attach_clk(regmap, clk);
-               if (ret)
-                       goto err_attach;
+       if (check_clk) {
+               clk = of_clk_get(np, 0);
+               if (IS_ERR(clk)) {
+                       ret = PTR_ERR(clk);
+                       /* clock is optional */
+                       if (ret != -ENOENT)
+                               goto err_clk;
+               } else {
+                       ret = regmap_mmio_attach_clk(regmap, clk);
+                       if (ret)
+                               goto err_attach;
+               }
        }
 
        syscon->regmap = regmap;
@@ -150,7 +149,8 @@ err_map:
        return ERR_PTR(ret);
 }
 
-struct regmap *syscon_node_to_regmap(struct device_node *np)
+static struct regmap *device_node_get_regmap(struct device_node *np,
+                                            bool check_clk)
 {
        struct syscon *entry, *syscon = NULL;
 
@@ -165,13 +165,27 @@ struct regmap *syscon_node_to_regmap(struct device_node *np)
        spin_unlock(&syscon_list_slock);
 
        if (!syscon)
-               syscon = of_syscon_register(np);
+               syscon = of_syscon_register(np, check_clk);
 
        if (IS_ERR(syscon))
                return ERR_CAST(syscon);
 
        return syscon->regmap;
 }
+
+struct regmap *device_node_to_regmap(struct device_node *np)
+{
+       return device_node_get_regmap(np, false);
+}
+EXPORT_SYMBOL_GPL(device_node_to_regmap);
+
+struct regmap *syscon_node_to_regmap(struct device_node *np)
+{
+       if (!of_device_is_compatible(np, "syscon"))
+               return ERR_PTR(-EINVAL);
+
+       return device_node_get_regmap(np, true);
+}
 EXPORT_SYMBOL_GPL(syscon_node_to_regmap);
 
 struct regmap *syscon_regmap_lookup_by_compatible(const char *s)
index 60c122e..faecbca 100644 (file)
@@ -626,8 +626,7 @@ static const struct mfd_cell timberdale_cells_bar2[] = {
 static ssize_t show_fw_ver(struct device *dev, struct device_attribute *attr,
        char *buf)
 {
-       struct pci_dev *pdev = to_pci_dev(dev);
-       struct timberdale_device *priv = pci_get_drvdata(pdev);
+       struct timberdale_device *priv = dev_get_drvdata(dev);
 
        return sprintf(buf, "%d.%d.%d\n", priv->fw.major, priv->fw.minor,
                priv->fw.config);
index 865257a..907452b 100644 (file)
@@ -437,12 +437,11 @@ static int tps80031_probe(struct i2c_client *client,
                if (tps80031_slave_address[i] == client->addr)
                        tps80031->clients[i] = client;
                else
-                       tps80031->clients[i] = i2c_new_dummy(client->adapter,
-                                               tps80031_slave_address[i]);
-               if (!tps80031->clients[i]) {
+                       tps80031->clients[i] = devm_i2c_new_dummy_device(&client->dev,
+                                               client->adapter, tps80031_slave_address[i]);
+               if (IS_ERR(tps80031->clients[i])) {
                        dev_err(&client->dev, "can't attach client %d\n", i);
-                       ret = -ENOMEM;
-                       goto fail_client_reg;
+                       return PTR_ERR(tps80031->clients[i]);
                }
 
                i2c_set_clientdata(tps80031->clients[i], tps80031);
@@ -452,7 +451,7 @@ static int tps80031_probe(struct i2c_client *client,
                        ret = PTR_ERR(tps80031->regmap[i]);
                        dev_err(&client->dev,
                                "regmap %d init failed, err %d\n", i, ret);
-                       goto fail_client_reg;
+                       return ret;
                }
        }
 
@@ -461,7 +460,7 @@ static int tps80031_probe(struct i2c_client *client,
        if (ret < 0) {
                dev_err(&client->dev,
                        "Silicon version number read failed: %d\n", ret);
-               goto fail_client_reg;
+               return ret;
        }
 
        ret = tps80031_read(&client->dev, TPS80031_SLAVE_ID3,
@@ -469,7 +468,7 @@ static int tps80031_probe(struct i2c_client *client,
        if (ret < 0) {
                dev_err(&client->dev,
                        "Silicon eeprom version read failed: %d\n", ret);
-               goto fail_client_reg;
+               return ret;
        }
 
        dev_info(&client->dev, "ES version 0x%02x and EPROM version 0x%02x\n",
@@ -482,7 +481,7 @@ static int tps80031_probe(struct i2c_client *client,
        ret = tps80031_irq_init(tps80031, client->irq, pdata->irq_base);
        if (ret) {
                dev_err(&client->dev, "IRQ init failed: %d\n", ret);
-               goto fail_client_reg;
+               return ret;
        }
 
        tps80031_pupd_init(tps80031, pdata);
@@ -506,12 +505,6 @@ static int tps80031_probe(struct i2c_client *client,
 
 fail_mfd_add:
        regmap_del_irq_chip(client->irq, tps80031->irq_data);
-
-fail_client_reg:
-       for (i = 0; i < TPS80031_NUM_SLAVES; i++) {
-               if (tps80031->clients[i]  && (tps80031->clients[i] != client))
-                       i2c_unregister_device(tps80031->clients[i]);
-       }
        return ret;
 }
 
index 448d939..20cf8cf 100644 (file)
@@ -1141,12 +1141,12 @@ twl_probe(struct i2c_client *client, const struct i2c_device_id *id)
                if (i == 0) {
                        twl->client = client;
                } else {
-                       twl->client = i2c_new_dummy(client->adapter,
+                       twl->client = i2c_new_dummy_device(client->adapter,
                                                    client->addr + i);
-                       if (!twl->client) {
+                       if (IS_ERR(twl->client)) {
                                dev_err(&client->dev,
                                        "can't attach client %d\n", i);
-                               status = -ENOMEM;
+                               status = PTR_ERR(twl->client);
                                goto fail;
                        }
                }
index 518945b..2cccd82 100644 (file)
@@ -14,7 +14,6 @@
 #include <linux/delay.h>
 #include <linux/mutex.h>
 #include <linux/mod_devicetable.h>
-#include <linux/log2.h>
 #include <linux/bitops.h>
 #include <linux/jiffies.h>
 #include <linux/property.h>
index a2a142a..9d04231 100644 (file)
@@ -573,6 +573,7 @@ static void __exit gru_exit(void)
        gru_free_tables();
        misc_deregister(&gru_miscdev);
        gru_proc_exit();
+       mmu_notifier_synchronize();
 }
 
 static const struct file_operations gru_fops = {
index 438191c..a7e44b2 100644 (file)
@@ -307,10 +307,8 @@ struct gru_mm_tracker {                            /* pack to reduce size */
 
 struct gru_mm_struct {
        struct mmu_notifier     ms_notifier;
-       atomic_t                ms_refcnt;
        spinlock_t              ms_asid_lock;   /* protects ASID assignment */
        atomic_t                ms_range_active;/* num range_invals active */
-       char                    ms_released;
        wait_queue_head_t       ms_wait_queue;
        DECLARE_BITMAP(ms_asidmap, GRU_MAX_GRUS);
        struct gru_mm_tracker   ms_asids[GRU_MAX_GRUS];
index 59ba0ad..10921cd 100644 (file)
@@ -235,83 +235,47 @@ static void gru_invalidate_range_end(struct mmu_notifier *mn,
                gms, range->start, range->end);
 }
 
-static void gru_release(struct mmu_notifier *mn, struct mm_struct *mm)
+static struct mmu_notifier *gru_alloc_notifier(struct mm_struct *mm)
 {
-       struct gru_mm_struct *gms = container_of(mn, struct gru_mm_struct,
-                                                ms_notifier);
+       struct gru_mm_struct *gms;
+
+       gms = kzalloc(sizeof(*gms), GFP_KERNEL);
+       if (!gms)
+               return ERR_PTR(-ENOMEM);
+       STAT(gms_alloc);
+       spin_lock_init(&gms->ms_asid_lock);
+       init_waitqueue_head(&gms->ms_wait_queue);
 
-       gms->ms_released = 1;
-       gru_dbg(grudev, "gms %p\n", gms);
+       return &gms->ms_notifier;
 }
 
+static void gru_free_notifier(struct mmu_notifier *mn)
+{
+       kfree(container_of(mn, struct gru_mm_struct, ms_notifier));
+       STAT(gms_free);
+}
 
 static const struct mmu_notifier_ops gru_mmuops = {
        .invalidate_range_start = gru_invalidate_range_start,
        .invalidate_range_end   = gru_invalidate_range_end,
-       .release                = gru_release,
+       .alloc_notifier         = gru_alloc_notifier,
+       .free_notifier          = gru_free_notifier,
 };
 
-/* Move this to the basic mmu_notifier file. But for now... */
-static struct mmu_notifier *mmu_find_ops(struct mm_struct *mm,
-                       const struct mmu_notifier_ops *ops)
-{
-       struct mmu_notifier *mn, *gru_mn = NULL;
-
-       if (mm->mmu_notifier_mm) {
-               rcu_read_lock();
-               hlist_for_each_entry_rcu(mn, &mm->mmu_notifier_mm->list,
-                                        hlist)
-                   if (mn->ops == ops) {
-                       gru_mn = mn;
-                       break;
-               }
-               rcu_read_unlock();
-       }
-       return gru_mn;
-}
-
 struct gru_mm_struct *gru_register_mmu_notifier(void)
 {
-       struct gru_mm_struct *gms;
        struct mmu_notifier *mn;
-       int err;
-
-       mn = mmu_find_ops(current->mm, &gru_mmuops);
-       if (mn) {
-               gms = container_of(mn, struct gru_mm_struct, ms_notifier);
-               atomic_inc(&gms->ms_refcnt);
-       } else {
-               gms = kzalloc(sizeof(*gms), GFP_KERNEL);
-               if (!gms)
-                       return ERR_PTR(-ENOMEM);
-               STAT(gms_alloc);
-               spin_lock_init(&gms->ms_asid_lock);
-               gms->ms_notifier.ops = &gru_mmuops;
-               atomic_set(&gms->ms_refcnt, 1);
-               init_waitqueue_head(&gms->ms_wait_queue);
-               err = __mmu_notifier_register(&gms->ms_notifier, current->mm);
-               if (err)
-                       goto error;
-       }
-       if (gms)
-               gru_dbg(grudev, "gms %p, refcnt %d\n", gms,
-                       atomic_read(&gms->ms_refcnt));
-       return gms;
-error:
-       kfree(gms);
-       return ERR_PTR(err);
+
+       mn = mmu_notifier_get_locked(&gru_mmuops, current->mm);
+       if (IS_ERR(mn))
+               return ERR_CAST(mn);
+
+       return container_of(mn, struct gru_mm_struct, ms_notifier);
 }
 
 void gru_drop_mmu_notifier(struct gru_mm_struct *gms)
 {
-       gru_dbg(grudev, "gms %p, refcnt %d, released %d\n", gms,
-               atomic_read(&gms->ms_refcnt), gms->ms_released);
-       if (atomic_dec_return(&gms->ms_refcnt) == 0) {
-               if (!gms->ms_released)
-                       mmu_notifier_unregister(&gms->ms_notifier, current->mm);
-               kfree(gms);
-               STAT(gms_free);
-       }
+       mmu_notifier_put(&gms->ms_notifier);
 }
 
 /*
index 80a6e2d..42d401e 100644 (file)
@@ -23,73 +23,6 @@ config MTD_TESTS
          WARNING: some of the tests will ERASE entire MTD device which they
          test. Do not use these tests unless you really know what you do.
 
-config MTD_CMDLINE_PARTS
-       tristate "Command line partition table parsing"
-       depends on MTD
-       help
-         Allow generic configuration of the MTD partition tables via the kernel
-         command line. Multiple flash resources are supported for hardware where
-         different kinds of flash memory are available.
-
-         You will still need the parsing functions to be called by the driver
-         for your particular device. It won't happen automatically. The
-         SA1100 map driver (CONFIG_MTD_SA1100) has an option for this, for
-         example.
-
-         The format for the command line is as follows:
-
-         mtdparts=<mtddef>[;<mtddef]
-         <mtddef>  := <mtd-id>:<partdef>[,<partdef>]
-         <partdef> := <size>[@offset][<name>][ro]
-         <mtd-id>  := unique id used in mapping driver/device
-         <size>    := standard linux memsize OR "-" to denote all
-         remaining space
-         <name>    := (NAME)
-
-         Due to the way Linux handles the command line, no spaces are
-         allowed in the partition definition, including mtd id's and partition
-         names.
-
-         Examples:
-
-         1 flash resource (mtd-id "sa1100"), with 1 single writable partition:
-         mtdparts=sa1100:-
-
-         Same flash, but 2 named partitions, the first one being read-only:
-         mtdparts=sa1100:256k(ARMboot)ro,-(root)
-
-         If unsure, say 'N'.
-
-config MTD_OF_PARTS
-       tristate "OpenFirmware partitioning information support"
-       default y
-       depends on OF
-       help
-         This provides a partition parsing function which derives
-         the partition map from the children of the flash node,
-         as described in Documentation/devicetree/bindings/mtd/partition.txt.
-
-config MTD_AR7_PARTS
-       tristate "TI AR7 partitioning support"
-       help
-         TI AR7 partitioning support
-
-config MTD_BCM63XX_PARTS
-       tristate "BCM63XX CFE partitioning support"
-       depends on BCM63XX || BMIPS_GENERIC || COMPILE_TEST
-       select CRC32
-       select MTD_PARSER_IMAGETAG
-       help
-         This provides partition parsing for BCM63xx devices with CFE
-         bootloaders.
-
-config MTD_BCM47XX_PARTS
-       tristate "BCM47XX partitioning support"
-       depends on BCM47XX || ARCH_BCM_5301X
-       help
-         This provides partitions parser for devices based on BCM47xx
-         boards.
-
 menu "Partition parsers"
 source "drivers/mtd/parsers/Kconfig"
 endmenu
index 62d649a..56cc60c 100644 (file)
@@ -7,11 +7,6 @@
 obj-$(CONFIG_MTD)              += mtd.o
 mtd-y                          := mtdcore.o mtdsuper.o mtdconcat.o mtdpart.o mtdchar.o
 
-obj-$(CONFIG_MTD_OF_PARTS)     += ofpart.o
-obj-$(CONFIG_MTD_CMDLINE_PARTS) += cmdlinepart.o
-obj-$(CONFIG_MTD_AR7_PARTS)    += ar7part.o
-obj-$(CONFIG_MTD_BCM63XX_PARTS)        += bcm63xxpart.o
-obj-$(CONFIG_MTD_BCM47XX_PARTS)        += bcm47xxpart.o
 obj-y                          += parsers/
 
 # 'Users' - code which presents functionality to userspace.
index f4da7bd..cf8c8be 100644 (file)
@@ -61,7 +61,9 @@
 
 static int cfi_amdstd_read (struct mtd_info *, loff_t, size_t, size_t *, u_char *);
 static int cfi_amdstd_write_words(struct mtd_info *, loff_t, size_t, size_t *, const u_char *);
+#if !FORCE_WORD_WRITE
 static int cfi_amdstd_write_buffers(struct mtd_info *, loff_t, size_t, size_t *, const u_char *);
+#endif
 static int cfi_amdstd_erase_chip(struct mtd_info *, struct erase_info *);
 static int cfi_amdstd_erase_varsize(struct mtd_info *, struct erase_info *);
 static void cfi_amdstd_sync (struct mtd_info *);
@@ -256,6 +258,7 @@ static void fixup_amd_bootblock(struct mtd_info *mtd)
 }
 #endif
 
+#if !FORCE_WORD_WRITE
 static void fixup_use_write_buffers(struct mtd_info *mtd)
 {
        struct map_info *map = mtd->priv;
@@ -265,6 +268,7 @@ static void fixup_use_write_buffers(struct mtd_info *mtd)
                mtd->_write = cfi_amdstd_write_buffers;
        }
 }
+#endif /* !FORCE_WORD_WRITE */
 
 /* Atmel chips don't use the same PRI format as AMD chips */
 static void fixup_convert_atmel_pri(struct mtd_info *mtd)
@@ -1637,11 +1641,11 @@ static int cfi_amdstd_lock_user_prot_reg(struct mtd_info *mtd, loff_t from,
                                   do_otp_lock, 1);
 }
 
-static int __xipram do_write_oneword(struct map_info *map, struct flchip *chip,
-                                    unsigned long adr, map_word datum,
-                                    int mode)
+static int __xipram do_write_oneword_once(struct map_info *map,
+                                         struct flchip *chip,
+                                         unsigned long adr, map_word datum,
+                                         int mode, struct cfi_private *cfi)
 {
-       struct cfi_private *cfi = map->fldrv_priv;
        unsigned long timeo = jiffies + HZ;
        /*
         * We use a 1ms + 1 jiffies generic timeout for writes (most devices
@@ -1654,42 +1658,7 @@ static int __xipram do_write_oneword(struct map_info *map, struct flchip *chip,
         */
        unsigned long uWriteTimeout = (HZ / 1000) + 1;
        int ret = 0;
-       map_word oldd;
-       int retry_cnt = 0;
-
-       adr += chip->start;
-
-       mutex_lock(&chip->mutex);
-       ret = get_chip(map, chip, adr, mode);
-       if (ret) {
-               mutex_unlock(&chip->mutex);
-               return ret;
-       }
-
-       pr_debug("MTD %s(): WRITE 0x%.8lx(0x%.8lx)\n",
-                __func__, adr, datum.x[0]);
-
-       if (mode == FL_OTP_WRITE)
-               otp_enter(map, chip, adr, map_bankwidth(map));
-
-       /*
-        * Check for a NOP for the case when the datum to write is already
-        * present - it saves time and works around buggy chips that corrupt
-        * data at other locations when 0xff is written to a location that
-        * already contains 0xff.
-        */
-       oldd = map_read(map, adr);
-       if (map_word_equal(map, oldd, datum)) {
-               pr_debug("MTD %s(): NOP\n",
-                      __func__);
-               goto op_done;
-       }
 
-       XIP_INVAL_CACHED_RANGE(map, adr, map_bankwidth(map));
-       ENABLE_VPP(map);
-       xip_disable(map, chip, adr);
-
- retry:
        cfi_send_gen_cmd(0xAA, cfi->addr_unlock1, chip->start, map, cfi, cfi->device_type, NULL);
        cfi_send_gen_cmd(0x55, cfi->addr_unlock2, chip->start, map, cfi, cfi->device_type, NULL);
        cfi_send_gen_cmd(0xA0, cfi->addr_unlock1, chip->start, map, cfi, cfi->device_type, NULL);
@@ -1717,40 +1686,125 @@ static int __xipram do_write_oneword(struct map_info *map, struct flchip *chip,
                        continue;
                }
 
+               /*
+                * We check "time_after" and "!chip_good" before checking
+                * "chip_good" to avoid the failure due to scheduling.
+                */
                if (time_after(jiffies, timeo) &&
-                   !chip_ready(map, chip, adr)) {
+                   !chip_good(map, chip, adr, datum)) {
                        xip_enable(map, chip, adr);
                        printk(KERN_WARNING "MTD %s(): software timeout\n", __func__);
                        xip_disable(map, chip, adr);
+                       ret = -EIO;
                        break;
                }
 
-               if (chip_ready(map, chip, adr))
+               if (chip_good(map, chip, adr, datum))
                        break;
 
                /* Latency issues. Drop the lock, wait a while and retry */
                UDELAY(map, chip, adr, 1);
        }
-       /* Did we succeed? */
-       if (!chip_good(map, chip, adr, datum)) {
-               /* reset on all failures. */
-               cfi_check_err_status(map, chip, adr);
-               map_write(map, CMD(0xF0), chip->start);
-               /* FIXME - should have reset delay before continuing */
 
-               if (++retry_cnt <= MAX_RETRIES)
-                       goto retry;
+       return ret;
+}
 
-               ret = -EIO;
+static int __xipram do_write_oneword_start(struct map_info *map,
+                                          struct flchip *chip,
+                                          unsigned long adr, int mode)
+{
+       int ret = 0;
+
+       mutex_lock(&chip->mutex);
+
+       ret = get_chip(map, chip, adr, mode);
+       if (ret) {
+               mutex_unlock(&chip->mutex);
+               return ret;
        }
-       xip_enable(map, chip, adr);
- op_done:
+
+       if (mode == FL_OTP_WRITE)
+               otp_enter(map, chip, adr, map_bankwidth(map));
+
+       return ret;
+}
+
+static void __xipram do_write_oneword_done(struct map_info *map,
+                                          struct flchip *chip,
+                                          unsigned long adr, int mode)
+{
        if (mode == FL_OTP_WRITE)
                otp_exit(map, chip, adr, map_bankwidth(map));
+
        chip->state = FL_READY;
        DISABLE_VPP(map);
        put_chip(map, chip, adr);
+
        mutex_unlock(&chip->mutex);
+}
+
+static int __xipram do_write_oneword_retry(struct map_info *map,
+                                          struct flchip *chip,
+                                          unsigned long adr, map_word datum,
+                                          int mode)
+{
+       struct cfi_private *cfi = map->fldrv_priv;
+       int ret = 0;
+       map_word oldd;
+       int retry_cnt = 0;
+
+       /*
+        * Check for a NOP for the case when the datum to write is already
+        * present - it saves time and works around buggy chips that corrupt
+        * data at other locations when 0xff is written to a location that
+        * already contains 0xff.
+        */
+       oldd = map_read(map, adr);
+       if (map_word_equal(map, oldd, datum)) {
+               pr_debug("MTD %s(): NOP\n", __func__);
+               return ret;
+       }
+
+       XIP_INVAL_CACHED_RANGE(map, adr, map_bankwidth(map));
+       ENABLE_VPP(map);
+       xip_disable(map, chip, adr);
+
+ retry:
+       ret = do_write_oneword_once(map, chip, adr, datum, mode, cfi);
+       if (ret) {
+               /* reset on all failures. */
+               cfi_check_err_status(map, chip, adr);
+               map_write(map, CMD(0xF0), chip->start);
+               /* FIXME - should have reset delay before continuing */
+
+               if (++retry_cnt <= MAX_RETRIES) {
+                       ret = 0;
+                       goto retry;
+               }
+       }
+       xip_enable(map, chip, adr);
+
+       return ret;
+}
+
+static int __xipram do_write_oneword(struct map_info *map, struct flchip *chip,
+                                    unsigned long adr, map_word datum,
+                                    int mode)
+{
+       int ret = 0;
+
+       adr += chip->start;
+
+       pr_debug("MTD %s(): WRITE 0x%.8lx(0x%.8lx)\n", __func__, adr,
+                datum.x[0]);
+
+       ret = do_write_oneword_start(map, chip, adr, mode);
+       if (ret)
+               return ret;
+
+       ret = do_write_oneword_retry(map, chip, adr, datum, mode);
+
+       do_write_oneword_done(map, chip, adr, mode);
 
        return ret;
 }
@@ -1879,6 +1933,78 @@ static int cfi_amdstd_write_words(struct mtd_info *mtd, loff_t to, size_t len,
        return 0;
 }
 
+#if !FORCE_WORD_WRITE
+static int __xipram do_write_buffer_wait(struct map_info *map,
+                                        struct flchip *chip, unsigned long adr,
+                                        map_word datum)
+{
+       unsigned long timeo;
+       unsigned long u_write_timeout;
+       int ret = 0;
+
+       /*
+        * Timeout is calculated according to CFI data, if available.
+        * See more comments in cfi_cmdset_0002().
+        */
+       u_write_timeout = usecs_to_jiffies(chip->buffer_write_time_max);
+       timeo = jiffies + u_write_timeout;
+
+       for (;;) {
+               if (chip->state != FL_WRITING) {
+                       /* Someone's suspended the write. Sleep */
+                       DECLARE_WAITQUEUE(wait, current);
+
+                       set_current_state(TASK_UNINTERRUPTIBLE);
+                       add_wait_queue(&chip->wq, &wait);
+                       mutex_unlock(&chip->mutex);
+                       schedule();
+                       remove_wait_queue(&chip->wq, &wait);
+                       timeo = jiffies + (HZ / 2); /* FIXME */
+                       mutex_lock(&chip->mutex);
+                       continue;
+               }
+
+               /*
+                * We check "time_after" and "!chip_good" before checking
+                * "chip_good" to avoid the failure due to scheduling.
+                */
+               if (time_after(jiffies, timeo) &&
+                   !chip_good(map, chip, adr, datum)) {
+                       ret = -EIO;
+                       break;
+               }
+
+               if (chip_good(map, chip, adr, datum))
+                       break;
+
+               /* Latency issues. Drop the lock, wait a while and retry */
+               UDELAY(map, chip, adr, 1);
+       }
+
+       return ret;
+}
+
+static void __xipram do_write_buffer_reset(struct map_info *map,
+                                          struct flchip *chip,
+                                          struct cfi_private *cfi)
+{
+       /*
+        * Recovery from write-buffer programming failures requires
+        * the write-to-buffer-reset sequence.  Since the last part
+        * of the sequence also works as a normal reset, we can run
+        * the same commands regardless of why we are here.
+        * See e.g.
+        * http://www.spansion.com/Support/Application%20Notes/MirrorBit_Write_Buffer_Prog_Page_Buffer_Read_AN.pdf
+        */
+       cfi_send_gen_cmd(0xAA, cfi->addr_unlock1, chip->start, map, cfi,
+                        cfi->device_type, NULL);
+       cfi_send_gen_cmd(0x55, cfi->addr_unlock2, chip->start, map, cfi,
+                        cfi->device_type, NULL);
+       cfi_send_gen_cmd(0xF0, cfi->addr_unlock1, chip->start, map, cfi,
+                        cfi->device_type, NULL);
+
+       /* FIXME - should have reset delay before continuing */
+}
 
 /*
  * FIXME: interleaved mode not tested, and probably not supported!
@@ -1888,13 +2014,6 @@ static int __xipram do_write_buffer(struct map_info *map, struct flchip *chip,
                                    int len)
 {
        struct cfi_private *cfi = map->fldrv_priv;
-       unsigned long timeo = jiffies + HZ;
-       /*
-        * Timeout is calculated according to CFI data, if available.
-        * See more comments in cfi_cmdset_0002().
-        */
-       unsigned long uWriteTimeout =
-                               usecs_to_jiffies(chip->buffer_write_time_max);
        int ret = -EIO;
        unsigned long cmd_adr;
        int z, words;
@@ -1951,63 +2070,16 @@ static int __xipram do_write_buffer(struct map_info *map, struct flchip *chip,
                                adr, map_bankwidth(map),
                                chip->word_write_time);
 
-       timeo = jiffies + uWriteTimeout;
-
-       for (;;) {
-               if (chip->state != FL_WRITING) {
-                       /* Someone's suspended the write. Sleep */
-                       DECLARE_WAITQUEUE(wait, current);
-
-                       set_current_state(TASK_UNINTERRUPTIBLE);
-                       add_wait_queue(&chip->wq, &wait);
-                       mutex_unlock(&chip->mutex);
-                       schedule();
-                       remove_wait_queue(&chip->wq, &wait);
-                       timeo = jiffies + (HZ / 2); /* FIXME */
-                       mutex_lock(&chip->mutex);
-                       continue;
-               }
-
-               /*
-                * We check "time_after" and "!chip_good" before checking "chip_good" to avoid
-                * the failure due to scheduling.
-                */
-               if (time_after(jiffies, timeo) &&
-                   !chip_good(map, chip, adr, datum))
-                       break;
-
-               if (chip_good(map, chip, adr, datum)) {
-                       xip_enable(map, chip, adr);
-                       goto op_done;
-               }
-
-               /* Latency issues. Drop the lock, wait a while and retry */
-               UDELAY(map, chip, adr, 1);
+       ret = do_write_buffer_wait(map, chip, adr, datum);
+       if (ret) {
+               cfi_check_err_status(map, chip, adr);
+               do_write_buffer_reset(map, chip, cfi);
+               pr_err("MTD %s(): software timeout, address:0x%.8lx.\n",
+                      __func__, adr);
        }
 
-       /*
-        * Recovery from write-buffer programming failures requires
-        * the write-to-buffer-reset sequence.  Since the last part
-        * of the sequence also works as a normal reset, we can run
-        * the same commands regardless of why we are here.
-        * See e.g.
-        * http://www.spansion.com/Support/Application%20Notes/MirrorBit_Write_Buffer_Prog_Page_Buffer_Read_AN.pdf
-        */
-       cfi_check_err_status(map, chip, adr);
-       cfi_send_gen_cmd(0xAA, cfi->addr_unlock1, chip->start, map, cfi,
-                        cfi->device_type, NULL);
-       cfi_send_gen_cmd(0x55, cfi->addr_unlock2, chip->start, map, cfi,
-                        cfi->device_type, NULL);
-       cfi_send_gen_cmd(0xF0, cfi->addr_unlock1, chip->start, map, cfi,
-                        cfi->device_type, NULL);
        xip_enable(map, chip, adr);
-       /* FIXME - should have reset delay before continuing */
 
-       printk(KERN_WARNING "MTD %s(): software timeout, address:0x%.8lx.\n",
-              __func__, adr);
-
-       ret = -EIO;
- op_done:
        chip->state = FL_READY;
        DISABLE_VPP(map);
        put_chip(map, chip, adr);
@@ -2091,6 +2163,7 @@ static int cfi_amdstd_write_buffers(struct mtd_info *mtd, loff_t to, size_t len,
 
        return 0;
 }
+#endif /* !FORCE_WORD_WRITE */
 
 /*
  * Wait for the flash chip to become ready to write data
@@ -2344,7 +2417,7 @@ static int __xipram do_erase_chip(struct map_info *map, struct flchip *chip)
        adr = cfi->addr_unlock1;
 
        mutex_lock(&chip->mutex);
-       ret = get_chip(map, chip, adr, FL_WRITING);
+       ret = get_chip(map, chip, adr, FL_ERASING);
        if (ret) {
                mutex_unlock(&chip->mutex);
                return ret;
index 839ed40..e5bd3c2 100644 (file)
@@ -20,7 +20,7 @@ static int genprobe_new_chip(struct map_info *map, struct chip_probe *cp,
 
 struct mtd_info *mtd_do_chip_probe(struct map_info *map, struct chip_probe *cp)
 {
-       struct mtd_info *mtd = NULL;
+       struct mtd_info *mtd;
        struct cfi_private *cfi;
 
        /* First probe the map to see if we have CFI stuff there. */
index 49abbc5..f96287c 100644 (file)
@@ -79,24 +79,6 @@ config MTD_DATAFLASH_OTP
          other key product data.  The second half is programmed with a
          unique-to-each-chip bit pattern at the factory.
 
-config MTD_M25P80
-       tristate "Support most SPI Flash chips (AT26DF, M25P, W25X, ...)"
-       depends on SPI_MASTER && MTD_SPI_NOR
-       select SPI_MEM
-       help
-         This enables access to most modern SPI flash chips, used for
-         program and data storage.   Series supported include Atmel AT26DF,
-         Spansion S25SL, SST 25VF, ST M25P, and Winbond W25X.  Other chips
-         are supported as well.  See the driver source for the current list,
-         or to add other chips.
-
-         Note that the original DataFlash chips (AT45 series, not AT26DF),
-         need an entirely different driver.
-
-         Set up your spi devices with the right board-specific platform data,
-         if you want to specify device partitioning or to use a device which
-         doesn't support the JEDEC ID instruction.
-
 config MTD_MCHP23K256
        tristate "Microchip 23K256 SRAM"
        depends on SPI_MASTER
index 94895ea..991c8d1 100644 (file)
@@ -12,7 +12,6 @@ obj-$(CONFIG_MTD_MTDRAM)      += mtdram.o
 obj-$(CONFIG_MTD_LART)         += lart.o
 obj-$(CONFIG_MTD_BLOCK2MTD)    += block2mtd.o
 obj-$(CONFIG_MTD_DATAFLASH)    += mtd_dataflash.o
-obj-$(CONFIG_MTD_M25P80)       += m25p80.o
 obj-$(CONFIG_MTD_MCHP23K256)   += mchp23k256.o
 obj-$(CONFIG_MTD_SPEAR_SMI)    += spear_smi.o
 obj-$(CONFIG_MTD_SST25L)       += sst25l.o
diff --git a/drivers/mtd/devices/m25p80.c b/drivers/mtd/devices/m25p80.c
deleted file mode 100644 (file)
index c508886..0000000
+++ /dev/null
@@ -1,347 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * MTD SPI driver for ST M25Pxx (and similar) serial flash chips
- *
- * Author: Mike Lavender, mike@steroidmicros.com
- *
- * Copyright (c) 2005, Intec Automation Inc.
- *
- * Some parts are based on lart.c by Abraham Van Der Merwe
- *
- * Cleaned up and generalized based on mtd_dataflash.c
- */
-
-#include <linux/err.h>
-#include <linux/errno.h>
-#include <linux/module.h>
-#include <linux/device.h>
-
-#include <linux/mtd/mtd.h>
-#include <linux/mtd/partitions.h>
-
-#include <linux/spi/spi.h>
-#include <linux/spi/spi-mem.h>
-#include <linux/spi/flash.h>
-#include <linux/mtd/spi-nor.h>
-
-struct m25p {
-       struct spi_mem          *spimem;
-       struct spi_nor          spi_nor;
-};
-
-static int m25p80_read_reg(struct spi_nor *nor, u8 code, u8 *val, int len)
-{
-       struct m25p *flash = nor->priv;
-       struct spi_mem_op op = SPI_MEM_OP(SPI_MEM_OP_CMD(code, 1),
-                                         SPI_MEM_OP_NO_ADDR,
-                                         SPI_MEM_OP_NO_DUMMY,
-                                         SPI_MEM_OP_DATA_IN(len, NULL, 1));
-       void *scratchbuf;
-       int ret;
-
-       scratchbuf = kmalloc(len, GFP_KERNEL);
-       if (!scratchbuf)
-               return -ENOMEM;
-
-       op.data.buf.in = scratchbuf;
-       ret = spi_mem_exec_op(flash->spimem, &op);
-       if (ret < 0)
-               dev_err(&flash->spimem->spi->dev, "error %d reading %x\n", ret,
-                       code);
-       else
-               memcpy(val, scratchbuf, len);
-
-       kfree(scratchbuf);
-
-       return ret;
-}
-
-static int m25p80_write_reg(struct spi_nor *nor, u8 opcode, u8 *buf, int len)
-{
-       struct m25p *flash = nor->priv;
-       struct spi_mem_op op = SPI_MEM_OP(SPI_MEM_OP_CMD(opcode, 1),
-                                         SPI_MEM_OP_NO_ADDR,
-                                         SPI_MEM_OP_NO_DUMMY,
-                                         SPI_MEM_OP_DATA_OUT(len, NULL, 1));
-       void *scratchbuf;
-       int ret;
-
-       scratchbuf = kmemdup(buf, len, GFP_KERNEL);
-       if (!scratchbuf)
-               return -ENOMEM;
-
-       op.data.buf.out = scratchbuf;
-       ret = spi_mem_exec_op(flash->spimem, &op);
-       kfree(scratchbuf);
-
-       return ret;
-}
-
-static ssize_t m25p80_write(struct spi_nor *nor, loff_t to, size_t len,
-                           const u_char *buf)
-{
-       struct m25p *flash = nor->priv;
-       struct spi_mem_op op =
-                       SPI_MEM_OP(SPI_MEM_OP_CMD(nor->program_opcode, 1),
-                                  SPI_MEM_OP_ADDR(nor->addr_width, to, 1),
-                                  SPI_MEM_OP_NO_DUMMY,
-                                  SPI_MEM_OP_DATA_OUT(len, buf, 1));
-       int ret;
-
-       /* get transfer protocols. */
-       op.cmd.buswidth = spi_nor_get_protocol_inst_nbits(nor->write_proto);
-       op.addr.buswidth = spi_nor_get_protocol_addr_nbits(nor->write_proto);
-       op.data.buswidth = spi_nor_get_protocol_data_nbits(nor->write_proto);
-
-       if (nor->program_opcode == SPINOR_OP_AAI_WP && nor->sst_write_second)
-               op.addr.nbytes = 0;
-
-       ret = spi_mem_adjust_op_size(flash->spimem, &op);
-       if (ret)
-               return ret;
-       op.data.nbytes = len < op.data.nbytes ? len : op.data.nbytes;
-
-       ret = spi_mem_exec_op(flash->spimem, &op);
-       if (ret)
-               return ret;
-
-       return op.data.nbytes;
-}
-
-/*
- * Read an address range from the nor chip.  The address range
- * may be any size provided it is within the physical boundaries.
- */
-static ssize_t m25p80_read(struct spi_nor *nor, loff_t from, size_t len,
-                          u_char *buf)
-{
-       struct m25p *flash = nor->priv;
-       struct spi_mem_op op =
-                       SPI_MEM_OP(SPI_MEM_OP_CMD(nor->read_opcode, 1),
-                                  SPI_MEM_OP_ADDR(nor->addr_width, from, 1),
-                                  SPI_MEM_OP_DUMMY(nor->read_dummy, 1),
-                                  SPI_MEM_OP_DATA_IN(len, buf, 1));
-       size_t remaining = len;
-       int ret;
-
-       /* get transfer protocols. */
-       op.cmd.buswidth = spi_nor_get_protocol_inst_nbits(nor->read_proto);
-       op.addr.buswidth = spi_nor_get_protocol_addr_nbits(nor->read_proto);
-       op.dummy.buswidth = op.addr.buswidth;
-       op.data.buswidth = spi_nor_get_protocol_data_nbits(nor->read_proto);
-
-       /* convert the dummy cycles to the number of bytes */
-       op.dummy.nbytes = (nor->read_dummy * op.dummy.buswidth) / 8;
-
-       while (remaining) {
-               op.data.nbytes = remaining < UINT_MAX ? remaining : UINT_MAX;
-               ret = spi_mem_adjust_op_size(flash->spimem, &op);
-               if (ret)
-                       return ret;
-
-               ret = spi_mem_exec_op(flash->spimem, &op);
-               if (ret)
-                       return ret;
-
-               op.addr.val += op.data.nbytes;
-               remaining -= op.data.nbytes;
-               op.data.buf.in += op.data.nbytes;
-       }
-
-       return len;
-}
-
-/*
- * board specific setup should have ensured the SPI clock used here
- * matches what the READ command supports, at least until this driver
- * understands FAST_READ (for clocks over 25 MHz).
- */
-static int m25p_probe(struct spi_mem *spimem)
-{
-       struct spi_device *spi = spimem->spi;
-       struct flash_platform_data      *data;
-       struct m25p *flash;
-       struct spi_nor *nor;
-       struct spi_nor_hwcaps hwcaps = {
-               .mask = SNOR_HWCAPS_READ |
-                       SNOR_HWCAPS_READ_FAST |
-                       SNOR_HWCAPS_PP,
-       };
-       char *flash_name;
-       int ret;
-
-       data = dev_get_platdata(&spimem->spi->dev);
-
-       flash = devm_kzalloc(&spimem->spi->dev, sizeof(*flash), GFP_KERNEL);
-       if (!flash)
-               return -ENOMEM;
-
-       nor = &flash->spi_nor;
-
-       /* install the hooks */
-       nor->read = m25p80_read;
-       nor->write = m25p80_write;
-       nor->write_reg = m25p80_write_reg;
-       nor->read_reg = m25p80_read_reg;
-
-       nor->dev = &spimem->spi->dev;
-       spi_nor_set_flash_node(nor, spi->dev.of_node);
-       nor->priv = flash;
-
-       spi_mem_set_drvdata(spimem, flash);
-       flash->spimem = spimem;
-
-       if (spi->mode & SPI_RX_OCTAL) {
-               hwcaps.mask |= SNOR_HWCAPS_READ_1_1_8;
-
-               if (spi->mode & SPI_TX_OCTAL)
-                       hwcaps.mask |= (SNOR_HWCAPS_READ_1_8_8 |
-                                       SNOR_HWCAPS_PP_1_1_8 |
-                                       SNOR_HWCAPS_PP_1_8_8);
-       } else if (spi->mode & SPI_RX_QUAD) {
-               hwcaps.mask |= SNOR_HWCAPS_READ_1_1_4;
-
-               if (spi->mode & SPI_TX_QUAD)
-                       hwcaps.mask |= (SNOR_HWCAPS_READ_1_4_4 |
-                                       SNOR_HWCAPS_PP_1_1_4 |
-                                       SNOR_HWCAPS_PP_1_4_4);
-       } else if (spi->mode & SPI_RX_DUAL) {
-               hwcaps.mask |= SNOR_HWCAPS_READ_1_1_2;
-
-               if (spi->mode & SPI_TX_DUAL)
-                       hwcaps.mask |= SNOR_HWCAPS_READ_1_2_2;
-       }
-
-       if (data && data->name)
-               nor->mtd.name = data->name;
-
-       if (!nor->mtd.name)
-               nor->mtd.name = spi_mem_get_name(spimem);
-
-       /* For some (historical?) reason many platforms provide two different
-        * names in flash_platform_data: "name" and "type". Quite often name is
-        * set to "m25p80" and then "type" provides a real chip name.
-        * If that's the case, respect "type" and ignore a "name".
-        */
-       if (data && data->type)
-               flash_name = data->type;
-       else if (!strcmp(spi->modalias, "spi-nor"))
-               flash_name = NULL; /* auto-detect */
-       else
-               flash_name = spi->modalias;
-
-       ret = spi_nor_scan(nor, flash_name, &hwcaps);
-       if (ret)
-               return ret;
-
-       return mtd_device_register(&nor->mtd, data ? data->parts : NULL,
-                                  data ? data->nr_parts : 0);
-}
-
-
-static int m25p_remove(struct spi_mem *spimem)
-{
-       struct m25p     *flash = spi_mem_get_drvdata(spimem);
-
-       spi_nor_restore(&flash->spi_nor);
-
-       /* Clean up MTD stuff. */
-       return mtd_device_unregister(&flash->spi_nor.mtd);
-}
-
-static void m25p_shutdown(struct spi_mem *spimem)
-{
-       struct m25p *flash = spi_mem_get_drvdata(spimem);
-
-       spi_nor_restore(&flash->spi_nor);
-}
-/*
- * Do NOT add to this array without reading the following:
- *
- * Historically, many flash devices are bound to this driver by their name. But
- * since most of these flash are compatible to some extent, and their
- * differences can often be differentiated by the JEDEC read-ID command, we
- * encourage new users to add support to the spi-nor library, and simply bind
- * against a generic string here (e.g., "jedec,spi-nor").
- *
- * Many flash names are kept here in this list (as well as in spi-nor.c) to
- * keep them available as module aliases for existing platforms.
- */
-static const struct spi_device_id m25p_ids[] = {
-       /*
-        * Allow non-DT platform devices to bind to the "spi-nor" modalias, and
-        * hack around the fact that the SPI core does not provide uevent
-        * matching for .of_match_table
-        */
-       {"spi-nor"},
-
-       /*
-        * Entries not used in DTs that should be safe to drop after replacing
-        * them with "spi-nor" in platform data.
-        */
-       {"s25sl064a"},  {"w25x16"},     {"m25p10"},     {"m25px64"},
-
-       /*
-        * Entries that were used in DTs without "jedec,spi-nor" fallback and
-        * should be kept for backward compatibility.
-        */
-       {"at25df321a"}, {"at25df641"},  {"at26df081a"},
-       {"mx25l4005a"}, {"mx25l1606e"}, {"mx25l6405d"}, {"mx25l12805d"},
-       {"mx25l25635e"},{"mx66l51235l"},
-       {"n25q064"},    {"n25q128a11"}, {"n25q128a13"}, {"n25q512a"},
-       {"s25fl256s1"}, {"s25fl512s"},  {"s25sl12801"}, {"s25fl008k"},
-       {"s25fl064k"},
-       {"sst25vf040b"},{"sst25vf016b"},{"sst25vf032b"},{"sst25wf040"},
-       {"m25p40"},     {"m25p80"},     {"m25p16"},     {"m25p32"},
-       {"m25p64"},     {"m25p128"},
-       {"w25x80"},     {"w25x32"},     {"w25q32"},     {"w25q32dw"},
-       {"w25q80bl"},   {"w25q128"},    {"w25q256"},
-
-       /* Flashes that can't be detected using JEDEC */
-       {"m25p05-nonjedec"},    {"m25p10-nonjedec"},    {"m25p20-nonjedec"},
-       {"m25p40-nonjedec"},    {"m25p80-nonjedec"},    {"m25p16-nonjedec"},
-       {"m25p32-nonjedec"},    {"m25p64-nonjedec"},    {"m25p128-nonjedec"},
-
-       /* Everspin MRAMs (non-JEDEC) */
-       { "mr25h128" }, /* 128 Kib, 40 MHz */
-       { "mr25h256" }, /* 256 Kib, 40 MHz */
-       { "mr25h10" },  /*   1 Mib, 40 MHz */
-       { "mr25h40" },  /*   4 Mib, 40 MHz */
-
-       { },
-};
-MODULE_DEVICE_TABLE(spi, m25p_ids);
-
-static const struct of_device_id m25p_of_table[] = {
-       /*
-        * Generic compatibility for SPI NOR that can be identified by the
-        * JEDEC READ ID opcode (0x9F). Use this, if possible.
-        */
-       { .compatible = "jedec,spi-nor" },
-       {}
-};
-MODULE_DEVICE_TABLE(of, m25p_of_table);
-
-static struct spi_mem_driver m25p80_driver = {
-       .spidrv = {
-               .driver = {
-                       .name   = "m25p80",
-                       .of_match_table = m25p_of_table,
-               },
-               .id_table       = m25p_ids,
-       },
-       .probe  = m25p_probe,
-       .remove = m25p_remove,
-       .shutdown       = m25p_shutdown,
-
-       /* REVISIT: many of these chips have deep power-down modes, which
-        * should clearly be entered on suspend() to minimize power use.
-        * And also when they're otherwise idle...
-        */
-};
-
-module_spi_mem_driver(m25p80_driver);
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Mike Lavender");
-MODULE_DESCRIPTION("MTD SPI driver for ST M25Pxx flash chips");
index c467286..931e5c2 100644 (file)
@@ -294,7 +294,7 @@ static int phram_param_call(const char *val, const struct kernel_param *kp)
 #endif
 }
 
-module_param_call(phram, phram_param_call, NULL, NULL, 000);
+module_param_call(phram, phram_param_call, NULL, NULL, 0200);
 MODULE_PARM_DESC(phram, "Memory region to map. \"phram=<name>,<start>,<length>\"");
 
 
index 3b89ab2..6597fc2 100644 (file)
@@ -135,7 +135,7 @@ static int pmc551_point(struct mtd_info *mtd, loff_t from, size_t len,
 static int pmc551_erase(struct mtd_info *mtd, struct erase_info *instr)
 {
        struct mypriv *priv = mtd->priv;
-       u32 soff_hi, soff_lo;   /* start address offset hi/lo */
+       u32 soff_hi;            /* start address offset hi */
        u32 eoff_hi, eoff_lo;   /* end address offset hi/lo */
        unsigned long end;
        u_char *ptr;
@@ -150,7 +150,6 @@ static int pmc551_erase(struct mtd_info *mtd, struct erase_info *instr)
        eoff_hi = end & ~(priv->asize - 1);
        soff_hi = instr->addr & ~(priv->asize - 1);
        eoff_lo = end & (priv->asize - 1);
-       soff_lo = instr->addr & (priv->asize - 1);
 
        pmc551_point(mtd, instr->addr, instr->len, &retlen,
                     (void **)&ptr, NULL);
@@ -225,7 +224,7 @@ static int pmc551_read(struct mtd_info *mtd, loff_t from, size_t len,
                        size_t * retlen, u_char * buf)
 {
        struct mypriv *priv = mtd->priv;
-       u32 soff_hi, soff_lo;   /* start address offset hi/lo */
+       u32 soff_hi;            /* start address offset hi */
        u32 eoff_hi, eoff_lo;   /* end address offset hi/lo */
        unsigned long end;
        u_char *ptr;
@@ -239,7 +238,6 @@ static int pmc551_read(struct mtd_info *mtd, loff_t from, size_t len,
        end = from + len - 1;
        soff_hi = from & ~(priv->asize - 1);
        eoff_hi = end & ~(priv->asize - 1);
-       soff_lo = from & (priv->asize - 1);
        eoff_lo = end & (priv->asize - 1);
 
        pmc551_point(mtd, from, len, retlen, (void **)&ptr, NULL);
@@ -282,7 +280,7 @@ static int pmc551_write(struct mtd_info *mtd, loff_t to, size_t len,
                        size_t * retlen, const u_char * buf)
 {
        struct mypriv *priv = mtd->priv;
-       u32 soff_hi, soff_lo;   /* start address offset hi/lo */
+       u32 soff_hi;            /* start address offset hi */
        u32 eoff_hi, eoff_lo;   /* end address offset hi/lo */
        unsigned long end;
        u_char *ptr;
@@ -296,7 +294,6 @@ static int pmc551_write(struct mtd_info *mtd, loff_t to, size_t len,
        end = to + len - 1;
        soff_hi = to & ~(priv->asize - 1);
        eoff_hi = end & ~(priv->asize - 1);
-       soff_lo = to & (priv->asize - 1);
        eoff_lo = end & (priv->asize - 1);
 
        pmc551_point(mtd, to, len, retlen, (void **)&ptr, NULL);
index 788d499..946ba80 100644 (file)
@@ -211,13 +211,12 @@ static int pismo_remove(struct i2c_client *client)
 static int pismo_probe(struct i2c_client *client,
                       const struct i2c_device_id *id)
 {
-       struct i2c_adapter *adapter = to_i2c_adapter(client->dev.parent);
        struct pismo_pdata *pdata = client->dev.platform_data;
        struct pismo_eeprom eeprom;
        struct pismo_data *pismo;
        int ret, i;
 
-       if (!i2c_check_functionality(adapter, I2C_FUNC_I2C)) {
+       if (!i2c_check_functionality(client->adapter, I2C_FUNC_I2C)) {
                dev_err(&client->dev, "functionality mismatch\n");
                return -EIO;
        }
index cebb346..7d96758 100644 (file)
@@ -68,8 +68,7 @@ static int pxa2xx_flash_probe(struct platform_device *pdev)
                       info->map.name);
                return -ENOMEM;
        }
-       info->map.cached =
-               ioremap_cached(info->map.phys, info->map.size);
+       info->map.cached = ioremap_cache(info->map.phys, info->map.size);
        if (!info->map.cached)
                printk(KERN_WARNING "Failed to ioremap cached %s\n",
                       info->map.name);
index 408615f..6cc7ecb 100644 (file)
@@ -335,6 +335,82 @@ static const struct device_type mtd_devtype = {
        .release        = mtd_release,
 };
 
+static int mtd_partid_show(struct seq_file *s, void *p)
+{
+       struct mtd_info *mtd = s->private;
+
+       seq_printf(s, "%s\n", mtd->dbg.partid);
+
+       return 0;
+}
+
+static int mtd_partid_debugfs_open(struct inode *inode, struct file *file)
+{
+       return single_open(file, mtd_partid_show, inode->i_private);
+}
+
+static const struct file_operations mtd_partid_debug_fops = {
+       .open           = mtd_partid_debugfs_open,
+       .read           = seq_read,
+       .llseek         = seq_lseek,
+       .release        = single_release,
+};
+
+static int mtd_partname_show(struct seq_file *s, void *p)
+{
+       struct mtd_info *mtd = s->private;
+
+       seq_printf(s, "%s\n", mtd->dbg.partname);
+
+       return 0;
+}
+
+static int mtd_partname_debugfs_open(struct inode *inode, struct file *file)
+{
+       return single_open(file, mtd_partname_show, inode->i_private);
+}
+
+static const struct file_operations mtd_partname_debug_fops = {
+       .open           = mtd_partname_debugfs_open,
+       .read           = seq_read,
+       .llseek         = seq_lseek,
+       .release        = single_release,
+};
+
+static struct dentry *dfs_dir_mtd;
+
+static void mtd_debugfs_populate(struct mtd_info *mtd)
+{
+       struct device *dev = &mtd->dev;
+       struct dentry *root, *dent;
+
+       if (IS_ERR_OR_NULL(dfs_dir_mtd))
+               return;
+
+       root = debugfs_create_dir(dev_name(dev), dfs_dir_mtd);
+       if (IS_ERR_OR_NULL(root)) {
+               dev_dbg(dev, "won't show data in debugfs\n");
+               return;
+       }
+
+       mtd->dbg.dfs_dir = root;
+
+       if (mtd->dbg.partid) {
+               dent = debugfs_create_file("partid", 0400, root, mtd,
+                                          &mtd_partid_debug_fops);
+               if (IS_ERR_OR_NULL(dent))
+                       dev_err(dev, "can't create debugfs entry for partid\n");
+       }
+
+       if (mtd->dbg.partname) {
+               dent = debugfs_create_file("partname", 0400, root, mtd,
+                                          &mtd_partname_debug_fops);
+               if (IS_ERR_OR_NULL(dent))
+                       dev_err(dev,
+                               "can't create debugfs entry for partname\n");
+       }
+}
+
 #ifndef CONFIG_MMU
 unsigned mtd_mmap_capabilities(struct mtd_info *mtd)
 {
@@ -512,8 +588,6 @@ static int mtd_nvmem_add(struct mtd_info *mtd)
        return 0;
 }
 
-static struct dentry *dfs_dir_mtd;
-
 /**
  *     add_mtd_device - register an MTD device
  *     @mtd: pointer to new MTD device info structure
@@ -607,13 +681,7 @@ int add_mtd_device(struct mtd_info *mtd)
        if (error)
                goto fail_nvmem_add;
 
-       if (!IS_ERR_OR_NULL(dfs_dir_mtd)) {
-               mtd->dbg.dfs_dir = debugfs_create_dir(dev_name(&mtd->dev), dfs_dir_mtd);
-               if (IS_ERR_OR_NULL(mtd->dbg.dfs_dir)) {
-                       pr_debug("mtd device %s won't show data in debugfs\n",
-                                dev_name(&mtd->dev));
-               }
-       }
+       mtd_debugfs_populate(mtd);
 
        device_create(&mtd_class, mtd->dev.parent, MTD_DEVT(i) + 1, NULL,
                      "mtd%dro", i);
index e082d63..77bd32a 100644 (file)
@@ -3880,6 +3880,9 @@ int onenand_scan(struct mtd_info *mtd, int maxchips)
                if (!this->oob_buf) {
                        if (this->options & ONENAND_PAGEBUF_ALLOC) {
                                this->options &= ~ONENAND_PAGEBUF_ALLOC;
+#ifdef CONFIG_MTD_ONENAND_VERIFY_WRITE
+                               kfree(this->verify_buf);
+#endif
                                kfree(this->page_buf);
                        }
                        return -ENOMEM;
index 5a711d8..e59de3f 100644 (file)
@@ -351,14 +351,6 @@ config MTD_NAND_SOCRATES
        help
          Enables support for NAND Flash chips wired onto Socrates board.
 
-config MTD_NAND_NUC900
-       tristate "Nuvoton NUC9xx/w90p910 NAND controller"
-       depends on ARCH_W90X900 || COMPILE_TEST
-       depends on HAS_IOMEM
-       help
-         This enables the driver for the NAND Flash on evaluation board based
-         on w90p910 / NUC9xx.
-
 source "drivers/mtd/nand/raw/ingenic/Kconfig"
 
 config MTD_NAND_FSMC
@@ -407,6 +399,12 @@ config MTD_NAND_MTK
          Enables support for NAND controller on MTK SoCs.
          This controller is found on mt27xx, mt81xx, mt65xx SoCs.
 
+config MTD_NAND_MXIC
+       tristate "Macronix raw NAND controller"
+       depends on HAS_IOMEM || COMPILE_TEST
+       help
+         This selects the Macronix raw NAND controller driver.
+
 config MTD_NAND_TEGRA
        tristate "NVIDIA Tegra NAND controller"
        depends on ARCH_TEGRA || COMPILE_TEST
index efaf5cd..a987219 100644 (file)
@@ -41,7 +41,6 @@ obj-$(CONFIG_MTD_NAND_SH_FLCTL)               += sh_flctl.o
 obj-$(CONFIG_MTD_NAND_MXC)             += mxc_nand.o
 obj-$(CONFIG_MTD_NAND_SOCRATES)                += socrates_nand.o
 obj-$(CONFIG_MTD_NAND_TXX9NDFMC)       += txx9ndfmc.o
-obj-$(CONFIG_MTD_NAND_NUC900)          += nuc900_nand.o
 obj-$(CONFIG_MTD_NAND_MPC5121_NFC)     += mpc5121_nfc.o
 obj-$(CONFIG_MTD_NAND_VF610_NFC)       += vf610_nfc.o
 obj-$(CONFIG_MTD_NAND_RICOH)           += r852.o
@@ -54,6 +53,7 @@ obj-$(CONFIG_MTD_NAND_HISI504)                += hisi504_nand.o
 obj-$(CONFIG_MTD_NAND_BRCMNAND)                += brcmnand/
 obj-$(CONFIG_MTD_NAND_QCOM)            += qcom_nandc.o
 obj-$(CONFIG_MTD_NAND_MTK)             += mtk_ecc.o mtk_nand.o
+obj-$(CONFIG_MTD_NAND_MXIC)            += mxic_nand.o
 obj-$(CONFIG_MTD_NAND_TEGRA)           += tegra_nand.o
 obj-$(CONFIG_MTD_NAND_STM32_FMC2)      += stm32_fmc2_nand.o
 obj-$(CONFIG_MTD_NAND_MESON)           += meson_nand.o
index 33310b8..15ef30b 100644 (file)
@@ -1792,6 +1792,7 @@ static int brcmstb_nand_verify_erased_page(struct mtd_info *mtd,
        int bitflips = 0;
        int page = addr >> chip->page_shift;
        int ret;
+       void *ecc_chunk;
 
        if (!buf)
                buf = nand_get_data_buf(chip);
@@ -1804,7 +1805,9 @@ static int brcmstb_nand_verify_erased_page(struct mtd_info *mtd,
                return ret;
 
        for (i = 0; i < chip->ecc.steps; i++, oob += sas) {
-               ret = nand_check_erased_ecc_chunk(buf, chip->ecc.size,
+               ecc_chunk = buf + chip->ecc.size * i;
+               ret = nand_check_erased_ecc_chunk(ecc_chunk,
+                                                 chip->ecc.size,
                                                  oob, sas, NULL, 0,
                                                  chip->ecc.strength);
                if (ret < 0)
index 66b7cff..e30feb5 100644 (file)
@@ -1,11 +1,4 @@
 # SPDX-License-Identifier: GPL-2.0-only
-config MTD_NAND_JZ4740
-       tristate "JZ4740 NAND controller"
-       depends on MACH_JZ4740 || COMPILE_TEST
-       depends on HAS_IOMEM
-       help
-         Enables support for NAND Flash on JZ4740 SoC based boards.
-
 config MTD_NAND_JZ4780
        tristate "JZ4780 NAND controller"
        depends on JZ4780_NEMC
index b63d368..4c53f5e 100644 (file)
@@ -1,5 +1,4 @@
 # SPDX-License-Identifier: GPL-2.0-only
-obj-$(CONFIG_MTD_NAND_JZ4740) += jz4740_nand.o
 obj-$(CONFIG_MTD_NAND_JZ4780) += ingenic_nand.o
 
 ingenic_nand-y += ingenic_nand_drv.o
index d7b7c0f..49afebe 100644 (file)
@@ -310,7 +310,6 @@ static int ingenic_nand_init_chip(struct platform_device *pdev,
        struct device *dev = &pdev->dev;
        struct ingenic_nand *nand;
        struct ingenic_nand_cs *cs;
-       struct resource *res;
        struct nand_chip *chip;
        struct mtd_info *mtd;
        const __be32 *reg;
@@ -326,8 +325,7 @@ static int ingenic_nand_init_chip(struct platform_device *pdev,
 
        jz4780_nemc_set_type(nfc->dev, cs->bank, JZ4780_NEMC_BANK_NAND);
 
-       res = platform_get_resource(pdev, IORESOURCE_MEM, chipnr);
-       cs->base = devm_ioremap_resource(dev, res);
+       cs->base = devm_platform_ioremap_resource(pdev, chipnr);
        if (IS_ERR(cs->base))
                return PTR_ERR(cs->base);
 
@@ -418,6 +416,7 @@ static int ingenic_nand_init_chips(struct ingenic_nfc *nfc,
                ret = ingenic_nand_init_chip(pdev, nfc, np, i);
                if (ret) {
                        ingenic_nand_cleanup_chips(nfc);
+                       of_node_put(np);
                        return ret;
                }
 
diff --git a/drivers/mtd/nand/raw/ingenic/jz4740_nand.c b/drivers/mtd/nand/raw/ingenic/jz4740_nand.c
deleted file mode 100644 (file)
index acdf674..0000000
+++ /dev/null
@@ -1,536 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- *  Copyright (C) 2009-2010, Lars-Peter Clausen <lars@metafoo.de>
- *  JZ4740 SoC NAND controller driver
- */
-
-#include <linux/io.h>
-#include <linux/ioport.h>
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/platform_device.h>
-#include <linux/slab.h>
-
-#include <linux/mtd/mtd.h>
-#include <linux/mtd/rawnand.h>
-#include <linux/mtd/partitions.h>
-
-#include <linux/gpio/consumer.h>
-
-#include <linux/platform_data/jz4740/jz4740_nand.h>
-
-#define JZ_REG_NAND_CTRL       0x50
-#define JZ_REG_NAND_ECC_CTRL   0x100
-#define JZ_REG_NAND_DATA       0x104
-#define JZ_REG_NAND_PAR0       0x108
-#define JZ_REG_NAND_PAR1       0x10C
-#define JZ_REG_NAND_PAR2       0x110
-#define JZ_REG_NAND_IRQ_STAT   0x114
-#define JZ_REG_NAND_IRQ_CTRL   0x118
-#define JZ_REG_NAND_ERR(x)     (0x11C + ((x) << 2))
-
-#define JZ_NAND_ECC_CTRL_PAR_READY     BIT(4)
-#define JZ_NAND_ECC_CTRL_ENCODING      BIT(3)
-#define JZ_NAND_ECC_CTRL_RS            BIT(2)
-#define JZ_NAND_ECC_CTRL_RESET         BIT(1)
-#define JZ_NAND_ECC_CTRL_ENABLE                BIT(0)
-
-#define JZ_NAND_STATUS_ERR_COUNT       (BIT(31) | BIT(30) | BIT(29))
-#define JZ_NAND_STATUS_PAD_FINISH      BIT(4)
-#define JZ_NAND_STATUS_DEC_FINISH      BIT(3)
-#define JZ_NAND_STATUS_ENC_FINISH      BIT(2)
-#define JZ_NAND_STATUS_UNCOR_ERROR     BIT(1)
-#define JZ_NAND_STATUS_ERROR           BIT(0)
-
-#define JZ_NAND_CTRL_ENABLE_CHIP(x) BIT((x) << 1)
-#define JZ_NAND_CTRL_ASSERT_CHIP(x) BIT(((x) << 1) + 1)
-#define JZ_NAND_CTRL_ASSERT_CHIP_MASK 0xaa
-
-#define JZ_NAND_MEM_CMD_OFFSET 0x08000
-#define JZ_NAND_MEM_ADDR_OFFSET 0x10000
-
-struct jz_nand {
-       struct nand_chip chip;
-       void __iomem *base;
-       struct resource *mem;
-
-       unsigned char banks[JZ_NAND_NUM_BANKS];
-       void __iomem *bank_base[JZ_NAND_NUM_BANKS];
-       struct resource *bank_mem[JZ_NAND_NUM_BANKS];
-
-       int selected_bank;
-
-       struct gpio_desc *busy_gpio;
-       bool is_reading;
-};
-
-static inline struct jz_nand *mtd_to_jz_nand(struct mtd_info *mtd)
-{
-       return container_of(mtd_to_nand(mtd), struct jz_nand, chip);
-}
-
-static void jz_nand_select_chip(struct nand_chip *chip, int chipnr)
-{
-       struct jz_nand *nand = mtd_to_jz_nand(nand_to_mtd(chip));
-       uint32_t ctrl;
-       int banknr;
-
-       ctrl = readl(nand->base + JZ_REG_NAND_CTRL);
-       ctrl &= ~JZ_NAND_CTRL_ASSERT_CHIP_MASK;
-
-       if (chipnr == -1) {
-               banknr = -1;
-       } else {
-               banknr = nand->banks[chipnr] - 1;
-               chip->legacy.IO_ADDR_R = nand->bank_base[banknr];
-               chip->legacy.IO_ADDR_W = nand->bank_base[banknr];
-       }
-       writel(ctrl, nand->base + JZ_REG_NAND_CTRL);
-
-       nand->selected_bank = banknr;
-}
-
-static void jz_nand_cmd_ctrl(struct nand_chip *chip, int dat,
-                            unsigned int ctrl)
-{
-       struct jz_nand *nand = mtd_to_jz_nand(nand_to_mtd(chip));
-       uint32_t reg;
-       void __iomem *bank_base = nand->bank_base[nand->selected_bank];
-
-       BUG_ON(nand->selected_bank < 0);
-
-       if (ctrl & NAND_CTRL_CHANGE) {
-               BUG_ON((ctrl & NAND_ALE) && (ctrl & NAND_CLE));
-               if (ctrl & NAND_ALE)
-                       bank_base += JZ_NAND_MEM_ADDR_OFFSET;
-               else if (ctrl & NAND_CLE)
-                       bank_base += JZ_NAND_MEM_CMD_OFFSET;
-               chip->legacy.IO_ADDR_W = bank_base;
-
-               reg = readl(nand->base + JZ_REG_NAND_CTRL);
-               if (ctrl & NAND_NCE)
-                       reg |= JZ_NAND_CTRL_ASSERT_CHIP(nand->selected_bank);
-               else
-                       reg &= ~JZ_NAND_CTRL_ASSERT_CHIP(nand->selected_bank);
-               writel(reg, nand->base + JZ_REG_NAND_CTRL);
-       }
-       if (dat != NAND_CMD_NONE)
-               writeb(dat, chip->legacy.IO_ADDR_W);
-}
-
-static int jz_nand_dev_ready(struct nand_chip *chip)
-{
-       struct jz_nand *nand = mtd_to_jz_nand(nand_to_mtd(chip));
-       return gpiod_get_value_cansleep(nand->busy_gpio);
-}
-
-static void jz_nand_hwctl(struct nand_chip *chip, int mode)
-{
-       struct jz_nand *nand = mtd_to_jz_nand(nand_to_mtd(chip));
-       uint32_t reg;
-
-       writel(0, nand->base + JZ_REG_NAND_IRQ_STAT);
-       reg = readl(nand->base + JZ_REG_NAND_ECC_CTRL);
-
-       reg |= JZ_NAND_ECC_CTRL_RESET;
-       reg |= JZ_NAND_ECC_CTRL_ENABLE;
-       reg |= JZ_NAND_ECC_CTRL_RS;
-
-       switch (mode) {
-       case NAND_ECC_READ:
-               reg &= ~JZ_NAND_ECC_CTRL_ENCODING;
-               nand->is_reading = true;
-               break;
-       case NAND_ECC_WRITE:
-               reg |= JZ_NAND_ECC_CTRL_ENCODING;
-               nand->is_reading = false;
-               break;
-       default:
-               break;
-       }
-
-       writel(reg, nand->base + JZ_REG_NAND_ECC_CTRL);
-}
-
-static int jz_nand_calculate_ecc_rs(struct nand_chip *chip, const uint8_t *dat,
-                                   uint8_t *ecc_code)
-{
-       struct jz_nand *nand = mtd_to_jz_nand(nand_to_mtd(chip));
-       uint32_t reg, status;
-       int i;
-       unsigned int timeout = 1000;
-       static uint8_t empty_block_ecc[] = {0xcd, 0x9d, 0x90, 0x58, 0xf4,
-                                               0x8b, 0xff, 0xb7, 0x6f};
-
-       if (nand->is_reading)
-               return 0;
-
-       do {
-               status = readl(nand->base + JZ_REG_NAND_IRQ_STAT);
-       } while (!(status & JZ_NAND_STATUS_ENC_FINISH) && --timeout);
-
-       if (timeout == 0)
-           return -1;
-
-       reg = readl(nand->base + JZ_REG_NAND_ECC_CTRL);
-       reg &= ~JZ_NAND_ECC_CTRL_ENABLE;
-       writel(reg, nand->base + JZ_REG_NAND_ECC_CTRL);
-
-       for (i = 0; i < 9; ++i)
-               ecc_code[i] = readb(nand->base + JZ_REG_NAND_PAR0 + i);
-
-       /* If the written data is completly 0xff, we also want to write 0xff as
-        * ecc, otherwise we will get in trouble when doing subpage writes. */
-       if (memcmp(ecc_code, empty_block_ecc, 9) == 0)
-               memset(ecc_code, 0xff, 9);
-
-       return 0;
-}
-
-static void jz_nand_correct_data(uint8_t *dat, int index, int mask)
-{
-       int offset = index & 0x7;
-       uint16_t data;
-
-       index += (index >> 3);
-
-       data = dat[index];
-       data |= dat[index+1] << 8;
-
-       mask ^= (data >> offset) & 0x1ff;
-       data &= ~(0x1ff << offset);
-       data |= (mask << offset);
-
-       dat[index] = data & 0xff;
-       dat[index+1] = (data >> 8) & 0xff;
-}
-
-static int jz_nand_correct_ecc_rs(struct nand_chip *chip, uint8_t *dat,
-                                 uint8_t *read_ecc, uint8_t *calc_ecc)
-{
-       struct jz_nand *nand = mtd_to_jz_nand(nand_to_mtd(chip));
-       int i, error_count, index;
-       uint32_t reg, status, error;
-       unsigned int timeout = 1000;
-
-       for (i = 0; i < 9; ++i)
-               writeb(read_ecc[i], nand->base + JZ_REG_NAND_PAR0 + i);
-
-       reg = readl(nand->base + JZ_REG_NAND_ECC_CTRL);
-       reg |= JZ_NAND_ECC_CTRL_PAR_READY;
-       writel(reg, nand->base + JZ_REG_NAND_ECC_CTRL);
-
-       do {
-               status = readl(nand->base + JZ_REG_NAND_IRQ_STAT);
-       } while (!(status & JZ_NAND_STATUS_DEC_FINISH) && --timeout);
-
-       if (timeout == 0)
-               return -ETIMEDOUT;
-
-       reg = readl(nand->base + JZ_REG_NAND_ECC_CTRL);
-       reg &= ~JZ_NAND_ECC_CTRL_ENABLE;
-       writel(reg, nand->base + JZ_REG_NAND_ECC_CTRL);
-
-       if (status & JZ_NAND_STATUS_ERROR) {
-               if (status & JZ_NAND_STATUS_UNCOR_ERROR)
-                       return -EBADMSG;
-
-               error_count = (status & JZ_NAND_STATUS_ERR_COUNT) >> 29;
-
-               for (i = 0; i < error_count; ++i) {
-                       error = readl(nand->base + JZ_REG_NAND_ERR(i));
-                       index = ((error >> 16) & 0x1ff) - 1;
-                       if (index >= 0 && index < 512)
-                               jz_nand_correct_data(dat, index, error & 0x1ff);
-               }
-
-               return error_count;
-       }
-
-       return 0;
-}
-
-static int jz_nand_ioremap_resource(struct platform_device *pdev,
-       const char *name, struct resource **res, void __iomem **base)
-{
-       int ret;
-
-       *res = platform_get_resource_byname(pdev, IORESOURCE_MEM, name);
-       if (!*res) {
-               dev_err(&pdev->dev, "Failed to get platform %s memory\n", name);
-               ret = -ENXIO;
-               goto err;
-       }
-
-       *res = request_mem_region((*res)->start, resource_size(*res),
-                               pdev->name);
-       if (!*res) {
-               dev_err(&pdev->dev, "Failed to request %s memory region\n", name);
-               ret = -EBUSY;
-               goto err;
-       }
-
-       *base = ioremap((*res)->start, resource_size(*res));
-       if (!*base) {
-               dev_err(&pdev->dev, "Failed to ioremap %s memory region\n", name);
-               ret = -EBUSY;
-               goto err_release_mem;
-       }
-
-       return 0;
-
-err_release_mem:
-       release_mem_region((*res)->start, resource_size(*res));
-err:
-       *res = NULL;
-       *base = NULL;
-       return ret;
-}
-
-static inline void jz_nand_iounmap_resource(struct resource *res,
-                                           void __iomem *base)
-{
-       iounmap(base);
-       release_mem_region(res->start, resource_size(res));
-}
-
-static int jz_nand_detect_bank(struct platform_device *pdev,
-                              struct jz_nand *nand, unsigned char bank,
-                              size_t chipnr, uint8_t *nand_maf_id,
-                              uint8_t *nand_dev_id)
-{
-       int ret;
-       char res_name[6];
-       uint32_t ctrl;
-       struct nand_chip *chip = &nand->chip;
-       struct mtd_info *mtd = nand_to_mtd(chip);
-       struct nand_memory_organization *memorg;
-       u8 id[2];
-
-       memorg = nanddev_get_memorg(&chip->base);
-
-       /* Request I/O resource. */
-       sprintf(res_name, "bank%d", bank);
-       ret = jz_nand_ioremap_resource(pdev, res_name,
-                                       &nand->bank_mem[bank - 1],
-                                       &nand->bank_base[bank - 1]);
-       if (ret)
-               return ret;
-
-       /* Enable chip in bank. */
-       ctrl = readl(nand->base + JZ_REG_NAND_CTRL);
-       ctrl |= JZ_NAND_CTRL_ENABLE_CHIP(bank - 1);
-       writel(ctrl, nand->base + JZ_REG_NAND_CTRL);
-
-       if (chipnr == 0) {
-               /* Detect first chip. */
-               ret = nand_scan(chip, 1);
-               if (ret)
-                       goto notfound_id;
-
-               /* Retrieve the IDs from the first chip. */
-               nand_select_target(chip, 0);
-               nand_reset_op(chip);
-               nand_readid_op(chip, 0, id, sizeof(id));
-               *nand_maf_id = id[0];
-               *nand_dev_id = id[1];
-       } else {
-               /* Detect additional chip. */
-               nand_select_target(chip, chipnr);
-               nand_reset_op(chip);
-               nand_readid_op(chip, 0, id, sizeof(id));
-               if (*nand_maf_id != id[0] || *nand_dev_id != id[1]) {
-                       ret = -ENODEV;
-                       goto notfound_id;
-               }
-
-               /* Update size of the MTD. */
-               memorg->ntargets++;
-               mtd->size += nanddev_target_size(&chip->base);
-       }
-
-       dev_info(&pdev->dev, "Found chip %zu on bank %i\n", chipnr, bank);
-       return 0;
-
-notfound_id:
-       dev_info(&pdev->dev, "No chip found on bank %i\n", bank);
-       ctrl &= ~(JZ_NAND_CTRL_ENABLE_CHIP(bank - 1));
-       writel(ctrl, nand->base + JZ_REG_NAND_CTRL);
-       jz_nand_iounmap_resource(nand->bank_mem[bank - 1],
-                                nand->bank_base[bank - 1]);
-       return ret;
-}
-
-static int jz_nand_attach_chip(struct nand_chip *chip)
-{
-       struct mtd_info *mtd = nand_to_mtd(chip);
-       struct device *dev = mtd->dev.parent;
-       struct jz_nand_platform_data *pdata = dev_get_platdata(dev);
-       struct platform_device *pdev = to_platform_device(dev);
-
-       if (pdata && pdata->ident_callback)
-               pdata->ident_callback(pdev, mtd, &pdata->partitions,
-                                     &pdata->num_partitions);
-
-       return 0;
-}
-
-static const struct nand_controller_ops jz_nand_controller_ops = {
-       .attach_chip = jz_nand_attach_chip,
-};
-
-static int jz_nand_probe(struct platform_device *pdev)
-{
-       int ret;
-       struct jz_nand *nand;
-       struct nand_chip *chip;
-       struct mtd_info *mtd;
-       struct jz_nand_platform_data *pdata = dev_get_platdata(&pdev->dev);
-       size_t chipnr, bank_idx;
-       uint8_t nand_maf_id = 0, nand_dev_id = 0;
-
-       nand = kzalloc(sizeof(*nand), GFP_KERNEL);
-       if (!nand)
-               return -ENOMEM;
-
-       ret = jz_nand_ioremap_resource(pdev, "mmio", &nand->mem, &nand->base);
-       if (ret)
-               goto err_free;
-
-       nand->busy_gpio = devm_gpiod_get_optional(&pdev->dev, "busy", GPIOD_IN);
-       if (IS_ERR(nand->busy_gpio)) {
-               ret = PTR_ERR(nand->busy_gpio);
-               dev_err(&pdev->dev, "Failed to request busy gpio %d\n",
-                   ret);
-               goto err_iounmap_mmio;
-       }
-
-       chip            = &nand->chip;
-       mtd             = nand_to_mtd(chip);
-       mtd->dev.parent = &pdev->dev;
-       mtd->name       = "jz4740-nand";
-
-       chip->ecc.hwctl         = jz_nand_hwctl;
-       chip->ecc.calculate     = jz_nand_calculate_ecc_rs;
-       chip->ecc.correct       = jz_nand_correct_ecc_rs;
-       chip->ecc.mode          = NAND_ECC_HW_OOB_FIRST;
-       chip->ecc.size          = 512;
-       chip->ecc.bytes         = 9;
-       chip->ecc.strength      = 4;
-       chip->ecc.options       = NAND_ECC_GENERIC_ERASED_CHECK;
-
-       chip->legacy.chip_delay = 50;
-       chip->legacy.cmd_ctrl = jz_nand_cmd_ctrl;
-       chip->legacy.select_chip = jz_nand_select_chip;
-       chip->legacy.dummy_controller.ops = &jz_nand_controller_ops;
-
-       if (nand->busy_gpio)
-               chip->legacy.dev_ready = jz_nand_dev_ready;
-
-       platform_set_drvdata(pdev, nand);
-
-       /* We are going to autodetect NAND chips in the banks specified in the
-        * platform data. Although nand_scan_ident() can detect multiple chips,
-        * it requires those chips to be numbered consecuitively, which is not
-        * always the case for external memory banks. And a fixed chip-to-bank
-        * mapping is not practical either, since for example Dingoo units
-        * produced at different times have NAND chips in different banks.
-        */
-       chipnr = 0;
-       for (bank_idx = 0; bank_idx < JZ_NAND_NUM_BANKS; bank_idx++) {
-               unsigned char bank;
-
-               /* If there is no platform data, look for NAND in bank 1,
-                * which is the most likely bank since it is the only one
-                * that can be booted from.
-                */
-               bank = pdata ? pdata->banks[bank_idx] : bank_idx ^ 1;
-               if (bank == 0)
-                       break;
-               if (bank > JZ_NAND_NUM_BANKS) {
-                       dev_warn(&pdev->dev,
-                               "Skipping non-existing bank: %d\n", bank);
-                       continue;
-               }
-               /* The detection routine will directly or indirectly call
-                * jz_nand_select_chip(), so nand->banks has to contain the
-                * bank we're checking.
-                */
-               nand->banks[chipnr] = bank;
-               if (jz_nand_detect_bank(pdev, nand, bank, chipnr,
-                                       &nand_maf_id, &nand_dev_id) == 0)
-                       chipnr++;
-               else
-                       nand->banks[chipnr] = 0;
-       }
-       if (chipnr == 0) {
-               dev_err(&pdev->dev, "No NAND chips found\n");
-               goto err_iounmap_mmio;
-       }
-
-       ret = mtd_device_register(mtd, pdata ? pdata->partitions : NULL,
-                                 pdata ? pdata->num_partitions : 0);
-
-       if (ret) {
-               dev_err(&pdev->dev, "Failed to add mtd device\n");
-               goto err_cleanup_nand;
-       }
-
-       dev_info(&pdev->dev, "Successfully registered JZ4740 NAND driver\n");
-
-       return 0;
-
-err_cleanup_nand:
-       nand_cleanup(chip);
-       while (chipnr--) {
-               unsigned char bank = nand->banks[chipnr];
-               jz_nand_iounmap_resource(nand->bank_mem[bank - 1],
-                                        nand->bank_base[bank - 1]);
-       }
-       writel(0, nand->base + JZ_REG_NAND_CTRL);
-err_iounmap_mmio:
-       jz_nand_iounmap_resource(nand->mem, nand->base);
-err_free:
-       kfree(nand);
-       return ret;
-}
-
-static int jz_nand_remove(struct platform_device *pdev)
-{
-       struct jz_nand *nand = platform_get_drvdata(pdev);
-       size_t i;
-
-       nand_release(&nand->chip);
-
-       /* Deassert and disable all chips */
-       writel(0, nand->base + JZ_REG_NAND_CTRL);
-
-       for (i = 0; i < JZ_NAND_NUM_BANKS; ++i) {
-               unsigned char bank = nand->banks[i];
-               if (bank != 0) {
-                       jz_nand_iounmap_resource(nand->bank_mem[bank - 1],
-                                                nand->bank_base[bank - 1]);
-               }
-       }
-
-       jz_nand_iounmap_resource(nand->mem, nand->base);
-
-       kfree(nand);
-
-       return 0;
-}
-
-static struct platform_driver jz_nand_driver = {
-       .probe = jz_nand_probe,
-       .remove = jz_nand_remove,
-       .driver = {
-               .name = "jz4740-nand",
-       },
-};
-
-module_platform_driver(jz_nand_driver);
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Lars-Peter Clausen <lars@metafoo.de>");
-MODULE_DESCRIPTION("NAND controller driver for JZ4740 SoC");
-MODULE_ALIAS("platform:jz4740-nand");
index ea57ddc..1b82b68 100644 (file)
@@ -1320,6 +1320,7 @@ static int meson_nfc_nand_chips_init(struct device *dev,
                ret = meson_nfc_nand_chip_init(dev, nfc, nand_np);
                if (ret) {
                        meson_nfc_nand_chip_cleanup(nfc);
+                       of_node_put(nand_np);
                        return ret;
                }
        }
diff --git a/drivers/mtd/nand/raw/mxic_nand.c b/drivers/mtd/nand/raw/mxic_nand.c
new file mode 100644 (file)
index 0000000..9d49e6c
--- /dev/null
@@ -0,0 +1,582 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2019 Macronix International Co., Ltd.
+ *
+ * Author:
+ *     Mason Yang <masonccyang@mxic.com.tw>
+ */
+
+#include <linux/clk.h>
+#include <linux/io.h>
+#include <linux/iopoll.h>
+#include <linux/interrupt.h>
+#include <linux/module.h>
+#include <linux/mtd/mtd.h>
+#include <linux/mtd/rawnand.h>
+#include <linux/mtd/nand_ecc.h>
+#include <linux/platform_device.h>
+
+#include "internals.h"
+
+#define HC_CFG                 0x0
+#define HC_CFG_IF_CFG(x)       ((x) << 27)
+#define HC_CFG_DUAL_SLAVE      BIT(31)
+#define HC_CFG_INDIVIDUAL      BIT(30)
+#define HC_CFG_NIO(x)          (((x) / 4) << 27)
+#define HC_CFG_TYPE(s, t)      ((t) << (23 + ((s) * 2)))
+#define HC_CFG_TYPE_SPI_NOR    0
+#define HC_CFG_TYPE_SPI_NAND   1
+#define HC_CFG_TYPE_SPI_RAM    2
+#define HC_CFG_TYPE_RAW_NAND   3
+#define HC_CFG_SLV_ACT(x)      ((x) << 21)
+#define HC_CFG_CLK_PH_EN       BIT(20)
+#define HC_CFG_CLK_POL_INV     BIT(19)
+#define HC_CFG_BIG_ENDIAN      BIT(18)
+#define HC_CFG_DATA_PASS       BIT(17)
+#define HC_CFG_IDLE_SIO_LVL(x) ((x) << 16)
+#define HC_CFG_MAN_START_EN    BIT(3)
+#define HC_CFG_MAN_START       BIT(2)
+#define HC_CFG_MAN_CS_EN       BIT(1)
+#define HC_CFG_MAN_CS_ASSERT   BIT(0)
+
+#define INT_STS                        0x4
+#define INT_STS_EN             0x8
+#define INT_SIG_EN             0xc
+#define INT_STS_ALL            GENMASK(31, 0)
+#define INT_RDY_PIN            BIT(26)
+#define INT_RDY_SR             BIT(25)
+#define INT_LNR_SUSP           BIT(24)
+#define INT_ECC_ERR            BIT(17)
+#define INT_CRC_ERR            BIT(16)
+#define INT_LWR_DIS            BIT(12)
+#define INT_LRD_DIS            BIT(11)
+#define INT_SDMA_INT           BIT(10)
+#define INT_DMA_FINISH         BIT(9)
+#define INT_RX_NOT_FULL                BIT(3)
+#define INT_RX_NOT_EMPTY       BIT(2)
+#define INT_TX_NOT_FULL                BIT(1)
+#define INT_TX_EMPTY           BIT(0)
+
+#define HC_EN                  0x10
+#define HC_EN_BIT              BIT(0)
+
+#define TXD(x)                 (0x14 + ((x) * 4))
+#define RXD                    0x24
+
+#define SS_CTRL(s)             (0x30 + ((s) * 4))
+#define LRD_CFG                        0x44
+#define LWR_CFG                        0x80
+#define RWW_CFG                        0x70
+#define OP_READ                        BIT(23)
+#define OP_DUMMY_CYC(x)                ((x) << 17)
+#define OP_ADDR_BYTES(x)       ((x) << 14)
+#define OP_CMD_BYTES(x)                (((x) - 1) << 13)
+#define OP_OCTA_CRC_EN         BIT(12)
+#define OP_DQS_EN              BIT(11)
+#define OP_ENHC_EN             BIT(10)
+#define OP_PREAMBLE_EN         BIT(9)
+#define OP_DATA_DDR            BIT(8)
+#define OP_DATA_BUSW(x)                ((x) << 6)
+#define OP_ADDR_DDR            BIT(5)
+#define OP_ADDR_BUSW(x)                ((x) << 3)
+#define OP_CMD_DDR             BIT(2)
+#define OP_CMD_BUSW(x)         (x)
+#define OP_BUSW_1              0
+#define OP_BUSW_2              1
+#define OP_BUSW_4              2
+#define OP_BUSW_8              3
+
+#define OCTA_CRC               0x38
+#define OCTA_CRC_IN_EN(s)      BIT(3 + ((s) * 16))
+#define OCTA_CRC_CHUNK(s, x)   ((fls((x) / 32)) << (1 + ((s) * 16)))
+#define OCTA_CRC_OUT_EN(s)     BIT(0 + ((s) * 16))
+
+#define ONFI_DIN_CNT(s)                (0x3c + (s))
+
+#define LRD_CTRL               0x48
+#define RWW_CTRL               0x74
+#define LWR_CTRL               0x84
+#define LMODE_EN               BIT(31)
+#define LMODE_SLV_ACT(x)       ((x) << 21)
+#define LMODE_CMD1(x)          ((x) << 8)
+#define LMODE_CMD0(x)          (x)
+
+#define LRD_ADDR               0x4c
+#define LWR_ADDR               0x88
+#define LRD_RANGE              0x50
+#define LWR_RANGE              0x8c
+
+#define AXI_SLV_ADDR           0x54
+
+#define DMAC_RD_CFG            0x58
+#define DMAC_WR_CFG            0x94
+#define DMAC_CFG_PERIPH_EN     BIT(31)
+#define DMAC_CFG_ALLFLUSH_EN   BIT(30)
+#define DMAC_CFG_LASTFLUSH_EN  BIT(29)
+#define DMAC_CFG_QE(x)         (((x) + 1) << 16)
+#define DMAC_CFG_BURST_LEN(x)  (((x) + 1) << 12)
+#define DMAC_CFG_BURST_SZ(x)   ((x) << 8)
+#define DMAC_CFG_DIR_READ      BIT(1)
+#define DMAC_CFG_START         BIT(0)
+
+#define DMAC_RD_CNT            0x5c
+#define DMAC_WR_CNT            0x98
+
+#define SDMA_ADDR              0x60
+
+#define DMAM_CFG               0x64
+#define DMAM_CFG_START         BIT(31)
+#define DMAM_CFG_CONT          BIT(30)
+#define DMAM_CFG_SDMA_GAP(x)   (fls((x) / 8192) << 2)
+#define DMAM_CFG_DIR_READ      BIT(1)
+#define DMAM_CFG_EN            BIT(0)
+
+#define DMAM_CNT               0x68
+
+#define LNR_TIMER_TH           0x6c
+
+#define RDM_CFG0               0x78
+#define RDM_CFG0_POLY(x)       (x)
+
+#define RDM_CFG1               0x7c
+#define RDM_CFG1_RDM_EN                BIT(31)
+#define RDM_CFG1_SEED(x)       (x)
+
+#define LWR_SUSP_CTRL          0x90
+#define LWR_SUSP_CTRL_EN       BIT(31)
+
+#define DMAS_CTRL              0x9c
+#define DMAS_CTRL_EN           BIT(31)
+#define DMAS_CTRL_DIR_READ     BIT(30)
+
+#define DATA_STROB             0xa0
+#define DATA_STROB_EDO_EN      BIT(2)
+#define DATA_STROB_INV_POL     BIT(1)
+#define DATA_STROB_DELAY_2CYC  BIT(0)
+
+#define IDLY_CODE(x)           (0xa4 + ((x) * 4))
+#define IDLY_CODE_VAL(x, v)    ((v) << (((x) % 4) * 8))
+
+#define GPIO                   0xc4
+#define GPIO_PT(x)             BIT(3 + ((x) * 16))
+#define GPIO_RESET(x)          BIT(2 + ((x) * 16))
+#define GPIO_HOLDB(x)          BIT(1 + ((x) * 16))
+#define GPIO_WPB(x)            BIT((x) * 16)
+
+#define HC_VER                 0xd0
+
+#define HW_TEST(x)             (0xe0 + ((x) * 4))
+
+#define MXIC_NFC_MAX_CLK_HZ    50000000
+#define IRQ_TIMEOUT            1000
+
+struct mxic_nand_ctlr {
+       struct clk *ps_clk;
+       struct clk *send_clk;
+       struct clk *send_dly_clk;
+       struct completion complete;
+       void __iomem *regs;
+       struct nand_controller controller;
+       struct device *dev;
+       struct nand_chip chip;
+};
+
+static int mxic_nfc_clk_enable(struct mxic_nand_ctlr *nfc)
+{
+       int ret;
+
+       ret = clk_prepare_enable(nfc->ps_clk);
+       if (ret)
+               return ret;
+
+       ret = clk_prepare_enable(nfc->send_clk);
+       if (ret)
+               goto err_ps_clk;
+
+       ret = clk_prepare_enable(nfc->send_dly_clk);
+       if (ret)
+               goto err_send_dly_clk;
+
+       return ret;
+
+err_send_dly_clk:
+       clk_disable_unprepare(nfc->send_clk);
+err_ps_clk:
+       clk_disable_unprepare(nfc->ps_clk);
+
+       return ret;
+}
+
+static void mxic_nfc_clk_disable(struct mxic_nand_ctlr *nfc)
+{
+       clk_disable_unprepare(nfc->send_clk);
+       clk_disable_unprepare(nfc->send_dly_clk);
+       clk_disable_unprepare(nfc->ps_clk);
+}
+
+static void mxic_nfc_set_input_delay(struct mxic_nand_ctlr *nfc, u8 idly_code)
+{
+       writel(IDLY_CODE_VAL(0, idly_code) |
+              IDLY_CODE_VAL(1, idly_code) |
+              IDLY_CODE_VAL(2, idly_code) |
+              IDLY_CODE_VAL(3, idly_code),
+              nfc->regs + IDLY_CODE(0));
+       writel(IDLY_CODE_VAL(4, idly_code) |
+              IDLY_CODE_VAL(5, idly_code) |
+              IDLY_CODE_VAL(6, idly_code) |
+              IDLY_CODE_VAL(7, idly_code),
+              nfc->regs + IDLY_CODE(1));
+}
+
+static int mxic_nfc_clk_setup(struct mxic_nand_ctlr *nfc, unsigned long freq)
+{
+       int ret;
+
+       ret = clk_set_rate(nfc->send_clk, freq);
+       if (ret)
+               return ret;
+
+       ret = clk_set_rate(nfc->send_dly_clk, freq);
+       if (ret)
+               return ret;
+
+       /*
+        * A constant delay range from 0x0 ~ 0x1F for input delay,
+        * the unit is 78 ps, the max input delay is 2.418 ns.
+        */
+       mxic_nfc_set_input_delay(nfc, 0xf);
+
+       /*
+        * Phase degree = 360 * freq * output-delay
+        * where output-delay is a constant value 1 ns in FPGA.
+        *
+        * Get Phase degree = 360 * freq * 1 ns
+        *                  = 360 * freq * 1 sec / 1000000000
+        *                  = 9 * freq / 25000000
+        */
+       ret = clk_set_phase(nfc->send_dly_clk, 9 * freq / 25000000);
+       if (ret)
+               return ret;
+
+       return 0;
+}
+
+static int mxic_nfc_set_freq(struct mxic_nand_ctlr *nfc, unsigned long freq)
+{
+       int ret;
+
+       if (freq > MXIC_NFC_MAX_CLK_HZ)
+               freq = MXIC_NFC_MAX_CLK_HZ;
+
+       mxic_nfc_clk_disable(nfc);
+       ret = mxic_nfc_clk_setup(nfc, freq);
+       if (ret)
+               return ret;
+
+       ret = mxic_nfc_clk_enable(nfc);
+       if (ret)
+               return ret;
+
+       return 0;
+}
+
+static irqreturn_t mxic_nfc_isr(int irq, void *dev_id)
+{
+       struct mxic_nand_ctlr *nfc = dev_id;
+       u32 sts;
+
+       sts = readl(nfc->regs + INT_STS);
+       if (sts & INT_RDY_PIN)
+               complete(&nfc->complete);
+       else
+               return IRQ_NONE;
+
+       return IRQ_HANDLED;
+}
+
+static void mxic_nfc_hw_init(struct mxic_nand_ctlr *nfc)
+{
+       writel(HC_CFG_NIO(8) | HC_CFG_TYPE(1, HC_CFG_TYPE_RAW_NAND) |
+              HC_CFG_SLV_ACT(0) | HC_CFG_MAN_CS_EN |
+              HC_CFG_IDLE_SIO_LVL(1), nfc->regs + HC_CFG);
+       writel(INT_STS_ALL, nfc->regs + INT_STS_EN);
+       writel(INT_RDY_PIN, nfc->regs + INT_SIG_EN);
+       writel(0x0, nfc->regs + ONFI_DIN_CNT(0));
+       writel(0, nfc->regs + LRD_CFG);
+       writel(0, nfc->regs + LRD_CTRL);
+       writel(0x0, nfc->regs + HC_EN);
+}
+
+static void mxic_nfc_cs_enable(struct mxic_nand_ctlr *nfc)
+{
+       writel(readl(nfc->regs + HC_CFG) | HC_CFG_MAN_CS_EN,
+              nfc->regs + HC_CFG);
+       writel(HC_CFG_MAN_CS_ASSERT | readl(nfc->regs + HC_CFG),
+              nfc->regs + HC_CFG);
+}
+
+static void mxic_nfc_cs_disable(struct mxic_nand_ctlr *nfc)
+{
+       writel(~HC_CFG_MAN_CS_ASSERT & readl(nfc->regs + HC_CFG),
+              nfc->regs + HC_CFG);
+}
+
+static int  mxic_nfc_wait_ready(struct nand_chip *chip)
+{
+       struct mxic_nand_ctlr *nfc = nand_get_controller_data(chip);
+       int ret;
+
+       ret = wait_for_completion_timeout(&nfc->complete,
+                                         msecs_to_jiffies(IRQ_TIMEOUT));
+       if (!ret) {
+               dev_err(nfc->dev, "nand device timeout\n");
+               return -ETIMEDOUT;
+       }
+
+       return 0;
+}
+
+static int mxic_nfc_data_xfer(struct mxic_nand_ctlr *nfc, const void *txbuf,
+                             void *rxbuf, unsigned int len)
+{
+       unsigned int pos = 0;
+
+       while (pos < len) {
+               unsigned int nbytes = len - pos;
+               u32 data = 0xffffffff;
+               u32 sts;
+               int ret;
+
+               if (nbytes > 4)
+                       nbytes = 4;
+
+               if (txbuf)
+                       memcpy(&data, txbuf + pos, nbytes);
+
+               ret = readl_poll_timeout(nfc->regs + INT_STS, sts,
+                                        sts & INT_TX_EMPTY, 0, USEC_PER_SEC);
+               if (ret)
+                       return ret;
+
+               writel(data, nfc->regs + TXD(nbytes % 4));
+
+               ret = readl_poll_timeout(nfc->regs + INT_STS, sts,
+                                        sts & INT_TX_EMPTY, 0, USEC_PER_SEC);
+               if (ret)
+                       return ret;
+
+               ret = readl_poll_timeout(nfc->regs + INT_STS, sts,
+                                        sts & INT_RX_NOT_EMPTY, 0,
+                                        USEC_PER_SEC);
+               if (ret)
+                       return ret;
+
+               data = readl(nfc->regs + RXD);
+               if (rxbuf) {
+                       data >>= (8 * (4 - nbytes));
+                       memcpy(rxbuf + pos, &data, nbytes);
+               }
+               if (readl(nfc->regs + INT_STS) & INT_RX_NOT_EMPTY)
+                       dev_warn(nfc->dev, "RX FIFO not empty\n");
+
+               pos += nbytes;
+       }
+
+       return 0;
+}
+
+static int mxic_nfc_exec_op(struct nand_chip *chip,
+                           const struct nand_operation *op, bool check_only)
+{
+       struct mxic_nand_ctlr *nfc = nand_get_controller_data(chip);
+       const struct nand_op_instr *instr = NULL;
+       int ret = 0;
+       unsigned int op_id;
+
+       mxic_nfc_cs_enable(nfc);
+       init_completion(&nfc->complete);
+       for (op_id = 0; op_id < op->ninstrs; op_id++) {
+               instr = &op->instrs[op_id];
+
+               switch (instr->type) {
+               case NAND_OP_CMD_INSTR:
+                       writel(0, nfc->regs + HC_EN);
+                       writel(HC_EN_BIT, nfc->regs + HC_EN);
+                       writel(OP_CMD_BUSW(OP_BUSW_8) |  OP_DUMMY_CYC(0x3F) |
+                              OP_CMD_BYTES(0), nfc->regs + SS_CTRL(0));
+
+                       ret = mxic_nfc_data_xfer(nfc,
+                                                &instr->ctx.cmd.opcode,
+                                                NULL, 1);
+                       break;
+
+               case NAND_OP_ADDR_INSTR:
+                       writel(OP_ADDR_BUSW(OP_BUSW_8) | OP_DUMMY_CYC(0x3F) |
+                              OP_ADDR_BYTES(instr->ctx.addr.naddrs),
+                              nfc->regs + SS_CTRL(0));
+                       ret = mxic_nfc_data_xfer(nfc,
+                                                instr->ctx.addr.addrs, NULL,
+                                                instr->ctx.addr.naddrs);
+                       break;
+
+               case NAND_OP_DATA_IN_INSTR:
+                       writel(0x0, nfc->regs + ONFI_DIN_CNT(0));
+                       writel(OP_DATA_BUSW(OP_BUSW_8) | OP_DUMMY_CYC(0x3F) |
+                              OP_READ, nfc->regs + SS_CTRL(0));
+                       ret = mxic_nfc_data_xfer(nfc, NULL,
+                                                instr->ctx.data.buf.in,
+                                                instr->ctx.data.len);
+                       break;
+
+               case NAND_OP_DATA_OUT_INSTR:
+                       writel(instr->ctx.data.len,
+                              nfc->regs + ONFI_DIN_CNT(0));
+                       writel(OP_DATA_BUSW(OP_BUSW_8) | OP_DUMMY_CYC(0x3F),
+                              nfc->regs + SS_CTRL(0));
+                       ret = mxic_nfc_data_xfer(nfc,
+                                                instr->ctx.data.buf.out, NULL,
+                                                instr->ctx.data.len);
+                       break;
+
+               case NAND_OP_WAITRDY_INSTR:
+                       ret = mxic_nfc_wait_ready(chip);
+                       break;
+               }
+       }
+       mxic_nfc_cs_disable(nfc);
+
+       return ret;
+}
+
+static int mxic_nfc_setup_data_interface(struct nand_chip *chip, int chipnr,
+                                        const struct nand_data_interface *conf)
+{
+       struct mxic_nand_ctlr *nfc = nand_get_controller_data(chip);
+       const struct nand_sdr_timings *sdr;
+       unsigned long freq;
+       int ret;
+
+       sdr = nand_get_sdr_timings(conf);
+       if (IS_ERR(sdr))
+               return PTR_ERR(sdr);
+
+       if (chipnr == NAND_DATA_IFACE_CHECK_ONLY)
+               return 0;
+
+       freq = NSEC_PER_SEC / (sdr->tRC_min / 1000);
+
+       ret =  mxic_nfc_set_freq(nfc, freq);
+       if (ret)
+               dev_err(nfc->dev, "set freq:%ld failed\n", freq);
+
+       if (sdr->tRC_min < 30000)
+               writel(DATA_STROB_EDO_EN, nfc->regs + DATA_STROB);
+
+       return 0;
+}
+
+static const struct nand_controller_ops mxic_nand_controller_ops = {
+       .exec_op = mxic_nfc_exec_op,
+       .setup_data_interface = mxic_nfc_setup_data_interface,
+};
+
+static int mxic_nfc_probe(struct platform_device *pdev)
+{
+       struct device_node *nand_np, *np = pdev->dev.of_node;
+       struct mtd_info *mtd;
+       struct mxic_nand_ctlr *nfc;
+       struct nand_chip *nand_chip;
+       int err;
+       int irq;
+
+       nfc = devm_kzalloc(&pdev->dev, sizeof(struct mxic_nand_ctlr),
+                          GFP_KERNEL);
+       if (!nfc)
+               return -ENOMEM;
+
+       nfc->ps_clk = devm_clk_get(&pdev->dev, "ps");
+       if (IS_ERR(nfc->ps_clk))
+               return PTR_ERR(nfc->ps_clk);
+
+       nfc->send_clk = devm_clk_get(&pdev->dev, "send");
+       if (IS_ERR(nfc->send_clk))
+               return PTR_ERR(nfc->send_clk);
+
+       nfc->send_dly_clk = devm_clk_get(&pdev->dev, "send_dly");
+       if (IS_ERR(nfc->send_dly_clk))
+               return PTR_ERR(nfc->send_dly_clk);
+
+       nfc->regs = devm_platform_ioremap_resource(pdev, 0);
+       if (IS_ERR(nfc->regs))
+               return PTR_ERR(nfc->regs);
+
+       nand_chip = &nfc->chip;
+       mtd = nand_to_mtd(nand_chip);
+       mtd->dev.parent = &pdev->dev;
+
+       for_each_child_of_node(np, nand_np)
+               nand_set_flash_node(nand_chip, nand_np);
+
+       nand_chip->priv = nfc;
+       nfc->dev = &pdev->dev;
+       nfc->controller.ops = &mxic_nand_controller_ops;
+       nand_controller_init(&nfc->controller);
+       nand_chip->controller = &nfc->controller;
+
+       irq = platform_get_irq(pdev, 0);
+       if (irq < 0) {
+               dev_err(&pdev->dev, "failed to retrieve irq\n");
+               return irq;
+       }
+
+       mxic_nfc_hw_init(nfc);
+
+       err = devm_request_irq(&pdev->dev, irq, mxic_nfc_isr,
+                              0, "mxic-nfc", nfc);
+       if (err)
+               goto fail;
+
+       err = nand_scan(nand_chip, 1);
+       if (err)
+               goto fail;
+
+       err = mtd_device_register(mtd, NULL, 0);
+       if (err)
+               goto fail;
+
+       platform_set_drvdata(pdev, nfc);
+       return 0;
+
+fail:
+       mxic_nfc_clk_disable(nfc);
+       return err;
+}
+
+static int mxic_nfc_remove(struct platform_device *pdev)
+{
+       struct mxic_nand_ctlr *nfc = platform_get_drvdata(pdev);
+
+       nand_release(&nfc->chip);
+       mxic_nfc_clk_disable(nfc);
+       return 0;
+}
+
+static const struct of_device_id mxic_nfc_of_ids[] = {
+       { .compatible = "mxic,multi-itfc-v009-nand-controller", },
+       {},
+};
+MODULE_DEVICE_TABLE(of, mxic_nfc_of_ids);
+
+static struct platform_driver mxic_nfc_driver = {
+       .probe = mxic_nfc_probe,
+       .remove = mxic_nfc_remove,
+       .driver = {
+               .name = "mxic-nfc",
+               .of_match_table = mxic_nfc_of_ids,
+       },
+};
+module_platform_driver(mxic_nfc_driver);
+
+MODULE_AUTHOR("Mason Yang <masonccyang@mxic.com.tw>");
+MODULE_DESCRIPTION("Macronix raw NAND controller driver");
+MODULE_LICENSE("GPL v2");
index 91f046d..5c2c30a 100644 (file)
@@ -4112,7 +4112,7 @@ static int nand_write_oob(struct mtd_info *mtd, loff_t to,
                          struct mtd_oob_ops *ops)
 {
        struct nand_chip *chip = mtd_to_nand(mtd);
-       int ret = -ENOTSUPP;
+       int ret;
 
        ops->retlen = 0;
 
index 2ef15ef..96045d6 100644 (file)
@@ -1232,7 +1232,7 @@ static int nand_scan_bbt(struct nand_chip *this, struct nand_bbt_descr *bd)
        if (!td) {
                if ((res = nand_memory_bbt(this, bd))) {
                        pr_err("nand_bbt: can't scan flash and build the RAM-based BBT\n");
-                       goto err;
+                       goto err_free_bbt;
                }
                return 0;
        }
@@ -1245,7 +1245,7 @@ static int nand_scan_bbt(struct nand_chip *this, struct nand_bbt_descr *bd)
        buf = vmalloc(len);
        if (!buf) {
                res = -ENOMEM;
-               goto err;
+               goto err_free_bbt;
        }
 
        /* Is the bbt at a given page? */
@@ -1258,7 +1258,7 @@ static int nand_scan_bbt(struct nand_chip *this, struct nand_bbt_descr *bd)
 
        res = check_create(this, buf, bd);
        if (res)
-               goto err;
+               goto err_free_buf;
 
        /* Prevent the bbt regions from erasing / writing */
        mark_bbt_region(this, td);
@@ -1268,7 +1268,9 @@ static int nand_scan_bbt(struct nand_chip *this, struct nand_bbt_descr *bd)
        vfree(buf);
        return 0;
 
-err:
+err_free_buf:
+       vfree(buf);
+err_free_bbt:
        kfree(this->bbt);
        this->bbt = NULL;
        return res;
diff --git a/drivers/mtd/nand/raw/nuc900_nand.c b/drivers/mtd/nand/raw/nuc900_nand.c
deleted file mode 100644 (file)
index 13bf7b2..0000000
+++ /dev/null
@@ -1,304 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Copyright © 2009 Nuvoton technology corporation.
- *
- * Wan ZongShun <mcuos.com@gmail.com>
- */
-
-#include <linux/slab.h>
-#include <linux/module.h>
-#include <linux/interrupt.h>
-#include <linux/io.h>
-#include <linux/platform_device.h>
-#include <linux/delay.h>
-#include <linux/clk.h>
-#include <linux/err.h>
-
-#include <linux/mtd/mtd.h>
-#include <linux/mtd/rawnand.h>
-#include <linux/mtd/partitions.h>
-
-#define REG_FMICSR     0x00
-#define REG_SMCSR      0xa0
-#define REG_SMISR      0xac
-#define REG_SMCMD      0xb0
-#define REG_SMADDR     0xb4
-#define REG_SMDATA     0xb8
-
-#define RESET_FMI      0x01
-#define NAND_EN                0x08
-#define READYBUSY      (0x01 << 18)
-
-#define SWRST          0x01
-#define PSIZE          (0x01 << 3)
-#define DMARWEN                (0x03 << 1)
-#define BUSWID         (0x01 << 4)
-#define ECC4EN         (0x01 << 5)
-#define WP             (0x01 << 24)
-#define NANDCS         (0x01 << 25)
-#define ENDADDR                (0x01 << 31)
-
-#define read_data_reg(dev)             \
-       __raw_readl((dev)->reg + REG_SMDATA)
-
-#define write_data_reg(dev, val)       \
-       __raw_writel((val), (dev)->reg + REG_SMDATA)
-
-#define write_cmd_reg(dev, val)                \
-       __raw_writel((val), (dev)->reg + REG_SMCMD)
-
-#define write_addr_reg(dev, val)       \
-       __raw_writel((val), (dev)->reg + REG_SMADDR)
-
-struct nuc900_nand {
-       struct nand_chip chip;
-       void __iomem *reg;
-       struct clk *clk;
-       spinlock_t lock;
-};
-
-static inline struct nuc900_nand *mtd_to_nuc900(struct mtd_info *mtd)
-{
-       return container_of(mtd_to_nand(mtd), struct nuc900_nand, chip);
-}
-
-static const struct mtd_partition partitions[] = {
-       {
-        .name = "NAND FS 0",
-        .offset = 0,
-        .size = 8 * 1024 * 1024
-       },
-       {
-        .name = "NAND FS 1",
-        .offset = MTDPART_OFS_APPEND,
-        .size = MTDPART_SIZ_FULL
-       }
-};
-
-static unsigned char nuc900_nand_read_byte(struct nand_chip *chip)
-{
-       unsigned char ret;
-       struct nuc900_nand *nand = mtd_to_nuc900(nand_to_mtd(chip));
-
-       ret = (unsigned char)read_data_reg(nand);
-
-       return ret;
-}
-
-static void nuc900_nand_read_buf(struct nand_chip *chip,
-                                unsigned char *buf, int len)
-{
-       int i;
-       struct nuc900_nand *nand = mtd_to_nuc900(nand_to_mtd(chip));
-
-       for (i = 0; i < len; i++)
-               buf[i] = (unsigned char)read_data_reg(nand);
-}
-
-static void nuc900_nand_write_buf(struct nand_chip *chip,
-                                 const unsigned char *buf, int len)
-{
-       int i;
-       struct nuc900_nand *nand = mtd_to_nuc900(nand_to_mtd(chip));
-
-       for (i = 0; i < len; i++)
-               write_data_reg(nand, buf[i]);
-}
-
-static int nuc900_check_rb(struct nuc900_nand *nand)
-{
-       unsigned int val;
-       spin_lock(&nand->lock);
-       val = __raw_readl(nand->reg + REG_SMISR);
-       val &= READYBUSY;
-       spin_unlock(&nand->lock);
-
-       return val;
-}
-
-static int nuc900_nand_devready(struct nand_chip *chip)
-{
-       struct nuc900_nand *nand = mtd_to_nuc900(nand_to_mtd(chip));
-       int ready;
-
-       ready = (nuc900_check_rb(nand)) ? 1 : 0;
-       return ready;
-}
-
-static void nuc900_nand_command_lp(struct nand_chip *chip,
-                                  unsigned int command,
-                                  int column, int page_addr)
-{
-       struct mtd_info *mtd = nand_to_mtd(chip);
-       struct nuc900_nand *nand = mtd_to_nuc900(mtd);
-
-       if (command == NAND_CMD_READOOB) {
-               column += mtd->writesize;
-               command = NAND_CMD_READ0;
-       }
-
-       write_cmd_reg(nand, command & 0xff);
-
-       if (column != -1 || page_addr != -1) {
-
-               if (column != -1) {
-                       if (chip->options & NAND_BUSWIDTH_16 &&
-                                       !nand_opcode_8bits(command))
-                               column >>= 1;
-                       write_addr_reg(nand, column);
-                       write_addr_reg(nand, column >> 8 | ENDADDR);
-               }
-               if (page_addr != -1) {
-                       write_addr_reg(nand, page_addr);
-
-                       if (chip->options & NAND_ROW_ADDR_3) {
-                               write_addr_reg(nand, page_addr >> 8);
-                               write_addr_reg(nand, page_addr >> 16 | ENDADDR);
-                       } else {
-                               write_addr_reg(nand, page_addr >> 8 | ENDADDR);
-                       }
-               }
-       }
-
-       switch (command) {
-       case NAND_CMD_CACHEDPROG:
-       case NAND_CMD_PAGEPROG:
-       case NAND_CMD_ERASE1:
-       case NAND_CMD_ERASE2:
-       case NAND_CMD_SEQIN:
-       case NAND_CMD_RNDIN:
-       case NAND_CMD_STATUS:
-               return;
-
-       case NAND_CMD_RESET:
-               if (chip->legacy.dev_ready)
-                       break;
-               udelay(chip->legacy.chip_delay);
-
-               write_cmd_reg(nand, NAND_CMD_STATUS);
-               write_cmd_reg(nand, command);
-
-               while (!nuc900_check_rb(nand))
-                       ;
-
-               return;
-
-       case NAND_CMD_RNDOUT:
-               write_cmd_reg(nand, NAND_CMD_RNDOUTSTART);
-               return;
-
-       case NAND_CMD_READ0:
-               write_cmd_reg(nand, NAND_CMD_READSTART);
-               /* fall through */
-
-       default:
-
-               if (!chip->legacy.dev_ready) {
-                       udelay(chip->legacy.chip_delay);
-                       return;
-               }
-       }
-
-       /* Apply this short delay always to ensure that we do wait tWB in
-        * any case on any machine. */
-       ndelay(100);
-
-       while (!chip->legacy.dev_ready(chip))
-               ;
-}
-
-
-static void nuc900_nand_enable(struct nuc900_nand *nand)
-{
-       unsigned int val;
-       spin_lock(&nand->lock);
-       __raw_writel(RESET_FMI, (nand->reg + REG_FMICSR));
-
-       val = __raw_readl(nand->reg + REG_FMICSR);
-
-       if (!(val & NAND_EN))
-               __raw_writel(val | NAND_EN, nand->reg + REG_FMICSR);
-
-       val = __raw_readl(nand->reg + REG_SMCSR);
-
-       val &= ~(SWRST|PSIZE|DMARWEN|BUSWID|ECC4EN|NANDCS);
-       val |= WP;
-
-       __raw_writel(val, nand->reg + REG_SMCSR);
-
-       spin_unlock(&nand->lock);
-}
-
-static int nuc900_nand_probe(struct platform_device *pdev)
-{
-       struct nuc900_nand *nuc900_nand;
-       struct nand_chip *chip;
-       struct mtd_info *mtd;
-       struct resource *res;
-
-       nuc900_nand = devm_kzalloc(&pdev->dev, sizeof(struct nuc900_nand),
-                                  GFP_KERNEL);
-       if (!nuc900_nand)
-               return -ENOMEM;
-       chip = &(nuc900_nand->chip);
-       mtd = nand_to_mtd(chip);
-
-       mtd->dev.parent         = &pdev->dev;
-       spin_lock_init(&nuc900_nand->lock);
-
-       nuc900_nand->clk = devm_clk_get(&pdev->dev, NULL);
-       if (IS_ERR(nuc900_nand->clk))
-               return -ENOENT;
-       clk_enable(nuc900_nand->clk);
-
-       chip->legacy.cmdfunc    = nuc900_nand_command_lp;
-       chip->legacy.dev_ready  = nuc900_nand_devready;
-       chip->legacy.read_byte  = nuc900_nand_read_byte;
-       chip->legacy.write_buf  = nuc900_nand_write_buf;
-       chip->legacy.read_buf   = nuc900_nand_read_buf;
-       chip->legacy.chip_delay = 50;
-       chip->options           = 0;
-       chip->ecc.mode          = NAND_ECC_SOFT;
-       chip->ecc.algo          = NAND_ECC_HAMMING;
-
-       res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-       nuc900_nand->reg = devm_ioremap_resource(&pdev->dev, res);
-       if (IS_ERR(nuc900_nand->reg))
-               return PTR_ERR(nuc900_nand->reg);
-
-       nuc900_nand_enable(nuc900_nand);
-
-       if (nand_scan(chip, 1))
-               return -ENXIO;
-
-       mtd_device_register(mtd, partitions, ARRAY_SIZE(partitions));
-
-       platform_set_drvdata(pdev, nuc900_nand);
-
-       return 0;
-}
-
-static int nuc900_nand_remove(struct platform_device *pdev)
-{
-       struct nuc900_nand *nuc900_nand = platform_get_drvdata(pdev);
-
-       nand_release(&nuc900_nand->chip);
-       clk_disable(nuc900_nand->clk);
-
-       return 0;
-}
-
-static struct platform_driver nuc900_nand_driver = {
-       .probe          = nuc900_nand_probe,
-       .remove         = nuc900_nand_remove,
-       .driver         = {
-               .name   = "nuc900-fmi",
-       },
-};
-
-module_platform_driver(nuc900_nand_driver);
-
-MODULE_AUTHOR("Wan ZongShun <mcuos.com@gmail.com>");
-MODULE_DESCRIPTION("w90p910/NUC9xx nand driver!");
-MODULE_LICENSE("GPL");
-MODULE_ALIAS("platform:nuc900-fmi");
index 8d881a2..6ec65f4 100644 (file)
@@ -1501,7 +1501,7 @@ static int omap_elm_correct_data(struct nand_chip *chip, u_char *data,
                }
 
                /* Update number of correctable errors */
-               stat += err_vec[i].error_count;
+               stat = max_t(unsigned int, stat, err_vec[i].error_count);
 
                /* Update page data with sector size */
                data += ecc->size;
index 30c51f7..c43cb4d 100644 (file)
@@ -116,7 +116,7 @@ static int oxnas_nand_probe(struct platform_device *pdev)
                                    GFP_KERNEL);
                if (!chip) {
                        err = -ENOMEM;
-                       goto err_clk_unprepare;
+                       goto err_release_child;
                }
 
                chip->controller = &oxnas->base;
@@ -137,12 +137,12 @@ static int oxnas_nand_probe(struct platform_device *pdev)
                /* Scan to find existence of the device */
                err = nand_scan(chip, 1);
                if (err)
-                       goto err_clk_unprepare;
+                       goto err_release_child;
 
                err = mtd_device_register(mtd, NULL, 0);
                if (err) {
                        nand_release(chip);
-                       goto err_clk_unprepare;
+                       goto err_release_child;
                }
 
                oxnas->chips[nchips] = chip;
@@ -159,6 +159,8 @@ static int oxnas_nand_probe(struct platform_device *pdev)
 
        return 0;
 
+err_release_child:
+       of_node_put(nand_np);
 err_clk_unprepare:
        clk_disable_unprepare(oxnas->clk);
        return err;
index dae0d23..7777425 100644 (file)
@@ -998,7 +998,7 @@ static void r852_shutdown(struct pci_dev *pci_dev)
 #ifdef CONFIG_PM_SLEEP
 static int r852_suspend(struct device *device)
 {
-       struct r852_device *dev = pci_get_drvdata(to_pci_dev(device));
+       struct r852_device *dev = dev_get_drvdata(device);
 
        if (dev->ctlreg & R852_CTL_CARDENABLE)
                return -EBUSY;
@@ -1019,7 +1019,7 @@ static int r852_suspend(struct device *device)
 
 static int r852_resume(struct device *device)
 {
-       struct r852_device *dev = pci_get_drvdata(to_pci_dev(device));
+       struct r852_device *dev = dev_get_drvdata(device);
 
        r852_disable_irqs(dev);
        r852_card_update_present(dev);
index e63acc0..8cc852d 100644 (file)
@@ -1427,21 +1427,16 @@ static void stm32_fmc2_calc_timings(struct nand_chip *chip,
        struct stm32_fmc2_timings *tims = &nand->timings;
        unsigned long hclk = clk_get_rate(fmc2->clk);
        unsigned long hclkp = NSEC_PER_SEC / (hclk / 1000);
-       int tar, tclr, thiz, twait, tset_mem, tset_att, thold_mem, thold_att;
-
-       tar = hclkp;
-       if (tar < sdrt->tAR_min)
-               tar = sdrt->tAR_min;
-       tims->tar = DIV_ROUND_UP(tar, hclkp) - 1;
-       if (tims->tar > FMC2_PCR_TIMING_MASK)
-               tims->tar = FMC2_PCR_TIMING_MASK;
-
-       tclr = hclkp;
-       if (tclr < sdrt->tCLR_min)
-               tclr = sdrt->tCLR_min;
-       tims->tclr = DIV_ROUND_UP(tclr, hclkp) - 1;
-       if (tims->tclr > FMC2_PCR_TIMING_MASK)
-               tims->tclr = FMC2_PCR_TIMING_MASK;
+       unsigned long timing, tar, tclr, thiz, twait;
+       unsigned long tset_mem, tset_att, thold_mem, thold_att;
+
+       tar = max_t(unsigned long, hclkp, sdrt->tAR_min);
+       timing = DIV_ROUND_UP(tar, hclkp) - 1;
+       tims->tar = min_t(unsigned long, timing, FMC2_PCR_TIMING_MASK);
+
+       tclr = max_t(unsigned long, hclkp, sdrt->tCLR_min);
+       timing = DIV_ROUND_UP(tclr, hclkp) - 1;
+       tims->tclr = min_t(unsigned long, timing, FMC2_PCR_TIMING_MASK);
 
        tims->thiz = FMC2_THIZ;
        thiz = (tims->thiz + 1) * hclkp;
@@ -1451,18 +1446,11 @@ static void stm32_fmc2_calc_timings(struct nand_chip *chip,
         * tWAIT > tWP
         * tWAIT > tREA + tIO
         */
-       twait = hclkp;
-       if (twait < sdrt->tRP_min)
-               twait = sdrt->tRP_min;
-       if (twait < sdrt->tWP_min)
-               twait = sdrt->tWP_min;
-       if (twait < sdrt->tREA_max + FMC2_TIO)
-               twait = sdrt->tREA_max + FMC2_TIO;
-       tims->twait = DIV_ROUND_UP(twait, hclkp);
-       if (tims->twait == 0)
-               tims->twait = 1;
-       else if (tims->twait > FMC2_PMEM_PATT_TIMING_MASK)
-               tims->twait = FMC2_PMEM_PATT_TIMING_MASK;
+       twait = max_t(unsigned long, hclkp, sdrt->tRP_min);
+       twait = max_t(unsigned long, twait, sdrt->tWP_min);
+       twait = max_t(unsigned long, twait, sdrt->tREA_max + FMC2_TIO);
+       timing = DIV_ROUND_UP(twait, hclkp);
+       tims->twait = clamp_val(timing, 1, FMC2_PMEM_PATT_TIMING_MASK);
 
        /*
         * tSETUP_MEM > tCS - tWAIT
@@ -1477,20 +1465,15 @@ static void stm32_fmc2_calc_timings(struct nand_chip *chip,
        if (twait > thiz && (sdrt->tDS_min > twait - thiz) &&
            (tset_mem < sdrt->tDS_min - (twait - thiz)))
                tset_mem = sdrt->tDS_min - (twait - thiz);
-       tims->tset_mem = DIV_ROUND_UP(tset_mem, hclkp);
-       if (tims->tset_mem == 0)
-               tims->tset_mem = 1;
-       else if (tims->tset_mem > FMC2_PMEM_PATT_TIMING_MASK)
-               tims->tset_mem = FMC2_PMEM_PATT_TIMING_MASK;
+       timing = DIV_ROUND_UP(tset_mem, hclkp);
+       tims->tset_mem = clamp_val(timing, 1, FMC2_PMEM_PATT_TIMING_MASK);
 
        /*
         * tHOLD_MEM > tCH
         * tHOLD_MEM > tREH - tSETUP_MEM
         * tHOLD_MEM > max(tRC, tWC) - (tSETUP_MEM + tWAIT)
         */
-       thold_mem = hclkp;
-       if (thold_mem < sdrt->tCH_min)
-               thold_mem = sdrt->tCH_min;
+       thold_mem = max_t(unsigned long, hclkp, sdrt->tCH_min);
        if (sdrt->tREH_min > tset_mem &&
            (thold_mem < sdrt->tREH_min - tset_mem))
                thold_mem = sdrt->tREH_min - tset_mem;
@@ -1500,11 +1483,8 @@ static void stm32_fmc2_calc_timings(struct nand_chip *chip,
        if ((sdrt->tWC_min > tset_mem + twait) &&
            (thold_mem < sdrt->tWC_min - (tset_mem + twait)))
                thold_mem = sdrt->tWC_min - (tset_mem + twait);
-       tims->thold_mem = DIV_ROUND_UP(thold_mem, hclkp);
-       if (tims->thold_mem == 0)
-               tims->thold_mem = 1;
-       else if (tims->thold_mem > FMC2_PMEM_PATT_TIMING_MASK)
-               tims->thold_mem = FMC2_PMEM_PATT_TIMING_MASK;
+       timing = DIV_ROUND_UP(thold_mem, hclkp);
+       tims->thold_mem = clamp_val(timing, 1, FMC2_PMEM_PATT_TIMING_MASK);
 
        /*
         * tSETUP_ATT > tCS - tWAIT
@@ -1526,11 +1506,8 @@ static void stm32_fmc2_calc_timings(struct nand_chip *chip,
        if (twait > thiz && (sdrt->tDS_min > twait - thiz) &&
            (tset_att < sdrt->tDS_min - (twait - thiz)))
                tset_att = sdrt->tDS_min - (twait - thiz);
-       tims->tset_att = DIV_ROUND_UP(tset_att, hclkp);
-       if (tims->tset_att == 0)
-               tims->tset_att = 1;
-       else if (tims->tset_att > FMC2_PMEM_PATT_TIMING_MASK)
-               tims->tset_att = FMC2_PMEM_PATT_TIMING_MASK;
+       timing = DIV_ROUND_UP(tset_att, hclkp);
+       tims->tset_att = clamp_val(timing, 1, FMC2_PMEM_PATT_TIMING_MASK);
 
        /*
         * tHOLD_ATT > tALH
@@ -1545,17 +1522,11 @@ static void stm32_fmc2_calc_timings(struct nand_chip *chip,
         * tHOLD_ATT > tRC - (tSETUP_ATT + tWAIT)
         * tHOLD_ATT > tWC - (tSETUP_ATT + tWAIT)
         */
-       thold_att = hclkp;
-       if (thold_att < sdrt->tALH_min)
-               thold_att = sdrt->tALH_min;
-       if (thold_att < sdrt->tCH_min)
-               thold_att = sdrt->tCH_min;
-       if (thold_att < sdrt->tCLH_min)
-               thold_att = sdrt->tCLH_min;
-       if (thold_att < sdrt->tCOH_min)
-               thold_att = sdrt->tCOH_min;
-       if (thold_att < sdrt->tDH_min)
-               thold_att = sdrt->tDH_min;
+       thold_att = max_t(unsigned long, hclkp, sdrt->tALH_min);
+       thold_att = max_t(unsigned long, thold_att, sdrt->tCH_min);
+       thold_att = max_t(unsigned long, thold_att, sdrt->tCLH_min);
+       thold_att = max_t(unsigned long, thold_att, sdrt->tCOH_min);
+       thold_att = max_t(unsigned long, thold_att, sdrt->tDH_min);
        if ((sdrt->tWB_max + FMC2_TIO + FMC2_TSYNC > tset_mem) &&
            (thold_att < sdrt->tWB_max + FMC2_TIO + FMC2_TSYNC - tset_mem))
                thold_att = sdrt->tWB_max + FMC2_TIO + FMC2_TSYNC - tset_mem;
@@ -1574,11 +1545,8 @@ static void stm32_fmc2_calc_timings(struct nand_chip *chip,
        if ((sdrt->tWC_min > tset_att + twait) &&
            (thold_att < sdrt->tWC_min - (tset_att + twait)))
                thold_att = sdrt->tWC_min - (tset_att + twait);
-       tims->thold_att = DIV_ROUND_UP(thold_att, hclkp);
-       if (tims->thold_att == 0)
-               tims->thold_att = 1;
-       else if (tims->thold_att > FMC2_PMEM_PATT_TIMING_MASK)
-               tims->thold_att = FMC2_PMEM_PATT_TIMING_MASK;
+       timing = DIV_ROUND_UP(thold_att, hclkp);
+       tims->thold_att = clamp_val(timing, 1, FMC2_PMEM_PATT_TIMING_MASK);
 }
 
 static int stm32_fmc2_setup_interface(struct nand_chip *chip, int chipnr,
index b3f2cab..9acf2de 100644 (file)
@@ -659,6 +659,7 @@ static int tango_nand_probe(struct platform_device *pdev)
                err = chip_init(&pdev->dev, np);
                if (err) {
                        tango_nand_remove(pdev);
+                       of_node_put(np);
                        return err;
                }
        }
index e4fe8c4..6b399a7 100644 (file)
@@ -862,6 +862,7 @@ static int vf610_nfc_probe(struct platform_device *pdev)
                                dev_err(nfc->dev,
                                        "Only one NAND chip supported!\n");
                                err = -EINVAL;
+                               of_node_put(child);
                                goto err_disable_clk;
                        }
 
index 176b75a..f98363c 100644 (file)
@@ -1,4 +1,72 @@
 # SPDX-License-Identifier: GPL-2.0-only
+config MTD_AR7_PARTS
+       tristate "TI AR7 partitioning parser"
+       help
+         TI AR7 partitioning parser support
+
+config MTD_BCM47XX_PARTS
+       tristate "BCM47XX partitioning parser"
+       depends on BCM47XX || ARCH_BCM_5301X
+       help
+         This provides partitions parser for devices based on BCM47xx
+         boards.
+
+config MTD_BCM63XX_PARTS
+       tristate "BCM63XX CFE partitioning parser"
+       depends on BCM63XX || BMIPS_GENERIC || COMPILE_TEST
+       select CRC32
+       select MTD_PARSER_IMAGETAG
+       help
+         This provides partition parsing for BCM63xx devices with CFE
+         bootloaders.
+
+config MTD_CMDLINE_PARTS
+       tristate "Command line partition table parsing"
+       depends on MTD
+       help
+         Allow generic configuration of the MTD partition tables via the kernel
+         command line. Multiple flash resources are supported for hardware where
+         different kinds of flash memory are available.
+
+         You will still need the parsing functions to be called by the driver
+         for your particular device. It won't happen automatically. The
+         SA1100 map driver (CONFIG_MTD_SA1100) has an option for this, for
+         example.
+
+         The format for the command line is as follows:
+
+         mtdparts=<mtddef>[;<mtddef]
+         <mtddef>  := <mtd-id>:<partdef>[,<partdef>]
+         <partdef> := <size>[@offset][<name>][ro]
+         <mtd-id>  := unique id used in mapping driver/device
+         <size>    := standard linux memsize OR "-" to denote all
+         remaining space
+         <name>    := (NAME)
+
+         Due to the way Linux handles the command line, no spaces are
+         allowed in the partition definition, including mtd id's and partition
+         names.
+
+         Examples:
+
+         1 flash resource (mtd-id "sa1100"), with 1 single writable partition:
+         mtdparts=sa1100:-
+
+         Same flash, but 2 named partitions, the first one being read-only:
+         mtdparts=sa1100:256k(ARMboot)ro,-(root)
+
+         If unsure, say 'N'.
+
+config MTD_OF_PARTS
+       tristate "OpenFirmware (device tree) partitioning parser"
+       default y
+       depends on OF
+       help
+         This provides a open firmware device tree partition parser
+         which derives the partition map from the children of the
+         flash memory node, as described in
+         Documentation/devicetree/bindings/mtd/partition.txt.
+
 config MTD_PARSER_IMAGETAG
        tristate "Parser for BCM963XX Image Tag format partitions"
        depends on BCM63XX || BMIPS_GENERIC || COMPILE_TEST
index dd566bd..b0c5f62 100644 (file)
@@ -1,4 +1,9 @@
 # SPDX-License-Identifier: GPL-2.0-only
+obj-$(CONFIG_MTD_AR7_PARTS)            += ar7part.o
+obj-$(CONFIG_MTD_BCM47XX_PARTS)                += bcm47xxpart.o
+obj-$(CONFIG_MTD_BCM63XX_PARTS)                += bcm63xxpart.o
+obj-$(CONFIG_MTD_CMDLINE_PARTS)                += cmdlinepart.o
+obj-$(CONFIG_MTD_OF_PARTS)             += ofpart.o
 obj-$(CONFIG_MTD_PARSER_IMAGETAG)      += parser_imagetag.o
 obj-$(CONFIG_MTD_AFS_PARTS)            += afs.o
 obj-$(CONFIG_MTD_PARSER_TRX)           += parser_trx.o
index dfc47a4..4744bf9 100644 (file)
@@ -774,8 +774,11 @@ static int sm_init_zone(struct sm_ftl *ftl, int zone_num)
                        continue;
 
                /* Read the oob of first sector */
-               if (sm_read_sector(ftl, zone_num, block, 0, NULL, &oob))
+               if (sm_read_sector(ftl, zone_num, block, 0, NULL, &oob)) {
+                       kfifo_free(&zone->free_sectors);
+                       kfree(zone->lba_to_phys_table);
                        return -EIO;
+               }
 
                /* Test to see if block is erased. It is enough to test
                        first sector, because erase happens in one shot */
index 6de8327..f237fcd 100644 (file)
@@ -2,6 +2,8 @@
 menuconfig MTD_SPI_NOR
        tristate "SPI-NOR device support"
        depends on MTD
+       depends on MTD && SPI_MASTER
+       select SPI_MEM
        help
          This is the framework for the SPI NOR which can be used by the SPI
          device drivers and the SPI-NOR device driver.
index 19b8757..009c1da 100644 (file)
@@ -836,8 +836,10 @@ static int aspeed_smc_setup_flash(struct aspeed_smc_controller *controller,
                controller->chips[cs] = chip;
        }
 
-       if (ret)
+       if (ret) {
+               of_node_put(child);
                aspeed_smc_unregister(controller);
+       }
 
        return ret;
 }
index 67f15a1..7bef639 100644 (file)
@@ -13,6 +13,7 @@
 #include <linux/errno.h>
 #include <linux/interrupt.h>
 #include <linux/io.h>
+#include <linux/iopoll.h>
 #include <linux/jiffies.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
@@ -241,23 +242,13 @@ struct cqspi_driver_platdata {
 
 #define CQSPI_IRQ_STATUS_MASK          0x1FFFF
 
-static int cqspi_wait_for_bit(void __iomem *reg, const u32 mask, bool clear)
+static int cqspi_wait_for_bit(void __iomem *reg, const u32 mask, bool clr)
 {
-       unsigned long end = jiffies + msecs_to_jiffies(CQSPI_TIMEOUT_MS);
        u32 val;
 
-       while (1) {
-               val = readl(reg);
-               if (clear)
-                       val = ~val;
-               val &= mask;
-
-               if (val == mask)
-                       return 0;
-
-               if (time_after(jiffies, end))
-                       return -ETIMEDOUT;
-       }
+       return readl_relaxed_poll_timeout(reg, val,
+                                         (((clr ? ~val : val) & mask) == mask),
+                                         10, CQSPI_TIMEOUT_MS * 1000);
 }
 
 static bool cqspi_is_idle(struct cqspi_st *cqspi)
index dea43ea..6dac9dd 100644 (file)
@@ -401,6 +401,7 @@ static int hisi_spi_nor_register_all(struct hifmc_host *host)
 
                if (host->num_chip == HIFMC_MAX_CHIP_NUM) {
                        dev_warn(dev, "Flash device number exceeds the maximum chipselect number\n");
+                       of_node_put(np);
                        break;
                }
        }
index b83c4ab..3cda8e7 100644 (file)
@@ -65,6 +65,7 @@ static const struct pci_device_id intel_spi_pci_ids[] = {
        { PCI_VDEVICE(INTEL, 0x19e0), (unsigned long)&bxt_info },
        { PCI_VDEVICE(INTEL, 0x34a4), (unsigned long)&bxt_info },
        { PCI_VDEVICE(INTEL, 0x4b24), (unsigned long)&bxt_info },
+       { PCI_VDEVICE(INTEL, 0xa0a4), (unsigned long)&bxt_info },
        { PCI_VDEVICE(INTEL, 0xa1a4), (unsigned long)&bxt_info },
        { PCI_VDEVICE(INTEL, 0xa224), (unsigned long)&bxt_info },
        { },
index 1ccf23f..43e55a2 100644 (file)
@@ -621,6 +621,8 @@ static ssize_t intel_spi_read(struct spi_nor *nor, loff_t from, size_t len,
        switch (nor->read_opcode) {
        case SPINOR_OP_READ:
        case SPINOR_OP_READ_FAST:
+       case SPINOR_OP_READ_4B:
+       case SPINOR_OP_READ_FAST_4B:
                break;
        default:
                return -EINVAL;
index 654bdc4..1d8621d 100644 (file)
@@ -19,6 +19,7 @@
 
 #include <linux/mtd/mtd.h>
 #include <linux/of_platform.h>
+#include <linux/sched/task_stack.h>
 #include <linux/spi/flash.h>
 #include <linux/mtd/spi-nor.h>
 
 #define SPI_NOR_MAX_ID_LEN     6
 #define SPI_NOR_MAX_ADDR_WIDTH 4
 
-struct spi_nor_read_command {
-       u8                      num_mode_clocks;
-       u8                      num_wait_states;
-       u8                      opcode;
-       enum spi_nor_protocol   proto;
-};
-
-struct spi_nor_pp_command {
-       u8                      opcode;
-       enum spi_nor_protocol   proto;
-};
-
-enum spi_nor_read_command_index {
-       SNOR_CMD_READ,
-       SNOR_CMD_READ_FAST,
-       SNOR_CMD_READ_1_1_1_DTR,
-
-       /* Dual SPI */
-       SNOR_CMD_READ_1_1_2,
-       SNOR_CMD_READ_1_2_2,
-       SNOR_CMD_READ_2_2_2,
-       SNOR_CMD_READ_1_2_2_DTR,
-
-       /* Quad SPI */
-       SNOR_CMD_READ_1_1_4,
-       SNOR_CMD_READ_1_4_4,
-       SNOR_CMD_READ_4_4_4,
-       SNOR_CMD_READ_1_4_4_DTR,
-
-       /* Octal SPI */
-       SNOR_CMD_READ_1_1_8,
-       SNOR_CMD_READ_1_8_8,
-       SNOR_CMD_READ_8_8_8,
-       SNOR_CMD_READ_1_8_8_DTR,
-
-       SNOR_CMD_READ_MAX
-};
-
-enum spi_nor_pp_command_index {
-       SNOR_CMD_PP,
-
-       /* Quad SPI */
-       SNOR_CMD_PP_1_1_4,
-       SNOR_CMD_PP_1_4_4,
-       SNOR_CMD_PP_4_4_4,
-
-       /* Octal SPI */
-       SNOR_CMD_PP_1_1_8,
-       SNOR_CMD_PP_1_8_8,
-       SNOR_CMD_PP_8_8_8,
-
-       SNOR_CMD_PP_MAX
-};
-
-struct spi_nor_flash_parameter {
-       u64                             size;
-       u32                             page_size;
-
-       struct spi_nor_hwcaps           hwcaps;
-       struct spi_nor_read_command     reads[SNOR_CMD_READ_MAX];
-       struct spi_nor_pp_command       page_programs[SNOR_CMD_PP_MAX];
-
-       int (*quad_enable)(struct spi_nor *nor);
-};
-
 struct sfdp_parameter_header {
        u8              id_lsb;
        u8              minor;
@@ -218,16 +154,26 @@ struct sfdp_bfpt {
 
 /**
  * struct spi_nor_fixups - SPI NOR fixup hooks
+ * @default_init: called after default flash parameters init. Used to tweak
+ *                flash parameters when information provided by the flash_info
+ *                table is incomplete or wrong.
  * @post_bfpt: called after the BFPT table has been parsed
+ * @post_sfdp: called after SFDP has been parsed (is also called for SPI NORs
+ *             that do not support RDSFDP). Typically used to tweak various
+ *             parameters that could not be extracted by other means (i.e.
+ *             when information provided by the SFDP/flash_info tables are
+ *             incomplete or wrong).
  *
  * Those hooks can be used to tweak the SPI NOR configuration when the SFDP
  * table is broken or not available.
  */
 struct spi_nor_fixups {
+       void (*default_init)(struct spi_nor *nor);
        int (*post_bfpt)(struct spi_nor *nor,
                         const struct sfdp_parameter_header *bfpt_header,
                         const struct sfdp_bfpt *bfpt,
                         struct spi_nor_flash_parameter *params);
+       void (*post_sfdp)(struct spi_nor *nor);
 };
 
 struct flash_info {
@@ -265,6 +211,14 @@ struct flash_info {
                                         * bit. Must be used with
                                         * SPI_NOR_HAS_LOCK.
                                         */
+#define SPI_NOR_XSR_RDY                BIT(10) /*
+                                        * S3AN flashes have specific opcode to
+                                        * read the status register.
+                                        * Flags SPI_NOR_XSR_RDY and SPI_S3AN
+                                        * use the same bit as one implies the
+                                        * other, but we will get rid of
+                                        * SPI_S3AN soon.
+                                        */
 #define        SPI_S3AN                BIT(10) /*
                                         * Xilinx Spartan 3AN In-System Flash
                                         * (MFR cannot be used for probing
@@ -282,12 +236,158 @@ struct flash_info {
 
        /* Part specific fixup hooks. */
        const struct spi_nor_fixups *fixups;
-
-       int     (*quad_enable)(struct spi_nor *nor);
 };
 
 #define JEDEC_MFR(info)        ((info)->id[0])
 
+/**
+ * spi_nor_spimem_xfer_data() - helper function to read/write data to
+ *                              flash's memory region
+ * @nor:        pointer to 'struct spi_nor'
+ * @op:         pointer to 'struct spi_mem_op' template for transfer
+ *
+ * Return: number of bytes transferred on success, -errno otherwise
+ */
+static ssize_t spi_nor_spimem_xfer_data(struct spi_nor *nor,
+                                       struct spi_mem_op *op)
+{
+       bool usebouncebuf = false;
+       void *rdbuf = NULL;
+       const void *buf;
+       int ret;
+
+       if (op->data.dir == SPI_MEM_DATA_IN)
+               buf = op->data.buf.in;
+       else
+               buf = op->data.buf.out;
+
+       if (object_is_on_stack(buf) || !virt_addr_valid(buf))
+               usebouncebuf = true;
+
+       if (usebouncebuf) {
+               if (op->data.nbytes > nor->bouncebuf_size)
+                       op->data.nbytes = nor->bouncebuf_size;
+
+               if (op->data.dir == SPI_MEM_DATA_IN) {
+                       rdbuf = op->data.buf.in;
+                       op->data.buf.in = nor->bouncebuf;
+               } else {
+                       op->data.buf.out = nor->bouncebuf;
+                       memcpy(nor->bouncebuf, buf,
+                              op->data.nbytes);
+               }
+       }
+
+       ret = spi_mem_adjust_op_size(nor->spimem, op);
+       if (ret)
+               return ret;
+
+       ret = spi_mem_exec_op(nor->spimem, op);
+       if (ret)
+               return ret;
+
+       if (usebouncebuf && op->data.dir == SPI_MEM_DATA_IN)
+               memcpy(rdbuf, nor->bouncebuf, op->data.nbytes);
+
+       return op->data.nbytes;
+}
+
+/**
+ * spi_nor_spimem_read_data() - read data from flash's memory region via
+ *                              spi-mem
+ * @nor:        pointer to 'struct spi_nor'
+ * @from:       offset to read from
+ * @len:        number of bytes to read
+ * @buf:        pointer to dst buffer
+ *
+ * Return: number of bytes read successfully, -errno otherwise
+ */
+static ssize_t spi_nor_spimem_read_data(struct spi_nor *nor, loff_t from,
+                                       size_t len, u8 *buf)
+{
+       struct spi_mem_op op =
+               SPI_MEM_OP(SPI_MEM_OP_CMD(nor->read_opcode, 1),
+                          SPI_MEM_OP_ADDR(nor->addr_width, from, 1),
+                          SPI_MEM_OP_DUMMY(nor->read_dummy, 1),
+                          SPI_MEM_OP_DATA_IN(len, buf, 1));
+
+       /* get transfer protocols. */
+       op.cmd.buswidth = spi_nor_get_protocol_inst_nbits(nor->read_proto);
+       op.addr.buswidth = spi_nor_get_protocol_addr_nbits(nor->read_proto);
+       op.dummy.buswidth = op.addr.buswidth;
+       op.data.buswidth = spi_nor_get_protocol_data_nbits(nor->read_proto);
+
+       /* convert the dummy cycles to the number of bytes */
+       op.dummy.nbytes = (nor->read_dummy * op.dummy.buswidth) / 8;
+
+       return spi_nor_spimem_xfer_data(nor, &op);
+}
+
+/**
+ * spi_nor_read_data() - read data from flash memory
+ * @nor:        pointer to 'struct spi_nor'
+ * @from:       offset to read from
+ * @len:        number of bytes to read
+ * @buf:        pointer to dst buffer
+ *
+ * Return: number of bytes read successfully, -errno otherwise
+ */
+static ssize_t spi_nor_read_data(struct spi_nor *nor, loff_t from, size_t len,
+                                u8 *buf)
+{
+       if (nor->spimem)
+               return spi_nor_spimem_read_data(nor, from, len, buf);
+
+       return nor->read(nor, from, len, buf);
+}
+
+/**
+ * spi_nor_spimem_write_data() - write data to flash memory via
+ *                               spi-mem
+ * @nor:        pointer to 'struct spi_nor'
+ * @to:         offset to write to
+ * @len:        number of bytes to write
+ * @buf:        pointer to src buffer
+ *
+ * Return: number of bytes written successfully, -errno otherwise
+ */
+static ssize_t spi_nor_spimem_write_data(struct spi_nor *nor, loff_t to,
+                                        size_t len, const u8 *buf)
+{
+       struct spi_mem_op op =
+               SPI_MEM_OP(SPI_MEM_OP_CMD(nor->program_opcode, 1),
+                          SPI_MEM_OP_ADDR(nor->addr_width, to, 1),
+                          SPI_MEM_OP_NO_DUMMY,
+                          SPI_MEM_OP_DATA_OUT(len, buf, 1));
+
+       op.cmd.buswidth = spi_nor_get_protocol_inst_nbits(nor->write_proto);
+       op.addr.buswidth = spi_nor_get_protocol_addr_nbits(nor->write_proto);
+       op.data.buswidth = spi_nor_get_protocol_data_nbits(nor->write_proto);
+
+       if (nor->program_opcode == SPINOR_OP_AAI_WP && nor->sst_write_second)
+               op.addr.nbytes = 0;
+
+       return spi_nor_spimem_xfer_data(nor, &op);
+}
+
+/**
+ * spi_nor_write_data() - write data to flash memory
+ * @nor:        pointer to 'struct spi_nor'
+ * @to:         offset to write to
+ * @len:        number of bytes to write
+ * @buf:        pointer to src buffer
+ *
+ * Return: number of bytes written successfully, -errno otherwise
+ */
+static ssize_t spi_nor_write_data(struct spi_nor *nor, loff_t to, size_t len,
+                                 const u8 *buf)
+{
+       if (nor->spimem)
+               return spi_nor_spimem_write_data(nor, to, len, buf);
+
+       return nor->write(nor, to, len, buf);
+}
+
 /*
  * Read the status register, returning its value in the location
  * Return the status register value.
@@ -296,15 +396,25 @@ struct flash_info {
 static int read_sr(struct spi_nor *nor)
 {
        int ret;
-       u8 val;
 
-       ret = nor->read_reg(nor, SPINOR_OP_RDSR, &val, 1);
+       if (nor->spimem) {
+               struct spi_mem_op op =
+                       SPI_MEM_OP(SPI_MEM_OP_CMD(SPINOR_OP_RDSR, 1),
+                                  SPI_MEM_OP_NO_ADDR,
+                                  SPI_MEM_OP_NO_DUMMY,
+                                  SPI_MEM_OP_DATA_IN(1, nor->bouncebuf, 1));
+
+               ret = spi_mem_exec_op(nor->spimem, &op);
+       } else {
+               ret = nor->read_reg(nor, SPINOR_OP_RDSR, nor->bouncebuf, 1);
+       }
+
        if (ret < 0) {
                pr_err("error %d reading SR\n", (int) ret);
                return ret;
        }
 
-       return val;
+       return nor->bouncebuf[0];
 }
 
 /*
@@ -315,15 +425,25 @@ static int read_sr(struct spi_nor *nor)
 static int read_fsr(struct spi_nor *nor)
 {
        int ret;
-       u8 val;
 
-       ret = nor->read_reg(nor, SPINOR_OP_RDFSR, &val, 1);
+       if (nor->spimem) {
+               struct spi_mem_op op =
+                       SPI_MEM_OP(SPI_MEM_OP_CMD(SPINOR_OP_RDFSR, 1),
+                                  SPI_MEM_OP_NO_ADDR,
+                                  SPI_MEM_OP_NO_DUMMY,
+                                  SPI_MEM_OP_DATA_IN(1, nor->bouncebuf, 1));
+
+               ret = spi_mem_exec_op(nor->spimem, &op);
+       } else {
+               ret = nor->read_reg(nor, SPINOR_OP_RDFSR, nor->bouncebuf, 1);
+       }
+
        if (ret < 0) {
                pr_err("error %d reading FSR\n", ret);
                return ret;
        }
 
-       return val;
+       return nor->bouncebuf[0];
 }
 
 /*
@@ -334,15 +454,25 @@ static int read_fsr(struct spi_nor *nor)
 static int read_cr(struct spi_nor *nor)
 {
        int ret;
-       u8 val;
 
-       ret = nor->read_reg(nor, SPINOR_OP_RDCR, &val, 1);
+       if (nor->spimem) {
+               struct spi_mem_op op =
+                       SPI_MEM_OP(SPI_MEM_OP_CMD(SPINOR_OP_RDCR, 1),
+                                  SPI_MEM_OP_NO_ADDR,
+                                  SPI_MEM_OP_NO_DUMMY,
+                                  SPI_MEM_OP_DATA_IN(1, nor->bouncebuf, 1));
+
+               ret = spi_mem_exec_op(nor->spimem, &op);
+       } else {
+               ret = nor->read_reg(nor, SPINOR_OP_RDCR, nor->bouncebuf, 1);
+       }
+
        if (ret < 0) {
                dev_err(nor->dev, "error %d reading CR\n", ret);
                return ret;
        }
 
-       return val;
+       return nor->bouncebuf[0];
 }
 
 /*
@@ -351,8 +481,18 @@ static int read_cr(struct spi_nor *nor)
  */
 static int write_sr(struct spi_nor *nor, u8 val)
 {
-       nor->cmd_buf[0] = val;
-       return nor->write_reg(nor, SPINOR_OP_WRSR, nor->cmd_buf, 1);
+       nor->bouncebuf[0] = val;
+       if (nor->spimem) {
+               struct spi_mem_op op =
+                       SPI_MEM_OP(SPI_MEM_OP_CMD(SPINOR_OP_WRSR, 1),
+                                  SPI_MEM_OP_NO_ADDR,
+                                  SPI_MEM_OP_NO_DUMMY,
+                                  SPI_MEM_OP_DATA_IN(1, nor->bouncebuf, 1));
+
+               return spi_mem_exec_op(nor->spimem, &op);
+       }
+
+       return nor->write_reg(nor, SPINOR_OP_WRSR, nor->bouncebuf, 1);
 }
 
 /*
@@ -361,6 +501,16 @@ static int write_sr(struct spi_nor *nor, u8 val)
  */
 static int write_enable(struct spi_nor *nor)
 {
+       if (nor->spimem) {
+               struct spi_mem_op op =
+                       SPI_MEM_OP(SPI_MEM_OP_CMD(SPINOR_OP_WREN, 1),
+                                  SPI_MEM_OP_NO_ADDR,
+                                  SPI_MEM_OP_NO_DUMMY,
+                                  SPI_MEM_OP_NO_DATA);
+
+               return spi_mem_exec_op(nor->spimem, &op);
+       }
+
        return nor->write_reg(nor, SPINOR_OP_WREN, NULL, 0);
 }
 
@@ -369,6 +519,16 @@ static int write_enable(struct spi_nor *nor)
  */
 static int write_disable(struct spi_nor *nor)
 {
+       if (nor->spimem) {
+               struct spi_mem_op op =
+                       SPI_MEM_OP(SPI_MEM_OP_CMD(SPINOR_OP_WRDI, 1),
+                                  SPI_MEM_OP_NO_ADDR,
+                                  SPI_MEM_OP_NO_DUMMY,
+                                  SPI_MEM_OP_NO_DATA);
+
+               return spi_mem_exec_op(nor->spimem, &op);
+       }
+
        return nor->write_reg(nor, SPINOR_OP_WRDI, NULL, 0);
 }
 
@@ -439,24 +599,12 @@ static u8 spi_nor_convert_3to4_erase(u8 opcode)
 
 static void spi_nor_set_4byte_opcodes(struct spi_nor *nor)
 {
-       /* Do some manufacturer fixups first */
-       switch (JEDEC_MFR(nor->info)) {
-       case SNOR_MFR_SPANSION:
-               /* No small sector erase for 4-byte command set */
-               nor->erase_opcode = SPINOR_OP_SE;
-               nor->mtd.erasesize = nor->info->sector_size;
-               break;
-
-       default:
-               break;
-       }
-
        nor->read_opcode = spi_nor_convert_3to4_read(nor->read_opcode);
        nor->program_opcode = spi_nor_convert_3to4_program(nor->program_opcode);
        nor->erase_opcode = spi_nor_convert_3to4_erase(nor->erase_opcode);
 
        if (!spi_nor_has_uniform_erase(nor)) {
-               struct spi_nor_erase_map *map = &nor->erase_map;
+               struct spi_nor_erase_map *map = &nor->params.erase_map;
                struct spi_nor_erase_type *erase;
                int i;
 
@@ -468,63 +616,131 @@ static void spi_nor_set_4byte_opcodes(struct spi_nor *nor)
        }
 }
 
-/* Enable/disable 4-byte addressing mode. */
-static int set_4byte(struct spi_nor *nor, bool enable)
+static int macronix_set_4byte(struct spi_nor *nor, bool enable)
 {
-       int status;
-       bool need_wren = false;
-       u8 cmd;
+       if (nor->spimem) {
+               struct spi_mem_op op =
+                       SPI_MEM_OP(SPI_MEM_OP_CMD(enable ?
+                                                 SPINOR_OP_EN4B :
+                                                 SPINOR_OP_EX4B,
+                                                 1),
+                                 SPI_MEM_OP_NO_ADDR,
+                                 SPI_MEM_OP_NO_DUMMY,
+                                 SPI_MEM_OP_NO_DATA);
+
+               return spi_mem_exec_op(nor->spimem, &op);
+       }
 
-       switch (JEDEC_MFR(nor->info)) {
-       case SNOR_MFR_ST:
-       case SNOR_MFR_MICRON:
-               /* Some Micron need WREN command; all will accept it */
-               need_wren = true;
-               /* fall through */
-       case SNOR_MFR_MACRONIX:
-       case SNOR_MFR_WINBOND:
-               if (need_wren)
-                       write_enable(nor);
+       return nor->write_reg(nor, enable ? SPINOR_OP_EN4B : SPINOR_OP_EX4B,
+                             NULL, 0);
+}
 
-               cmd = enable ? SPINOR_OP_EN4B : SPINOR_OP_EX4B;
-               status = nor->write_reg(nor, cmd, NULL, 0);
-               if (need_wren)
-                       write_disable(nor);
+static int st_micron_set_4byte(struct spi_nor *nor, bool enable)
+{
+       int ret;
 
-               if (!status && !enable &&
-                   JEDEC_MFR(nor->info) == SNOR_MFR_WINBOND) {
-                       /*
-                        * On Winbond W25Q256FV, leaving 4byte mode causes
-                        * the Extended Address Register to be set to 1, so all
-                        * 3-byte-address reads come from the second 16M.
-                        * We must clear the register to enable normal behavior.
-                        */
-                       write_enable(nor);
-                       nor->cmd_buf[0] = 0;
-                       nor->write_reg(nor, SPINOR_OP_WREAR, nor->cmd_buf, 1);
-                       write_disable(nor);
-               }
+       write_enable(nor);
+       ret = macronix_set_4byte(nor, enable);
+       write_disable(nor);
 
-               return status;
-       default:
-               /* Spansion style */
-               nor->cmd_buf[0] = enable << 7;
-               return nor->write_reg(nor, SPINOR_OP_BRWR, nor->cmd_buf, 1);
+       return ret;
+}
+
+static int spansion_set_4byte(struct spi_nor *nor, bool enable)
+{
+       nor->bouncebuf[0] = enable << 7;
+
+       if (nor->spimem) {
+               struct spi_mem_op op =
+                       SPI_MEM_OP(SPI_MEM_OP_CMD(SPINOR_OP_BRWR, 1),
+                                  SPI_MEM_OP_NO_ADDR,
+                                  SPI_MEM_OP_NO_DUMMY,
+                                  SPI_MEM_OP_DATA_OUT(1, nor->bouncebuf, 1));
+
+               return spi_mem_exec_op(nor->spimem, &op);
+       }
+
+       return nor->write_reg(nor, SPINOR_OP_BRWR, nor->bouncebuf, 1);
+}
+
+static int spi_nor_write_ear(struct spi_nor *nor, u8 ear)
+{
+       nor->bouncebuf[0] = ear;
+
+       if (nor->spimem) {
+               struct spi_mem_op op =
+                       SPI_MEM_OP(SPI_MEM_OP_CMD(SPINOR_OP_WREAR, 1),
+                                  SPI_MEM_OP_NO_ADDR,
+                                  SPI_MEM_OP_NO_DUMMY,
+                                  SPI_MEM_OP_DATA_OUT(1, nor->bouncebuf, 1));
+
+               return spi_mem_exec_op(nor->spimem, &op);
+       }
+
+       return nor->write_reg(nor, SPINOR_OP_WREAR, nor->bouncebuf, 1);
+}
+
+static int winbond_set_4byte(struct spi_nor *nor, bool enable)
+{
+       int ret;
+
+       ret = macronix_set_4byte(nor, enable);
+       if (ret || enable)
+               return ret;
+
+       /*
+        * On Winbond W25Q256FV, leaving 4byte mode causes the Extended Address
+        * Register to be set to 1, so all 3-byte-address reads come from the
+        * second 16M. We must clear the register to enable normal behavior.
+        */
+       write_enable(nor);
+       ret = spi_nor_write_ear(nor, 0);
+       write_disable(nor);
+
+       return ret;
+}
+
+static int spi_nor_xread_sr(struct spi_nor *nor, u8 *sr)
+{
+       if (nor->spimem) {
+               struct spi_mem_op op =
+                       SPI_MEM_OP(SPI_MEM_OP_CMD(SPINOR_OP_XRDSR, 1),
+                                  SPI_MEM_OP_NO_ADDR,
+                                  SPI_MEM_OP_NO_DUMMY,
+                                  SPI_MEM_OP_DATA_IN(1, sr, 1));
+
+               return spi_mem_exec_op(nor->spimem, &op);
        }
+
+       return nor->read_reg(nor, SPINOR_OP_XRDSR, sr, 1);
 }
 
 static int s3an_sr_ready(struct spi_nor *nor)
 {
        int ret;
-       u8 val;
 
-       ret = nor->read_reg(nor, SPINOR_OP_XRDSR, &val, 1);
+       ret = spi_nor_xread_sr(nor, nor->bouncebuf);
        if (ret < 0) {
                dev_err(nor->dev, "error %d reading XRDSR\n", (int) ret);
                return ret;
        }
 
-       return !!(val & XSR_RDY);
+       return !!(nor->bouncebuf[0] & XSR_RDY);
+}
+
+static int spi_nor_clear_sr(struct spi_nor *nor)
+{
+       if (nor->spimem) {
+               struct spi_mem_op op =
+                       SPI_MEM_OP(SPI_MEM_OP_CMD(SPINOR_OP_CLSR, 1),
+                                  SPI_MEM_OP_NO_ADDR,
+                                  SPI_MEM_OP_NO_DUMMY,
+                                  SPI_MEM_OP_NO_DATA);
+
+               return spi_mem_exec_op(nor->spimem, &op);
+       }
+
+       return nor->write_reg(nor, SPINOR_OP_CLSR, NULL, 0);
 }
 
 static int spi_nor_sr_ready(struct spi_nor *nor)
@@ -539,13 +755,28 @@ static int spi_nor_sr_ready(struct spi_nor *nor)
                else
                        dev_err(nor->dev, "Programming Error occurred\n");
 
-               nor->write_reg(nor, SPINOR_OP_CLSR, NULL, 0);
+               spi_nor_clear_sr(nor);
                return -EIO;
        }
 
        return !(sr & SR_WIP);
 }
 
+static int spi_nor_clear_fsr(struct spi_nor *nor)
+{
+       if (nor->spimem) {
+               struct spi_mem_op op =
+                       SPI_MEM_OP(SPI_MEM_OP_CMD(SPINOR_OP_CLFSR, 1),
+                                  SPI_MEM_OP_NO_ADDR,
+                                  SPI_MEM_OP_NO_DUMMY,
+                                  SPI_MEM_OP_NO_DATA);
+
+               return spi_mem_exec_op(nor->spimem, &op);
+       }
+
+       return nor->write_reg(nor, SPINOR_OP_CLFSR, NULL, 0);
+}
+
 static int spi_nor_fsr_ready(struct spi_nor *nor)
 {
        int fsr = read_fsr(nor);
@@ -562,7 +793,7 @@ static int spi_nor_fsr_ready(struct spi_nor *nor)
                        dev_err(nor->dev,
                        "Attempted to modify a protected sector.\n");
 
-               nor->write_reg(nor, SPINOR_OP_CLFSR, NULL, 0);
+               spi_nor_clear_fsr(nor);
                return -EIO;
        }
 
@@ -630,6 +861,16 @@ static int erase_chip(struct spi_nor *nor)
 {
        dev_dbg(nor->dev, " %lldKiB\n", (long long)(nor->mtd.size >> 10));
 
+       if (nor->spimem) {
+               struct spi_mem_op op =
+                       SPI_MEM_OP(SPI_MEM_OP_CMD(SPINOR_OP_CHIP_ERASE, 1),
+                                  SPI_MEM_OP_NO_ADDR,
+                                  SPI_MEM_OP_NO_DUMMY,
+                                  SPI_MEM_OP_NO_DATA);
+
+               return spi_mem_exec_op(nor->spimem, &op);
+       }
+
        return nor->write_reg(nor, SPINOR_OP_CHIP_ERASE, NULL, 0);
 }
 
@@ -666,10 +907,9 @@ static void spi_nor_unlock_and_unprep(struct spi_nor *nor, enum spi_nor_ops ops)
  * Addr can safely be unsigned int, the biggest S3AN device is smaller than
  * 4 MiB.
  */
-static loff_t spi_nor_s3an_addr_convert(struct spi_nor *nor, unsigned int addr)
+static u32 s3an_convert_addr(struct spi_nor *nor, u32 addr)
 {
-       unsigned int offset;
-       unsigned int page;
+       u32 offset, page;
 
        offset = addr % nor->page_size;
        page = addr / nor->page_size;
@@ -678,30 +918,47 @@ static loff_t spi_nor_s3an_addr_convert(struct spi_nor *nor, unsigned int addr)
        return page | offset;
 }
 
+static u32 spi_nor_convert_addr(struct spi_nor *nor, loff_t addr)
+{
+       if (!nor->params.convert_addr)
+               return addr;
+
+       return nor->params.convert_addr(nor, addr);
+}
+
 /*
  * Initiate the erasure of a single sector
  */
 static int spi_nor_erase_sector(struct spi_nor *nor, u32 addr)
 {
-       u8 buf[SPI_NOR_MAX_ADDR_WIDTH];
        int i;
 
-       if (nor->flags & SNOR_F_S3AN_ADDR_DEFAULT)
-               addr = spi_nor_s3an_addr_convert(nor, addr);
+       addr = spi_nor_convert_addr(nor, addr);
 
        if (nor->erase)
                return nor->erase(nor, addr);
 
+       if (nor->spimem) {
+               struct spi_mem_op op =
+                       SPI_MEM_OP(SPI_MEM_OP_CMD(nor->erase_opcode, 1),
+                                  SPI_MEM_OP_ADDR(nor->addr_width, addr, 1),
+                                  SPI_MEM_OP_NO_DUMMY,
+                                  SPI_MEM_OP_NO_DATA);
+
+               return spi_mem_exec_op(nor->spimem, &op);
+       }
+
        /*
         * Default implementation, if driver doesn't have a specialized HW
         * control
         */
        for (i = nor->addr_width - 1; i >= 0; i--) {
-               buf[i] = addr & 0xff;
+               nor->bouncebuf[i] = addr & 0xff;
                addr >>= 8;
        }
 
-       return nor->write_reg(nor, nor->erase_opcode, buf, nor->addr_width);
+       return nor->write_reg(nor, nor->erase_opcode, nor->bouncebuf,
+                             nor->addr_width);
 }
 
 /**
@@ -876,7 +1133,7 @@ static int spi_nor_init_erase_cmd_list(struct spi_nor *nor,
                                       struct list_head *erase_list,
                                       u64 addr, u32 len)
 {
-       const struct spi_nor_erase_map *map = &nor->erase_map;
+       const struct spi_nor_erase_map *map = &nor->params.erase_map;
        const struct spi_nor_erase_type *erase, *prev_erase = NULL;
        struct spi_nor_erase_region *region;
        struct spi_nor_erase_command *cmd = NULL;
@@ -1349,6 +1606,12 @@ static int stm_is_locked(struct spi_nor *nor, loff_t ofs, uint64_t len)
        return stm_is_locked_sr(nor, ofs, len, status);
 }
 
+static const struct spi_nor_locking_ops stm_locking_ops = {
+       .lock = stm_lock,
+       .unlock = stm_unlock,
+       .is_locked = stm_is_locked,
+};
+
 static int spi_nor_lock(struct mtd_info *mtd, loff_t ofs, uint64_t len)
 {
        struct spi_nor *nor = mtd_to_spi_nor(mtd);
@@ -1358,7 +1621,7 @@ static int spi_nor_lock(struct mtd_info *mtd, loff_t ofs, uint64_t len)
        if (ret)
                return ret;
 
-       ret = nor->flash_lock(nor, ofs, len);
+       ret = nor->params.locking_ops->lock(nor, ofs, len);
 
        spi_nor_unlock_and_unprep(nor, SPI_NOR_OPS_UNLOCK);
        return ret;
@@ -1373,7 +1636,7 @@ static int spi_nor_unlock(struct mtd_info *mtd, loff_t ofs, uint64_t len)
        if (ret)
                return ret;
 
-       ret = nor->flash_unlock(nor, ofs, len);
+       ret = nor->params.locking_ops->unlock(nor, ofs, len);
 
        spi_nor_unlock_and_unprep(nor, SPI_NOR_OPS_LOCK);
        return ret;
@@ -1388,7 +1651,7 @@ static int spi_nor_is_locked(struct mtd_info *mtd, loff_t ofs, uint64_t len)
        if (ret)
                return ret;
 
-       ret = nor->flash_is_locked(nor, ofs, len);
+       ret = nor->params.locking_ops->is_locked(nor, ofs, len);
 
        spi_nor_unlock_and_unprep(nor, SPI_NOR_OPS_LOCK);
        return ret;
@@ -1406,7 +1669,18 @@ static int write_sr_cr(struct spi_nor *nor, u8 *sr_cr)
 
        write_enable(nor);
 
-       ret = nor->write_reg(nor, SPINOR_OP_WRSR, sr_cr, 2);
+       if (nor->spimem) {
+               struct spi_mem_op op =
+                       SPI_MEM_OP(SPI_MEM_OP_CMD(SPINOR_OP_WRSR, 1),
+                                  SPI_MEM_OP_NO_ADDR,
+                                  SPI_MEM_OP_NO_DUMMY,
+                                  SPI_MEM_OP_DATA_OUT(2, sr_cr, 1));
+
+               ret = spi_mem_exec_op(nor->spimem, &op);
+       } else {
+               ret = nor->write_reg(nor, SPINOR_OP_WRSR, sr_cr, 2);
+       }
+
        if (ret < 0) {
                dev_err(nor->dev,
                        "error while writing configuration register\n");
@@ -1485,9 +1759,11 @@ static int macronix_quad_enable(struct spi_nor *nor)
  */
 static int spansion_quad_enable(struct spi_nor *nor)
 {
-       u8 sr_cr[2] = {0, CR_QUAD_EN_SPAN};
+       u8 *sr_cr = nor->bouncebuf;
        int ret;
 
+       sr_cr[0] = 0;
+       sr_cr[1] = CR_QUAD_EN_SPAN;
        ret = write_sr_cr(nor, sr_cr);
        if (ret)
                return ret;
@@ -1517,7 +1793,7 @@ static int spansion_quad_enable(struct spi_nor *nor)
  */
 static int spansion_no_read_cr_quad_enable(struct spi_nor *nor)
 {
-       u8 sr_cr[2];
+       u8 *sr_cr = nor->bouncebuf;
        int ret;
 
        /* Keep the current value of the Status Register. */
@@ -1548,7 +1824,7 @@ static int spansion_no_read_cr_quad_enable(struct spi_nor *nor)
 static int spansion_read_cr_quad_enable(struct spi_nor *nor)
 {
        struct device *dev = nor->dev;
-       u8 sr_cr[2];
+       u8 *sr_cr = nor->bouncebuf;
        int ret;
 
        /* Check current Quad Enable bit value. */
@@ -1585,6 +1861,36 @@ static int spansion_read_cr_quad_enable(struct spi_nor *nor)
        return 0;
 }
 
+static int spi_nor_write_sr2(struct spi_nor *nor, u8 *sr2)
+{
+       if (nor->spimem) {
+               struct spi_mem_op op =
+                       SPI_MEM_OP(SPI_MEM_OP_CMD(SPINOR_OP_WRSR2, 1),
+                                  SPI_MEM_OP_NO_ADDR,
+                                  SPI_MEM_OP_NO_DUMMY,
+                                  SPI_MEM_OP_DATA_OUT(1, sr2, 1));
+
+               return spi_mem_exec_op(nor->spimem, &op);
+       }
+
+       return nor->write_reg(nor, SPINOR_OP_WRSR2, sr2, 1);
+}
+
+static int spi_nor_read_sr2(struct spi_nor *nor, u8 *sr2)
+{
+       if (nor->spimem) {
+               struct spi_mem_op op =
+                       SPI_MEM_OP(SPI_MEM_OP_CMD(SPINOR_OP_RDSR2, 1),
+                                  SPI_MEM_OP_NO_ADDR,
+                                  SPI_MEM_OP_NO_DUMMY,
+                                  SPI_MEM_OP_DATA_IN(1, sr2, 1));
+
+               return spi_mem_exec_op(nor->spimem, &op);
+       }
+
+       return nor->read_reg(nor, SPINOR_OP_RDSR2, sr2, 1);
+}
+
 /**
  * sr2_bit7_quad_enable() - set QE bit in Status Register 2.
  * @nor:       pointer to a 'struct spi_nor'
@@ -1599,22 +1905,22 @@ static int spansion_read_cr_quad_enable(struct spi_nor *nor)
  */
 static int sr2_bit7_quad_enable(struct spi_nor *nor)
 {
-       u8 sr2;
+       u8 *sr2 = nor->bouncebuf;
        int ret;
 
        /* Check current Quad Enable bit value. */
-       ret = nor->read_reg(nor, SPINOR_OP_RDSR2, &sr2, 1);
+       ret = spi_nor_read_sr2(nor, sr2);
        if (ret)
                return ret;
-       if (sr2 & SR2_QUAD_EN_BIT7)
+       if (*sr2 & SR2_QUAD_EN_BIT7)
                return 0;
 
        /* Update the Quad Enable bit. */
-       sr2 |= SR2_QUAD_EN_BIT7;
+       *sr2 |= SR2_QUAD_EN_BIT7;
 
        write_enable(nor);
 
-       ret = nor->write_reg(nor, SPINOR_OP_WRSR2, &sr2, 1);
+       ret = spi_nor_write_sr2(nor, sr2);
        if (ret < 0) {
                dev_err(nor->dev, "error while writing status register 2\n");
                return -EINVAL;
@@ -1627,8 +1933,8 @@ static int sr2_bit7_quad_enable(struct spi_nor *nor)
        }
 
        /* Read back and check it. */
-       ret = nor->read_reg(nor, SPINOR_OP_RDSR2, &sr2, 1);
-       if (!(ret > 0 && (sr2 & SR2_QUAD_EN_BIT7))) {
+       ret = spi_nor_read_sr2(nor, sr2);
+       if (!(ret > 0 && (*sr2 & SR2_QUAD_EN_BIT7))) {
                dev_err(nor->dev, "SR2 Quad bit not set\n");
                return -EINVAL;
        }
@@ -1687,7 +1993,7 @@ static int spi_nor_spansion_clear_sr_bp(struct spi_nor *nor)
 {
        int ret;
        u8 mask = SR_BP2 | SR_BP1 | SR_BP0;
-       u8 sr_cr[2] = {0};
+       u8 *sr_cr =  nor->bouncebuf;
 
        /* Check current Quad Enable bit value. */
        ret = read_cr(nor);
@@ -1822,6 +2128,21 @@ static struct spi_nor_fixups mx25l25635_fixups = {
        .post_bfpt = mx25l25635_post_bfpt_fixups,
 };
 
+static void gd25q256_default_init(struct spi_nor *nor)
+{
+       /*
+        * Some manufacturer like GigaDevice may use different
+        * bit to set QE on different memories, so the MFR can't
+        * indicate the quad_enable method for this case, we need
+        * to set it in the default_init fixup hook.
+        */
+       nor->params.quad_enable = macronix_quad_enable;
+}
+
+static struct spi_nor_fixups gd25q256_fixups = {
+       .default_init = gd25q256_default_init,
+};
+
 /* NOTE: double check command sets and memory organization when you add
  * more nor chips.  This current list focusses on newer chips, which
  * have been converging on command sets which including JEDEC ID.
@@ -1914,7 +2235,7 @@ static const struct flash_info spi_nor_ids[] = {
                "gd25q256", INFO(0xc84019, 0, 64 * 1024, 512,
                        SECT_4K | SPI_NOR_DUAL_READ | SPI_NOR_QUAD_READ |
                        SPI_NOR_4B_OPCODES | SPI_NOR_HAS_LOCK | SPI_NOR_HAS_TB)
-                       .quad_enable = macronix_quad_enable,
+                       .fixups = &gd25q256_fixups,
        },
 
        /* Intel/Numonyx -- xxxs33b */
@@ -1988,13 +2309,16 @@ static const struct flash_info spi_nor_ids[] = {
        { "n25q128a13",  INFO(0x20ba18, 0, 64 * 1024,  256, SECT_4K | SPI_NOR_QUAD_READ) },
        { "n25q256a",    INFO(0x20ba19, 0, 64 * 1024,  512, SECT_4K | SPI_NOR_DUAL_READ | SPI_NOR_QUAD_READ) },
        { "n25q256ax1",  INFO(0x20bb19, 0, 64 * 1024,  512, SECT_4K | SPI_NOR_QUAD_READ) },
-       { "n25q512a",    INFO(0x20bb20, 0, 64 * 1024, 1024, SECT_4K | USE_FSR | SPI_NOR_QUAD_READ) },
        { "n25q512ax3",  INFO(0x20ba20, 0, 64 * 1024, 1024, SECT_4K | USE_FSR | SPI_NOR_QUAD_READ) },
        { "n25q00",      INFO(0x20ba21, 0, 64 * 1024, 2048, SECT_4K | USE_FSR | SPI_NOR_QUAD_READ | NO_CHIP_ERASE) },
        { "n25q00a",     INFO(0x20bb21, 0, 64 * 1024, 2048, SECT_4K | USE_FSR | SPI_NOR_QUAD_READ | NO_CHIP_ERASE) },
        { "mt25ql02g",   INFO(0x20ba22, 0, 64 * 1024, 4096,
                              SECT_4K | USE_FSR | SPI_NOR_QUAD_READ |
                              NO_CHIP_ERASE) },
+       { "mt25qu512a (n25q512a)", INFO(0x20bb20, 0, 64 * 1024, 1024,
+                                       SECT_4K | USE_FSR | SPI_NOR_DUAL_READ |
+                                       SPI_NOR_QUAD_READ |
+                                       SPI_NOR_4B_OPCODES) },
        { "mt25qu02g",   INFO(0x20bb22, 0, 64 * 1024, 4096, SECT_4K | USE_FSR | SPI_NOR_QUAD_READ | NO_CHIP_ERASE) },
 
        /* Micron */
@@ -2003,6 +2327,9 @@ static const struct flash_info spi_nor_ids[] = {
                        SECT_4K | USE_FSR | SPI_NOR_OCTAL_READ |
                        SPI_NOR_4B_OPCODES)
        },
+       { "mt35xu02g",  INFO(0x2c5b1c, 0, 128 * 1024, 2048,
+                            SECT_4K | USE_FSR | SPI_NOR_OCTAL_READ |
+                            SPI_NOR_4B_OPCODES) },
 
        /* PMC */
        { "pm25lv512",   INFO(0,        0, 32 * 1024,    2, SECT_4K_PMC) },
@@ -2022,7 +2349,7 @@ static const struct flash_info spi_nor_ids[] = {
        { "s25fl256s1", INFO(0x010219, 0x4d01,  64 * 1024, 512, SPI_NOR_DUAL_READ | SPI_NOR_QUAD_READ | USE_CLSR) },
        { "s25fl512s",  INFO6(0x010220, 0x4d0080, 256 * 1024, 256,
                        SPI_NOR_DUAL_READ | SPI_NOR_QUAD_READ |
-                       SPI_NOR_HAS_LOCK | SPI_NOR_HAS_TB | USE_CLSR) },
+                       SPI_NOR_HAS_LOCK | USE_CLSR) },
        { "s25fs512s",  INFO6(0x010220, 0x4d0081, 256 * 1024, 256, SPI_NOR_DUAL_READ | SPI_NOR_QUAD_READ | USE_CLSR) },
        { "s70fl01gs",  INFO(0x010221, 0x4d00, 256 * 1024, 256, 0) },
        { "s25sl12800", INFO(0x012018, 0x0300, 256 * 1024,  64, 0) },
@@ -2060,6 +2387,8 @@ static const struct flash_info spi_nor_ids[] = {
        { "sst25wf040b", INFO(0x621613, 0, 64 * 1024,  8, SECT_4K) },
        { "sst25wf040",  INFO(0xbf2504, 0, 64 * 1024,  8, SECT_4K | SST_WRITE) },
        { "sst25wf080",  INFO(0xbf2505, 0, 64 * 1024, 16, SECT_4K | SST_WRITE) },
+       { "sst26wf016b", INFO(0xbf2651, 0, 64 * 1024, 32, SECT_4K |
+                             SPI_NOR_DUAL_READ | SPI_NOR_QUAD_READ) },
        { "sst26vf064b", INFO(0xbf2643, 0, 64 * 1024, 128, SECT_4K | SPI_NOR_DUAL_READ | SPI_NOR_QUAD_READ) },
 
        /* ST Microelectronics -- newer production may have feature updates */
@@ -2151,6 +2480,8 @@ static const struct flash_info spi_nor_ids[] = {
        { "w25q80bl", INFO(0xef4014, 0, 64 * 1024,  16, SECT_4K) },
        { "w25q128", INFO(0xef4018, 0, 64 * 1024, 256, SECT_4K) },
        { "w25q256", INFO(0xef4019, 0, 64 * 1024, 512, SECT_4K | SPI_NOR_DUAL_READ | SPI_NOR_QUAD_READ) },
+       { "w25q256jvm", INFO(0xef7019, 0, 64 * 1024, 512,
+                            SECT_4K | SPI_NOR_DUAL_READ | SPI_NOR_QUAD_READ) },
        { "w25m512jv", INFO(0xef7119, 0, 64 * 1024, 1024,
                        SECT_4K | SPI_NOR_QUAD_READ | SPI_NOR_DUAL_READ) },
 
@@ -2177,10 +2508,21 @@ static const struct flash_info spi_nor_ids[] = {
 static const struct flash_info *spi_nor_read_id(struct spi_nor *nor)
 {
        int                     tmp;
-       u8                      id[SPI_NOR_MAX_ID_LEN];
+       u8                      *id = nor->bouncebuf;
        const struct flash_info *info;
 
-       tmp = nor->read_reg(nor, SPINOR_OP_RDID, id, SPI_NOR_MAX_ID_LEN);
+       if (nor->spimem) {
+               struct spi_mem_op op =
+                       SPI_MEM_OP(SPI_MEM_OP_CMD(SPINOR_OP_RDID, 1),
+                                  SPI_MEM_OP_NO_ADDR,
+                                  SPI_MEM_OP_NO_DUMMY,
+                                  SPI_MEM_OP_DATA_IN(SPI_NOR_MAX_ID_LEN, id, 1));
+
+               tmp = spi_mem_exec_op(nor->spimem, &op);
+       } else {
+               tmp = nor->read_reg(nor, SPINOR_OP_RDID, id,
+                                   SPI_NOR_MAX_ID_LEN);
+       }
        if (tmp < 0) {
                dev_err(nor->dev, "error %d reading JEDEC ID\n", tmp);
                return ERR_PTR(tmp);
@@ -2213,10 +2555,9 @@ static int spi_nor_read(struct mtd_info *mtd, loff_t from, size_t len,
        while (len) {
                loff_t addr = from;
 
-               if (nor->flags & SNOR_F_S3AN_ADDR_DEFAULT)
-                       addr = spi_nor_s3an_addr_convert(nor, addr);
+               addr = spi_nor_convert_addr(nor, addr);
 
-               ret = nor->read(nor, addr, len, buf);
+               ret = spi_nor_read_data(nor, addr, len, buf);
                if (ret == 0) {
                        /* We shouldn't see 0-length reads */
                        ret = -EIO;
@@ -2261,7 +2602,7 @@ static int sst_write(struct mtd_info *mtd, loff_t to, size_t len,
                nor->program_opcode = SPINOR_OP_BP;
 
                /* write one byte. */
-               ret = nor->write(nor, to, 1, buf);
+               ret = spi_nor_write_data(nor, to, 1, buf);
                if (ret < 0)
                        goto sst_write_err;
                WARN(ret != 1, "While writing 1 byte written %i bytes\n",
@@ -2277,7 +2618,7 @@ static int sst_write(struct mtd_info *mtd, loff_t to, size_t len,
                nor->program_opcode = SPINOR_OP_AAI_WP;
 
                /* write two bytes. */
-               ret = nor->write(nor, to, 2, buf + actual);
+               ret = spi_nor_write_data(nor, to, 2, buf + actual);
                if (ret < 0)
                        goto sst_write_err;
                WARN(ret != 2, "While writing 2 bytes written %i bytes\n",
@@ -2300,7 +2641,7 @@ static int sst_write(struct mtd_info *mtd, loff_t to, size_t len,
                write_enable(nor);
 
                nor->program_opcode = SPINOR_OP_BP;
-               ret = nor->write(nor, to, 1, buf + actual);
+               ret = spi_nor_write_data(nor, to, 1, buf + actual);
                if (ret < 0)
                        goto sst_write_err;
                WARN(ret != 1, "While writing 1 byte written %i bytes\n",
@@ -2358,11 +2699,10 @@ static int spi_nor_write(struct mtd_info *mtd, loff_t to, size_t len,
                page_remain = min_t(size_t,
                                    nor->page_size - page_offset, len - i);
 
-               if (nor->flags & SNOR_F_S3AN_ADDR_DEFAULT)
-                       addr = spi_nor_s3an_addr_convert(nor, addr);
+               addr = spi_nor_convert_addr(nor, addr);
 
                write_enable(nor);
-               ret = nor->write(nor, addr, page_remain, buf + i);
+               ret = spi_nor_write_data(nor, addr, page_remain, buf + i);
                if (ret < 0)
                        goto write_err;
                written = ret;
@@ -2381,8 +2721,10 @@ write_err:
 
 static int spi_nor_check(struct spi_nor *nor)
 {
-       if (!nor->dev || !nor->read || !nor->write ||
-               !nor->read_reg || !nor->write_reg) {
+       if (!nor->dev ||
+           (!nor->spimem &&
+           (!nor->read || !nor->write || !nor->read_reg ||
+             !nor->write_reg))) {
                pr_err("spi-nor: please fill all the necessary fields!\n");
                return -EINVAL;
        }
@@ -2390,12 +2732,12 @@ static int spi_nor_check(struct spi_nor *nor)
        return 0;
 }
 
-static int s3an_nor_scan(struct spi_nor *nor)
+static int s3an_nor_setup(struct spi_nor *nor,
+                         const struct spi_nor_hwcaps *hwcaps)
 {
        int ret;
-       u8 val;
 
-       ret = nor->read_reg(nor, SPINOR_OP_XRDSR, &val, 1);
+       ret = spi_nor_xread_sr(nor, nor->bouncebuf);
        if (ret < 0) {
                dev_err(nor->dev, "error %d reading XRDSR\n", (int) ret);
                return ret;
@@ -2417,7 +2759,7 @@ static int s3an_nor_scan(struct spi_nor *nor)
         * The current addressing mode can be read from the XRDSR register
         * and should not be changed, because is a destructive operation.
         */
-       if (val & XSR_PAGESIZE) {
+       if (nor->bouncebuf[0] & XSR_PAGESIZE) {
                /* Flash in Power of 2 mode */
                nor->page_size = (nor->page_size == 264) ? 256 : 512;
                nor->mtd.writebufsize = nor->page_size;
@@ -2425,7 +2767,8 @@ static int s3an_nor_scan(struct spi_nor *nor)
                nor->mtd.erasesize = 8 * nor->page_size;
        } else {
                /* Flash in Default addressing mode */
-               nor->flags |= SNOR_F_S3AN_ADDR_DEFAULT;
+               nor->params.convert_addr = s3an_convert_addr;
+               nor->mtd.erasesize = nor->info->sector_size;
        }
 
        return 0;
@@ -2525,11 +2868,11 @@ static int spi_nor_read_raw(struct spi_nor *nor, u32 addr, size_t len, u8 *buf)
        int ret;
 
        while (len) {
-               ret = nor->read(nor, addr, len, buf);
-               if (!ret || ret > len)
-                       return -EIO;
+               ret = spi_nor_read_data(nor, addr, len, buf);
                if (ret < 0)
                        return ret;
+               if (!ret || ret > len)
+                       return -EIO;
 
                buf += ret;
                addr += ret;
@@ -2575,6 +2918,126 @@ static int spi_nor_read_sfdp(struct spi_nor *nor, u32 addr,
 }
 
 /**
+ * spi_nor_spimem_check_op - check if the operation is supported
+ *                           by controller
+ *@nor:        pointer to a 'struct spi_nor'
+ *@op:         pointer to op template to be checked
+ *
+ * Returns 0 if operation is supported, -ENOTSUPP otherwise.
+ */
+static int spi_nor_spimem_check_op(struct spi_nor *nor,
+                                  struct spi_mem_op *op)
+{
+       /*
+        * First test with 4 address bytes. The opcode itself might
+        * be a 3B addressing opcode but we don't care, because
+        * SPI controller implementation should not check the opcode,
+        * but just the sequence.
+        */
+       op->addr.nbytes = 4;
+       if (!spi_mem_supports_op(nor->spimem, op)) {
+               if (nor->mtd.size > SZ_16M)
+                       return -ENOTSUPP;
+
+               /* If flash size <= 16MB, 3 address bytes are sufficient */
+               op->addr.nbytes = 3;
+               if (!spi_mem_supports_op(nor->spimem, op))
+                       return -ENOTSUPP;
+       }
+
+       return 0;
+}
+
+/**
+ * spi_nor_spimem_check_readop - check if the read op is supported
+ *                               by controller
+ *@nor:         pointer to a 'struct spi_nor'
+ *@read:        pointer to op template to be checked
+ *
+ * Returns 0 if operation is supported, -ENOTSUPP otherwise.
+ */
+static int spi_nor_spimem_check_readop(struct spi_nor *nor,
+                                      const struct spi_nor_read_command *read)
+{
+       struct spi_mem_op op = SPI_MEM_OP(SPI_MEM_OP_CMD(read->opcode, 1),
+                                         SPI_MEM_OP_ADDR(3, 0, 1),
+                                         SPI_MEM_OP_DUMMY(0, 1),
+                                         SPI_MEM_OP_DATA_IN(0, NULL, 1));
+
+       op.cmd.buswidth = spi_nor_get_protocol_inst_nbits(read->proto);
+       op.addr.buswidth = spi_nor_get_protocol_addr_nbits(read->proto);
+       op.data.buswidth = spi_nor_get_protocol_data_nbits(read->proto);
+       op.dummy.buswidth = op.addr.buswidth;
+       op.dummy.nbytes = (read->num_mode_clocks + read->num_wait_states) *
+                         op.dummy.buswidth / 8;
+
+       return spi_nor_spimem_check_op(nor, &op);
+}
+
+/**
+ * spi_nor_spimem_check_pp - check if the page program op is supported
+ *                           by controller
+ *@nor:         pointer to a 'struct spi_nor'
+ *@pp:          pointer to op template to be checked
+ *
+ * Returns 0 if operation is supported, -ENOTSUPP otherwise.
+ */
+static int spi_nor_spimem_check_pp(struct spi_nor *nor,
+                                  const struct spi_nor_pp_command *pp)
+{
+       struct spi_mem_op op = SPI_MEM_OP(SPI_MEM_OP_CMD(pp->opcode, 1),
+                                         SPI_MEM_OP_ADDR(3, 0, 1),
+                                         SPI_MEM_OP_NO_DUMMY,
+                                         SPI_MEM_OP_DATA_OUT(0, NULL, 1));
+
+       op.cmd.buswidth = spi_nor_get_protocol_inst_nbits(pp->proto);
+       op.addr.buswidth = spi_nor_get_protocol_addr_nbits(pp->proto);
+       op.data.buswidth = spi_nor_get_protocol_data_nbits(pp->proto);
+
+       return spi_nor_spimem_check_op(nor, &op);
+}
+
+/**
+ * spi_nor_spimem_adjust_hwcaps - Find optimal Read/Write protocol
+ *                                based on SPI controller capabilities
+ * @nor:        pointer to a 'struct spi_nor'
+ * @hwcaps:     pointer to resulting capabilities after adjusting
+ *              according to controller and flash's capability
+ */
+static void
+spi_nor_spimem_adjust_hwcaps(struct spi_nor *nor, u32 *hwcaps)
+{
+       struct spi_nor_flash_parameter *params =  &nor->params;
+       unsigned int cap;
+
+       /* DTR modes are not supported yet, mask them all. */
+       *hwcaps &= ~SNOR_HWCAPS_DTR;
+
+       /* X-X-X modes are not supported yet, mask them all. */
+       *hwcaps &= ~SNOR_HWCAPS_X_X_X;
+
+       for (cap = 0; cap < sizeof(*hwcaps) * BITS_PER_BYTE; cap++) {
+               int rdidx, ppidx;
+
+               if (!(*hwcaps & BIT(cap)))
+                       continue;
+
+               rdidx = spi_nor_hwcaps_read2cmd(BIT(cap));
+               if (rdidx >= 0 &&
+                   spi_nor_spimem_check_readop(nor, &params->reads[rdidx]))
+                       *hwcaps &= ~BIT(cap);
+
+               ppidx = spi_nor_hwcaps_pp2cmd(BIT(cap));
+               if (ppidx < 0)
+                       continue;
+
+               if (spi_nor_spimem_check_pp(nor,
+                                           &params->page_programs[ppidx]))
+                       *hwcaps &= ~BIT(cap);
+       }
+}
+
+/**
  * spi_nor_read_sfdp_dma_unsafe() - read Serial Flash Discoverable Parameters.
  * @nor:       pointer to a 'struct spi_nor'
  * @addr:      offset in the SFDP area to start reading data from
@@ -2892,7 +3355,7 @@ static int spi_nor_parse_bfpt(struct spi_nor *nor,
                              const struct sfdp_parameter_header *bfpt_header,
                              struct spi_nor_flash_parameter *params)
 {
-       struct spi_nor_erase_map *map = &nor->erase_map;
+       struct spi_nor_erase_map *map = &params->erase_map;
        struct spi_nor_erase_type *erase_type = map->erase_type;
        struct sfdp_bfpt bfpt;
        size_t len;
@@ -2973,7 +3436,7 @@ static int spi_nor_parse_bfpt(struct spi_nor *nor,
         * Erase Types defined in the bfpt table.
         */
        erase_mask = 0;
-       memset(&nor->erase_map, 0, sizeof(nor->erase_map));
+       memset(&params->erase_map, 0, sizeof(params->erase_map));
        for (i = 0; i < ARRAY_SIZE(sfdp_bfpt_erases); i++) {
                const struct sfdp_bfpt_erase *er = &sfdp_bfpt_erases[i];
                u32 erasesize;
@@ -3248,14 +3711,18 @@ spi_nor_region_check_overlay(struct spi_nor_erase_region *region,
 /**
  * spi_nor_init_non_uniform_erase_map() - initialize the non-uniform erase map
  * @nor:       pointer to a 'struct spi_nor'
+ * @params:     pointer to a duplicate 'struct spi_nor_flash_parameter' that is
+ *              used for storing SFDP parsed data
  * @smpt:      pointer to the sector map parameter table
  *
  * Return: 0 on success, -errno otherwise.
  */
-static int spi_nor_init_non_uniform_erase_map(struct spi_nor *nor,
-                                             const u32 *smpt)
+static int
+spi_nor_init_non_uniform_erase_map(struct spi_nor *nor,
+                                  struct spi_nor_flash_parameter *params,
+                                  const u32 *smpt)
 {
-       struct spi_nor_erase_map *map = &nor->erase_map;
+       struct spi_nor_erase_map *map = &params->erase_map;
        struct spi_nor_erase_type *erase = map->erase_type;
        struct spi_nor_erase_region *region;
        u64 offset;
@@ -3334,6 +3801,8 @@ static int spi_nor_init_non_uniform_erase_map(struct spi_nor *nor,
  * spi_nor_parse_smpt() - parse Sector Map Parameter Table
  * @nor:               pointer to a 'struct spi_nor'
  * @smpt_header:       sector map parameter table header
+ * @params:            pointer to a duplicate 'struct spi_nor_flash_parameter'
+ *                      that is used for storing SFDP parsed data
  *
  * This table is optional, but when available, we parse it to identify the
  * location and size of sectors within the main data array of the flash memory
@@ -3342,7 +3811,8 @@ static int spi_nor_init_non_uniform_erase_map(struct spi_nor *nor,
  * Return: 0 on success, -errno otherwise.
  */
 static int spi_nor_parse_smpt(struct spi_nor *nor,
-                             const struct sfdp_parameter_header *smpt_header)
+                             const struct sfdp_parameter_header *smpt_header,
+                             struct spi_nor_flash_parameter *params)
 {
        const u32 *sector_map;
        u32 *smpt;
@@ -3371,11 +3841,11 @@ static int spi_nor_parse_smpt(struct spi_nor *nor,
                goto out;
        }
 
-       ret = spi_nor_init_non_uniform_erase_map(nor, sector_map);
+       ret = spi_nor_init_non_uniform_erase_map(nor, params, sector_map);
        if (ret)
                goto out;
 
-       spi_nor_regions_sort_erase_types(&nor->erase_map);
+       spi_nor_regions_sort_erase_types(&params->erase_map);
        /* fall through */
 out:
        kfree(smpt);
@@ -3431,7 +3901,7 @@ static int spi_nor_parse_4bait(struct spi_nor *nor,
                { 0u /* not used */,            BIT(12) },
        };
        struct spi_nor_pp_command *params_pp = params->page_programs;
-       struct spi_nor_erase_map *map = &nor->erase_map;
+       struct spi_nor_erase_map *map = &params->erase_map;
        struct spi_nor_erase_type *erase_type = map->erase_type;
        u32 *dwords;
        size_t len;
@@ -3453,7 +3923,7 @@ static int spi_nor_parse_4bait(struct spi_nor *nor,
        addr = SFDP_PARAM_HEADER_PTP(param_header);
        ret = spi_nor_read_sfdp(nor, addr, len, dwords);
        if (ret)
-               return ret;
+               goto out;
 
        /* Fix endianness of the 4BAIT DWORDs. */
        for (i = 0; i < SFDP_4BAIT_DWORD_MAX; i++)
@@ -3661,7 +4131,7 @@ static int spi_nor_parse_sfdp(struct spi_nor *nor,
 
                switch (SFDP_PARAM_HEADER_ID(param_header)) {
                case SFDP_SECTOR_MAP_ID:
-                       err = spi_nor_parse_smpt(nor, param_header);
+                       err = spi_nor_parse_smpt(nor, param_header, params);
                        break;
 
                case SFDP_4BAIT_ID:
@@ -3690,137 +4160,11 @@ exit:
        return err;
 }
 
-static int spi_nor_init_params(struct spi_nor *nor,
-                              struct spi_nor_flash_parameter *params)
+static int spi_nor_select_read(struct spi_nor *nor,
+                              u32 shared_hwcaps)
 {
-       struct spi_nor_erase_map *map = &nor->erase_map;
-       const struct flash_info *info = nor->info;
-       u8 i, erase_mask;
-
-       /* Set legacy flash parameters as default. */
-       memset(params, 0, sizeof(*params));
-
-       /* Set SPI NOR sizes. */
-       params->size = (u64)info->sector_size * info->n_sectors;
-       params->page_size = info->page_size;
-
-       /* (Fast) Read settings. */
-       params->hwcaps.mask |= SNOR_HWCAPS_READ;
-       spi_nor_set_read_settings(&params->reads[SNOR_CMD_READ],
-                                 0, 0, SPINOR_OP_READ,
-                                 SNOR_PROTO_1_1_1);
-
-       if (!(info->flags & SPI_NOR_NO_FR)) {
-               params->hwcaps.mask |= SNOR_HWCAPS_READ_FAST;
-               spi_nor_set_read_settings(&params->reads[SNOR_CMD_READ_FAST],
-                                         0, 8, SPINOR_OP_READ_FAST,
-                                         SNOR_PROTO_1_1_1);
-       }
-
-       if (info->flags & SPI_NOR_DUAL_READ) {
-               params->hwcaps.mask |= SNOR_HWCAPS_READ_1_1_2;
-               spi_nor_set_read_settings(&params->reads[SNOR_CMD_READ_1_1_2],
-                                         0, 8, SPINOR_OP_READ_1_1_2,
-                                         SNOR_PROTO_1_1_2);
-       }
-
-       if (info->flags & SPI_NOR_QUAD_READ) {
-               params->hwcaps.mask |= SNOR_HWCAPS_READ_1_1_4;
-               spi_nor_set_read_settings(&params->reads[SNOR_CMD_READ_1_1_4],
-                                         0, 8, SPINOR_OP_READ_1_1_4,
-                                         SNOR_PROTO_1_1_4);
-       }
-
-       if (info->flags & SPI_NOR_OCTAL_READ) {
-               params->hwcaps.mask |= SNOR_HWCAPS_READ_1_1_8;
-               spi_nor_set_read_settings(&params->reads[SNOR_CMD_READ_1_1_8],
-                                         0, 8, SPINOR_OP_READ_1_1_8,
-                                         SNOR_PROTO_1_1_8);
-       }
-
-       /* Page Program settings. */
-       params->hwcaps.mask |= SNOR_HWCAPS_PP;
-       spi_nor_set_pp_settings(&params->page_programs[SNOR_CMD_PP],
-                               SPINOR_OP_PP, SNOR_PROTO_1_1_1);
-
-       /*
-        * Sector Erase settings. Sort Erase Types in ascending order, with the
-        * smallest erase size starting at BIT(0).
-        */
-       erase_mask = 0;
-       i = 0;
-       if (info->flags & SECT_4K_PMC) {
-               erase_mask |= BIT(i);
-               spi_nor_set_erase_type(&map->erase_type[i], 4096u,
-                                      SPINOR_OP_BE_4K_PMC);
-               i++;
-       } else if (info->flags & SECT_4K) {
-               erase_mask |= BIT(i);
-               spi_nor_set_erase_type(&map->erase_type[i], 4096u,
-                                      SPINOR_OP_BE_4K);
-               i++;
-       }
-       erase_mask |= BIT(i);
-       spi_nor_set_erase_type(&map->erase_type[i], info->sector_size,
-                              SPINOR_OP_SE);
-       spi_nor_init_uniform_erase_map(map, erase_mask, params->size);
-
-       /* Select the procedure to set the Quad Enable bit. */
-       if (params->hwcaps.mask & (SNOR_HWCAPS_READ_QUAD |
-                                  SNOR_HWCAPS_PP_QUAD)) {
-               switch (JEDEC_MFR(info)) {
-               case SNOR_MFR_MACRONIX:
-                       params->quad_enable = macronix_quad_enable;
-                       break;
-
-               case SNOR_MFR_ST:
-               case SNOR_MFR_MICRON:
-                       break;
-
-               default:
-                       /* Kept only for backward compatibility purpose. */
-                       params->quad_enable = spansion_quad_enable;
-                       break;
-               }
-
-               /*
-                * Some manufacturer like GigaDevice may use different
-                * bit to set QE on different memories, so the MFR can't
-                * indicate the quad_enable method for this case, we need
-                * set it in flash info list.
-                */
-               if (info->quad_enable)
-                       params->quad_enable = info->quad_enable;
-       }
-
-       if ((info->flags & (SPI_NOR_DUAL_READ | SPI_NOR_QUAD_READ)) &&
-           !(info->flags & SPI_NOR_SKIP_SFDP)) {
-               struct spi_nor_flash_parameter sfdp_params;
-               struct spi_nor_erase_map prev_map;
-
-               memcpy(&sfdp_params, params, sizeof(sfdp_params));
-               memcpy(&prev_map, &nor->erase_map, sizeof(prev_map));
-
-               if (spi_nor_parse_sfdp(nor, &sfdp_params)) {
-                       nor->addr_width = 0;
-                       nor->flags &= ~SNOR_F_4B_OPCODES;
-                       /* restore previous erase map */
-                       memcpy(&nor->erase_map, &prev_map,
-                              sizeof(nor->erase_map));
-               } else {
-                       memcpy(params, &sfdp_params, sizeof(*params));
-               }
-       }
-
-       return 0;
-}
-
-static int spi_nor_select_read(struct spi_nor *nor,
-                              const struct spi_nor_flash_parameter *params,
-                              u32 shared_hwcaps)
-{
-       int cmd, best_match = fls(shared_hwcaps & SNOR_HWCAPS_READ_MASK) - 1;
-       const struct spi_nor_read_command *read;
+       int cmd, best_match = fls(shared_hwcaps & SNOR_HWCAPS_READ_MASK) - 1;
+       const struct spi_nor_read_command *read;
 
        if (best_match < 0)
                return -EINVAL;
@@ -3829,7 +4173,7 @@ static int spi_nor_select_read(struct spi_nor *nor,
        if (cmd < 0)
                return -EINVAL;
 
-       read = &params->reads[cmd];
+       read = &nor->params.reads[cmd];
        nor->read_opcode = read->opcode;
        nor->read_proto = read->proto;
 
@@ -3848,7 +4192,6 @@ static int spi_nor_select_read(struct spi_nor *nor,
 }
 
 static int spi_nor_select_pp(struct spi_nor *nor,
-                            const struct spi_nor_flash_parameter *params,
                             u32 shared_hwcaps)
 {
        int cmd, best_match = fls(shared_hwcaps & SNOR_HWCAPS_PP_MASK) - 1;
@@ -3861,7 +4204,7 @@ static int spi_nor_select_pp(struct spi_nor *nor,
        if (cmd < 0)
                return -EINVAL;
 
-       pp = &params->page_programs[cmd];
+       pp = &nor->params.page_programs[cmd];
        nor->program_opcode = pp->opcode;
        nor->write_proto = pp->proto;
        return 0;
@@ -3920,11 +4263,12 @@ spi_nor_select_uniform_erase(struct spi_nor_erase_map *map,
        return erase;
 }
 
-static int spi_nor_select_erase(struct spi_nor *nor, u32 wanted_size)
+static int spi_nor_select_erase(struct spi_nor *nor)
 {
-       struct spi_nor_erase_map *map = &nor->erase_map;
+       struct spi_nor_erase_map *map = &nor->params.erase_map;
        const struct spi_nor_erase_type *erase = NULL;
        struct mtd_info *mtd = &nor->mtd;
+       u32 wanted_size = nor->info->sector_size;
        int i;
 
        /*
@@ -3967,12 +4311,11 @@ static int spi_nor_select_erase(struct spi_nor *nor, u32 wanted_size)
        return 0;
 }
 
-static int spi_nor_setup(struct spi_nor *nor,
-                        const struct spi_nor_flash_parameter *params,
-                        const struct spi_nor_hwcaps *hwcaps)
+static int spi_nor_default_setup(struct spi_nor *nor,
+                                const struct spi_nor_hwcaps *hwcaps)
 {
+       struct spi_nor_flash_parameter *params = &nor->params;
        u32 ignored_mask, shared_mask;
-       bool enable_quad_io;
        int err;
 
        /*
@@ -3981,20 +4324,29 @@ static int spi_nor_setup(struct spi_nor *nor,
         */
        shared_mask = hwcaps->mask & params->hwcaps.mask;
 
-       /* SPI n-n-n protocols are not supported yet. */
-       ignored_mask = (SNOR_HWCAPS_READ_2_2_2 |
-                       SNOR_HWCAPS_READ_4_4_4 |
-                       SNOR_HWCAPS_READ_8_8_8 |
-                       SNOR_HWCAPS_PP_4_4_4 |
-                       SNOR_HWCAPS_PP_8_8_8);
-       if (shared_mask & ignored_mask) {
-               dev_dbg(nor->dev,
-                       "SPI n-n-n protocols are not supported yet.\n");
-               shared_mask &= ~ignored_mask;
+       if (nor->spimem) {
+               /*
+                * When called from spi_nor_probe(), all caps are set and we
+                * need to discard some of them based on what the SPI
+                * controller actually supports (using spi_mem_supports_op()).
+                */
+               spi_nor_spimem_adjust_hwcaps(nor, &shared_mask);
+       } else {
+               /*
+                * SPI n-n-n protocols are not supported when the SPI
+                * controller directly implements the spi_nor interface.
+                * Yet another reason to switch to spi-mem.
+                */
+               ignored_mask = SNOR_HWCAPS_X_X_X;
+               if (shared_mask & ignored_mask) {
+                       dev_dbg(nor->dev,
+                               "SPI n-n-n protocols are not supported.\n");
+                       shared_mask &= ~ignored_mask;
+               }
        }
 
        /* Select the (Fast) Read command. */
-       err = spi_nor_select_read(nor, params, shared_mask);
+       err = spi_nor_select_read(nor, shared_mask);
        if (err) {
                dev_err(nor->dev,
                        "can't select read settings supported by both the SPI controller and memory.\n");
@@ -4002,7 +4354,7 @@ static int spi_nor_setup(struct spi_nor *nor,
        }
 
        /* Select the Page Program command. */
-       err = spi_nor_select_pp(nor, params, shared_mask);
+       err = spi_nor_select_pp(nor, shared_mask);
        if (err) {
                dev_err(nor->dev,
                        "can't select write settings supported by both the SPI controller and memory.\n");
@@ -4010,30 +4362,325 @@ static int spi_nor_setup(struct spi_nor *nor,
        }
 
        /* Select the Sector Erase command. */
-       err = spi_nor_select_erase(nor, nor->info->sector_size);
+       err = spi_nor_select_erase(nor);
        if (err) {
                dev_err(nor->dev,
                        "can't select erase settings supported by both the SPI controller and memory.\n");
                return err;
        }
 
-       /* Enable Quad I/O if needed. */
-       enable_quad_io = (spi_nor_get_protocol_width(nor->read_proto) == 4 ||
-                         spi_nor_get_protocol_width(nor->write_proto) == 4);
-       if (enable_quad_io && params->quad_enable)
-               nor->quad_enable = params->quad_enable;
-       else
-               nor->quad_enable = NULL;
-
        return 0;
 }
 
+static int spi_nor_setup(struct spi_nor *nor,
+                        const struct spi_nor_hwcaps *hwcaps)
+{
+       if (!nor->params.setup)
+               return 0;
+
+       return nor->params.setup(nor, hwcaps);
+}
+
+static void macronix_set_default_init(struct spi_nor *nor)
+{
+       nor->params.quad_enable = macronix_quad_enable;
+       nor->params.set_4byte = macronix_set_4byte;
+}
+
+static void st_micron_set_default_init(struct spi_nor *nor)
+{
+       nor->flags |= SNOR_F_HAS_LOCK;
+       nor->params.quad_enable = NULL;
+       nor->params.set_4byte = st_micron_set_4byte;
+}
+
+static void winbond_set_default_init(struct spi_nor *nor)
+{
+       nor->params.set_4byte = winbond_set_4byte;
+}
+
+/**
+ * spi_nor_manufacturer_init_params() - Initialize the flash's parameters and
+ * settings based on MFR register and ->default_init() hook.
+ * @nor:       pointer to a 'struct spi-nor'.
+ */
+static void spi_nor_manufacturer_init_params(struct spi_nor *nor)
+{
+       /* Init flash parameters based on MFR */
+       switch (JEDEC_MFR(nor->info)) {
+       case SNOR_MFR_MACRONIX:
+               macronix_set_default_init(nor);
+               break;
+
+       case SNOR_MFR_ST:
+       case SNOR_MFR_MICRON:
+               st_micron_set_default_init(nor);
+               break;
+
+       case SNOR_MFR_WINBOND:
+               winbond_set_default_init(nor);
+               break;
+
+       default:
+               break;
+       }
+
+       if (nor->info->fixups && nor->info->fixups->default_init)
+               nor->info->fixups->default_init(nor);
+}
+
+/**
+ * spi_nor_sfdp_init_params() - Initialize the flash's parameters and settings
+ * based on JESD216 SFDP standard.
+ * @nor:       pointer to a 'struct spi-nor'.
+ *
+ * The method has a roll-back mechanism: in case the SFDP parsing fails, the
+ * legacy flash parameters and settings will be restored.
+ */
+static void spi_nor_sfdp_init_params(struct spi_nor *nor)
+{
+       struct spi_nor_flash_parameter sfdp_params;
+
+       memcpy(&sfdp_params, &nor->params, sizeof(sfdp_params));
+
+       if (spi_nor_parse_sfdp(nor, &sfdp_params)) {
+               nor->addr_width = 0;
+               nor->flags &= ~SNOR_F_4B_OPCODES;
+       } else {
+               memcpy(&nor->params, &sfdp_params, sizeof(nor->params));
+       }
+}
+
+/**
+ * spi_nor_info_init_params() - Initialize the flash's parameters and settings
+ * based on nor->info data.
+ * @nor:       pointer to a 'struct spi-nor'.
+ */
+static void spi_nor_info_init_params(struct spi_nor *nor)
+{
+       struct spi_nor_flash_parameter *params = &nor->params;
+       struct spi_nor_erase_map *map = &params->erase_map;
+       const struct flash_info *info = nor->info;
+       struct device_node *np = spi_nor_get_flash_node(nor);
+       u8 i, erase_mask;
+
+       /* Initialize legacy flash parameters and settings. */
+       params->quad_enable = spansion_quad_enable;
+       params->set_4byte = spansion_set_4byte;
+       params->setup = spi_nor_default_setup;
+
+       /* Set SPI NOR sizes. */
+       params->size = (u64)info->sector_size * info->n_sectors;
+       params->page_size = info->page_size;
+
+       if (!(info->flags & SPI_NOR_NO_FR)) {
+               /* Default to Fast Read for DT and non-DT platform devices. */
+               params->hwcaps.mask |= SNOR_HWCAPS_READ_FAST;
+
+               /* Mask out Fast Read if not requested at DT instantiation. */
+               if (np && !of_property_read_bool(np, "m25p,fast-read"))
+                       params->hwcaps.mask &= ~SNOR_HWCAPS_READ_FAST;
+       }
+
+       /* (Fast) Read settings. */
+       params->hwcaps.mask |= SNOR_HWCAPS_READ;
+       spi_nor_set_read_settings(&params->reads[SNOR_CMD_READ],
+                                 0, 0, SPINOR_OP_READ,
+                                 SNOR_PROTO_1_1_1);
+
+       if (params->hwcaps.mask & SNOR_HWCAPS_READ_FAST)
+               spi_nor_set_read_settings(&params->reads[SNOR_CMD_READ_FAST],
+                                         0, 8, SPINOR_OP_READ_FAST,
+                                         SNOR_PROTO_1_1_1);
+
+       if (info->flags & SPI_NOR_DUAL_READ) {
+               params->hwcaps.mask |= SNOR_HWCAPS_READ_1_1_2;
+               spi_nor_set_read_settings(&params->reads[SNOR_CMD_READ_1_1_2],
+                                         0, 8, SPINOR_OP_READ_1_1_2,
+                                         SNOR_PROTO_1_1_2);
+       }
+
+       if (info->flags & SPI_NOR_QUAD_READ) {
+               params->hwcaps.mask |= SNOR_HWCAPS_READ_1_1_4;
+               spi_nor_set_read_settings(&params->reads[SNOR_CMD_READ_1_1_4],
+                                         0, 8, SPINOR_OP_READ_1_1_4,
+                                         SNOR_PROTO_1_1_4);
+       }
+
+       if (info->flags & SPI_NOR_OCTAL_READ) {
+               params->hwcaps.mask |= SNOR_HWCAPS_READ_1_1_8;
+               spi_nor_set_read_settings(&params->reads[SNOR_CMD_READ_1_1_8],
+                                         0, 8, SPINOR_OP_READ_1_1_8,
+                                         SNOR_PROTO_1_1_8);
+       }
+
+       /* Page Program settings. */
+       params->hwcaps.mask |= SNOR_HWCAPS_PP;
+       spi_nor_set_pp_settings(&params->page_programs[SNOR_CMD_PP],
+                               SPINOR_OP_PP, SNOR_PROTO_1_1_1);
+
+       /*
+        * Sector Erase settings. Sort Erase Types in ascending order, with the
+        * smallest erase size starting at BIT(0).
+        */
+       erase_mask = 0;
+       i = 0;
+       if (info->flags & SECT_4K_PMC) {
+               erase_mask |= BIT(i);
+               spi_nor_set_erase_type(&map->erase_type[i], 4096u,
+                                      SPINOR_OP_BE_4K_PMC);
+               i++;
+       } else if (info->flags & SECT_4K) {
+               erase_mask |= BIT(i);
+               spi_nor_set_erase_type(&map->erase_type[i], 4096u,
+                                      SPINOR_OP_BE_4K);
+               i++;
+       }
+       erase_mask |= BIT(i);
+       spi_nor_set_erase_type(&map->erase_type[i], info->sector_size,
+                              SPINOR_OP_SE);
+       spi_nor_init_uniform_erase_map(map, erase_mask, params->size);
+}
+
+static void spansion_post_sfdp_fixups(struct spi_nor *nor)
+{
+       struct mtd_info *mtd = &nor->mtd;
+
+       if (mtd->size <= SZ_16M)
+               return;
+
+       nor->flags |= SNOR_F_4B_OPCODES;
+       /* No small sector erase for 4-byte command set */
+       nor->erase_opcode = SPINOR_OP_SE;
+       nor->mtd.erasesize = nor->info->sector_size;
+}
+
+static void s3an_post_sfdp_fixups(struct spi_nor *nor)
+{
+       nor->params.setup = s3an_nor_setup;
+}
+
+/**
+ * spi_nor_post_sfdp_fixups() - Updates the flash's parameters and settings
+ * after SFDP has been parsed (is also called for SPI NORs that do not
+ * support RDSFDP).
+ * @nor:       pointer to a 'struct spi_nor'
+ *
+ * Typically used to tweak various parameters that could not be extracted by
+ * other means (i.e. when information provided by the SFDP/flash_info tables
+ * are incomplete or wrong).
+ */
+static void spi_nor_post_sfdp_fixups(struct spi_nor *nor)
+{
+       switch (JEDEC_MFR(nor->info)) {
+       case SNOR_MFR_SPANSION:
+               spansion_post_sfdp_fixups(nor);
+               break;
+
+       default:
+               break;
+       }
+
+       if (nor->info->flags & SPI_S3AN)
+               s3an_post_sfdp_fixups(nor);
+
+       if (nor->info->fixups && nor->info->fixups->post_sfdp)
+               nor->info->fixups->post_sfdp(nor);
+}
+
+/**
+ * spi_nor_late_init_params() - Late initialization of default flash parameters.
+ * @nor:       pointer to a 'struct spi_nor'
+ *
+ * Used to set default flash parameters and settings when the ->default_init()
+ * hook or the SFDP parser let voids.
+ */
+static void spi_nor_late_init_params(struct spi_nor *nor)
+{
+       /*
+        * NOR protection support. When locking_ops are not provided, we pick
+        * the default ones.
+        */
+       if (nor->flags & SNOR_F_HAS_LOCK && !nor->params.locking_ops)
+               nor->params.locking_ops = &stm_locking_ops;
+}
+
+/**
+ * spi_nor_init_params() - Initialize the flash's parameters and settings.
+ * @nor:       pointer to a 'struct spi-nor'.
+ *
+ * The flash parameters and settings are initialized based on a sequence of
+ * calls that are ordered by priority:
+ *
+ * 1/ Default flash parameters initialization. The initializations are done
+ *    based on nor->info data:
+ *             spi_nor_info_init_params()
+ *
+ * which can be overwritten by:
+ * 2/ Manufacturer flash parameters initialization. The initializations are
+ *    done based on MFR register, or when the decisions can not be done solely
+ *    based on MFR, by using specific flash_info tweeks, ->default_init():
+ *             spi_nor_manufacturer_init_params()
+ *
+ * which can be overwritten by:
+ * 3/ SFDP flash parameters initialization. JESD216 SFDP is a standard and
+ *    should be more accurate that the above.
+ *             spi_nor_sfdp_init_params()
+ *
+ *    Please note that there is a ->post_bfpt() fixup hook that can overwrite
+ *    the flash parameters and settings immediately after parsing the Basic
+ *    Flash Parameter Table.
+ *
+ * which can be overwritten by:
+ * 4/ Post SFDP flash parameters initialization. Used to tweak various
+ *    parameters that could not be extracted by other means (i.e. when
+ *    information provided by the SFDP/flash_info tables are incomplete or
+ *    wrong).
+ *             spi_nor_post_sfdp_fixups()
+ *
+ * 5/ Late default flash parameters initialization, used when the
+ * ->default_init() hook or the SFDP parser do not set specific params.
+ *             spi_nor_late_init_params()
+ */
+static void spi_nor_init_params(struct spi_nor *nor)
+{
+       spi_nor_info_init_params(nor);
+
+       spi_nor_manufacturer_init_params(nor);
+
+       if ((nor->info->flags & (SPI_NOR_DUAL_READ | SPI_NOR_QUAD_READ)) &&
+           !(nor->info->flags & SPI_NOR_SKIP_SFDP))
+               spi_nor_sfdp_init_params(nor);
+
+       spi_nor_post_sfdp_fixups(nor);
+
+       spi_nor_late_init_params(nor);
+}
+
+/**
+ * spi_nor_quad_enable() - enable Quad I/O if needed.
+ * @nor:                pointer to a 'struct spi_nor'
+ *
+ * Return: 0 on success, -errno otherwise.
+ */
+static int spi_nor_quad_enable(struct spi_nor *nor)
+{
+       if (!nor->params.quad_enable)
+               return 0;
+
+       if (!(spi_nor_get_protocol_width(nor->read_proto) == 4 ||
+             spi_nor_get_protocol_width(nor->write_proto) == 4))
+               return 0;
+
+       return nor->params.quad_enable(nor);
+}
+
 static int spi_nor_init(struct spi_nor *nor)
 {
        int err;
 
        if (nor->clear_sr_bp) {
-               if (nor->quad_enable == spansion_quad_enable)
+               if (nor->params.quad_enable == spansion_quad_enable)
                        nor->clear_sr_bp = spi_nor_spansion_clear_sr_bp;
 
                err = nor->clear_sr_bp(nor);
@@ -4044,12 +4691,10 @@ static int spi_nor_init(struct spi_nor *nor)
                }
        }
 
-       if (nor->quad_enable) {
-               err = nor->quad_enable(nor);
-               if (err) {
-                       dev_err(nor->dev, "quad mode not supported\n");
-                       return err;
-               }
+       err = spi_nor_quad_enable(nor);
+       if (err) {
+               dev_err(nor->dev, "quad mode not supported\n");
+               return err;
        }
 
        if (nor->addr_width == 4 && !(nor->flags & SNOR_F_4B_OPCODES)) {
@@ -4062,7 +4707,7 @@ static int spi_nor_init(struct spi_nor *nor)
                 */
                WARN_ONCE(nor->flags & SNOR_F_BROKEN_RESET,
                          "enabling reset hack; may not recover from unexpected reboots\n");
-               set_4byte(nor, true);
+               nor->params.set_4byte(nor, true);
        }
 
        return 0;
@@ -4086,7 +4731,7 @@ void spi_nor_restore(struct spi_nor *nor)
        /* restore the addressing mode */
        if (nor->addr_width == 4 && !(nor->flags & SNOR_F_4B_OPCODES) &&
            nor->flags & SNOR_F_BROKEN_RESET)
-               set_4byte(nor, false);
+               nor->params.set_4byte(nor, false);
 }
 EXPORT_SYMBOL_GPL(spi_nor_restore);
 
@@ -4102,25 +4747,47 @@ static const struct flash_info *spi_nor_match_id(const char *name)
        return NULL;
 }
 
-int spi_nor_scan(struct spi_nor *nor, const char *name,
-                const struct spi_nor_hwcaps *hwcaps)
+static int spi_nor_set_addr_width(struct spi_nor *nor)
+{
+       if (nor->addr_width) {
+               /* already configured from SFDP */
+       } else if (nor->info->addr_width) {
+               nor->addr_width = nor->info->addr_width;
+       } else if (nor->mtd.size > 0x1000000) {
+               /* enable 4-byte addressing if the device exceeds 16MiB */
+               nor->addr_width = 4;
+       } else {
+               nor->addr_width = 3;
+       }
+
+       if (nor->addr_width > SPI_NOR_MAX_ADDR_WIDTH) {
+               dev_err(nor->dev, "address width is too large: %u\n",
+                       nor->addr_width);
+               return -EINVAL;
+       }
+
+       /* Set 4byte opcodes when possible. */
+       if (nor->addr_width == 4 && nor->flags & SNOR_F_4B_OPCODES &&
+           !(nor->flags & SNOR_F_HAS_4BAIT))
+               spi_nor_set_4byte_opcodes(nor);
+
+       return 0;
+}
+
+static void spi_nor_debugfs_init(struct spi_nor *nor,
+                                const struct flash_info *info)
 {
-       struct spi_nor_flash_parameter params;
-       const struct flash_info *info = NULL;
-       struct device *dev = nor->dev;
        struct mtd_info *mtd = &nor->mtd;
-       struct device_node *np = spi_nor_get_flash_node(nor);
-       int ret;
-       int i;
 
-       ret = spi_nor_check(nor);
-       if (ret)
-               return ret;
+       mtd->dbg.partname = info->name;
+       mtd->dbg.partid = devm_kasprintf(nor->dev, GFP_KERNEL, "spi-nor:%*phN",
+                                        info->id_len, info->id);
+}
 
-       /* Reset SPI protocol for all commands. */
-       nor->reg_proto = SNOR_PROTO_1_1_1;
-       nor->read_proto = SNOR_PROTO_1_1_1;
-       nor->write_proto = SNOR_PROTO_1_1_1;
+static const struct flash_info *spi_nor_get_flash_info(struct spi_nor *nor,
+                                                      const char *name)
+{
+       const struct flash_info *info = NULL;
 
        if (name)
                info = spi_nor_match_id(name);
@@ -4128,7 +4795,7 @@ int spi_nor_scan(struct spi_nor *nor, const char *name,
        if (!info)
                info = spi_nor_read_id(nor);
        if (IS_ERR_OR_NULL(info))
-               return -ENOENT;
+               return ERR_PTR(-ENOENT);
 
        /*
         * If caller has specified name of flash model that can normally be
@@ -4139,7 +4806,7 @@ int spi_nor_scan(struct spi_nor *nor, const char *name,
 
                jinfo = spi_nor_read_id(nor);
                if (IS_ERR(jinfo)) {
-                       return PTR_ERR(jinfo);
+                       return jinfo;
                } else if (jinfo != info) {
                        /*
                         * JEDEC knows better, so overwrite platform ID. We
@@ -4148,14 +4815,57 @@ int spi_nor_scan(struct spi_nor *nor, const char *name,
                         * marked read-only, and we don't want to lose that
                         * information, even if it's not 100% accurate.
                         */
-                       dev_warn(dev, "found %s, expected %s\n",
+                       dev_warn(nor->dev, "found %s, expected %s\n",
                                 jinfo->name, info->name);
                        info = jinfo;
                }
        }
 
+       return info;
+}
+
+int spi_nor_scan(struct spi_nor *nor, const char *name,
+                const struct spi_nor_hwcaps *hwcaps)
+{
+       const struct flash_info *info;
+       struct device *dev = nor->dev;
+       struct mtd_info *mtd = &nor->mtd;
+       struct device_node *np = spi_nor_get_flash_node(nor);
+       struct spi_nor_flash_parameter *params = &nor->params;
+       int ret;
+       int i;
+
+       ret = spi_nor_check(nor);
+       if (ret)
+               return ret;
+
+       /* Reset SPI protocol for all commands. */
+       nor->reg_proto = SNOR_PROTO_1_1_1;
+       nor->read_proto = SNOR_PROTO_1_1_1;
+       nor->write_proto = SNOR_PROTO_1_1_1;
+
+       /*
+        * We need the bounce buffer early to read/write registers when going
+        * through the spi-mem layer (buffers have to be DMA-able).
+        * For spi-mem drivers, we'll reallocate a new buffer if
+        * nor->page_size turns out to be greater than PAGE_SIZE (which
+        * shouldn't happen before long since NOR pages are usually less
+        * than 1KB) after spi_nor_scan() returns.
+        */
+       nor->bouncebuf_size = PAGE_SIZE;
+       nor->bouncebuf = devm_kmalloc(dev, nor->bouncebuf_size,
+                                     GFP_KERNEL);
+       if (!nor->bouncebuf)
+               return -ENOMEM;
+
+       info = spi_nor_get_flash_info(nor, name);
+       if (IS_ERR(info))
+               return PTR_ERR(info);
+
        nor->info = info;
 
+       spi_nor_debugfs_init(nor, info);
+
        mutex_init(&nor->lock);
 
        /*
@@ -4163,9 +4873,12 @@ int spi_nor_scan(struct spi_nor *nor, const char *name,
         * spi_nor_wait_till_ready(). Xilinx S3AN share MFR
         * with Atmel spi-nor
         */
-       if (info->flags & SPI_S3AN)
+       if (info->flags & SPI_NOR_XSR_RDY)
                nor->flags |=  SNOR_F_READY_XSR_RDY;
 
+       if (info->flags & SPI_NOR_HAS_LOCK)
+               nor->flags |= SNOR_F_HAS_LOCK;
+
        /*
         * Atmel, SST, Intel/Numonyx, and others serial NOR tend to power up
         * with the software protection bits set.
@@ -4176,10 +4889,8 @@ int spi_nor_scan(struct spi_nor *nor, const char *name,
            nor->info->flags & SPI_NOR_HAS_LOCK)
                nor->clear_sr_bp = spi_nor_clear_sr_bp;
 
-       /* Parse the Serial Flash Discoverable Parameters table. */
-       ret = spi_nor_init_params(nor, &params);
-       if (ret)
-               return ret;
+       /* Init flash parameters based on flash_info struct and SFDP */
+       spi_nor_init_params(nor);
 
        if (!mtd->name)
                mtd->name = dev_name(dev);
@@ -4187,21 +4898,12 @@ int spi_nor_scan(struct spi_nor *nor, const char *name,
        mtd->type = MTD_NORFLASH;
        mtd->writesize = 1;
        mtd->flags = MTD_CAP_NORFLASH;
-       mtd->size = params.size;
+       mtd->size = params->size;
        mtd->_erase = spi_nor_erase;
        mtd->_read = spi_nor_read;
        mtd->_resume = spi_nor_resume;
 
-       /* NOR protection support for STmicro/Micron chips and similar */
-       if (JEDEC_MFR(info) == SNOR_MFR_ST ||
-           JEDEC_MFR(info) == SNOR_MFR_MICRON ||
-           info->flags & SPI_NOR_HAS_LOCK) {
-               nor->flash_lock = stm_lock;
-               nor->flash_unlock = stm_unlock;
-               nor->flash_is_locked = stm_is_locked;
-       }
-
-       if (nor->flash_lock && nor->flash_unlock && nor->flash_is_locked) {
+       if (nor->params.locking_ops) {
                mtd->_lock = spi_nor_lock;
                mtd->_unlock = spi_nor_unlock;
                mtd->_is_locked = spi_nor_is_locked;
@@ -4226,68 +4928,28 @@ int spi_nor_scan(struct spi_nor *nor, const char *name,
                mtd->flags |= MTD_NO_ERASE;
 
        mtd->dev.parent = dev;
-       nor->page_size = params.page_size;
+       nor->page_size = params->page_size;
        mtd->writebufsize = nor->page_size;
 
-       if (np) {
-               /* If we were instantiated by DT, use it */
-               if (of_property_read_bool(np, "m25p,fast-read"))
-                       params.hwcaps.mask |= SNOR_HWCAPS_READ_FAST;
-               else
-                       params.hwcaps.mask &= ~SNOR_HWCAPS_READ_FAST;
-       } else {
-               /* If we weren't instantiated by DT, default to fast-read */
-               params.hwcaps.mask |= SNOR_HWCAPS_READ_FAST;
-       }
-
        if (of_property_read_bool(np, "broken-flash-reset"))
                nor->flags |= SNOR_F_BROKEN_RESET;
 
-       /* Some devices cannot do fast-read, no matter what DT tells us */
-       if (info->flags & SPI_NOR_NO_FR)
-               params.hwcaps.mask &= ~SNOR_HWCAPS_READ_FAST;
-
        /*
         * Configure the SPI memory:
         * - select op codes for (Fast) Read, Page Program and Sector Erase.
         * - set the number of dummy cycles (mode cycles + wait states).
         * - set the SPI protocols for register and memory accesses.
-        * - set the Quad Enable bit if needed (required by SPI x-y-4 protos).
         */
-       ret = spi_nor_setup(nor, &params, hwcaps);
+       ret = spi_nor_setup(nor, hwcaps);
        if (ret)
                return ret;
 
-       if (nor->addr_width) {
-               /* already configured from SFDP */
-       } else if (info->addr_width) {
-               nor->addr_width = info->addr_width;
-       } else if (mtd->size > 0x1000000) {
-               /* enable 4-byte addressing if the device exceeds 16MiB */
-               nor->addr_width = 4;
-       } else {
-               nor->addr_width = 3;
-       }
-
-       if (info->flags & SPI_NOR_4B_OPCODES ||
-           (JEDEC_MFR(info) == SNOR_MFR_SPANSION && mtd->size > SZ_16M))
+       if (info->flags & SPI_NOR_4B_OPCODES)
                nor->flags |= SNOR_F_4B_OPCODES;
 
-       if (nor->addr_width == 4 && nor->flags & SNOR_F_4B_OPCODES &&
-           !(nor->flags & SNOR_F_HAS_4BAIT))
-               spi_nor_set_4byte_opcodes(nor);
-
-       if (nor->addr_width > SPI_NOR_MAX_ADDR_WIDTH) {
-               dev_err(dev, "address width is too large: %u\n",
-                       nor->addr_width);
-               return -EINVAL;
-       }
-
-       if (info->flags & SPI_S3AN) {
-               ret = s3an_nor_scan(nor);
-               if (ret)
-                       return ret;
-       }
+       ret = spi_nor_set_addr_width(nor);
+       if (ret)
+               return ret;
 
        /* Send all the required SPI flash commands to initialize device */
        ret = spi_nor_init(nor);
@@ -4317,6 +4979,174 @@ int spi_nor_scan(struct spi_nor *nor, const char *name,
 }
 EXPORT_SYMBOL_GPL(spi_nor_scan);
 
+static int spi_nor_probe(struct spi_mem *spimem)
+{
+       struct spi_device *spi = spimem->spi;
+       struct flash_platform_data *data = dev_get_platdata(&spi->dev);
+       struct spi_nor *nor;
+       /*
+        * Enable all caps by default. The core will mask them after
+        * checking what's really supported using spi_mem_supports_op().
+        */
+       const struct spi_nor_hwcaps hwcaps = { .mask = SNOR_HWCAPS_ALL };
+       char *flash_name;
+       int ret;
+
+       nor = devm_kzalloc(&spi->dev, sizeof(*nor), GFP_KERNEL);
+       if (!nor)
+               return -ENOMEM;
+
+       nor->spimem = spimem;
+       nor->dev = &spi->dev;
+       spi_nor_set_flash_node(nor, spi->dev.of_node);
+
+       spi_mem_set_drvdata(spimem, nor);
+
+       if (data && data->name)
+               nor->mtd.name = data->name;
+
+       if (!nor->mtd.name)
+               nor->mtd.name = spi_mem_get_name(spimem);
+
+       /*
+        * For some (historical?) reason many platforms provide two different
+        * names in flash_platform_data: "name" and "type". Quite often name is
+        * set to "m25p80" and then "type" provides a real chip name.
+        * If that's the case, respect "type" and ignore a "name".
+        */
+       if (data && data->type)
+               flash_name = data->type;
+       else if (!strcmp(spi->modalias, "spi-nor"))
+               flash_name = NULL; /* auto-detect */
+       else
+               flash_name = spi->modalias;
+
+       ret = spi_nor_scan(nor, flash_name, &hwcaps);
+       if (ret)
+               return ret;
+
+       /*
+        * None of the existing parts have > 512B pages, but let's play safe
+        * and add this logic so that if anyone ever adds support for such
+        * a NOR we don't end up with buffer overflows.
+        */
+       if (nor->page_size > PAGE_SIZE) {
+               nor->bouncebuf_size = nor->page_size;
+               devm_kfree(nor->dev, nor->bouncebuf);
+               nor->bouncebuf = devm_kmalloc(nor->dev,
+                                             nor->bouncebuf_size,
+                                             GFP_KERNEL);
+               if (!nor->bouncebuf)
+                       return -ENOMEM;
+       }
+
+       return mtd_device_register(&nor->mtd, data ? data->parts : NULL,
+                                  data ? data->nr_parts : 0);
+}
+
+static int spi_nor_remove(struct spi_mem *spimem)
+{
+       struct spi_nor *nor = spi_mem_get_drvdata(spimem);
+
+       spi_nor_restore(nor);
+
+       /* Clean up MTD stuff. */
+       return mtd_device_unregister(&nor->mtd);
+}
+
+static void spi_nor_shutdown(struct spi_mem *spimem)
+{
+       struct spi_nor *nor = spi_mem_get_drvdata(spimem);
+
+       spi_nor_restore(nor);
+}
+
+/*
+ * Do NOT add to this array without reading the following:
+ *
+ * Historically, many flash devices are bound to this driver by their name. But
+ * since most of these flash are compatible to some extent, and their
+ * differences can often be differentiated by the JEDEC read-ID command, we
+ * encourage new users to add support to the spi-nor library, and simply bind
+ * against a generic string here (e.g., "jedec,spi-nor").
+ *
+ * Many flash names are kept here in this list (as well as in spi-nor.c) to
+ * keep them available as module aliases for existing platforms.
+ */
+static const struct spi_device_id spi_nor_dev_ids[] = {
+       /*
+        * Allow non-DT platform devices to bind to the "spi-nor" modalias, and
+        * hack around the fact that the SPI core does not provide uevent
+        * matching for .of_match_table
+        */
+       {"spi-nor"},
+
+       /*
+        * Entries not used in DTs that should be safe to drop after replacing
+        * them with "spi-nor" in platform data.
+        */
+       {"s25sl064a"},  {"w25x16"},     {"m25p10"},     {"m25px64"},
+
+       /*
+        * Entries that were used in DTs without "jedec,spi-nor" fallback and
+        * should be kept for backward compatibility.
+        */
+       {"at25df321a"}, {"at25df641"},  {"at26df081a"},
+       {"mx25l4005a"}, {"mx25l1606e"}, {"mx25l6405d"}, {"mx25l12805d"},
+       {"mx25l25635e"},{"mx66l51235l"},
+       {"n25q064"},    {"n25q128a11"}, {"n25q128a13"}, {"n25q512a"},
+       {"s25fl256s1"}, {"s25fl512s"},  {"s25sl12801"}, {"s25fl008k"},
+       {"s25fl064k"},
+       {"sst25vf040b"},{"sst25vf016b"},{"sst25vf032b"},{"sst25wf040"},
+       {"m25p40"},     {"m25p80"},     {"m25p16"},     {"m25p32"},
+       {"m25p64"},     {"m25p128"},
+       {"w25x80"},     {"w25x32"},     {"w25q32"},     {"w25q32dw"},
+       {"w25q80bl"},   {"w25q128"},    {"w25q256"},
+
+       /* Flashes that can't be detected using JEDEC */
+       {"m25p05-nonjedec"},    {"m25p10-nonjedec"},    {"m25p20-nonjedec"},
+       {"m25p40-nonjedec"},    {"m25p80-nonjedec"},    {"m25p16-nonjedec"},
+       {"m25p32-nonjedec"},    {"m25p64-nonjedec"},    {"m25p128-nonjedec"},
+
+       /* Everspin MRAMs (non-JEDEC) */
+       { "mr25h128" }, /* 128 Kib, 40 MHz */
+       { "mr25h256" }, /* 256 Kib, 40 MHz */
+       { "mr25h10" },  /*   1 Mib, 40 MHz */
+       { "mr25h40" },  /*   4 Mib, 40 MHz */
+
+       { },
+};
+MODULE_DEVICE_TABLE(spi, spi_nor_dev_ids);
+
+static const struct of_device_id spi_nor_of_table[] = {
+       /*
+        * Generic compatibility for SPI NOR that can be identified by the
+        * JEDEC READ ID opcode (0x9F). Use this, if possible.
+        */
+       { .compatible = "jedec,spi-nor" },
+       { /* sentinel */ },
+};
+MODULE_DEVICE_TABLE(of, spi_nor_of_table);
+
+/*
+ * REVISIT: many of these chips have deep power-down modes, which
+ * should clearly be entered on suspend() to minimize power use.
+ * And also when they're otherwise idle...
+ */
+static struct spi_mem_driver spi_nor_driver = {
+       .spidrv = {
+               .driver = {
+                       .name = "spi-nor",
+                       .of_match_table = spi_nor_of_table,
+               },
+               .id_table = spi_nor_dev_ids,
+       },
+       .probe = spi_nor_probe,
+       .remove = spi_nor_remove,
+       .shutdown = spi_nor_shutdown,
+};
+module_spi_mem_driver(spi_nor_driver);
+
 MODULE_LICENSE("GPL v2");
 MODULE_AUTHOR("Huang Shijie <shijie8@gmail.com>");
 MODULE_AUTHOR("Mike Lavender");
index 6025398..e1a2ae2 100644 (file)
@@ -345,15 +345,36 @@ static const struct blk_mq_ops ubiblock_mq_ops = {
        .init_request   = ubiblock_init_request,
 };
 
+static int calc_disk_capacity(struct ubi_volume_info *vi, u64 *disk_capacity)
+{
+       u64 size = vi->used_bytes >> 9;
+
+       if (vi->used_bytes % 512) {
+               pr_warn("UBI: block: volume size is not a multiple of 512, "
+                       "last %llu bytes are ignored!\n",
+                       vi->used_bytes - (size << 9));
+       }
+
+       if ((sector_t)size != size)
+               return -EFBIG;
+
+       *disk_capacity = size;
+
+       return 0;
+}
+
 int ubiblock_create(struct ubi_volume_info *vi)
 {
        struct ubiblock *dev;
        struct gendisk *gd;
-       u64 disk_capacity = vi->used_bytes >> 9;
+       u64 disk_capacity;
        int ret;
 
-       if ((sector_t)disk_capacity != disk_capacity)
-               return -EFBIG;
+       ret = calc_disk_capacity(vi, &disk_capacity);
+       if (ret) {
+               return ret;
+       }
+
        /* Check that the volume isn't already handled */
        mutex_lock(&devices_mutex);
        if (find_dev_nolock(vi->ubi_num, vi->vol_id)) {
@@ -507,7 +528,8 @@ out_unlock:
 static int ubiblock_resize(struct ubi_volume_info *vi)
 {
        struct ubiblock *dev;
-       u64 disk_capacity = vi->used_bytes >> 9;
+       u64 disk_capacity;
+       int ret;
 
        /*
         * Need to lock the device list until we stop using the device,
@@ -520,11 +542,16 @@ static int ubiblock_resize(struct ubi_volume_info *vi)
                mutex_unlock(&devices_mutex);
                return -ENODEV;
        }
-       if ((sector_t)disk_capacity != disk_capacity) {
+
+       ret = calc_disk_capacity(vi, &disk_capacity);
+       if (ret) {
                mutex_unlock(&devices_mutex);
-               dev_warn(disk_to_dev(dev->gd), "the volume is too big (%d LEBs), cannot resize",
-                        vi->size);
-               return -EFBIG;
+               if (ret == -EFBIG) {
+                       dev_warn(disk_to_dev(dev->gd),
+                                "the volume is too big (%d LEBs), cannot resize",
+                                vi->size);
+               }
+               return ret;
        }
 
        mutex_lock(&dev->dev_mutex);
index d9e2e3a..c44c847 100644 (file)
@@ -196,7 +196,7 @@ static int produce_free_peb(struct ubi_device *ubi)
  */
 int ubi_wl_get_peb(struct ubi_device *ubi)
 {
-       int ret, retried = 0;
+       int ret, attempts = 0;
        struct ubi_fm_pool *pool = &ubi->fm_pool;
        struct ubi_fm_pool *wl_pool = &ubi->fm_wl_pool;
 
@@ -221,12 +221,12 @@ again:
 
        if (pool->used == pool->size) {
                spin_unlock(&ubi->wl_lock);
-               if (retried) {
+               attempts++;
+               if (attempts == 10) {
                        ubi_err(ubi, "Unable to get a free PEB from user WL pool");
                        ret = -ENOSPC;
                        goto out;
                }
-               retried = 1;
                up_read(&ubi->fm_eba_sem);
                ret = produce_free_peb(ubi);
                if (ret < 0) {
index 949700a..3fcdefe 100644 (file)
@@ -710,6 +710,12 @@ static int wear_leveling_worker(struct ubi_device *ubi, struct ubi_work *wrk,
                if (!e2)
                        goto out_cancel;
 
+               /*
+                * Anchor move within the anchor area is useless.
+                */
+               if (e2->pnum < UBI_FM_MAX_START)
+                       goto out_cancel;
+
                self_check_in_wl_tree(ubi, e1, &ubi->used);
                rb_erase(&e1->u.rb, &ubi->used);
                dbg_wl("anchor-move PEB %d to PEB %d", e1->pnum, e2->pnum);
index 34cd679..6c51b1b 100644 (file)
@@ -13,7 +13,6 @@
 #include <linux/io.h>
 #include <linux/netdevice.h>
 #include <linux/pci.h>
-#include <linux/pci-aspm.h>
 #include <linux/crc32.h>
 #include <linux/if_vlan.h>
 #include <linux/timecounter.h>
index 6d52cf5..25aa400 100644 (file)
@@ -14,7 +14,6 @@
 #include <linux/module.h>
 #include <linux/kernel.h>
 #include <linux/pci.h>
-#include <linux/pci-aspm.h>
 #include <linux/netdevice.h>
 #include <linux/etherdevice.h>
 #include <linux/ethtool.h>
index bc86dff..01c3804 100644 (file)
@@ -188,8 +188,7 @@ int mlx5_rl_add_rate(struct mlx5_core_dev *dev, u16 *index,
                /* new rate limit */
                err = mlx5_set_pp_rate_limit_cmd(dev, entry->index, rl);
                if (err) {
-                       mlx5_core_err(dev, "Failed configuring rate limit(err %d): \
-                                     rate %u, max_burst_sz %u, typical_pkt_sz %u\n",
+                       mlx5_core_err(dev, "Failed configuring rate limit(err %d): rate %u, max_burst_sz %u, typical_pkt_sz %u\n",
                                      err, rl->rate, rl->max_burst_sz,
                                      rl->typical_pkt_sz);
                        goto out;
@@ -218,8 +217,7 @@ void mlx5_rl_remove_rate(struct mlx5_core_dev *dev, struct mlx5_rate_limit *rl)
        mutex_lock(&table->rl_lock);
        entry = find_rl_entry(table, rl);
        if (!entry || !entry->refcount) {
-               mlx5_core_warn(dev, "Rate %u, max_burst_sz %u typical_pkt_sz %u \
-                              are not configured\n",
+               mlx5_core_warn(dev, "Rate %u, max_burst_sz %u typical_pkt_sz %u are not configured\n",
                               rl->rate, rl->max_burst_sz, rl->typical_pkt_sz);
                goto out;
        }
index 158ac07..38b1f40 100644 (file)
@@ -798,9 +798,8 @@ static int qed_rdma_add_user(void *rdma_cxt,
        /* Calculate the corresponding DPI address */
        dpi_start_offset = p_hwfn->dpi_start_offset;
 
-       out_params->dpi_addr = (u64)((u8 __iomem *)p_hwfn->doorbells +
-                                    dpi_start_offset +
-                                    ((out_params->dpi) * p_hwfn->dpi_size));
+       out_params->dpi_addr = p_hwfn->doorbells + dpi_start_offset +
+                              out_params->dpi * p_hwfn->dpi_size;
 
        out_params->dpi_phys_addr = p_hwfn->db_phys_addr +
                                    dpi_start_offset +
index 0ef01db..74f81fe 100644 (file)
@@ -28,7 +28,6 @@
 #include <linux/dma-mapping.h>
 #include <linux/pm_runtime.h>
 #include <linux/prefetch.h>
-#include <linux/pci-aspm.h>
 #include <linux/ipv6.h>
 #include <net/ip6_checksum.h>
 
index ebd64e0..1255302 100644 (file)
@@ -654,8 +654,7 @@ void i2400m_tx_close(struct i2400m *i2400m)
        padding = aligned_size - tx_msg_moved->size;
        if (padding > 0) {
                pad_buf = i2400m_tx_fifo_push(i2400m, padding, 0, 0);
-               if (unlikely(WARN_ON(pad_buf == NULL
-                                    || pad_buf == TAIL_FULL))) {
+               if (WARN_ON(pad_buf == NULL || pad_buf == TAIL_FULL)) {
                        /* This should not happen -- append should verify
                         * there is always space left at least to append
                         * tx_block_size */
index c6156cc..d5ee32c 100644 (file)
@@ -18,7 +18,6 @@
 
 #include <linux/nl80211.h>
 #include <linux/pci.h>
-#include <linux/pci-aspm.h>
 #include <linux/etherdevice.h>
 #include <linux/module.h>
 #include "../ath.h"
index b82da75..4fbcc7f 100644 (file)
@@ -18,7 +18,6 @@
 #include <linux/module.h>
 #include <linux/init.h>
 #include <linux/pci.h>
-#include <linux/pci-aspm.h>
 #include <linux/slab.h>
 #include <linux/dma-mapping.h>
 #include <linux/delay.h>
index fa2c028..ffb705b 100644 (file)
@@ -18,7 +18,6 @@
 #include <linux/module.h>
 #include <linux/init.h>
 #include <linux/pci.h>
-#include <linux/pci-aspm.h>
 #include <linux/slab.h>
 #include <linux/dma-mapping.h>
 #include <linux/delay.h>
index 5ab87a8..f8a1f98 100644 (file)
@@ -62,7 +62,6 @@
  *
  *****************************************************************************/
 #include <linux/pci.h>
-#include <linux/pci-aspm.h>
 #include <linux/interrupt.h>
 #include <linux/debugfs.h>
 #include <linux/sched.h>
index 2859cc9..156c2a1 100644 (file)
@@ -78,7 +78,7 @@ static int ndev_mw_to_bar(struct amd_ntb_dev *ndev, int idx)
        if (idx < 0 || idx > ndev->mw_count)
                return -EINVAL;
 
-       return 1 << idx;
+       return ndev->dev_data->mw_idx << idx;
 }
 
 static int amd_ntb_mw_count(struct ntb_dev *ntb, int pidx)
@@ -909,7 +909,7 @@ static int amd_init_ntb(struct amd_ntb_dev *ndev)
 {
        void __iomem *mmio = ndev->self_mmio;
 
-       ndev->mw_count = AMD_MW_CNT;
+       ndev->mw_count = ndev->dev_data->mw_count;
        ndev->spad_count = AMD_SPADS_CNT;
        ndev->db_count = AMD_DB_CNT;
 
@@ -1069,6 +1069,8 @@ static int amd_ntb_pci_probe(struct pci_dev *pdev,
                goto err_ndev;
        }
 
+       ndev->dev_data = (struct ntb_dev_data *)id->driver_data;
+
        ndev_init_struct(ndev, pdev);
 
        rc = amd_ntb_init_pci(ndev, pdev);
@@ -1123,9 +1125,21 @@ static const struct file_operations amd_ntb_debugfs_info = {
        .read = ndev_debugfs_read,
 };
 
+static const struct ntb_dev_data dev_data[] = {
+       { /* for device 145b */
+               .mw_count = 3,
+               .mw_idx = 1,
+       },
+       { /* for device 148b */
+               .mw_count = 2,
+               .mw_idx = 2,
+       },
+};
+
 static const struct pci_device_id amd_ntb_pci_tbl[] = {
-       {PCI_VDEVICE(AMD, PCI_DEVICE_ID_AMD_NTB)},
-       {0}
+       { PCI_VDEVICE(AMD, 0x145b), (kernel_ulong_t)&dev_data[0] },
+       { PCI_VDEVICE(AMD, 0x148b), (kernel_ulong_t)&dev_data[1] },
+       { 0, }
 };
 MODULE_DEVICE_TABLE(pci, amd_ntb_pci_tbl);
 
index 8f3617a..139a307 100644 (file)
@@ -52,7 +52,6 @@
 #include <linux/ntb.h>
 #include <linux/pci.h>
 
-#define PCI_DEVICE_ID_AMD_NTB  0x145B
 #define AMD_LINK_HB_TIMEOUT    msecs_to_jiffies(1000)
 #define AMD_LINK_STATUS_OFFSET 0x68
 #define NTB_LIN_STA_ACTIVE_BIT 0x00000002
@@ -93,7 +92,6 @@ static inline void _write64(u64 val, void __iomem *mmio)
 
 enum {
        /* AMD NTB Capability */
-       AMD_MW_CNT              = 3,
        AMD_DB_CNT              = 16,
        AMD_MSIX_VECTOR_CNT     = 24,
        AMD_SPADS_CNT           = 16,
@@ -170,6 +168,11 @@ enum {
        AMD_PEER_OFFSET         = 0x400,
 };
 
+struct ntb_dev_data {
+       const unsigned char mw_count;
+       const unsigned int mw_idx;
+};
+
 struct amd_ntb_dev;
 
 struct amd_ntb_vec {
@@ -185,6 +188,7 @@ struct amd_ntb_dev {
        u32 cntl_sta;
        u32 peer_sta;
 
+       struct ntb_dev_data *dev_data;
        unsigned char mw_count;
        unsigned char spad_count;
        unsigned char db_count;
index bfc7cac..c79b54c 100644 (file)
@@ -4,11 +4,11 @@ config NTB_IDT
        depends on PCI
        select HWMON
        help
-        This driver supports NTB of cappable IDT PCIe-switches.
+        This driver supports NTB of capable IDT PCIe-switches.
 
         Some of the pre-initializations must be made before IDT PCIe-switch
-        exposes it NT-functions correctly. It should be done by either proper
-        initialisation of EEPROM connected to master smbus of the switch or
+        exposes its NT-functions correctly. It should be done by either proper
+        initialization of EEPROM connected to master SMbus of the switch or
         by BIOS using slave-SMBus interface changing corresponding registers
         value. Evidently it must be done before PCI bus enumeration is
         finished in Linux kernel.
index f495945..86ffa71 100644 (file)
@@ -306,7 +306,7 @@ static int switchtec_ntb_mw_set_trans(struct ntb_dev *ntb, int pidx, int widx,
        if (rc)
                return rc;
 
-       if (addr == 0 || size == 0) {
+       if (size == 0) {
                if (widx < nr_direct_mw)
                        switchtec_ntb_mw_clr_direct(sndev, widx);
                else
index 40c90ca..00a5d57 100644 (file)
@@ -292,7 +292,7 @@ static int ntb_transport_bus_match(struct device *dev,
 static int ntb_transport_bus_probe(struct device *dev)
 {
        const struct ntb_transport_client *client;
-       int rc = -EINVAL;
+       int rc;
 
        get_device(dev);
 
index d028331..e9b7c2d 100644 (file)
@@ -1378,7 +1378,7 @@ static int perf_setup_peer_mw(struct perf_peer *peer)
        int ret;
 
        /* Get outbound MW parameters and map it */
-       ret = ntb_peer_mw_get_addr(perf->ntb, peer->gidx, &phys_addr,
+       ret = ntb_peer_mw_get_addr(perf->ntb, perf->gidx, &phys_addr,
                                   &peer->outbuf_size);
        if (ret)
                return ret;
index a5fde15..36af7af 100644 (file)
@@ -118,4 +118,16 @@ config NVDIMM_KEYS
        depends on ENCRYPTED_KEYS
        depends on (LIBNVDIMM=ENCRYPTED_KEYS) || LIBNVDIMM=m
 
+config NVDIMM_TEST_BUILD
+       tristate "Build the unit test core"
+       depends on m
+       depends on COMPILE_TEST && X86_64
+       default m if COMPILE_TEST
+       help
+         Build the core of the unit test infrastructure. The result of
+         this build is non-functional for unit test execution, but it
+         otherwise helps catch build errors induced by changes to the
+         core devm_memremap_pages() implementation and other
+         infrastructure.
+
 endif
index cefe233..29203f3 100644 (file)
@@ -29,3 +29,7 @@ libnvdimm-$(CONFIG_BTT) += btt_devs.o
 libnvdimm-$(CONFIG_NVDIMM_PFN) += pfn_devs.o
 libnvdimm-$(CONFIG_NVDIMM_DAX) += dax_devs.o
 libnvdimm-$(CONFIG_NVDIMM_KEYS) += security.o
+
+TOOLS := ../../tools
+TEST_SRC := $(TOOLS)/testing/nvdimm/test
+obj-$(CONFIG_NVDIMM_TEST_BUILD) += $(TEST_SRC)/iomap.o
index 798c5c4..75a58a6 100644 (file)
@@ -95,10 +95,9 @@ static int nvdimm_bus_probe(struct device *dev)
        rc = nd_drv->probe(dev);
        debug_nvdimm_unlock(dev);
 
-       if (rc == 0)
-               nd_region_probe_success(nvdimm_bus, dev);
-       else
-               nd_region_disable(nvdimm_bus, dev);
+       if ((rc == 0 || rc == -EOPNOTSUPP) &&
+                       dev->parent && is_nd_region(dev->parent))
+               nd_region_advance_seeds(to_nd_region(dev->parent), dev);
        nvdimm_bus_probe_end(nvdimm_bus);
 
        dev_dbg(&nvdimm_bus->dev, "END: %s.probe(%s) = %d\n", dev->driver->name,
@@ -121,7 +120,6 @@ static int nvdimm_bus_remove(struct device *dev)
                rc = nd_drv->remove(dev);
                debug_nvdimm_unlock(dev);
        }
-       nd_region_disable(nvdimm_bus, dev);
 
        dev_dbg(&nvdimm_bus->dev, "%s.remove(%s) = %d\n", dev->driver->name,
                        dev_name(dev), rc);
@@ -400,7 +398,7 @@ static int child_unregister(struct device *dev, void *data)
 
                /* We are shutting down. Make state frozen artificially. */
                nvdimm_bus_lock(dev);
-               nvdimm->sec.state = NVDIMM_SECURITY_FROZEN;
+               set_bit(NVDIMM_SECURITY_FROZEN, &nvdimm->sec.flags);
                if (test_and_clear_bit(NDD_WORK_PENDING, &nvdimm->flags))
                        dev_put = true;
                nvdimm_bus_unlock(dev);
index 29a065e..196aa44 100644 (file)
@@ -372,106 +372,26 @@ __weak ssize_t security_show(struct device *dev,
 {
        struct nvdimm *nvdimm = to_nvdimm(dev);
 
-       switch (nvdimm->sec.state) {
-       case NVDIMM_SECURITY_DISABLED:
+       if (test_bit(NVDIMM_SECURITY_DISABLED, &nvdimm->sec.flags))
                return sprintf(buf, "disabled\n");
-       case NVDIMM_SECURITY_UNLOCKED:
+       if (test_bit(NVDIMM_SECURITY_UNLOCKED, &nvdimm->sec.flags))
                return sprintf(buf, "unlocked\n");
-       case NVDIMM_SECURITY_LOCKED:
+       if (test_bit(NVDIMM_SECURITY_LOCKED, &nvdimm->sec.flags))
                return sprintf(buf, "locked\n");
-       case NVDIMM_SECURITY_FROZEN:
-               return sprintf(buf, "frozen\n");
-       case NVDIMM_SECURITY_OVERWRITE:
+       if (test_bit(NVDIMM_SECURITY_OVERWRITE, &nvdimm->sec.flags))
                return sprintf(buf, "overwrite\n");
-       default:
-               return -ENOTTY;
-       }
-
        return -ENOTTY;
 }
 
-#define OPS                                                    \
-       C( OP_FREEZE,           "freeze",               1),     \
-       C( OP_DISABLE,          "disable",              2),     \
-       C( OP_UPDATE,           "update",               3),     \
-       C( OP_ERASE,            "erase",                2),     \
-       C( OP_OVERWRITE,        "overwrite",            2),     \
-       C( OP_MASTER_UPDATE,    "master_update",        3),     \
-       C( OP_MASTER_ERASE,     "master_erase",         2)
-#undef C
-#define C(a, b, c) a
-enum nvdimmsec_op_ids { OPS };
-#undef C
-#define C(a, b, c) { b, c }
-static struct {
-       const char *name;
-       int args;
-} ops[] = { OPS };
-#undef C
-
-#define SEC_CMD_SIZE 32
-#define KEY_ID_SIZE 10
-
-static ssize_t __security_store(struct device *dev, const char *buf, size_t len)
+static ssize_t frozen_show(struct device *dev,
+               struct device_attribute *attr, char *buf)
 {
        struct nvdimm *nvdimm = to_nvdimm(dev);
-       ssize_t rc;
-       char cmd[SEC_CMD_SIZE+1], keystr[KEY_ID_SIZE+1],
-               nkeystr[KEY_ID_SIZE+1];
-       unsigned int key, newkey;
-       int i;
 
-       if (atomic_read(&nvdimm->busy))
-               return -EBUSY;
-
-       rc = sscanf(buf, "%"__stringify(SEC_CMD_SIZE)"s"
-                       " %"__stringify(KEY_ID_SIZE)"s"
-                       " %"__stringify(KEY_ID_SIZE)"s",
-                       cmd, keystr, nkeystr);
-       if (rc < 1)
-               return -EINVAL;
-       for (i = 0; i < ARRAY_SIZE(ops); i++)
-               if (sysfs_streq(cmd, ops[i].name))
-                       break;
-       if (i >= ARRAY_SIZE(ops))
-               return -EINVAL;
-       if (ops[i].args > 1)
-               rc = kstrtouint(keystr, 0, &key);
-       if (rc >= 0 && ops[i].args > 2)
-               rc = kstrtouint(nkeystr, 0, &newkey);
-       if (rc < 0)
-               return rc;
-
-       if (i == OP_FREEZE) {
-               dev_dbg(dev, "freeze\n");
-               rc = nvdimm_security_freeze(nvdimm);
-       } else if (i == OP_DISABLE) {
-               dev_dbg(dev, "disable %u\n", key);
-               rc = nvdimm_security_disable(nvdimm, key);
-       } else if (i == OP_UPDATE) {
-               dev_dbg(dev, "update %u %u\n", key, newkey);
-               rc = nvdimm_security_update(nvdimm, key, newkey, NVDIMM_USER);
-       } else if (i == OP_ERASE) {
-               dev_dbg(dev, "erase %u\n", key);
-               rc = nvdimm_security_erase(nvdimm, key, NVDIMM_USER);
-       } else if (i == OP_OVERWRITE) {
-               dev_dbg(dev, "overwrite %u\n", key);
-               rc = nvdimm_security_overwrite(nvdimm, key);
-       } else if (i == OP_MASTER_UPDATE) {
-               dev_dbg(dev, "master_update %u %u\n", key, newkey);
-               rc = nvdimm_security_update(nvdimm, key, newkey,
-                               NVDIMM_MASTER);
-       } else if (i == OP_MASTER_ERASE) {
-               dev_dbg(dev, "master_erase %u\n", key);
-               rc = nvdimm_security_erase(nvdimm, key,
-                               NVDIMM_MASTER);
-       } else
-               return -EINVAL;
-
-       if (rc == 0)
-               rc = len;
-       return rc;
+       return sprintf(buf, "%d\n", test_bit(NVDIMM_SECURITY_FROZEN,
+                               &nvdimm->sec.flags));
 }
+static DEVICE_ATTR_RO(frozen);
 
 static ssize_t security_store(struct device *dev,
                struct device_attribute *attr, const char *buf, size_t len)
@@ -487,7 +407,7 @@ static ssize_t security_store(struct device *dev,
        nd_device_lock(dev);
        nvdimm_bus_lock(dev);
        wait_nvdimm_bus_probe_idle(dev);
-       rc = __security_store(dev, buf, len);
+       rc = nvdimm_security_store(dev, buf, len);
        nvdimm_bus_unlock(dev);
        nd_device_unlock(dev);
 
@@ -501,6 +421,7 @@ static struct attribute *nvdimm_attributes[] = {
        &dev_attr_commands.attr,
        &dev_attr_available_slots.attr,
        &dev_attr_security.attr,
+       &dev_attr_frozen.attr,
        NULL,
 };
 
@@ -509,17 +430,24 @@ static umode_t nvdimm_visible(struct kobject *kobj, struct attribute *a, int n)
        struct device *dev = container_of(kobj, typeof(*dev), kobj);
        struct nvdimm *nvdimm = to_nvdimm(dev);
 
-       if (a != &dev_attr_security.attr)
+       if (a != &dev_attr_security.attr && a != &dev_attr_frozen.attr)
                return a->mode;
-       if (nvdimm->sec.state < 0)
+       if (!nvdimm->sec.flags)
                return 0;
-       /* Are there any state mutation ops? */
-       if (nvdimm->sec.ops->freeze || nvdimm->sec.ops->disable
-                       || nvdimm->sec.ops->change_key
-                       || nvdimm->sec.ops->erase
-                       || nvdimm->sec.ops->overwrite)
+
+       if (a == &dev_attr_security.attr) {
+               /* Are there any state mutation ops (make writable)? */
+               if (nvdimm->sec.ops->freeze || nvdimm->sec.ops->disable
+                               || nvdimm->sec.ops->change_key
+                               || nvdimm->sec.ops->erase
+                               || nvdimm->sec.ops->overwrite)
+                       return a->mode;
+               return 0444;
+       }
+
+       if (nvdimm->sec.ops->freeze)
                return a->mode;
-       return 0444;
+       return 0;
 }
 
 struct attribute_group nvdimm_attribute_group = {
@@ -569,8 +497,8 @@ struct nvdimm *__nvdimm_create(struct nvdimm_bus *nvdimm_bus,
         * attribute visibility.
         */
        /* get security state and extended (master) state */
-       nvdimm->sec.state = nvdimm_security_state(nvdimm, NVDIMM_USER);
-       nvdimm->sec.ext_state = nvdimm_security_state(nvdimm, NVDIMM_MASTER);
+       nvdimm->sec.flags = nvdimm_security_flags(nvdimm, NVDIMM_USER);
+       nvdimm->sec.ext_flags = nvdimm_security_flags(nvdimm, NVDIMM_MASTER);
        nd_device_register(dev);
 
        return nvdimm;
@@ -588,7 +516,7 @@ int nvdimm_security_setup_events(struct device *dev)
 {
        struct nvdimm *nvdimm = to_nvdimm(dev);
 
-       if (nvdimm->sec.state < 0 || !nvdimm->sec.ops
+       if (!nvdimm->sec.flags || !nvdimm->sec.ops
                        || !nvdimm->sec.ops->overwrite)
                return 0;
        nvdimm->sec.overwrite_state = sysfs_get_dirent(dev->kobj.sd, "security");
@@ -614,7 +542,7 @@ int nvdimm_security_freeze(struct nvdimm *nvdimm)
        if (!nvdimm->sec.ops || !nvdimm->sec.ops->freeze)
                return -EOPNOTSUPP;
 
-       if (nvdimm->sec.state < 0)
+       if (!nvdimm->sec.flags)
                return -EIO;
 
        if (test_bit(NDD_SECURITY_OVERWRITE, &nvdimm->flags)) {
@@ -623,7 +551,7 @@ int nvdimm_security_freeze(struct nvdimm *nvdimm)
        }
 
        rc = nvdimm->sec.ops->freeze(nvdimm);
-       nvdimm->sec.state = nvdimm_security_state(nvdimm, NVDIMM_USER);
+       nvdimm->sec.flags = nvdimm_security_flags(nvdimm, NVDIMM_USER);
 
        return rc;
 }
index 73e197b..47a4828 100644 (file)
@@ -353,11 +353,6 @@ static bool slot_valid(struct nvdimm_drvdata *ndd,
        if (slot != __le32_to_cpu(nd_label->slot))
                return false;
 
-       /* check that DPA allocations are page aligned */
-       if ((__le64_to_cpu(nd_label->dpa)
-                               | __le64_to_cpu(nd_label->rawsize)) % SZ_4K)
-               return false;
-
        /* check checksum */
        if (namespace_label_has(ndd, checksum)) {
                u64 sum, sum_save;
index a16e522..4340132 100644 (file)
@@ -1006,10 +1006,10 @@ static ssize_t __size_store(struct device *dev, unsigned long long val)
                return -ENXIO;
        }
 
-       div_u64_rem(val, SZ_4K * nd_region->ndr_mappings, &remainder);
+       div_u64_rem(val, PAGE_SIZE * nd_region->ndr_mappings, &remainder);
        if (remainder) {
-               dev_dbg(dev, "%llu is not %dK aligned\n", val,
-                               (SZ_4K * nd_region->ndr_mappings) / SZ_1K);
+               dev_dbg(dev, "%llu is not %ldK aligned\n", val,
+                               (PAGE_SIZE * nd_region->ndr_mappings) / SZ_1K);
                return -EINVAL;
        }
 
@@ -2462,6 +2462,27 @@ static struct device **create_namespaces(struct nd_region *nd_region)
        return devs;
 }
 
+static void deactivate_labels(void *region)
+{
+       struct nd_region *nd_region = region;
+       int i;
+
+       for (i = 0; i < nd_region->ndr_mappings; i++) {
+               struct nd_mapping *nd_mapping = &nd_region->mapping[i];
+               struct nvdimm_drvdata *ndd = nd_mapping->ndd;
+               struct nvdimm *nvdimm = nd_mapping->nvdimm;
+
+               mutex_lock(&nd_mapping->lock);
+               nd_mapping_free_labels(nd_mapping);
+               mutex_unlock(&nd_mapping->lock);
+
+               put_ndd(ndd);
+               nd_mapping->ndd = NULL;
+               if (ndd)
+                       atomic_dec(&nvdimm->busy);
+       }
+}
+
 static int init_active_labels(struct nd_region *nd_region)
 {
        int i;
@@ -2519,16 +2540,17 @@ static int init_active_labels(struct nd_region *nd_region)
                        mutex_unlock(&nd_mapping->lock);
                }
 
-               if (j >= count)
-                       continue;
+               if (j < count)
+                       break;
+       }
 
-               mutex_lock(&nd_mapping->lock);
-               nd_mapping_free_labels(nd_mapping);
-               mutex_unlock(&nd_mapping->lock);
+       if (i < nd_region->ndr_mappings) {
+               deactivate_labels(nd_region);
                return -ENOMEM;
        }
 
-       return 0;
+       return devm_add_action_or_reset(&nd_region->dev, deactivate_labels,
+                       nd_region);
 }
 
 int nd_region_register_namespaces(struct nd_region *nd_region, int *err)
index 0ac52b6..25fa121 100644 (file)
@@ -39,53 +39,40 @@ struct nvdimm {
        const char *dimm_id;
        struct {
                const struct nvdimm_security_ops *ops;
-               enum nvdimm_security_state state;
-               enum nvdimm_security_state ext_state;
+               unsigned long flags;
+               unsigned long ext_flags;
                unsigned int overwrite_tmo;
                struct kernfs_node *overwrite_state;
        } sec;
        struct delayed_work dwork;
 };
 
-static inline enum nvdimm_security_state nvdimm_security_state(
+static inline unsigned long nvdimm_security_flags(
                struct nvdimm *nvdimm, enum nvdimm_passphrase_type ptype)
 {
+       u64 flags;
+       const u64 state_flags = 1UL << NVDIMM_SECURITY_DISABLED
+               | 1UL << NVDIMM_SECURITY_LOCKED
+               | 1UL << NVDIMM_SECURITY_UNLOCKED
+               | 1UL << NVDIMM_SECURITY_OVERWRITE;
+
        if (!nvdimm->sec.ops)
-               return -ENXIO;
+               return 0;
 
-       return nvdimm->sec.ops->state(nvdimm, ptype);
+       flags = nvdimm->sec.ops->get_flags(nvdimm, ptype);
+       /* disabled, locked, unlocked, and overwrite are mutually exclusive */
+       dev_WARN_ONCE(&nvdimm->dev, hweight64(flags & state_flags) > 1,
+                       "reported invalid security state: %#llx\n",
+                       (unsigned long long) flags);
+       return flags;
 }
 int nvdimm_security_freeze(struct nvdimm *nvdimm);
 #if IS_ENABLED(CONFIG_NVDIMM_KEYS)
-int nvdimm_security_disable(struct nvdimm *nvdimm, unsigned int keyid);
-int nvdimm_security_update(struct nvdimm *nvdimm, unsigned int keyid,
-               unsigned int new_keyid,
-               enum nvdimm_passphrase_type pass_type);
-int nvdimm_security_erase(struct nvdimm *nvdimm, unsigned int keyid,
-               enum nvdimm_passphrase_type pass_type);
-int nvdimm_security_overwrite(struct nvdimm *nvdimm, unsigned int keyid);
+ssize_t nvdimm_security_store(struct device *dev, const char *buf, size_t len);
 void nvdimm_security_overwrite_query(struct work_struct *work);
 #else
-static inline int nvdimm_security_disable(struct nvdimm *nvdimm,
-               unsigned int keyid)
-{
-       return -EOPNOTSUPP;
-}
-static inline int nvdimm_security_update(struct nvdimm *nvdimm,
-               unsigned int keyid,
-               unsigned int new_keyid,
-               enum nvdimm_passphrase_type pass_type)
-{
-       return -EOPNOTSUPP;
-}
-static inline int nvdimm_security_erase(struct nvdimm *nvdimm,
-               unsigned int keyid,
-               enum nvdimm_passphrase_type pass_type)
-{
-       return -EOPNOTSUPP;
-}
-static inline int nvdimm_security_overwrite(struct nvdimm *nvdimm,
-               unsigned int keyid)
+static inline ssize_t nvdimm_security_store(struct device *dev,
+               const char *buf, size_t len)
 {
        return -EOPNOTSUPP;
 }
@@ -128,13 +115,12 @@ int __init nvdimm_bus_init(void);
 void nvdimm_bus_exit(void);
 void nvdimm_devs_exit(void);
 void nd_region_devs_exit(void);
-void nd_region_probe_success(struct nvdimm_bus *nvdimm_bus, struct device *dev);
 struct nd_region;
+void nd_region_advance_seeds(struct nd_region *nd_region, struct device *dev);
 void nd_region_create_ns_seed(struct nd_region *nd_region);
 void nd_region_create_btt_seed(struct nd_region *nd_region);
 void nd_region_create_pfn_seed(struct nd_region *nd_region);
 void nd_region_create_dax_seed(struct nd_region *nd_region);
-void nd_region_disable(struct nvdimm_bus *nvdimm_bus, struct device *dev);
 int nvdimm_bus_create_ndctl(struct nvdimm_bus *nvdimm_bus);
 void nvdimm_bus_destroy_ndctl(struct nvdimm_bus *nvdimm_bus);
 void nd_synchronize(void);
index 1b99556..e89af4b 100644 (file)
@@ -375,6 +375,10 @@ unsigned int pmem_sector_size(struct nd_namespace_common *ndns);
 void nvdimm_badblocks_populate(struct nd_region *nd_region,
                struct badblocks *bb, const struct resource *res);
 #if IS_ENABLED(CONFIG_ND_CLAIM)
+
+/* max struct page size independent of kernel config */
+#define MAX_STRUCT_PAGE_SIZE 64
+
 int nvdimm_setup_pfn(struct nd_pfn *nd_pfn, struct dev_pagemap *pgmap);
 int devm_nsio_enable(struct device *dev, struct nd_namespace_io *nsio);
 void devm_nsio_disable(struct device *dev, struct nd_namespace_io *nsio);
index a0c8dcf..97187d6 100644 (file)
@@ -42,7 +42,7 @@ static int of_pmem_region_probe(struct platform_device *pdev)
                return -ENOMEM;
 
        priv->bus_desc.attr_groups = bus_attr_groups;
-       priv->bus_desc.provider_name = "of_pmem";
+       priv->bus_desc.provider_name = kstrdup(pdev->name, GFP_KERNEL);
        priv->bus_desc.module = THIS_MODULE;
        priv->bus_desc.of_node = np;
 
index 7381673..acb1951 100644 (file)
@@ -29,7 +29,10 @@ struct nd_pfn_sb {
        /* minor-version-2 record the base alignment of the mapping */
        __le32 align;
        /* minor-version-3 guarantee the padding and flags are zero */
-       u8 padding[4000];
+       /* minor-version-4 record the page size and struct page size */
+       __le32 page_size;
+       __le16 page_struct_size;
+       u8 padding[3994];
        __le64 checksum;
 };
 
index cb98b8f..bb9cc5c 100644 (file)
@@ -460,6 +460,11 @@ int nd_pfn_validate(struct nd_pfn *nd_pfn, const char *sig)
        if (__le16_to_cpu(pfn_sb->version_minor) < 2)
                pfn_sb->align = 0;
 
+       if (__le16_to_cpu(pfn_sb->version_minor) < 4) {
+               pfn_sb->page_struct_size = cpu_to_le16(64);
+               pfn_sb->page_size = cpu_to_le32(PAGE_SIZE);
+       }
+
        switch (le32_to_cpu(pfn_sb->mode)) {
        case PFN_MODE_RAM:
        case PFN_MODE_PMEM:
@@ -475,6 +480,22 @@ int nd_pfn_validate(struct nd_pfn *nd_pfn, const char *sig)
                align = 1UL << ilog2(offset);
        mode = le32_to_cpu(pfn_sb->mode);
 
+       if ((le32_to_cpu(pfn_sb->page_size) > PAGE_SIZE) &&
+                       (mode == PFN_MODE_PMEM)) {
+               dev_err(&nd_pfn->dev,
+                               "init failed, page size mismatch %d\n",
+                               le32_to_cpu(pfn_sb->page_size));
+               return -EOPNOTSUPP;
+       }
+
+       if ((le16_to_cpu(pfn_sb->page_struct_size) < sizeof(struct page)) &&
+                       (mode == PFN_MODE_PMEM)) {
+               dev_err(&nd_pfn->dev,
+                               "init failed, struct page size mismatch %d\n",
+                               le16_to_cpu(pfn_sb->page_struct_size));
+               return -EOPNOTSUPP;
+       }
+
        if (!nd_pfn->uuid) {
                /*
                 * When probing a namepace via nd_pfn_probe() the uuid
@@ -703,8 +724,16 @@ static int nd_pfn_init(struct nd_pfn *nd_pfn)
                 * The altmap should be padded out to the block size used
                 * when populating the vmemmap. This *should* be equal to
                 * PMD_SIZE for most architectures.
+                *
+                * Also make sure size of struct page is less than 64. We
+                * want to make sure we use large enough size here so that
+                * we don't have a dynamic reserve space depending on
+                * struct page size. But we also want to make sure we notice
+                * when we end up adding new elements to struct page.
                 */
-               offset = ALIGN(start + SZ_8K + 64 * npfns, align) - start;
+               BUILD_BUG_ON(sizeof(struct page) > MAX_STRUCT_PAGE_SIZE);
+               offset = ALIGN(start + SZ_8K + MAX_STRUCT_PAGE_SIZE * npfns, align)
+                       - start;
        } else if (nd_pfn->mode == PFN_MODE_RAM)
                offset = ALIGN(start + SZ_8K, align) - start;
        else
@@ -724,9 +753,11 @@ static int nd_pfn_init(struct nd_pfn *nd_pfn)
        memcpy(pfn_sb->uuid, nd_pfn->uuid, 16);
        memcpy(pfn_sb->parent_uuid, nd_dev_to_uuid(&ndns->dev), 16);
        pfn_sb->version_major = cpu_to_le16(1);
-       pfn_sb->version_minor = cpu_to_le16(3);
+       pfn_sb->version_minor = cpu_to_le16(4);
        pfn_sb->end_trunc = cpu_to_le32(end_trunc);
        pfn_sb->align = cpu_to_le32(nd_pfn->align);
+       pfn_sb->page_struct_size = cpu_to_le16(MAX_STRUCT_PAGE_SIZE);
+       pfn_sb->page_size = cpu_to_le32(PAGE_SIZE);
        checksum = nd_sb_checksum((struct nd_gen_sb *) pfn_sb);
        pfn_sb->checksum = cpu_to_le64(checksum);
 
index 4c121dd..f9f76f6 100644 (file)
@@ -490,6 +490,7 @@ static int pmem_attach_disk(struct device *dev,
 
 static int nd_pmem_probe(struct device *dev)
 {
+       int ret;
        struct nd_namespace_common *ndns;
 
        ndns = nvdimm_namespace_common_probe(dev);
@@ -505,12 +506,32 @@ static int nd_pmem_probe(struct device *dev)
        if (is_nd_pfn(dev))
                return pmem_attach_disk(dev, ndns);
 
-       /* if we find a valid info-block we'll come back as that personality */
-       if (nd_btt_probe(dev, ndns) == 0 || nd_pfn_probe(dev, ndns) == 0
-                       || nd_dax_probe(dev, ndns) == 0)
+       ret = nd_btt_probe(dev, ndns);
+       if (ret == 0)
                return -ENXIO;
 
-       /* ...otherwise we're just a raw pmem device */
+       /*
+        * We have two failure conditions here, there is no
+        * info reserver block or we found a valid info reserve block
+        * but failed to initialize the pfn superblock.
+        *
+        * For the first case consider namespace as a raw pmem namespace
+        * and attach a disk.
+        *
+        * For the latter, consider this a success and advance the namespace
+        * seed.
+        */
+       ret = nd_pfn_probe(dev, ndns);
+       if (ret == 0)
+               return -ENXIO;
+       else if (ret == -EOPNOTSUPP)
+               return ret;
+
+       ret = nd_dax_probe(dev, ndns);
+       if (ret == 0)
+               return -ENXIO;
+       else if (ret == -EOPNOTSUPP)
+               return ret;
        return pmem_attach_disk(dev, ndns);
 }
 
index af30cbe..3fd6b59 100644 (file)
@@ -715,85 +715,37 @@ void nd_mapping_free_labels(struct nd_mapping *nd_mapping)
 }
 
 /*
- * Upon successful probe/remove, take/release a reference on the
- * associated interleave set (if present), and plant new btt + namespace
- * seeds.  Also, on the removal of a BLK region, notify the provider to
- * disable the region.
+ * When a namespace is activated create new seeds for the next
+ * namespace, or namespace-personality to be configured.
  */
-static void nd_region_notify_driver_action(struct nvdimm_bus *nvdimm_bus,
-               struct device *dev, bool probe)
+void nd_region_advance_seeds(struct nd_region *nd_region, struct device *dev)
 {
-       struct nd_region *nd_region;
-
-       if (!probe && is_nd_region(dev)) {
-               int i;
-
-               nd_region = to_nd_region(dev);
-               for (i = 0; i < nd_region->ndr_mappings; i++) {
-                       struct nd_mapping *nd_mapping = &nd_region->mapping[i];
-                       struct nvdimm_drvdata *ndd = nd_mapping->ndd;
-                       struct nvdimm *nvdimm = nd_mapping->nvdimm;
-
-                       mutex_lock(&nd_mapping->lock);
-                       nd_mapping_free_labels(nd_mapping);
-                       mutex_unlock(&nd_mapping->lock);
-
-                       put_ndd(ndd);
-                       nd_mapping->ndd = NULL;
-                       if (ndd)
-                               atomic_dec(&nvdimm->busy);
-               }
-       }
-       if (dev->parent && is_nd_region(dev->parent) && probe) {
-               nd_region = to_nd_region(dev->parent);
-               nvdimm_bus_lock(dev);
-               if (nd_region->ns_seed == dev)
-                       nd_region_create_ns_seed(nd_region);
-               nvdimm_bus_unlock(dev);
-       }
-       if (is_nd_btt(dev) && probe) {
+       nvdimm_bus_lock(dev);
+       if (nd_region->ns_seed == dev) {
+               nd_region_create_ns_seed(nd_region);
+       } else if (is_nd_btt(dev)) {
                struct nd_btt *nd_btt = to_nd_btt(dev);
 
-               nd_region = to_nd_region(dev->parent);
-               nvdimm_bus_lock(dev);
                if (nd_region->btt_seed == dev)
                        nd_region_create_btt_seed(nd_region);
                if (nd_region->ns_seed == &nd_btt->ndns->dev)
                        nd_region_create_ns_seed(nd_region);
-               nvdimm_bus_unlock(dev);
-       }
-       if (is_nd_pfn(dev) && probe) {
+       } else if (is_nd_pfn(dev)) {
                struct nd_pfn *nd_pfn = to_nd_pfn(dev);
 
-               nd_region = to_nd_region(dev->parent);
-               nvdimm_bus_lock(dev);
                if (nd_region->pfn_seed == dev)
                        nd_region_create_pfn_seed(nd_region);
                if (nd_region->ns_seed == &nd_pfn->ndns->dev)
                        nd_region_create_ns_seed(nd_region);
-               nvdimm_bus_unlock(dev);
-       }
-       if (is_nd_dax(dev) && probe) {
+       } else if (is_nd_dax(dev)) {
                struct nd_dax *nd_dax = to_nd_dax(dev);
 
-               nd_region = to_nd_region(dev->parent);
-               nvdimm_bus_lock(dev);
                if (nd_region->dax_seed == dev)
                        nd_region_create_dax_seed(nd_region);
                if (nd_region->ns_seed == &nd_dax->nd_pfn.ndns->dev)
                        nd_region_create_ns_seed(nd_region);
-               nvdimm_bus_unlock(dev);
        }
-}
-
-void nd_region_probe_success(struct nvdimm_bus *nvdimm_bus, struct device *dev)
-{
-       nd_region_notify_driver_action(nvdimm_bus, dev, true);
-}
-
-void nd_region_disable(struct nvdimm_bus *nvdimm_bus, struct device *dev)
-{
-       nd_region_notify_driver_action(nvdimm_bus, dev, false);
+       nvdimm_bus_unlock(dev);
 }
 
 static ssize_t mappingN(struct device *dev, char *buf, int n)
@@ -992,10 +944,10 @@ static struct nd_region *nd_region_create(struct nvdimm_bus *nvdimm_bus,
                struct nd_mapping_desc *mapping = &ndr_desc->mapping[i];
                struct nvdimm *nvdimm = mapping->nvdimm;
 
-               if ((mapping->start | mapping->size) % SZ_4K) {
-                       dev_err(&nvdimm_bus->dev, "%s: %s mapping%d is not 4K aligned\n",
-                                       caller, dev_name(&nvdimm->dev), i);
-
+               if ((mapping->start | mapping->size) % PAGE_SIZE) {
+                       dev_err(&nvdimm_bus->dev,
+                               "%s: %s mapping%d is not %ld aligned\n",
+                               caller, dev_name(&nvdimm->dev), i, PAGE_SIZE);
                        return NULL;
                }
 
@@ -1025,10 +977,9 @@ static struct nd_region *nd_region_create(struct nvdimm_bus *nvdimm_bus,
                }
                region_buf = ndbr;
        } else {
-               nd_region = kzalloc(sizeof(struct nd_region)
-                               + sizeof(struct nd_mapping)
-                               * ndr_desc->num_mappings,
-                               GFP_KERNEL);
+               nd_region = kzalloc(struct_size(nd_region, mapping,
+                                               ndr_desc->num_mappings),
+                                   GFP_KERNEL);
                region_buf = nd_region;
        }
 
index a570f22..9e45b20 100644 (file)
@@ -158,7 +158,7 @@ static int nvdimm_key_revalidate(struct nvdimm *nvdimm)
        }
 
        nvdimm_put_key(key);
-       nvdimm->sec.state = nvdimm_security_state(nvdimm, NVDIMM_USER);
+       nvdimm->sec.flags = nvdimm_security_flags(nvdimm, NVDIMM_USER);
        return 0;
 }
 
@@ -174,7 +174,7 @@ static int __nvdimm_security_unlock(struct nvdimm *nvdimm)
        lockdep_assert_held(&nvdimm_bus->reconfig_mutex);
 
        if (!nvdimm->sec.ops || !nvdimm->sec.ops->unlock
-                       || nvdimm->sec.state < 0)
+                       || !nvdimm->sec.flags)
                return -EIO;
 
        if (test_bit(NDD_SECURITY_OVERWRITE, &nvdimm->flags)) {
@@ -189,7 +189,7 @@ static int __nvdimm_security_unlock(struct nvdimm *nvdimm)
         * freeze of the security configuration. I.e. if the OS does not
         * have the key, security is being managed pre-OS.
         */
-       if (nvdimm->sec.state == NVDIMM_SECURITY_UNLOCKED) {
+       if (test_bit(NVDIMM_SECURITY_UNLOCKED, &nvdimm->sec.flags)) {
                if (!key_revalidate)
                        return 0;
 
@@ -202,7 +202,7 @@ static int __nvdimm_security_unlock(struct nvdimm *nvdimm)
                        rc == 0 ? "success" : "fail");
 
        nvdimm_put_key(key);
-       nvdimm->sec.state = nvdimm_security_state(nvdimm, NVDIMM_USER);
+       nvdimm->sec.flags = nvdimm_security_flags(nvdimm, NVDIMM_USER);
        return rc;
 }
 
@@ -217,7 +217,25 @@ int nvdimm_security_unlock(struct device *dev)
        return rc;
 }
 
-int nvdimm_security_disable(struct nvdimm *nvdimm, unsigned int keyid)
+static int check_security_state(struct nvdimm *nvdimm)
+{
+       struct device *dev = &nvdimm->dev;
+
+       if (test_bit(NVDIMM_SECURITY_FROZEN, &nvdimm->sec.flags)) {
+               dev_dbg(dev, "Incorrect security state: %#lx\n",
+                               nvdimm->sec.flags);
+               return -EIO;
+       }
+
+       if (test_bit(NDD_SECURITY_OVERWRITE, &nvdimm->flags)) {
+               dev_dbg(dev, "Security operation in progress.\n");
+               return -EBUSY;
+       }
+
+       return 0;
+}
+
+static int security_disable(struct nvdimm *nvdimm, unsigned int keyid)
 {
        struct device *dev = &nvdimm->dev;
        struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(dev);
@@ -229,19 +247,12 @@ int nvdimm_security_disable(struct nvdimm *nvdimm, unsigned int keyid)
        lockdep_assert_held(&nvdimm_bus->reconfig_mutex);
 
        if (!nvdimm->sec.ops || !nvdimm->sec.ops->disable
-                       || nvdimm->sec.state < 0)
+                       || !nvdimm->sec.flags)
                return -EOPNOTSUPP;
 
-       if (nvdimm->sec.state >= NVDIMM_SECURITY_FROZEN) {
-               dev_dbg(dev, "Incorrect security state: %d\n",
-                               nvdimm->sec.state);
-               return -EIO;
-       }
-
-       if (test_bit(NDD_SECURITY_OVERWRITE, &nvdimm->flags)) {
-               dev_dbg(dev, "Security operation in progress.\n");
-               return -EBUSY;
-       }
+       rc = check_security_state(nvdimm);
+       if (rc)
+               return rc;
 
        data = nvdimm_get_user_key_payload(nvdimm, keyid,
                        NVDIMM_BASE_KEY, &key);
@@ -253,11 +264,11 @@ int nvdimm_security_disable(struct nvdimm *nvdimm, unsigned int keyid)
                        rc == 0 ? "success" : "fail");
 
        nvdimm_put_key(key);
-       nvdimm->sec.state = nvdimm_security_state(nvdimm, NVDIMM_USER);
+       nvdimm->sec.flags = nvdimm_security_flags(nvdimm, NVDIMM_USER);
        return rc;
 }
 
-int nvdimm_security_update(struct nvdimm *nvdimm, unsigned int keyid,
+static int security_update(struct nvdimm *nvdimm, unsigned int keyid,
                unsigned int new_keyid,
                enum nvdimm_passphrase_type pass_type)
 {
@@ -271,14 +282,12 @@ int nvdimm_security_update(struct nvdimm *nvdimm, unsigned int keyid,
        lockdep_assert_held(&nvdimm_bus->reconfig_mutex);
 
        if (!nvdimm->sec.ops || !nvdimm->sec.ops->change_key
-                       || nvdimm->sec.state < 0)
+                       || !nvdimm->sec.flags)
                return -EOPNOTSUPP;
 
-       if (nvdimm->sec.state >= NVDIMM_SECURITY_FROZEN) {
-               dev_dbg(dev, "Incorrect security state: %d\n",
-                               nvdimm->sec.state);
-               return -EIO;
-       }
+       rc = check_security_state(nvdimm);
+       if (rc)
+               return rc;
 
        data = nvdimm_get_user_key_payload(nvdimm, keyid,
                        NVDIMM_BASE_KEY, &key);
@@ -301,15 +310,15 @@ int nvdimm_security_update(struct nvdimm *nvdimm, unsigned int keyid,
        nvdimm_put_key(newkey);
        nvdimm_put_key(key);
        if (pass_type == NVDIMM_MASTER)
-               nvdimm->sec.ext_state = nvdimm_security_state(nvdimm,
+               nvdimm->sec.ext_flags = nvdimm_security_flags(nvdimm,
                                NVDIMM_MASTER);
        else
-               nvdimm->sec.state = nvdimm_security_state(nvdimm,
+               nvdimm->sec.flags = nvdimm_security_flags(nvdimm,
                                NVDIMM_USER);
        return rc;
 }
 
-int nvdimm_security_erase(struct nvdimm *nvdimm, unsigned int keyid,
+static int security_erase(struct nvdimm *nvdimm, unsigned int keyid,
                enum nvdimm_passphrase_type pass_type)
 {
        struct device *dev = &nvdimm->dev;
@@ -322,26 +331,14 @@ int nvdimm_security_erase(struct nvdimm *nvdimm, unsigned int keyid,
        lockdep_assert_held(&nvdimm_bus->reconfig_mutex);
 
        if (!nvdimm->sec.ops || !nvdimm->sec.ops->erase
-                       || nvdimm->sec.state < 0)
+                       || !nvdimm->sec.flags)
                return -EOPNOTSUPP;
 
-       if (atomic_read(&nvdimm->busy)) {
-               dev_dbg(dev, "Unable to secure erase while DIMM active.\n");
-               return -EBUSY;
-       }
-
-       if (nvdimm->sec.state >= NVDIMM_SECURITY_FROZEN) {
-               dev_dbg(dev, "Incorrect security state: %d\n",
-                               nvdimm->sec.state);
-               return -EIO;
-       }
-
-       if (test_bit(NDD_SECURITY_OVERWRITE, &nvdimm->flags)) {
-               dev_dbg(dev, "Security operation in progress.\n");
-               return -EBUSY;
-       }
+       rc = check_security_state(nvdimm);
+       if (rc)
+               return rc;
 
-       if (nvdimm->sec.ext_state != NVDIMM_SECURITY_UNLOCKED
+       if (!test_bit(NVDIMM_SECURITY_UNLOCKED, &nvdimm->sec.ext_flags)
                        && pass_type == NVDIMM_MASTER) {
                dev_dbg(dev,
                        "Attempt to secure erase in wrong master state.\n");
@@ -359,11 +356,11 @@ int nvdimm_security_erase(struct nvdimm *nvdimm, unsigned int keyid,
                        rc == 0 ? "success" : "fail");
 
        nvdimm_put_key(key);
-       nvdimm->sec.state = nvdimm_security_state(nvdimm, NVDIMM_USER);
+       nvdimm->sec.flags = nvdimm_security_flags(nvdimm, NVDIMM_USER);
        return rc;
 }
 
-int nvdimm_security_overwrite(struct nvdimm *nvdimm, unsigned int keyid)
+static int security_overwrite(struct nvdimm *nvdimm, unsigned int keyid)
 {
        struct device *dev = &nvdimm->dev;
        struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(dev);
@@ -375,29 +372,17 @@ int nvdimm_security_overwrite(struct nvdimm *nvdimm, unsigned int keyid)
        lockdep_assert_held(&nvdimm_bus->reconfig_mutex);
 
        if (!nvdimm->sec.ops || !nvdimm->sec.ops->overwrite
-                       || nvdimm->sec.state < 0)
+                       || !nvdimm->sec.flags)
                return -EOPNOTSUPP;
 
-       if (atomic_read(&nvdimm->busy)) {
-               dev_dbg(dev, "Unable to overwrite while DIMM active.\n");
-               return -EBUSY;
-       }
-
        if (dev->driver == NULL) {
                dev_dbg(dev, "Unable to overwrite while DIMM active.\n");
                return -EINVAL;
        }
 
-       if (nvdimm->sec.state >= NVDIMM_SECURITY_FROZEN) {
-               dev_dbg(dev, "Incorrect security state: %d\n",
-                               nvdimm->sec.state);
-               return -EIO;
-       }
-
-       if (test_bit(NDD_SECURITY_OVERWRITE, &nvdimm->flags)) {
-               dev_dbg(dev, "Security operation in progress.\n");
-               return -EBUSY;
-       }
+       rc = check_security_state(nvdimm);
+       if (rc)
+               return rc;
 
        data = nvdimm_get_user_key_payload(nvdimm, keyid,
                        NVDIMM_BASE_KEY, &key);
@@ -412,7 +397,7 @@ int nvdimm_security_overwrite(struct nvdimm *nvdimm, unsigned int keyid)
        if (rc == 0) {
                set_bit(NDD_SECURITY_OVERWRITE, &nvdimm->flags);
                set_bit(NDD_WORK_PENDING, &nvdimm->flags);
-               nvdimm->sec.state = NVDIMM_SECURITY_OVERWRITE;
+               set_bit(NVDIMM_SECURITY_OVERWRITE, &nvdimm->sec.flags);
                /*
                 * Make sure we don't lose device while doing overwrite
                 * query.
@@ -443,7 +428,7 @@ void __nvdimm_security_overwrite_query(struct nvdimm *nvdimm)
        tmo = nvdimm->sec.overwrite_tmo;
 
        if (!nvdimm->sec.ops || !nvdimm->sec.ops->query_overwrite
-                       || nvdimm->sec.state < 0)
+                       || !nvdimm->sec.flags)
                return;
 
        rc = nvdimm->sec.ops->query_overwrite(nvdimm);
@@ -467,8 +452,8 @@ void __nvdimm_security_overwrite_query(struct nvdimm *nvdimm)
        clear_bit(NDD_SECURITY_OVERWRITE, &nvdimm->flags);
        clear_bit(NDD_WORK_PENDING, &nvdimm->flags);
        put_device(&nvdimm->dev);
-       nvdimm->sec.state = nvdimm_security_state(nvdimm, NVDIMM_USER);
-       nvdimm->sec.ext_state = nvdimm_security_state(nvdimm, NVDIMM_MASTER);
+       nvdimm->sec.flags = nvdimm_security_flags(nvdimm, NVDIMM_USER);
+       nvdimm->sec.flags = nvdimm_security_flags(nvdimm, NVDIMM_MASTER);
 }
 
 void nvdimm_security_overwrite_query(struct work_struct *work)
@@ -480,3 +465,85 @@ void nvdimm_security_overwrite_query(struct work_struct *work)
        __nvdimm_security_overwrite_query(nvdimm);
        nvdimm_bus_unlock(&nvdimm->dev);
 }
+
+#define OPS                                                    \
+       C( OP_FREEZE,           "freeze",               1),     \
+       C( OP_DISABLE,          "disable",              2),     \
+       C( OP_UPDATE,           "update",               3),     \
+       C( OP_ERASE,            "erase",                2),     \
+       C( OP_OVERWRITE,        "overwrite",            2),     \
+       C( OP_MASTER_UPDATE,    "master_update",        3),     \
+       C( OP_MASTER_ERASE,     "master_erase",         2)
+#undef C
+#define C(a, b, c) a
+enum nvdimmsec_op_ids { OPS };
+#undef C
+#define C(a, b, c) { b, c }
+static struct {
+       const char *name;
+       int args;
+} ops[] = { OPS };
+#undef C
+
+#define SEC_CMD_SIZE 32
+#define KEY_ID_SIZE 10
+
+ssize_t nvdimm_security_store(struct device *dev, const char *buf, size_t len)
+{
+       struct nvdimm *nvdimm = to_nvdimm(dev);
+       ssize_t rc;
+       char cmd[SEC_CMD_SIZE+1], keystr[KEY_ID_SIZE+1],
+               nkeystr[KEY_ID_SIZE+1];
+       unsigned int key, newkey;
+       int i;
+
+       rc = sscanf(buf, "%"__stringify(SEC_CMD_SIZE)"s"
+                       " %"__stringify(KEY_ID_SIZE)"s"
+                       " %"__stringify(KEY_ID_SIZE)"s",
+                       cmd, keystr, nkeystr);
+       if (rc < 1)
+               return -EINVAL;
+       for (i = 0; i < ARRAY_SIZE(ops); i++)
+               if (sysfs_streq(cmd, ops[i].name))
+                       break;
+       if (i >= ARRAY_SIZE(ops))
+               return -EINVAL;
+       if (ops[i].args > 1)
+               rc = kstrtouint(keystr, 0, &key);
+       if (rc >= 0 && ops[i].args > 2)
+               rc = kstrtouint(nkeystr, 0, &newkey);
+       if (rc < 0)
+               return rc;
+
+       if (i == OP_FREEZE) {
+               dev_dbg(dev, "freeze\n");
+               rc = nvdimm_security_freeze(nvdimm);
+       } else if (i == OP_DISABLE) {
+               dev_dbg(dev, "disable %u\n", key);
+               rc = security_disable(nvdimm, key);
+       } else if (i == OP_UPDATE || i == OP_MASTER_UPDATE) {
+               dev_dbg(dev, "%s %u %u\n", ops[i].name, key, newkey);
+               rc = security_update(nvdimm, key, newkey, i == OP_UPDATE
+                               ? NVDIMM_USER : NVDIMM_MASTER);
+       } else if (i == OP_ERASE || i == OP_MASTER_ERASE) {
+               dev_dbg(dev, "%s %u\n", ops[i].name, key);
+               if (atomic_read(&nvdimm->busy)) {
+                       dev_dbg(dev, "Unable to secure erase while DIMM active.\n");
+                       return -EBUSY;
+               }
+               rc = security_erase(nvdimm, key, i == OP_ERASE
+                               ? NVDIMM_USER : NVDIMM_MASTER);
+       } else if (i == OP_OVERWRITE) {
+               dev_dbg(dev, "overwrite %u\n", key);
+               if (atomic_read(&nvdimm->busy)) {
+                       dev_dbg(dev, "Unable to overwrite while DIMM active.\n");
+                       return -EBUSY;
+               }
+               rc = security_overwrite(nvdimm, key);
+       } else
+               return -EINVAL;
+
+       if (rc == 0)
+               rc = len;
+       return rc;
+}
index 1ede176..108f60b 100644 (file)
@@ -666,8 +666,6 @@ static inline blk_status_t nvme_setup_rw(struct nvme_ns *ns,
                        if (WARN_ON_ONCE(!nvme_ns_has_pi(ns)))
                                return BLK_STS_NOTSUPP;
                        control |= NVME_RW_PRINFO_PRACT;
-               } else if (req_op(req) == REQ_OP_WRITE) {
-                       t10_pi_prepare(req, ns->pi_type);
                }
 
                switch (ns->pi_type) {
@@ -690,13 +688,6 @@ static inline blk_status_t nvme_setup_rw(struct nvme_ns *ns,
 
 void nvme_cleanup_cmd(struct request *req)
 {
-       if (blk_integrity_rq(req) && req_op(req) == REQ_OP_READ &&
-           nvme_req(req)->status == 0) {
-               struct nvme_ns *ns = req->rq_disk->private_data;
-
-               t10_pi_complete(req, ns->pi_type,
-                               blk_rq_bytes(req) >> ns->lba_shift);
-       }
        if (req->rq_flags & RQF_SPECIAL_PAYLOAD) {
                struct nvme_ns *ns = req->rq_disk->private_data;
                struct page *page = req->special_vec.bv_page;
index 6b4d7b0..c0808f9 100644 (file)
@@ -549,8 +549,10 @@ static void nvme_unmap_data(struct nvme_dev *dev, struct request *req)
 
        WARN_ON_ONCE(!iod->nents);
 
-       /* P2PDMA requests do not need to be unmapped */
-       if (!is_pci_p2pdma_page(sg_page(iod->sg)))
+       if (is_pci_p2pdma_page(sg_page(iod->sg)))
+               pci_p2pdma_unmap_sg(dev->dev, iod->sg, iod->nents,
+                                   rq_dma_dir(req));
+       else
                dma_unmap_sg(dev->dev, iod->sg, iod->nents, rq_dma_dir(req));
 
 
@@ -834,8 +836,8 @@ static blk_status_t nvme_map_data(struct nvme_dev *dev, struct request *req,
                goto out;
 
        if (is_pci_p2pdma_page(sg_page(iod->sg)))
-               nr_mapped = pci_p2pdma_map_sg(dev->dev, iod->sg, iod->nents,
-                                             rq_dma_dir(req));
+               nr_mapped = pci_p2pdma_map_sg_attrs(dev->dev, iod->sg,
+                               iod->nents, rq_dma_dir(req), DMA_ATTR_NO_WARN);
        else
                nr_mapped = dma_map_sg_attrs(dev->dev, iod->sg, iod->nents,
                                             rq_dma_dir(req), DMA_ATTR_NO_WARN);
index c313de9..a304f5e 100644 (file)
@@ -52,7 +52,7 @@ config PCI_MSI
           If you don't know what to do here, say Y.
 
 config PCI_MSI_IRQ_DOMAIN
-       def_bool ARC || ARM || ARM64 || X86
+       def_bool ARC || ARM || ARM64 || X86 || RISCV
        depends on PCI_MSI
        select GENERIC_MSI_IRQ_DOMAIN
 
@@ -170,7 +170,7 @@ config PCI_P2PDMA
 
          Many PCIe root complexes do not support P2P transactions and
          it's hard to tell which support it at all, so at this time,
-         P2P DMA transations must be between devices behind the same root
+         P2P DMA transactions must be between devices behind the same root
          port.
 
          If unsure, say N.
@@ -181,7 +181,7 @@ config PCI_LABEL
 
 config PCI_HYPERV
         tristate "Hyper-V PCI Frontend"
-        depends on X86 && HYPERV && PCI_MSI && PCI_MSI_IRQ_DOMAIN && X86_64
+        depends on X86_64 && HYPERV && PCI_MSI && PCI_MSI_IRQ_DOMAIN && SYSFS
        select PCI_HYPERV_INTERFACE
         help
           The PCI device frontend driver allows the kernel to import arbitrary
index 544922f..2fccb57 100644 (file)
@@ -336,15 +336,6 @@ static inline int pcie_cap_version(const struct pci_dev *dev)
        return pcie_caps_reg(dev) & PCI_EXP_FLAGS_VERS;
 }
 
-static bool pcie_downstream_port(const struct pci_dev *dev)
-{
-       int type = pci_pcie_type(dev);
-
-       return type == PCI_EXP_TYPE_ROOT_PORT ||
-              type == PCI_EXP_TYPE_DOWNSTREAM ||
-              type == PCI_EXP_TYPE_PCIE_BRIDGE;
-}
-
 bool pcie_cap_has_lnkctl(const struct pci_dev *dev)
 {
        int type = pci_pcie_type(dev);
index 495059d..8e40b3e 100644 (file)
@@ -417,11 +417,9 @@ struct pci_bus *pci_bus_get(struct pci_bus *bus)
                get_device(&bus->dev);
        return bus;
 }
-EXPORT_SYMBOL(pci_bus_get);
 
 void pci_bus_put(struct pci_bus *bus)
 {
        if (bus)
                put_device(&bus->dev);
 }
-EXPORT_SYMBOL(pci_bus_put);
index 6ea778a..0ba988b 100644 (file)
@@ -131,13 +131,29 @@ config PCI_KEYSTONE_EP
          DesignWare core functions to implement the driver.
 
 config PCI_LAYERSCAPE
-       bool "Freescale Layerscape PCIe controller"
+       bool "Freescale Layerscape PCIe controller - Host mode"
        depends on OF && (ARM || ARCH_LAYERSCAPE || COMPILE_TEST)
        depends on PCI_MSI_IRQ_DOMAIN
        select MFD_SYSCON
        select PCIE_DW_HOST
        help
-         Say Y here if you want PCIe controller support on Layerscape SoCs.
+         Say Y here if you want to enable PCIe controller support on Layerscape
+         SoCs to work in Host mode.
+         This controller can work either as EP or RC. The RCW[HOST_AGT_PEX]
+         determines which PCIe controller works in EP mode and which PCIe
+         controller works in RC mode.
+
+config PCI_LAYERSCAPE_EP
+       bool "Freescale Layerscape PCIe controller - Endpoint mode"
+       depends on OF && (ARM || ARCH_LAYERSCAPE || COMPILE_TEST)
+       depends on PCI_ENDPOINT
+       select PCIE_DW_EP
+       help
+         Say Y here if you want to enable PCIe controller support on Layerscape
+         SoCs to work in Endpoint mode.
+         This controller can work either as EP or RC. The RCW[HOST_AGT_PEX]
+         determines which PCIe controller works in EP mode and which PCIe
+         controller works in RC mode.
 
 config PCI_HISI
        depends on OF && (ARM64 || COMPILE_TEST)
@@ -220,6 +236,16 @@ config PCI_MESON
          and therefore the driver re-uses the DesignWare core functions to
          implement the driver.
 
+config PCIE_TEGRA194
+       tristate "NVIDIA Tegra194 (and later) PCIe controller"
+       depends on ARCH_TEGRA_194_SOC || COMPILE_TEST
+       depends on PCI_MSI_IRQ_DOMAIN
+       select PCIE_DW_HOST
+       select PHY_TEGRA194_P2U
+       help
+         Say Y here if you want support for DesignWare core based PCIe host
+         controller found in NVIDIA Tegra194 SoC.
+
 config PCIE_UNIPHIER
        bool "Socionext UniPhier PCIe controllers"
        depends on ARCH_UNIPHIER || COMPILE_TEST
@@ -230,4 +256,16 @@ config PCIE_UNIPHIER
          Say Y here if you want PCIe controller support on UniPhier SoCs.
          This driver supports LD20 and PXs3 SoCs.
 
+config PCIE_AL
+       bool "Amazon Annapurna Labs PCIe controller"
+       depends on OF && (ARM64 || COMPILE_TEST)
+       depends on PCI_MSI_IRQ_DOMAIN
+       select PCIE_DW_HOST
+       help
+         Say Y here to enable support of the Amazon's Annapurna Labs PCIe
+         controller IP on Amazon SoCs. The PCIe controller uses the DesignWare
+         core plus Annapurna Labs proprietary hardware wrappers. This is
+         required only for DT-based platforms. ACPI platforms with the
+         Annapurna Labs PCIe controller don't need to enable this.
+
 endmenu
index b085dfd..69faff3 100644 (file)
@@ -8,13 +8,15 @@ obj-$(CONFIG_PCI_EXYNOS) += pci-exynos.o
 obj-$(CONFIG_PCI_IMX6) += pci-imx6.o
 obj-$(CONFIG_PCIE_SPEAR13XX) += pcie-spear13xx.o
 obj-$(CONFIG_PCI_KEYSTONE) += pci-keystone.o
-obj-$(CONFIG_PCI_LAYERSCAPE) += pci-layerscape.o pci-layerscape-ep.o
+obj-$(CONFIG_PCI_LAYERSCAPE) += pci-layerscape.o
+obj-$(CONFIG_PCI_LAYERSCAPE_EP) += pci-layerscape-ep.o
 obj-$(CONFIG_PCIE_QCOM) += pcie-qcom.o
 obj-$(CONFIG_PCIE_ARMADA_8K) += pcie-armada8k.o
 obj-$(CONFIG_PCIE_ARTPEC6) += pcie-artpec6.o
 obj-$(CONFIG_PCIE_KIRIN) += pcie-kirin.o
 obj-$(CONFIG_PCIE_HISI_STB) += pcie-histb.o
 obj-$(CONFIG_PCI_MESON) += pci-meson.o
+obj-$(CONFIG_PCIE_TEGRA194) += pcie-tegra194.o
 obj-$(CONFIG_PCIE_UNIPHIER) += pcie-uniphier.o
 
 # The following drivers are for devices that use the generic ACPI
index cee5f2f..14a6ba4 100644 (file)
@@ -465,7 +465,7 @@ static int __init exynos_pcie_probe(struct platform_device *pdev)
 
        ep->phy = devm_of_phy_get(dev, np, NULL);
        if (IS_ERR(ep->phy)) {
-               if (PTR_ERR(ep->phy) == -EPROBE_DEFER)
+               if (PTR_ERR(ep->phy) != -ENODEV)
                        return PTR_ERR(ep->phy);
 
                ep->phy = NULL;
index 9b5cb5b..acfbd34 100644 (file)
@@ -57,6 +57,7 @@ enum imx6_pcie_variants {
 struct imx6_pcie_drvdata {
        enum imx6_pcie_variants variant;
        u32 flags;
+       int dbi_length;
 };
 
 struct imx6_pcie {
@@ -1173,8 +1174,8 @@ static int imx6_pcie_probe(struct platform_device *pdev)
 
        imx6_pcie->vpcie = devm_regulator_get_optional(&pdev->dev, "vpcie");
        if (IS_ERR(imx6_pcie->vpcie)) {
-               if (PTR_ERR(imx6_pcie->vpcie) == -EPROBE_DEFER)
-                       return -EPROBE_DEFER;
+               if (PTR_ERR(imx6_pcie->vpcie) != -ENODEV)
+                       return PTR_ERR(imx6_pcie->vpcie);
                imx6_pcie->vpcie = NULL;
        }
 
@@ -1212,6 +1213,7 @@ static const struct imx6_pcie_drvdata drvdata[] = {
                .variant = IMX6Q,
                .flags = IMX6_PCIE_FLAG_IMX6_PHY |
                         IMX6_PCIE_FLAG_IMX6_SPEED_CHANGE,
+               .dbi_length = 0x200,
        },
        [IMX6SX] = {
                .variant = IMX6SX,
@@ -1254,6 +1256,37 @@ static struct platform_driver imx6_pcie_driver = {
        .shutdown = imx6_pcie_shutdown,
 };
 
+static void imx6_pcie_quirk(struct pci_dev *dev)
+{
+       struct pci_bus *bus = dev->bus;
+       struct pcie_port *pp = bus->sysdata;
+
+       /* Bus parent is the PCI bridge, its parent is this platform driver */
+       if (!bus->dev.parent || !bus->dev.parent->parent)
+               return;
+
+       /* Make sure we only quirk devices associated with this driver */
+       if (bus->dev.parent->parent->driver != &imx6_pcie_driver.driver)
+               return;
+
+       if (bus->number == pp->root_bus_nr) {
+               struct dw_pcie *pci = to_dw_pcie_from_pp(pp);
+               struct imx6_pcie *imx6_pcie = to_imx6_pcie(pci);
+
+               /*
+                * Limit config length to avoid the kernel reading beyond
+                * the register set and causing an abort on i.MX 6Quad
+                */
+               if (imx6_pcie->drvdata->dbi_length) {
+                       dev->cfg_size = imx6_pcie->drvdata->dbi_length;
+                       dev_info(&dev->dev, "Limiting cfg_size to %d\n",
+                                       dev->cfg_size);
+               }
+       }
+}
+DECLARE_PCI_FIXUP_CLASS_HEADER(PCI_VENDOR_ID_SYNOPSYS, 0xabcd,
+                       PCI_CLASS_BRIDGE_PCI, 8, imx6_pcie_quirk);
+
 static int __init imx6_pcie_init(void)
 {
 #ifdef CONFIG_ARM
index be61d96..ca9aa45 100644 (file)
@@ -44,6 +44,7 @@ static const struct pci_epc_features ls_pcie_epc_features = {
        .linkup_notifier = false,
        .msi_capable = true,
        .msix_capable = false,
+       .bar_fixed_64bit = (1 << BAR_2) | (1 << BAR_4),
 };
 
 static const struct pci_epc_features*
index 3ab58f0..1eeda2f 100644 (file)
@@ -91,3 +91,368 @@ struct pci_ecam_ops al_pcie_ops = {
 };
 
 #endif /* defined(CONFIG_ACPI) && defined(CONFIG_PCI_QUIRKS) */
+
+#ifdef CONFIG_PCIE_AL
+
+#include <linux/of_pci.h>
+#include "pcie-designware.h"
+
+#define AL_PCIE_REV_ID_2       2
+#define AL_PCIE_REV_ID_3       3
+#define AL_PCIE_REV_ID_4       4
+
+#define AXI_BASE_OFFSET                0x0
+
+#define DEVICE_ID_OFFSET       0x16c
+
+#define DEVICE_REV_ID                  0x0
+#define DEVICE_REV_ID_DEV_ID_MASK      GENMASK(31, 16)
+
+#define DEVICE_REV_ID_DEV_ID_X4                0
+#define DEVICE_REV_ID_DEV_ID_X8                2
+#define DEVICE_REV_ID_DEV_ID_X16       4
+
+#define OB_CTRL_REV1_2_OFFSET  0x0040
+#define OB_CTRL_REV3_5_OFFSET  0x0030
+
+#define CFG_TARGET_BUS                 0x0
+#define CFG_TARGET_BUS_MASK_MASK       GENMASK(7, 0)
+#define CFG_TARGET_BUS_BUSNUM_MASK     GENMASK(15, 8)
+
+#define CFG_CONTROL                    0x4
+#define CFG_CONTROL_SUBBUS_MASK                GENMASK(15, 8)
+#define CFG_CONTROL_SEC_BUS_MASK       GENMASK(23, 16)
+
+struct al_pcie_reg_offsets {
+       unsigned int ob_ctrl;
+};
+
+struct al_pcie_target_bus_cfg {
+       u8 reg_val;
+       u8 reg_mask;
+       u8 ecam_mask;
+};
+
+struct al_pcie {
+       struct dw_pcie *pci;
+       void __iomem *controller_base; /* base of PCIe unit (not DW core) */
+       struct device *dev;
+       resource_size_t ecam_size;
+       unsigned int controller_rev_id;
+       struct al_pcie_reg_offsets reg_offsets;
+       struct al_pcie_target_bus_cfg target_bus_cfg;
+};
+
+#define PCIE_ECAM_DEVFN(x)             (((x) & 0xff) << 12)
+
+#define to_al_pcie(x)          dev_get_drvdata((x)->dev)
+
+static inline u32 al_pcie_controller_readl(struct al_pcie *pcie, u32 offset)
+{
+       return readl_relaxed(pcie->controller_base + offset);
+}
+
+static inline void al_pcie_controller_writel(struct al_pcie *pcie, u32 offset,
+                                            u32 val)
+{
+       writel_relaxed(val, pcie->controller_base + offset);
+}
+
+static int al_pcie_rev_id_get(struct al_pcie *pcie, unsigned int *rev_id)
+{
+       u32 dev_rev_id_val;
+       u32 dev_id_val;
+
+       dev_rev_id_val = al_pcie_controller_readl(pcie, AXI_BASE_OFFSET +
+                                                 DEVICE_ID_OFFSET +
+                                                 DEVICE_REV_ID);
+       dev_id_val = FIELD_GET(DEVICE_REV_ID_DEV_ID_MASK, dev_rev_id_val);
+
+       switch (dev_id_val) {
+       case DEVICE_REV_ID_DEV_ID_X4:
+               *rev_id = AL_PCIE_REV_ID_2;
+               break;
+       case DEVICE_REV_ID_DEV_ID_X8:
+               *rev_id = AL_PCIE_REV_ID_3;
+               break;
+       case DEVICE_REV_ID_DEV_ID_X16:
+               *rev_id = AL_PCIE_REV_ID_4;
+               break;
+       default:
+               dev_err(pcie->dev, "Unsupported dev_id_val (0x%x)\n",
+                       dev_id_val);
+               return -EINVAL;
+       }
+
+       dev_dbg(pcie->dev, "dev_id_val: 0x%x\n", dev_id_val);
+
+       return 0;
+}
+
+static int al_pcie_reg_offsets_set(struct al_pcie *pcie)
+{
+       switch (pcie->controller_rev_id) {
+       case AL_PCIE_REV_ID_2:
+               pcie->reg_offsets.ob_ctrl = OB_CTRL_REV1_2_OFFSET;
+               break;
+       case AL_PCIE_REV_ID_3:
+       case AL_PCIE_REV_ID_4:
+               pcie->reg_offsets.ob_ctrl = OB_CTRL_REV3_5_OFFSET;
+               break;
+       default:
+               dev_err(pcie->dev, "Unsupported controller rev_id: 0x%x\n",
+                       pcie->controller_rev_id);
+               return -EINVAL;
+       }
+
+       return 0;
+}
+
+static inline void al_pcie_target_bus_set(struct al_pcie *pcie,
+                                         u8 target_bus,
+                                         u8 mask_target_bus)
+{
+       u32 reg;
+
+       reg = FIELD_PREP(CFG_TARGET_BUS_MASK_MASK, mask_target_bus) |
+             FIELD_PREP(CFG_TARGET_BUS_BUSNUM_MASK, target_bus);
+
+       al_pcie_controller_writel(pcie, AXI_BASE_OFFSET +
+                                 pcie->reg_offsets.ob_ctrl + CFG_TARGET_BUS,
+                                 reg);
+}
+
+static void __iomem *al_pcie_conf_addr_map(struct al_pcie *pcie,
+                                          unsigned int busnr,
+                                          unsigned int devfn)
+{
+       struct al_pcie_target_bus_cfg *target_bus_cfg = &pcie->target_bus_cfg;
+       unsigned int busnr_ecam = busnr & target_bus_cfg->ecam_mask;
+       unsigned int busnr_reg = busnr & target_bus_cfg->reg_mask;
+       struct pcie_port *pp = &pcie->pci->pp;
+       void __iomem *pci_base_addr;
+
+       pci_base_addr = (void __iomem *)((uintptr_t)pp->va_cfg0_base +
+                                        (busnr_ecam << 20) +
+                                        PCIE_ECAM_DEVFN(devfn));
+
+       if (busnr_reg != target_bus_cfg->reg_val) {
+               dev_dbg(pcie->pci->dev, "Changing target bus busnum val from 0x%x to 0x%x\n",
+                       target_bus_cfg->reg_val, busnr_reg);
+               target_bus_cfg->reg_val = busnr_reg;
+               al_pcie_target_bus_set(pcie,
+                                      target_bus_cfg->reg_val,
+                                      target_bus_cfg->reg_mask);
+       }
+
+       return pci_base_addr;
+}
+
+static int al_pcie_rd_other_conf(struct pcie_port *pp, struct pci_bus *bus,
+                                unsigned int devfn, int where, int size,
+                                u32 *val)
+{
+       struct dw_pcie *pci = to_dw_pcie_from_pp(pp);
+       struct al_pcie *pcie = to_al_pcie(pci);
+       unsigned int busnr = bus->number;
+       void __iomem *pci_addr;
+       int rc;
+
+       pci_addr = al_pcie_conf_addr_map(pcie, busnr, devfn);
+
+       rc = dw_pcie_read(pci_addr + where, size, val);
+
+       dev_dbg(pci->dev, "%d-byte config read from %04x:%02x:%02x.%d offset 0x%x (pci_addr: 0x%px) - val:0x%x\n",
+               size, pci_domain_nr(bus), bus->number,
+               PCI_SLOT(devfn), PCI_FUNC(devfn), where,
+               (pci_addr + where), *val);
+
+       return rc;
+}
+
+static int al_pcie_wr_other_conf(struct pcie_port *pp, struct pci_bus *bus,
+                                unsigned int devfn, int where, int size,
+                                u32 val)
+{
+       struct dw_pcie *pci = to_dw_pcie_from_pp(pp);
+       struct al_pcie *pcie = to_al_pcie(pci);
+       unsigned int busnr = bus->number;
+       void __iomem *pci_addr;
+       int rc;
+
+       pci_addr = al_pcie_conf_addr_map(pcie, busnr, devfn);
+
+       rc = dw_pcie_write(pci_addr + where, size, val);
+
+       dev_dbg(pci->dev, "%d-byte config write to %04x:%02x:%02x.%d offset 0x%x (pci_addr: 0x%px) - val:0x%x\n",
+               size, pci_domain_nr(bus), bus->number,
+               PCI_SLOT(devfn), PCI_FUNC(devfn), where,
+               (pci_addr + where), val);
+
+       return rc;
+}
+
+static void al_pcie_config_prepare(struct al_pcie *pcie)
+{
+       struct al_pcie_target_bus_cfg *target_bus_cfg;
+       struct pcie_port *pp = &pcie->pci->pp;
+       unsigned int ecam_bus_mask;
+       u32 cfg_control_offset;
+       u8 subordinate_bus;
+       u8 secondary_bus;
+       u32 cfg_control;
+       u32 reg;
+
+       target_bus_cfg = &pcie->target_bus_cfg;
+
+       ecam_bus_mask = (pcie->ecam_size >> 20) - 1;
+       if (ecam_bus_mask > 255) {
+               dev_warn(pcie->dev, "ECAM window size is larger than 256MB. Cutting off at 256\n");
+               ecam_bus_mask = 255;
+       }
+
+       /* This portion is taken from the transaction address */
+       target_bus_cfg->ecam_mask = ecam_bus_mask;
+       /* This portion is taken from the cfg_target_bus reg */
+       target_bus_cfg->reg_mask = ~target_bus_cfg->ecam_mask;
+       target_bus_cfg->reg_val = pp->busn->start & target_bus_cfg->reg_mask;
+
+       al_pcie_target_bus_set(pcie, target_bus_cfg->reg_val,
+                              target_bus_cfg->reg_mask);
+
+       secondary_bus = pp->busn->start + 1;
+       subordinate_bus = pp->busn->end;
+
+       /* Set the valid values of secondary and subordinate buses */
+       cfg_control_offset = AXI_BASE_OFFSET + pcie->reg_offsets.ob_ctrl +
+                            CFG_CONTROL;
+
+       cfg_control = al_pcie_controller_readl(pcie, cfg_control_offset);
+
+       reg = cfg_control &
+             ~(CFG_CONTROL_SEC_BUS_MASK | CFG_CONTROL_SUBBUS_MASK);
+
+       reg |= FIELD_PREP(CFG_CONTROL_SUBBUS_MASK, subordinate_bus) |
+              FIELD_PREP(CFG_CONTROL_SEC_BUS_MASK, secondary_bus);
+
+       al_pcie_controller_writel(pcie, cfg_control_offset, reg);
+}
+
+static int al_pcie_host_init(struct pcie_port *pp)
+{
+       struct dw_pcie *pci = to_dw_pcie_from_pp(pp);
+       struct al_pcie *pcie = to_al_pcie(pci);
+       int rc;
+
+       rc = al_pcie_rev_id_get(pcie, &pcie->controller_rev_id);
+       if (rc)
+               return rc;
+
+       rc = al_pcie_reg_offsets_set(pcie);
+       if (rc)
+               return rc;
+
+       al_pcie_config_prepare(pcie);
+
+       return 0;
+}
+
+static const struct dw_pcie_host_ops al_pcie_host_ops = {
+       .rd_other_conf = al_pcie_rd_other_conf,
+       .wr_other_conf = al_pcie_wr_other_conf,
+       .host_init = al_pcie_host_init,
+};
+
+static int al_add_pcie_port(struct pcie_port *pp,
+                           struct platform_device *pdev)
+{
+       struct device *dev = &pdev->dev;
+       int ret;
+
+       pp->ops = &al_pcie_host_ops;
+
+       ret = dw_pcie_host_init(pp);
+       if (ret) {
+               dev_err(dev, "failed to initialize host\n");
+               return ret;
+       }
+
+       return 0;
+}
+
+static const struct dw_pcie_ops dw_pcie_ops = {
+};
+
+static int al_pcie_probe(struct platform_device *pdev)
+{
+       struct device *dev = &pdev->dev;
+       struct resource *controller_res;
+       struct resource *ecam_res;
+       struct resource *dbi_res;
+       struct al_pcie *al_pcie;
+       struct dw_pcie *pci;
+
+       al_pcie = devm_kzalloc(dev, sizeof(*al_pcie), GFP_KERNEL);
+       if (!al_pcie)
+               return -ENOMEM;
+
+       pci = devm_kzalloc(dev, sizeof(*pci), GFP_KERNEL);
+       if (!pci)
+               return -ENOMEM;
+
+       pci->dev = dev;
+       pci->ops = &dw_pcie_ops;
+
+       al_pcie->pci = pci;
+       al_pcie->dev = dev;
+
+       dbi_res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "dbi");
+       pci->dbi_base = devm_pci_remap_cfg_resource(dev, dbi_res);
+       if (IS_ERR(pci->dbi_base)) {
+               dev_err(dev, "couldn't remap dbi base %pR\n", dbi_res);
+               return PTR_ERR(pci->dbi_base);
+       }
+
+       ecam_res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "config");
+       if (!ecam_res) {
+               dev_err(dev, "couldn't find 'config' reg in DT\n");
+               return -ENOENT;
+       }
+       al_pcie->ecam_size = resource_size(ecam_res);
+
+       controller_res = platform_get_resource_byname(pdev, IORESOURCE_MEM,
+                                                     "controller");
+       al_pcie->controller_base = devm_ioremap_resource(dev, controller_res);
+       if (IS_ERR(al_pcie->controller_base)) {
+               dev_err(dev, "couldn't remap controller base %pR\n",
+                       controller_res);
+               return PTR_ERR(al_pcie->controller_base);
+       }
+
+       dev_dbg(dev, "From DT: dbi_base: %pR, controller_base: %pR\n",
+               dbi_res, controller_res);
+
+       platform_set_drvdata(pdev, al_pcie);
+
+       return al_add_pcie_port(&pci->pp, pdev);
+}
+
+static const struct of_device_id al_pcie_of_match[] = {
+       { .compatible = "amazon,al-alpine-v2-pcie",
+       },
+       { .compatible = "amazon,al-alpine-v3-pcie",
+       },
+       {},
+};
+
+static struct platform_driver al_pcie_driver = {
+       .driver = {
+               .name   = "al-pcie",
+               .of_match_table = al_pcie_of_match,
+               .suppress_bind_attrs = true,
+       },
+       .probe = al_pcie_probe,
+};
+builtin_platform_driver(al_pcie_driver);
+
+#endif /* CONFIG_PCIE_AL*/
index 3d55dc7..4959654 100644 (file)
@@ -118,11 +118,10 @@ static int armada8k_pcie_setup_phys(struct armada8k_pcie *pcie)
 
        for (i = 0; i < ARMADA8K_PCIE_MAX_LANES; i++) {
                pcie->phy[i] = devm_of_phy_get_by_index(dev, node, i);
-               if (IS_ERR(pcie->phy[i]) &&
-                   (PTR_ERR(pcie->phy[i]) == -EPROBE_DEFER))
-                       return PTR_ERR(pcie->phy[i]);
-
                if (IS_ERR(pcie->phy[i])) {
+                       if (PTR_ERR(pcie->phy[i]) != -ENODEV)
+                               return PTR_ERR(pcie->phy[i]);
+
                        pcie->phy[i] = NULL;
                        continue;
                }
index 2bf5a35..3dd2e26 100644 (file)
@@ -40,39 +40,6 @@ void dw_pcie_ep_reset_bar(struct dw_pcie *pci, enum pci_barno bar)
        __dw_pcie_ep_reset_bar(pci, bar, 0);
 }
 
-static u8 __dw_pcie_ep_find_next_cap(struct dw_pcie *pci, u8 cap_ptr,
-                             u8 cap)
-{
-       u8 cap_id, next_cap_ptr;
-       u16 reg;
-
-       if (!cap_ptr)
-               return 0;
-
-       reg = dw_pcie_readw_dbi(pci, cap_ptr);
-       cap_id = (reg & 0x00ff);
-
-       if (cap_id > PCI_CAP_ID_MAX)
-               return 0;
-
-       if (cap_id == cap)
-               return cap_ptr;
-
-       next_cap_ptr = (reg & 0xff00) >> 8;
-       return __dw_pcie_ep_find_next_cap(pci, next_cap_ptr, cap);
-}
-
-static u8 dw_pcie_ep_find_capability(struct dw_pcie *pci, u8 cap)
-{
-       u8 next_cap_ptr;
-       u16 reg;
-
-       reg = dw_pcie_readw_dbi(pci, PCI_CAPABILITY_LIST);
-       next_cap_ptr = (reg & 0x00ff);
-
-       return __dw_pcie_ep_find_next_cap(pci, next_cap_ptr, cap);
-}
-
 static int dw_pcie_ep_write_header(struct pci_epc *epc, u8 func_no,
                                   struct pci_epf_header *hdr)
 {
@@ -531,6 +498,7 @@ int dw_pcie_ep_init(struct dw_pcie_ep *ep)
        int ret;
        u32 reg;
        void *addr;
+       u8 hdr_type;
        unsigned int nbars;
        unsigned int offset;
        struct pci_epc *epc;
@@ -595,6 +563,13 @@ int dw_pcie_ep_init(struct dw_pcie_ep *ep)
        if (ep->ops->ep_init)
                ep->ops->ep_init(ep);
 
+       hdr_type = dw_pcie_readb_dbi(pci, PCI_HEADER_TYPE);
+       if (hdr_type != PCI_HEADER_TYPE_NORMAL) {
+               dev_err(pci->dev, "PCIe controller is not set to EP mode (hdr_type:0x%x)!\n",
+                       hdr_type);
+               return -EIO;
+       }
+
        ret = of_property_read_u8(np, "max-functions", &epc->max_functions);
        if (ret < 0)
                epc->max_functions = 1;
@@ -612,9 +587,9 @@ int dw_pcie_ep_init(struct dw_pcie_ep *ep)
                dev_err(dev, "Failed to reserve memory for MSI/MSI-X\n");
                return -ENOMEM;
        }
-       ep->msi_cap = dw_pcie_ep_find_capability(pci, PCI_CAP_ID_MSI);
+       ep->msi_cap = dw_pcie_find_capability(pci, PCI_CAP_ID_MSI);
 
-       ep->msix_cap = dw_pcie_ep_find_capability(pci, PCI_CAP_ID_MSIX);
+       ep->msix_cap = dw_pcie_find_capability(pci, PCI_CAP_ID_MSIX);
 
        offset = dw_pcie_ep_find_ext_capability(pci, PCI_EXT_CAP_ID_REBAR);
        if (offset) {
index f93252d..0f36a92 100644 (file)
@@ -323,6 +323,7 @@ int dw_pcie_host_init(struct pcie_port *pp)
        struct pci_bus *child;
        struct pci_host_bridge *bridge;
        struct resource *cfg_res;
+       u32 hdr_type;
        int ret;
 
        raw_spin_lock_init(&pci->pp.lock);
@@ -464,6 +465,21 @@ int dw_pcie_host_init(struct pcie_port *pp)
                        goto err_free_msi;
        }
 
+       ret = dw_pcie_rd_own_conf(pp, PCI_HEADER_TYPE, 1, &hdr_type);
+       if (ret != PCIBIOS_SUCCESSFUL) {
+               dev_err(pci->dev, "Failed reading PCI_HEADER_TYPE cfg space reg (ret: 0x%x)\n",
+                       ret);
+               ret = pcibios_err_to_errno(ret);
+               goto err_free_msi;
+       }
+       if (hdr_type != PCI_HEADER_TYPE_BRIDGE) {
+               dev_err(pci->dev,
+                       "PCIe controller is not set to bridge type (hdr_type: 0x%x)!\n",
+                       hdr_type);
+               ret = -EIO;
+               goto err_free_msi;
+       }
+
        pp->root_bus_nr = pp->busn->start;
 
        bridge->dev.parent = dev;
@@ -628,6 +644,12 @@ void dw_pcie_setup_rc(struct pcie_port *pp)
        u32 val, ctrl, num_ctrls;
        struct dw_pcie *pci = to_dw_pcie_from_pp(pp);
 
+       /*
+        * Enable DBI read-only registers for writing/updating configuration.
+        * Write permission gets disabled towards the end of this function.
+        */
+       dw_pcie_dbi_ro_wr_en(pci);
+
        dw_pcie_setup(pci);
 
        if (!pp->ops->msi_host_init) {
@@ -650,12 +672,10 @@ void dw_pcie_setup_rc(struct pcie_port *pp)
        dw_pcie_writel_dbi(pci, PCI_BASE_ADDRESS_1, 0x00000000);
 
        /* Setup interrupt pins */
-       dw_pcie_dbi_ro_wr_en(pci);
        val = dw_pcie_readl_dbi(pci, PCI_INTERRUPT_LINE);
        val &= 0xffff00ff;
        val |= 0x00000100;
        dw_pcie_writel_dbi(pci, PCI_INTERRUPT_LINE, val);
-       dw_pcie_dbi_ro_wr_dis(pci);
 
        /* Setup bus numbers */
        val = dw_pcie_readl_dbi(pci, PCI_PRIMARY_BUS);
@@ -687,15 +707,13 @@ void dw_pcie_setup_rc(struct pcie_port *pp)
 
        dw_pcie_wr_own_conf(pp, PCI_BASE_ADDRESS_0, 4, 0);
 
-       /* Enable write permission for the DBI read-only register */
-       dw_pcie_dbi_ro_wr_en(pci);
        /* Program correct class for RC */
        dw_pcie_wr_own_conf(pp, PCI_CLASS_DEVICE, 2, PCI_CLASS_BRIDGE_PCI);
-       /* Better disable write permission right after the update */
-       dw_pcie_dbi_ro_wr_dis(pci);
 
        dw_pcie_rd_own_conf(pp, PCIE_LINK_WIDTH_SPEED_CONTROL, 4, &val);
        val |= PORT_LOGIC_SPEED_CHANGE;
        dw_pcie_wr_own_conf(pp, PCIE_LINK_WIDTH_SPEED_CONTROL, 4, val);
+
+       dw_pcie_dbi_ro_wr_dis(pci);
 }
 EXPORT_SYMBOL_GPL(dw_pcie_setup_rc);
index 7d25102..820488d 100644 (file)
 
 #include "pcie-designware.h"
 
+/*
+ * These interfaces resemble the pci_find_*capability() interfaces, but these
+ * are for configuring host controllers, which are bridges *to* PCI devices but
+ * are not PCI devices themselves.
+ */
+static u8 __dw_pcie_find_next_cap(struct dw_pcie *pci, u8 cap_ptr,
+                                 u8 cap)
+{
+       u8 cap_id, next_cap_ptr;
+       u16 reg;
+
+       if (!cap_ptr)
+               return 0;
+
+       reg = dw_pcie_readw_dbi(pci, cap_ptr);
+       cap_id = (reg & 0x00ff);
+
+       if (cap_id > PCI_CAP_ID_MAX)
+               return 0;
+
+       if (cap_id == cap)
+               return cap_ptr;
+
+       next_cap_ptr = (reg & 0xff00) >> 8;
+       return __dw_pcie_find_next_cap(pci, next_cap_ptr, cap);
+}
+
+u8 dw_pcie_find_capability(struct dw_pcie *pci, u8 cap)
+{
+       u8 next_cap_ptr;
+       u16 reg;
+
+       reg = dw_pcie_readw_dbi(pci, PCI_CAPABILITY_LIST);
+       next_cap_ptr = (reg & 0x00ff);
+
+       return __dw_pcie_find_next_cap(pci, next_cap_ptr, cap);
+}
+EXPORT_SYMBOL_GPL(dw_pcie_find_capability);
+
+static u16 dw_pcie_find_next_ext_capability(struct dw_pcie *pci, u16 start,
+                                           u8 cap)
+{
+       u32 header;
+       int ttl;
+       int pos = PCI_CFG_SPACE_SIZE;
+
+       /* minimum 8 bytes per capability */
+       ttl = (PCI_CFG_SPACE_EXP_SIZE - PCI_CFG_SPACE_SIZE) / 8;
+
+       if (start)
+               pos = start;
+
+       header = dw_pcie_readl_dbi(pci, pos);
+       /*
+        * If we have no capabilities, this is indicated by cap ID,
+        * cap version and next pointer all being 0.
+        */
+       if (header == 0)
+               return 0;
+
+       while (ttl-- > 0) {
+               if (PCI_EXT_CAP_ID(header) == cap && pos != start)
+                       return pos;
+
+               pos = PCI_EXT_CAP_NEXT(header);
+               if (pos < PCI_CFG_SPACE_SIZE)
+                       break;
+
+               header = dw_pcie_readl_dbi(pci, pos);
+       }
+
+       return 0;
+}
+
+u16 dw_pcie_find_ext_capability(struct dw_pcie *pci, u8 cap)
+{
+       return dw_pcie_find_next_ext_capability(pci, 0, cap);
+}
+EXPORT_SYMBOL_GPL(dw_pcie_find_ext_capability);
+
 int dw_pcie_read(void __iomem *addr, int size, u32 *val)
 {
        if (!IS_ALIGNED((uintptr_t)addr, size)) {
@@ -376,10 +456,11 @@ int dw_pcie_wait_for_link(struct dw_pcie *pci)
                usleep_range(LINK_WAIT_USLEEP_MIN, LINK_WAIT_USLEEP_MAX);
        }
 
-       dev_err(pci->dev, "Phy link never came up\n");
+       dev_info(pci->dev, "Phy link never came up\n");
 
        return -ETIMEDOUT;
 }
+EXPORT_SYMBOL_GPL(dw_pcie_wait_for_link);
 
 int dw_pcie_link_up(struct dw_pcie *pci)
 {
@@ -423,8 +504,10 @@ void dw_pcie_setup(struct dw_pcie *pci)
 
 
        ret = of_property_read_u32(np, "num-lanes", &lanes);
-       if (ret)
-               lanes = 0;
+       if (ret) {
+               dev_dbg(pci->dev, "property num-lanes isn't found\n");
+               return;
+       }
 
        /* Set the number of lanes */
        val = dw_pcie_readl_dbi(pci, PCIE_PORT_LINK_CONTROL);
@@ -466,4 +549,11 @@ void dw_pcie_setup(struct dw_pcie *pci)
                break;
        }
        dw_pcie_writel_dbi(pci, PCIE_LINK_WIDTH_SPEED_CONTROL, val);
+
+       if (of_property_read_bool(np, "snps,enable-cdm-check")) {
+               val = dw_pcie_readl_dbi(pci, PCIE_PL_CHK_REG_CONTROL_STATUS);
+               val |= PCIE_PL_CHK_REG_CHK_REG_CONTINUOUS |
+                      PCIE_PL_CHK_REG_CHK_REG_START;
+               dw_pcie_writel_dbi(pci, PCIE_PL_CHK_REG_CONTROL_STATUS, val);
+       }
 }
index ffed084..5a18e94 100644 (file)
 #define PCIE_MISC_CONTROL_1_OFF                0x8BC
 #define PCIE_DBI_RO_WR_EN              BIT(0)
 
+#define PCIE_PL_CHK_REG_CONTROL_STATUS                 0xB20
+#define PCIE_PL_CHK_REG_CHK_REG_START                  BIT(0)
+#define PCIE_PL_CHK_REG_CHK_REG_CONTINUOUS             BIT(1)
+#define PCIE_PL_CHK_REG_CHK_REG_COMPARISON_ERROR       BIT(16)
+#define PCIE_PL_CHK_REG_CHK_REG_LOGIC_ERROR            BIT(17)
+#define PCIE_PL_CHK_REG_CHK_REG_COMPLETE               BIT(18)
+
+#define PCIE_PL_CHK_REG_ERR_ADDR                       0xB28
+
 /*
  * iATU Unroll-specific register definitions
  * From 4.80 core version the address translation will be made by unroll
@@ -251,6 +260,9 @@ struct dw_pcie {
 #define to_dw_pcie_from_ep(endpoint)   \
                container_of((endpoint), struct dw_pcie, ep)
 
+u8 dw_pcie_find_capability(struct dw_pcie *pci, u8 cap);
+u16 dw_pcie_find_ext_capability(struct dw_pcie *pci, u8 cap);
+
 int dw_pcie_read(void __iomem *addr, int size, u32 *val);
 int dw_pcie_write(void __iomem *addr, int size, u32 val);
 
index 954bc2b..811b5c6 100644 (file)
@@ -340,8 +340,8 @@ static int histb_pcie_probe(struct platform_device *pdev)
 
        hipcie->vpcie = devm_regulator_get_optional(dev, "vpcie");
        if (IS_ERR(hipcie->vpcie)) {
-               if (PTR_ERR(hipcie->vpcie) == -EPROBE_DEFER)
-                       return -EPROBE_DEFER;
+               if (PTR_ERR(hipcie->vpcie) != -ENODEV)
+                       return PTR_ERR(hipcie->vpcie);
                hipcie->vpcie = NULL;
        }
 
index 8df1914..c19617a 100644 (file)
@@ -436,7 +436,7 @@ static int kirin_pcie_host_init(struct pcie_port *pp)
        return 0;
 }
 
-static struct dw_pcie_ops kirin_dw_pcie_ops = {
+static const struct dw_pcie_ops kirin_dw_pcie_ops = {
        .read_dbi = kirin_pcie_read_dbi,
        .write_dbi = kirin_pcie_write_dbi,
        .link_up = kirin_pcie_link_up,
diff --git a/drivers/pci/controller/dwc/pcie-tegra194.c b/drivers/pci/controller/dwc/pcie-tegra194.c
new file mode 100644 (file)
index 0000000..f89f5ac
--- /dev/null
@@ -0,0 +1,1732 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * PCIe host controller driver for Tegra194 SoC
+ *
+ * Copyright (C) 2019 NVIDIA Corporation.
+ *
+ * Author: Vidya Sagar <vidyas@nvidia.com>
+ */
+
+#include <linux/clk.h>
+#include <linux/debugfs.h>
+#include <linux/delay.h>
+#include <linux/gpio.h>
+#include <linux/interrupt.h>
+#include <linux/iopoll.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
+#include <linux/of_gpio.h>
+#include <linux/of_irq.h>
+#include <linux/of_pci.h>
+#include <linux/pci.h>
+#include <linux/phy/phy.h>
+#include <linux/pinctrl/consumer.h>
+#include <linux/platform_device.h>
+#include <linux/pm_runtime.h>
+#include <linux/random.h>
+#include <linux/reset.h>
+#include <linux/resource.h>
+#include <linux/types.h>
+#include "pcie-designware.h"
+#include <soc/tegra/bpmp.h>
+#include <soc/tegra/bpmp-abi.h>
+#include "../../pci.h"
+
+#define APPL_PINMUX                            0x0
+#define APPL_PINMUX_PEX_RST                    BIT(0)
+#define APPL_PINMUX_CLKREQ_OVERRIDE_EN         BIT(2)
+#define APPL_PINMUX_CLKREQ_OVERRIDE            BIT(3)
+#define APPL_PINMUX_CLK_OUTPUT_IN_OVERRIDE_EN  BIT(4)
+#define APPL_PINMUX_CLK_OUTPUT_IN_OVERRIDE     BIT(5)
+#define APPL_PINMUX_CLKREQ_OUT_OVRD_EN         BIT(9)
+#define APPL_PINMUX_CLKREQ_OUT_OVRD            BIT(10)
+
+#define APPL_CTRL                              0x4
+#define APPL_CTRL_SYS_PRE_DET_STATE            BIT(6)
+#define APPL_CTRL_LTSSM_EN                     BIT(7)
+#define APPL_CTRL_HW_HOT_RST_EN                        BIT(20)
+#define APPL_CTRL_HW_HOT_RST_MODE_MASK         GENMASK(1, 0)
+#define APPL_CTRL_HW_HOT_RST_MODE_SHIFT                22
+#define APPL_CTRL_HW_HOT_RST_MODE_IMDT_RST     0x1
+
+#define APPL_INTR_EN_L0_0                      0x8
+#define APPL_INTR_EN_L0_0_LINK_STATE_INT_EN    BIT(0)
+#define APPL_INTR_EN_L0_0_MSI_RCV_INT_EN       BIT(4)
+#define APPL_INTR_EN_L0_0_INT_INT_EN           BIT(8)
+#define APPL_INTR_EN_L0_0_CDM_REG_CHK_INT_EN   BIT(19)
+#define APPL_INTR_EN_L0_0_SYS_INTR_EN          BIT(30)
+#define APPL_INTR_EN_L0_0_SYS_MSI_INTR_EN      BIT(31)
+
+#define APPL_INTR_STATUS_L0                    0xC
+#define APPL_INTR_STATUS_L0_LINK_STATE_INT     BIT(0)
+#define APPL_INTR_STATUS_L0_INT_INT            BIT(8)
+#define APPL_INTR_STATUS_L0_CDM_REG_CHK_INT    BIT(18)
+
+#define APPL_INTR_EN_L1_0_0                            0x1C
+#define APPL_INTR_EN_L1_0_0_LINK_REQ_RST_NOT_INT_EN    BIT(1)
+
+#define APPL_INTR_STATUS_L1_0_0                                0x20
+#define APPL_INTR_STATUS_L1_0_0_LINK_REQ_RST_NOT_CHGED BIT(1)
+
+#define APPL_INTR_STATUS_L1_1                  0x2C
+#define APPL_INTR_STATUS_L1_2                  0x30
+#define APPL_INTR_STATUS_L1_3                  0x34
+#define APPL_INTR_STATUS_L1_6                  0x3C
+#define APPL_INTR_STATUS_L1_7                  0x40
+
+#define APPL_INTR_EN_L1_8_0                    0x44
+#define APPL_INTR_EN_L1_8_BW_MGT_INT_EN                BIT(2)
+#define APPL_INTR_EN_L1_8_AUTO_BW_INT_EN       BIT(3)
+#define APPL_INTR_EN_L1_8_INTX_EN              BIT(11)
+#define APPL_INTR_EN_L1_8_AER_INT_EN           BIT(15)
+
+#define APPL_INTR_STATUS_L1_8_0                        0x4C
+#define APPL_INTR_STATUS_L1_8_0_EDMA_INT_MASK  GENMASK(11, 6)
+#define APPL_INTR_STATUS_L1_8_0_BW_MGT_INT_STS BIT(2)
+#define APPL_INTR_STATUS_L1_8_0_AUTO_BW_INT_STS        BIT(3)
+
+#define APPL_INTR_STATUS_L1_9                  0x54
+#define APPL_INTR_STATUS_L1_10                 0x58
+#define APPL_INTR_STATUS_L1_11                 0x64
+#define APPL_INTR_STATUS_L1_13                 0x74
+#define APPL_INTR_STATUS_L1_14                 0x78
+#define APPL_INTR_STATUS_L1_15                 0x7C
+#define APPL_INTR_STATUS_L1_17                 0x88
+
+#define APPL_INTR_EN_L1_18                             0x90
+#define APPL_INTR_EN_L1_18_CDM_REG_CHK_CMPLT           BIT(2)
+#define APPL_INTR_EN_L1_18_CDM_REG_CHK_CMP_ERR         BIT(1)
+#define APPL_INTR_EN_L1_18_CDM_REG_CHK_LOGIC_ERR       BIT(0)
+
+#define APPL_INTR_STATUS_L1_18                         0x94
+#define APPL_INTR_STATUS_L1_18_CDM_REG_CHK_CMPLT       BIT(2)
+#define APPL_INTR_STATUS_L1_18_CDM_REG_CHK_CMP_ERR     BIT(1)
+#define APPL_INTR_STATUS_L1_18_CDM_REG_CHK_LOGIC_ERR   BIT(0)
+
+#define APPL_MSI_CTRL_2                                0xB0
+
+#define APPL_LTR_MSG_1                         0xC4
+#define LTR_MSG_REQ                            BIT(15)
+#define LTR_MST_NO_SNOOP_SHIFT                 16
+
+#define APPL_LTR_MSG_2                         0xC8
+#define APPL_LTR_MSG_2_LTR_MSG_REQ_STATE       BIT(3)
+
+#define APPL_LINK_STATUS                       0xCC
+#define APPL_LINK_STATUS_RDLH_LINK_UP          BIT(0)
+
+#define APPL_DEBUG                             0xD0
+#define APPL_DEBUG_PM_LINKST_IN_L2_LAT         BIT(21)
+#define APPL_DEBUG_PM_LINKST_IN_L0             0x11
+#define APPL_DEBUG_LTSSM_STATE_MASK            GENMASK(8, 3)
+#define APPL_DEBUG_LTSSM_STATE_SHIFT           3
+#define LTSSM_STATE_PRE_DETECT                 5
+
+#define APPL_RADM_STATUS                       0xE4
+#define APPL_PM_XMT_TURNOFF_STATE              BIT(0)
+
+#define APPL_DM_TYPE                           0x100
+#define APPL_DM_TYPE_MASK                      GENMASK(3, 0)
+#define APPL_DM_TYPE_RP                                0x4
+#define APPL_DM_TYPE_EP                                0x0
+
+#define APPL_CFG_BASE_ADDR                     0x104
+#define APPL_CFG_BASE_ADDR_MASK                        GENMASK(31, 12)
+
+#define APPL_CFG_IATU_DMA_BASE_ADDR            0x108
+#define APPL_CFG_IATU_DMA_BASE_ADDR_MASK       GENMASK(31, 18)
+
+#define APPL_CFG_MISC                          0x110
+#define APPL_CFG_MISC_SLV_EP_MODE              BIT(14)
+#define APPL_CFG_MISC_ARCACHE_MASK             GENMASK(13, 10)
+#define APPL_CFG_MISC_ARCACHE_SHIFT            10
+#define APPL_CFG_MISC_ARCACHE_VAL              3
+
+#define APPL_CFG_SLCG_OVERRIDE                 0x114
+#define APPL_CFG_SLCG_OVERRIDE_SLCG_EN_MASTER  BIT(0)
+
+#define APPL_CAR_RESET_OVRD                            0x12C
+#define APPL_CAR_RESET_OVRD_CYA_OVERRIDE_CORE_RST_N    BIT(0)
+
+#define IO_BASE_IO_DECODE                              BIT(0)
+#define IO_BASE_IO_DECODE_BIT8                         BIT(8)
+
+#define CFG_PREF_MEM_LIMIT_BASE_MEM_DECODE             BIT(0)
+#define CFG_PREF_MEM_LIMIT_BASE_MEM_LIMIT_DECODE       BIT(16)
+
+#define CFG_TIMER_CTRL_MAX_FUNC_NUM_OFF        0x718
+#define CFG_TIMER_CTRL_ACK_NAK_SHIFT   (19)
+
+#define EVENT_COUNTER_ALL_CLEAR                0x3
+#define EVENT_COUNTER_ENABLE_ALL       0x7
+#define EVENT_COUNTER_ENABLE_SHIFT     2
+#define EVENT_COUNTER_EVENT_SEL_MASK   GENMASK(7, 0)
+#define EVENT_COUNTER_EVENT_SEL_SHIFT  16
+#define EVENT_COUNTER_EVENT_Tx_L0S     0x2
+#define EVENT_COUNTER_EVENT_Rx_L0S     0x3
+#define EVENT_COUNTER_EVENT_L1         0x5
+#define EVENT_COUNTER_EVENT_L1_1       0x7
+#define EVENT_COUNTER_EVENT_L1_2       0x8
+#define EVENT_COUNTER_GROUP_SEL_SHIFT  24
+#define EVENT_COUNTER_GROUP_5          0x5
+
+#define PORT_LOGIC_ACK_F_ASPM_CTRL                     0x70C
+#define ENTER_ASPM                                     BIT(30)
+#define L0S_ENTRANCE_LAT_SHIFT                         24
+#define L0S_ENTRANCE_LAT_MASK                          GENMASK(26, 24)
+#define L1_ENTRANCE_LAT_SHIFT                          27
+#define L1_ENTRANCE_LAT_MASK                           GENMASK(29, 27)
+#define N_FTS_SHIFT                                    8
+#define N_FTS_MASK                                     GENMASK(7, 0)
+#define N_FTS_VAL                                      52
+
+#define PORT_LOGIC_GEN2_CTRL                           0x80C
+#define PORT_LOGIC_GEN2_CTRL_DIRECT_SPEED_CHANGE       BIT(17)
+#define FTS_MASK                                       GENMASK(7, 0)
+#define FTS_VAL                                                52
+
+#define PORT_LOGIC_MSI_CTRL_INT_0_EN           0x828
+
+#define GEN3_EQ_CONTROL_OFF                    0x8a8
+#define GEN3_EQ_CONTROL_OFF_PSET_REQ_VEC_SHIFT 8
+#define GEN3_EQ_CONTROL_OFF_PSET_REQ_VEC_MASK  GENMASK(23, 8)
+#define GEN3_EQ_CONTROL_OFF_FB_MODE_MASK       GENMASK(3, 0)
+
+#define GEN3_RELATED_OFF                       0x890
+#define GEN3_RELATED_OFF_GEN3_ZRXDC_NONCOMPL   BIT(0)
+#define GEN3_RELATED_OFF_GEN3_EQ_DISABLE       BIT(16)
+#define GEN3_RELATED_OFF_RATE_SHADOW_SEL_SHIFT 24
+#define GEN3_RELATED_OFF_RATE_SHADOW_SEL_MASK  GENMASK(25, 24)
+
+#define PORT_LOGIC_AMBA_ERROR_RESPONSE_DEFAULT 0x8D0
+#define AMBA_ERROR_RESPONSE_CRS_SHIFT          3
+#define AMBA_ERROR_RESPONSE_CRS_MASK           GENMASK(1, 0)
+#define AMBA_ERROR_RESPONSE_CRS_OKAY           0
+#define AMBA_ERROR_RESPONSE_CRS_OKAY_FFFFFFFF  1
+#define AMBA_ERROR_RESPONSE_CRS_OKAY_FFFF0001  2
+
+#define PORT_LOGIC_MSIX_DOORBELL                       0x948
+
+#define CAP_SPCIE_CAP_OFF                      0x154
+#define CAP_SPCIE_CAP_OFF_DSP_TX_PRESET0_MASK  GENMASK(3, 0)
+#define CAP_SPCIE_CAP_OFF_USP_TX_PRESET0_MASK  GENMASK(11, 8)
+#define CAP_SPCIE_CAP_OFF_USP_TX_PRESET0_SHIFT 8
+
+#define PME_ACK_TIMEOUT 10000
+
+#define LTSSM_TIMEOUT 50000    /* 50ms */
+
+#define GEN3_GEN4_EQ_PRESET_INIT       5
+
+#define GEN1_CORE_CLK_FREQ     62500000
+#define GEN2_CORE_CLK_FREQ     125000000
+#define GEN3_CORE_CLK_FREQ     250000000
+#define GEN4_CORE_CLK_FREQ     500000000
+
+static const unsigned int pcie_gen_freq[] = {
+       GEN1_CORE_CLK_FREQ,
+       GEN2_CORE_CLK_FREQ,
+       GEN3_CORE_CLK_FREQ,
+       GEN4_CORE_CLK_FREQ
+};
+
+static const u32 event_cntr_ctrl_offset[] = {
+       0x1d8,
+       0x1a8,
+       0x1a8,
+       0x1a8,
+       0x1c4,
+       0x1d8
+};
+
+static const u32 event_cntr_data_offset[] = {
+       0x1dc,
+       0x1ac,
+       0x1ac,
+       0x1ac,
+       0x1c8,
+       0x1dc
+};
+
+struct tegra_pcie_dw {
+       struct device *dev;
+       struct resource *appl_res;
+       struct resource *dbi_res;
+       struct resource *atu_dma_res;
+       void __iomem *appl_base;
+       struct clk *core_clk;
+       struct reset_control *core_apb_rst;
+       struct reset_control *core_rst;
+       struct dw_pcie pci;
+       struct tegra_bpmp *bpmp;
+
+       bool supports_clkreq;
+       bool enable_cdm_check;
+       bool link_state;
+       bool update_fc_fixup;
+       u8 init_link_width;
+       u32 msi_ctrl_int;
+       u32 num_lanes;
+       u32 max_speed;
+       u32 cid;
+       u32 cfg_link_cap_l1sub;
+       u32 pcie_cap_base;
+       u32 aspm_cmrt;
+       u32 aspm_pwr_on_t;
+       u32 aspm_l0s_enter_lat;
+
+       struct regulator *pex_ctl_supply;
+       struct regulator *slot_ctl_3v3;
+       struct regulator *slot_ctl_12v;
+
+       unsigned int phy_count;
+       struct phy **phys;
+
+       struct dentry *debugfs;
+};
+
+static inline struct tegra_pcie_dw *to_tegra_pcie(struct dw_pcie *pci)
+{
+       return container_of(pci, struct tegra_pcie_dw, pci);
+}
+
+static inline void appl_writel(struct tegra_pcie_dw *pcie, const u32 value,
+                              const u32 reg)
+{
+       writel_relaxed(value, pcie->appl_base + reg);
+}
+
+static inline u32 appl_readl(struct tegra_pcie_dw *pcie, const u32 reg)
+{
+       return readl_relaxed(pcie->appl_base + reg);
+}
+
+struct tegra_pcie_soc {
+       enum dw_pcie_device_mode mode;
+};
+
+static void apply_bad_link_workaround(struct pcie_port *pp)
+{
+       struct dw_pcie *pci = to_dw_pcie_from_pp(pp);
+       struct tegra_pcie_dw *pcie = to_tegra_pcie(pci);
+       u32 current_link_width;
+       u16 val;
+
+       /*
+        * NOTE:- Since this scenario is uncommon and link as such is not
+        * stable anyway, not waiting to confirm if link is really
+        * transitioning to Gen-2 speed
+        */
+       val = dw_pcie_readw_dbi(pci, pcie->pcie_cap_base + PCI_EXP_LNKSTA);
+       if (val & PCI_EXP_LNKSTA_LBMS) {
+               current_link_width = (val & PCI_EXP_LNKSTA_NLW) >>
+                                    PCI_EXP_LNKSTA_NLW_SHIFT;
+               if (pcie->init_link_width > current_link_width) {
+                       dev_warn(pci->dev, "PCIe link is bad, width reduced\n");
+                       val = dw_pcie_readw_dbi(pci, pcie->pcie_cap_base +
+                                               PCI_EXP_LNKCTL2);
+                       val &= ~PCI_EXP_LNKCTL2_TLS;
+                       val |= PCI_EXP_LNKCTL2_TLS_2_5GT;
+                       dw_pcie_writew_dbi(pci, pcie->pcie_cap_base +
+                                          PCI_EXP_LNKCTL2, val);
+
+                       val = dw_pcie_readw_dbi(pci, pcie->pcie_cap_base +
+                                               PCI_EXP_LNKCTL);
+                       val |= PCI_EXP_LNKCTL_RL;
+                       dw_pcie_writew_dbi(pci, pcie->pcie_cap_base +
+                                          PCI_EXP_LNKCTL, val);
+               }
+       }
+}
+
+static irqreturn_t tegra_pcie_rp_irq_handler(struct tegra_pcie_dw *pcie)
+{
+       struct dw_pcie *pci = &pcie->pci;
+       struct pcie_port *pp = &pci->pp;
+       u32 val, tmp;
+       u16 val_w;
+
+       val = appl_readl(pcie, APPL_INTR_STATUS_L0);
+       if (val & APPL_INTR_STATUS_L0_LINK_STATE_INT) {
+               val = appl_readl(pcie, APPL_INTR_STATUS_L1_0_0);
+               if (val & APPL_INTR_STATUS_L1_0_0_LINK_REQ_RST_NOT_CHGED) {
+                       appl_writel(pcie, val, APPL_INTR_STATUS_L1_0_0);
+
+                       /* SBR & Surprise Link Down WAR */
+                       val = appl_readl(pcie, APPL_CAR_RESET_OVRD);
+                       val &= ~APPL_CAR_RESET_OVRD_CYA_OVERRIDE_CORE_RST_N;
+                       appl_writel(pcie, val, APPL_CAR_RESET_OVRD);
+                       udelay(1);
+                       val = appl_readl(pcie, APPL_CAR_RESET_OVRD);
+                       val |= APPL_CAR_RESET_OVRD_CYA_OVERRIDE_CORE_RST_N;
+                       appl_writel(pcie, val, APPL_CAR_RESET_OVRD);
+
+                       val = dw_pcie_readl_dbi(pci, PORT_LOGIC_GEN2_CTRL);
+                       val |= PORT_LOGIC_GEN2_CTRL_DIRECT_SPEED_CHANGE;
+                       dw_pcie_writel_dbi(pci, PORT_LOGIC_GEN2_CTRL, val);
+               }
+       }
+
+       if (val & APPL_INTR_STATUS_L0_INT_INT) {
+               val = appl_readl(pcie, APPL_INTR_STATUS_L1_8_0);
+               if (val & APPL_INTR_STATUS_L1_8_0_AUTO_BW_INT_STS) {
+                       appl_writel(pcie,
+                                   APPL_INTR_STATUS_L1_8_0_AUTO_BW_INT_STS,
+                                   APPL_INTR_STATUS_L1_8_0);
+                       apply_bad_link_workaround(pp);
+               }
+               if (val & APPL_INTR_STATUS_L1_8_0_BW_MGT_INT_STS) {
+                       appl_writel(pcie,
+                                   APPL_INTR_STATUS_L1_8_0_BW_MGT_INT_STS,
+                                   APPL_INTR_STATUS_L1_8_0);
+
+                       val_w = dw_pcie_readw_dbi(pci, pcie->pcie_cap_base +
+                                                 PCI_EXP_LNKSTA);
+                       dev_dbg(pci->dev, "Link Speed : Gen-%u\n", val_w &
+                               PCI_EXP_LNKSTA_CLS);
+               }
+       }
+
+       val = appl_readl(pcie, APPL_INTR_STATUS_L0);
+       if (val & APPL_INTR_STATUS_L0_CDM_REG_CHK_INT) {
+               val = appl_readl(pcie, APPL_INTR_STATUS_L1_18);
+               tmp = dw_pcie_readl_dbi(pci, PCIE_PL_CHK_REG_CONTROL_STATUS);
+               if (val & APPL_INTR_STATUS_L1_18_CDM_REG_CHK_CMPLT) {
+                       dev_info(pci->dev, "CDM check complete\n");
+                       tmp |= PCIE_PL_CHK_REG_CHK_REG_COMPLETE;
+               }
+               if (val & APPL_INTR_STATUS_L1_18_CDM_REG_CHK_CMP_ERR) {
+                       dev_err(pci->dev, "CDM comparison mismatch\n");
+                       tmp |= PCIE_PL_CHK_REG_CHK_REG_COMPARISON_ERROR;
+               }
+               if (val & APPL_INTR_STATUS_L1_18_CDM_REG_CHK_LOGIC_ERR) {
+                       dev_err(pci->dev, "CDM Logic error\n");
+                       tmp |= PCIE_PL_CHK_REG_CHK_REG_LOGIC_ERROR;
+               }
+               dw_pcie_writel_dbi(pci, PCIE_PL_CHK_REG_CONTROL_STATUS, tmp);
+               tmp = dw_pcie_readl_dbi(pci, PCIE_PL_CHK_REG_ERR_ADDR);
+               dev_err(pci->dev, "CDM Error Address Offset = 0x%08X\n", tmp);
+       }
+
+       return IRQ_HANDLED;
+}
+
+static irqreturn_t tegra_pcie_irq_handler(int irq, void *arg)
+{
+       struct tegra_pcie_dw *pcie = arg;
+
+       return tegra_pcie_rp_irq_handler(pcie);
+}
+
+static int tegra_pcie_dw_rd_own_conf(struct pcie_port *pp, int where, int size,
+                                    u32 *val)
+{
+       struct dw_pcie *pci = to_dw_pcie_from_pp(pp);
+
+       /*
+        * This is an endpoint mode specific register happen to appear even
+        * when controller is operating in root port mode and system hangs
+        * when it is accessed with link being in ASPM-L1 state.
+        * So skip accessing it altogether
+        */
+       if (where == PORT_LOGIC_MSIX_DOORBELL) {
+               *val = 0x00000000;
+               return PCIBIOS_SUCCESSFUL;
+       }
+
+       return dw_pcie_read(pci->dbi_base + where, size, val);
+}
+
+static int tegra_pcie_dw_wr_own_conf(struct pcie_port *pp, int where, int size,
+                                    u32 val)
+{
+       struct dw_pcie *pci = to_dw_pcie_from_pp(pp);
+
+       /*
+        * This is an endpoint mode specific register happen to appear even
+        * when controller is operating in root port mode and system hangs
+        * when it is accessed with link being in ASPM-L1 state.
+        * So skip accessing it altogether
+        */
+       if (where == PORT_LOGIC_MSIX_DOORBELL)
+               return PCIBIOS_SUCCESSFUL;
+
+       return dw_pcie_write(pci->dbi_base + where, size, val);
+}
+
+#if defined(CONFIG_PCIEASPM)
+static void disable_aspm_l11(struct tegra_pcie_dw *pcie)
+{
+       u32 val;
+
+       val = dw_pcie_readl_dbi(&pcie->pci, pcie->cfg_link_cap_l1sub);
+       val &= ~PCI_L1SS_CAP_ASPM_L1_1;
+       dw_pcie_writel_dbi(&pcie->pci, pcie->cfg_link_cap_l1sub, val);
+}
+
+static void disable_aspm_l12(struct tegra_pcie_dw *pcie)
+{
+       u32 val;
+
+       val = dw_pcie_readl_dbi(&pcie->pci, pcie->cfg_link_cap_l1sub);
+       val &= ~PCI_L1SS_CAP_ASPM_L1_2;
+       dw_pcie_writel_dbi(&pcie->pci, pcie->cfg_link_cap_l1sub, val);
+}
+
+static inline u32 event_counter_prog(struct tegra_pcie_dw *pcie, u32 event)
+{
+       u32 val;
+
+       val = dw_pcie_readl_dbi(&pcie->pci, event_cntr_ctrl_offset[pcie->cid]);
+       val &= ~(EVENT_COUNTER_EVENT_SEL_MASK << EVENT_COUNTER_EVENT_SEL_SHIFT);
+       val |= EVENT_COUNTER_GROUP_5 << EVENT_COUNTER_GROUP_SEL_SHIFT;
+       val |= event << EVENT_COUNTER_EVENT_SEL_SHIFT;
+       val |= EVENT_COUNTER_ENABLE_ALL << EVENT_COUNTER_ENABLE_SHIFT;
+       dw_pcie_writel_dbi(&pcie->pci, event_cntr_ctrl_offset[pcie->cid], val);
+       val = dw_pcie_readl_dbi(&pcie->pci, event_cntr_data_offset[pcie->cid]);
+
+       return val;
+}
+
+static int aspm_state_cnt(struct seq_file *s, void *data)
+{
+       struct tegra_pcie_dw *pcie = (struct tegra_pcie_dw *)
+                                    dev_get_drvdata(s->private);
+       u32 val;
+
+       seq_printf(s, "Tx L0s entry count : %u\n",
+                  event_counter_prog(pcie, EVENT_COUNTER_EVENT_Tx_L0S));
+
+       seq_printf(s, "Rx L0s entry count : %u\n",
+                  event_counter_prog(pcie, EVENT_COUNTER_EVENT_Rx_L0S));
+
+       seq_printf(s, "Link L1 entry count : %u\n",
+                  event_counter_prog(pcie, EVENT_COUNTER_EVENT_L1));
+
+       seq_printf(s, "Link L1.1 entry count : %u\n",
+                  event_counter_prog(pcie, EVENT_COUNTER_EVENT_L1_1));
+
+       seq_printf(s, "Link L1.2 entry count : %u\n",
+                  event_counter_prog(pcie, EVENT_COUNTER_EVENT_L1_2));
+
+       /* Clear all counters */
+       dw_pcie_writel_dbi(&pcie->pci, event_cntr_ctrl_offset[pcie->cid],
+                          EVENT_COUNTER_ALL_CLEAR);
+
+       /* Re-enable counting */
+       val = EVENT_COUNTER_ENABLE_ALL << EVENT_COUNTER_ENABLE_SHIFT;
+       val |= EVENT_COUNTER_GROUP_5 << EVENT_COUNTER_GROUP_SEL_SHIFT;
+       dw_pcie_writel_dbi(&pcie->pci, event_cntr_ctrl_offset[pcie->cid], val);
+
+       return 0;
+}
+
+static void init_host_aspm(struct tegra_pcie_dw *pcie)
+{
+       struct dw_pcie *pci = &pcie->pci;
+       u32 val;
+
+       val = dw_pcie_find_ext_capability(pci, PCI_EXT_CAP_ID_L1SS);
+       pcie->cfg_link_cap_l1sub = val + PCI_L1SS_CAP;
+
+       /* Enable ASPM counters */
+       val = EVENT_COUNTER_ENABLE_ALL << EVENT_COUNTER_ENABLE_SHIFT;
+       val |= EVENT_COUNTER_GROUP_5 << EVENT_COUNTER_GROUP_SEL_SHIFT;
+       dw_pcie_writel_dbi(pci, event_cntr_ctrl_offset[pcie->cid], val);
+
+       /* Program T_cmrt and T_pwr_on values */
+       val = dw_pcie_readl_dbi(pci, pcie->cfg_link_cap_l1sub);
+       val &= ~(PCI_L1SS_CAP_CM_RESTORE_TIME | PCI_L1SS_CAP_P_PWR_ON_VALUE);
+       val |= (pcie->aspm_cmrt << 8);
+       val |= (pcie->aspm_pwr_on_t << 19);
+       dw_pcie_writel_dbi(pci, pcie->cfg_link_cap_l1sub, val);
+
+       /* Program L0s and L1 entrance latencies */
+       val = dw_pcie_readl_dbi(pci, PORT_LOGIC_ACK_F_ASPM_CTRL);
+       val &= ~L0S_ENTRANCE_LAT_MASK;
+       val |= (pcie->aspm_l0s_enter_lat << L0S_ENTRANCE_LAT_SHIFT);
+       val |= ENTER_ASPM;
+       dw_pcie_writel_dbi(pci, PORT_LOGIC_ACK_F_ASPM_CTRL, val);
+}
+
+static int init_debugfs(struct tegra_pcie_dw *pcie)
+{
+       struct dentry *d;
+
+       d = debugfs_create_devm_seqfile(pcie->dev, "aspm_state_cnt",
+                                       pcie->debugfs, aspm_state_cnt);
+       if (IS_ERR_OR_NULL(d))
+               dev_err(pcie->dev,
+                       "Failed to create debugfs file \"aspm_state_cnt\"\n");
+
+       return 0;
+}
+#else
+static inline void disable_aspm_l12(struct tegra_pcie_dw *pcie) { return; }
+static inline void disable_aspm_l11(struct tegra_pcie_dw *pcie) { return; }
+static inline void init_host_aspm(struct tegra_pcie_dw *pcie) { return; }
+static inline int init_debugfs(struct tegra_pcie_dw *pcie) { return 0; }
+#endif
+
+static void tegra_pcie_enable_system_interrupts(struct pcie_port *pp)
+{
+       struct dw_pcie *pci = to_dw_pcie_from_pp(pp);
+       struct tegra_pcie_dw *pcie = to_tegra_pcie(pci);
+       u32 val;
+       u16 val_w;
+
+       val = appl_readl(pcie, APPL_INTR_EN_L0_0);
+       val |= APPL_INTR_EN_L0_0_LINK_STATE_INT_EN;
+       appl_writel(pcie, val, APPL_INTR_EN_L0_0);
+
+       val = appl_readl(pcie, APPL_INTR_EN_L1_0_0);
+       val |= APPL_INTR_EN_L1_0_0_LINK_REQ_RST_NOT_INT_EN;
+       appl_writel(pcie, val, APPL_INTR_EN_L1_0_0);
+
+       if (pcie->enable_cdm_check) {
+               val = appl_readl(pcie, APPL_INTR_EN_L0_0);
+               val |= APPL_INTR_EN_L0_0_CDM_REG_CHK_INT_EN;
+               appl_writel(pcie, val, APPL_INTR_EN_L0_0);
+
+               val = appl_readl(pcie, APPL_INTR_EN_L1_18);
+               val |= APPL_INTR_EN_L1_18_CDM_REG_CHK_CMP_ERR;
+               val |= APPL_INTR_EN_L1_18_CDM_REG_CHK_LOGIC_ERR;
+               appl_writel(pcie, val, APPL_INTR_EN_L1_18);
+       }
+
+       val_w = dw_pcie_readw_dbi(&pcie->pci, pcie->pcie_cap_base +
+                                 PCI_EXP_LNKSTA);
+       pcie->init_link_width = (val_w & PCI_EXP_LNKSTA_NLW) >>
+                               PCI_EXP_LNKSTA_NLW_SHIFT;
+
+       val_w = dw_pcie_readw_dbi(&pcie->pci, pcie->pcie_cap_base +
+                                 PCI_EXP_LNKCTL);
+       val_w |= PCI_EXP_LNKCTL_LBMIE;
+       dw_pcie_writew_dbi(&pcie->pci, pcie->pcie_cap_base + PCI_EXP_LNKCTL,
+                          val_w);
+}
+
+static void tegra_pcie_enable_legacy_interrupts(struct pcie_port *pp)
+{
+       struct dw_pcie *pci = to_dw_pcie_from_pp(pp);
+       struct tegra_pcie_dw *pcie = to_tegra_pcie(pci);
+       u32 val;
+
+       /* Enable legacy interrupt generation */
+       val = appl_readl(pcie, APPL_INTR_EN_L0_0);
+       val |= APPL_INTR_EN_L0_0_SYS_INTR_EN;
+       val |= APPL_INTR_EN_L0_0_INT_INT_EN;
+       appl_writel(pcie, val, APPL_INTR_EN_L0_0);
+
+       val = appl_readl(pcie, APPL_INTR_EN_L1_8_0);
+       val |= APPL_INTR_EN_L1_8_INTX_EN;
+       val |= APPL_INTR_EN_L1_8_AUTO_BW_INT_EN;
+       val |= APPL_INTR_EN_L1_8_BW_MGT_INT_EN;
+       if (IS_ENABLED(CONFIG_PCIEAER))
+               val |= APPL_INTR_EN_L1_8_AER_INT_EN;
+       appl_writel(pcie, val, APPL_INTR_EN_L1_8_0);
+}
+
+static void tegra_pcie_enable_msi_interrupts(struct pcie_port *pp)
+{
+       struct dw_pcie *pci = to_dw_pcie_from_pp(pp);
+       struct tegra_pcie_dw *pcie = to_tegra_pcie(pci);
+       u32 val;
+
+       dw_pcie_msi_init(pp);
+
+       /* Enable MSI interrupt generation */
+       val = appl_readl(pcie, APPL_INTR_EN_L0_0);
+       val |= APPL_INTR_EN_L0_0_SYS_MSI_INTR_EN;
+       val |= APPL_INTR_EN_L0_0_MSI_RCV_INT_EN;
+       appl_writel(pcie, val, APPL_INTR_EN_L0_0);
+}
+
+static void tegra_pcie_enable_interrupts(struct pcie_port *pp)
+{
+       struct dw_pcie *pci = to_dw_pcie_from_pp(pp);
+       struct tegra_pcie_dw *pcie = to_tegra_pcie(pci);
+
+       /* Clear interrupt statuses before enabling interrupts */
+       appl_writel(pcie, 0xFFFFFFFF, APPL_INTR_STATUS_L0);
+       appl_writel(pcie, 0xFFFFFFFF, APPL_INTR_STATUS_L1_0_0);
+       appl_writel(pcie, 0xFFFFFFFF, APPL_INTR_STATUS_L1_1);
+       appl_writel(pcie, 0xFFFFFFFF, APPL_INTR_STATUS_L1_2);
+       appl_writel(pcie, 0xFFFFFFFF, APPL_INTR_STATUS_L1_3);
+       appl_writel(pcie, 0xFFFFFFFF, APPL_INTR_STATUS_L1_6);
+       appl_writel(pcie, 0xFFFFFFFF, APPL_INTR_STATUS_L1_7);
+       appl_writel(pcie, 0xFFFFFFFF, APPL_INTR_STATUS_L1_8_0);
+       appl_writel(pcie, 0xFFFFFFFF, APPL_INTR_STATUS_L1_9);
+       appl_writel(pcie, 0xFFFFFFFF, APPL_INTR_STATUS_L1_10);
+       appl_writel(pcie, 0xFFFFFFFF, APPL_INTR_STATUS_L1_11);
+       appl_writel(pcie, 0xFFFFFFFF, APPL_INTR_STATUS_L1_13);
+       appl_writel(pcie, 0xFFFFFFFF, APPL_INTR_STATUS_L1_14);
+       appl_writel(pcie, 0xFFFFFFFF, APPL_INTR_STATUS_L1_15);
+       appl_writel(pcie, 0xFFFFFFFF, APPL_INTR_STATUS_L1_17);
+
+       tegra_pcie_enable_system_interrupts(pp);
+       tegra_pcie_enable_legacy_interrupts(pp);
+       if (IS_ENABLED(CONFIG_PCI_MSI))
+               tegra_pcie_enable_msi_interrupts(pp);
+}
+
+static void config_gen3_gen4_eq_presets(struct tegra_pcie_dw *pcie)
+{
+       struct dw_pcie *pci = &pcie->pci;
+       u32 val, offset, i;
+
+       /* Program init preset */
+       for (i = 0; i < pcie->num_lanes; i++) {
+               dw_pcie_read(pci->dbi_base + CAP_SPCIE_CAP_OFF
+                                + (i * 2), 2, &val);
+               val &= ~CAP_SPCIE_CAP_OFF_DSP_TX_PRESET0_MASK;
+               val |= GEN3_GEN4_EQ_PRESET_INIT;
+               val &= ~CAP_SPCIE_CAP_OFF_USP_TX_PRESET0_MASK;
+               val |= (GEN3_GEN4_EQ_PRESET_INIT <<
+                          CAP_SPCIE_CAP_OFF_USP_TX_PRESET0_SHIFT);
+               dw_pcie_write(pci->dbi_base + CAP_SPCIE_CAP_OFF
+                                + (i * 2), 2, val);
+
+               offset = dw_pcie_find_ext_capability(pci,
+                                                    PCI_EXT_CAP_ID_PL_16GT) +
+                               PCI_PL_16GT_LE_CTRL;
+               dw_pcie_read(pci->dbi_base + offset + i, 1, &val);
+               val &= ~PCI_PL_16GT_LE_CTRL_DSP_TX_PRESET_MASK;
+               val |= GEN3_GEN4_EQ_PRESET_INIT;
+               val &= ~PCI_PL_16GT_LE_CTRL_USP_TX_PRESET_MASK;
+               val |= (GEN3_GEN4_EQ_PRESET_INIT <<
+                       PCI_PL_16GT_LE_CTRL_USP_TX_PRESET_SHIFT);
+               dw_pcie_write(pci->dbi_base + offset + i, 1, val);
+       }
+
+       val = dw_pcie_readl_dbi(pci, GEN3_RELATED_OFF);
+       val &= ~GEN3_RELATED_OFF_RATE_SHADOW_SEL_MASK;
+       dw_pcie_writel_dbi(pci, GEN3_RELATED_OFF, val);
+
+       val = dw_pcie_readl_dbi(pci, GEN3_EQ_CONTROL_OFF);
+       val &= ~GEN3_EQ_CONTROL_OFF_PSET_REQ_VEC_MASK;
+       val |= (0x3ff << GEN3_EQ_CONTROL_OFF_PSET_REQ_VEC_SHIFT);
+       val &= ~GEN3_EQ_CONTROL_OFF_FB_MODE_MASK;
+       dw_pcie_writel_dbi(pci, GEN3_EQ_CONTROL_OFF, val);
+
+       val = dw_pcie_readl_dbi(pci, GEN3_RELATED_OFF);
+       val &= ~GEN3_RELATED_OFF_RATE_SHADOW_SEL_MASK;
+       val |= (0x1 << GEN3_RELATED_OFF_RATE_SHADOW_SEL_SHIFT);
+       dw_pcie_writel_dbi(pci, GEN3_RELATED_OFF, val);
+
+       val = dw_pcie_readl_dbi(pci, GEN3_EQ_CONTROL_OFF);
+       val &= ~GEN3_EQ_CONTROL_OFF_PSET_REQ_VEC_MASK;
+       val |= (0x360 << GEN3_EQ_CONTROL_OFF_PSET_REQ_VEC_SHIFT);
+       val &= ~GEN3_EQ_CONTROL_OFF_FB_MODE_MASK;
+       dw_pcie_writel_dbi(pci, GEN3_EQ_CONTROL_OFF, val);
+
+       val = dw_pcie_readl_dbi(pci, GEN3_RELATED_OFF);
+       val &= ~GEN3_RELATED_OFF_RATE_SHADOW_SEL_MASK;
+       dw_pcie_writel_dbi(pci, GEN3_RELATED_OFF, val);
+}
+
+static void tegra_pcie_prepare_host(struct pcie_port *pp)
+{
+       struct dw_pcie *pci = to_dw_pcie_from_pp(pp);
+       struct tegra_pcie_dw *pcie = to_tegra_pcie(pci);
+       u32 val;
+
+       val = dw_pcie_readl_dbi(pci, PCI_IO_BASE);
+       val &= ~(IO_BASE_IO_DECODE | IO_BASE_IO_DECODE_BIT8);
+       dw_pcie_writel_dbi(pci, PCI_IO_BASE, val);
+
+       val = dw_pcie_readl_dbi(pci, PCI_PREF_MEMORY_BASE);
+       val |= CFG_PREF_MEM_LIMIT_BASE_MEM_DECODE;
+       val |= CFG_PREF_MEM_LIMIT_BASE_MEM_LIMIT_DECODE;
+       dw_pcie_writel_dbi(pci, PCI_PREF_MEMORY_BASE, val);
+
+       dw_pcie_writel_dbi(pci, PCI_BASE_ADDRESS_0, 0);
+
+       /* Configure FTS */
+       val = dw_pcie_readl_dbi(pci, PORT_LOGIC_ACK_F_ASPM_CTRL);
+       val &= ~(N_FTS_MASK << N_FTS_SHIFT);
+       val |= N_FTS_VAL << N_FTS_SHIFT;
+       dw_pcie_writel_dbi(pci, PORT_LOGIC_ACK_F_ASPM_CTRL, val);
+
+       val = dw_pcie_readl_dbi(pci, PORT_LOGIC_GEN2_CTRL);
+       val &= ~FTS_MASK;
+       val |= FTS_VAL;
+       dw_pcie_writel_dbi(pci, PORT_LOGIC_GEN2_CTRL, val);
+
+       /* Enable as 0xFFFF0001 response for CRS */
+       val = dw_pcie_readl_dbi(pci, PORT_LOGIC_AMBA_ERROR_RESPONSE_DEFAULT);
+       val &= ~(AMBA_ERROR_RESPONSE_CRS_MASK << AMBA_ERROR_RESPONSE_CRS_SHIFT);
+       val |= (AMBA_ERROR_RESPONSE_CRS_OKAY_FFFF0001 <<
+               AMBA_ERROR_RESPONSE_CRS_SHIFT);
+       dw_pcie_writel_dbi(pci, PORT_LOGIC_AMBA_ERROR_RESPONSE_DEFAULT, val);
+
+       /* Configure Max Speed from DT */
+       if (pcie->max_speed && pcie->max_speed != -EINVAL) {
+               val = dw_pcie_readl_dbi(pci, pcie->pcie_cap_base +
+                                       PCI_EXP_LNKCAP);
+               val &= ~PCI_EXP_LNKCAP_SLS;
+               val |= pcie->max_speed;
+               dw_pcie_writel_dbi(pci, pcie->pcie_cap_base + PCI_EXP_LNKCAP,
+                                  val);
+       }
+
+       /* Configure Max lane width from DT */
+       val = dw_pcie_readl_dbi(pci, pcie->pcie_cap_base + PCI_EXP_LNKCAP);
+       val &= ~PCI_EXP_LNKCAP_MLW;
+       val |= (pcie->num_lanes << PCI_EXP_LNKSTA_NLW_SHIFT);
+       dw_pcie_writel_dbi(pci, pcie->pcie_cap_base + PCI_EXP_LNKCAP, val);
+
+       config_gen3_gen4_eq_presets(pcie);
+
+       init_host_aspm(pcie);
+
+       val = dw_pcie_readl_dbi(pci, GEN3_RELATED_OFF);
+       val &= ~GEN3_RELATED_OFF_GEN3_ZRXDC_NONCOMPL;
+       dw_pcie_writel_dbi(pci, GEN3_RELATED_OFF, val);
+
+       if (pcie->update_fc_fixup) {
+               val = dw_pcie_readl_dbi(pci, CFG_TIMER_CTRL_MAX_FUNC_NUM_OFF);
+               val |= 0x1 << CFG_TIMER_CTRL_ACK_NAK_SHIFT;
+               dw_pcie_writel_dbi(pci, CFG_TIMER_CTRL_MAX_FUNC_NUM_OFF, val);
+       }
+
+       dw_pcie_setup_rc(pp);
+
+       clk_set_rate(pcie->core_clk, GEN4_CORE_CLK_FREQ);
+
+       /* Assert RST */
+       val = appl_readl(pcie, APPL_PINMUX);
+       val &= ~APPL_PINMUX_PEX_RST;
+       appl_writel(pcie, val, APPL_PINMUX);
+
+       usleep_range(100, 200);
+
+       /* Enable LTSSM */
+       val = appl_readl(pcie, APPL_CTRL);
+       val |= APPL_CTRL_LTSSM_EN;
+       appl_writel(pcie, val, APPL_CTRL);
+
+       /* De-assert RST */
+       val = appl_readl(pcie, APPL_PINMUX);
+       val |= APPL_PINMUX_PEX_RST;
+       appl_writel(pcie, val, APPL_PINMUX);
+
+       msleep(100);
+}
+
+static int tegra_pcie_dw_host_init(struct pcie_port *pp)
+{
+       struct dw_pcie *pci = to_dw_pcie_from_pp(pp);
+       struct tegra_pcie_dw *pcie = to_tegra_pcie(pci);
+       u32 val, tmp, offset, speed;
+
+       tegra_pcie_prepare_host(pp);
+
+       if (dw_pcie_wait_for_link(pci)) {
+               /*
+                * There are some endpoints which can't get the link up if
+                * root port has Data Link Feature (DLF) enabled.
+                * Refer Spec rev 4.0 ver 1.0 sec 3.4.2 & 7.7.4 for more info
+                * on Scaled Flow Control and DLF.
+                * So, need to confirm that is indeed the case here and attempt
+                * link up once again with DLF disabled.
+                */
+               val = appl_readl(pcie, APPL_DEBUG);
+               val &= APPL_DEBUG_LTSSM_STATE_MASK;
+               val >>= APPL_DEBUG_LTSSM_STATE_SHIFT;
+               tmp = appl_readl(pcie, APPL_LINK_STATUS);
+               tmp &= APPL_LINK_STATUS_RDLH_LINK_UP;
+               if (!(val == 0x11 && !tmp)) {
+                       /* Link is down for all good reasons */
+                       return 0;
+               }
+
+               dev_info(pci->dev, "Link is down in DLL");
+               dev_info(pci->dev, "Trying again with DLFE disabled\n");
+               /* Disable LTSSM */
+               val = appl_readl(pcie, APPL_CTRL);
+               val &= ~APPL_CTRL_LTSSM_EN;
+               appl_writel(pcie, val, APPL_CTRL);
+
+               reset_control_assert(pcie->core_rst);
+               reset_control_deassert(pcie->core_rst);
+
+               offset = dw_pcie_find_ext_capability(pci, PCI_EXT_CAP_ID_DLF);
+               val = dw_pcie_readl_dbi(pci, offset + PCI_DLF_CAP);
+               val &= ~PCI_DLF_EXCHANGE_ENABLE;
+               dw_pcie_writel_dbi(pci, offset, val);
+
+               tegra_pcie_prepare_host(pp);
+
+               if (dw_pcie_wait_for_link(pci))
+                       return 0;
+       }
+
+       speed = dw_pcie_readw_dbi(pci, pcie->pcie_cap_base + PCI_EXP_LNKSTA) &
+               PCI_EXP_LNKSTA_CLS;
+       clk_set_rate(pcie->core_clk, pcie_gen_freq[speed - 1]);
+
+       tegra_pcie_enable_interrupts(pp);
+
+       return 0;
+}
+
+static int tegra_pcie_dw_link_up(struct dw_pcie *pci)
+{
+       struct tegra_pcie_dw *pcie = to_tegra_pcie(pci);
+       u32 val = dw_pcie_readw_dbi(pci, pcie->pcie_cap_base + PCI_EXP_LNKSTA);
+
+       return !!(val & PCI_EXP_LNKSTA_DLLLA);
+}
+
+static void tegra_pcie_set_msi_vec_num(struct pcie_port *pp)
+{
+       pp->num_vectors = MAX_MSI_IRQS;
+}
+
+static const struct dw_pcie_ops tegra_dw_pcie_ops = {
+       .link_up = tegra_pcie_dw_link_up,
+};
+
+static struct dw_pcie_host_ops tegra_pcie_dw_host_ops = {
+       .rd_own_conf = tegra_pcie_dw_rd_own_conf,
+       .wr_own_conf = tegra_pcie_dw_wr_own_conf,
+       .host_init = tegra_pcie_dw_host_init,
+       .set_num_vectors = tegra_pcie_set_msi_vec_num,
+};
+
+static void tegra_pcie_disable_phy(struct tegra_pcie_dw *pcie)
+{
+       unsigned int phy_count = pcie->phy_count;
+
+       while (phy_count--) {
+               phy_power_off(pcie->phys[phy_count]);
+               phy_exit(pcie->phys[phy_count]);
+       }
+}
+
+static int tegra_pcie_enable_phy(struct tegra_pcie_dw *pcie)
+{
+       unsigned int i;
+       int ret;
+
+       for (i = 0; i < pcie->phy_count; i++) {
+               ret = phy_init(pcie->phys[i]);
+               if (ret < 0)
+                       goto phy_power_off;
+
+               ret = phy_power_on(pcie->phys[i]);
+               if (ret < 0)
+                       goto phy_exit;
+       }
+
+       return 0;
+
+phy_power_off:
+       while (i--) {
+               phy_power_off(pcie->phys[i]);
+phy_exit:
+               phy_exit(pcie->phys[i]);
+       }
+
+       return ret;
+}
+
+static int tegra_pcie_dw_parse_dt(struct tegra_pcie_dw *pcie)
+{
+       struct device_node *np = pcie->dev->of_node;
+       int ret;
+
+       ret = of_property_read_u32(np, "nvidia,aspm-cmrt-us", &pcie->aspm_cmrt);
+       if (ret < 0) {
+               dev_info(pcie->dev, "Failed to read ASPM T_cmrt: %d\n", ret);
+               return ret;
+       }
+
+       ret = of_property_read_u32(np, "nvidia,aspm-pwr-on-t-us",
+                                  &pcie->aspm_pwr_on_t);
+       if (ret < 0)
+               dev_info(pcie->dev, "Failed to read ASPM Power On time: %d\n",
+                        ret);
+
+       ret = of_property_read_u32(np, "nvidia,aspm-l0s-entrance-latency-us",
+                                  &pcie->aspm_l0s_enter_lat);
+       if (ret < 0)
+               dev_info(pcie->dev,
+                        "Failed to read ASPM L0s Entrance latency: %d\n", ret);
+
+       ret = of_property_read_u32(np, "num-lanes", &pcie->num_lanes);
+       if (ret < 0) {
+               dev_err(pcie->dev, "Failed to read num-lanes: %d\n", ret);
+               return ret;
+       }
+
+       pcie->max_speed = of_pci_get_max_link_speed(np);
+
+       ret = of_property_read_u32_index(np, "nvidia,bpmp", 1, &pcie->cid);
+       if (ret) {
+               dev_err(pcie->dev, "Failed to read Controller-ID: %d\n", ret);
+               return ret;
+       }
+
+       ret = of_property_count_strings(np, "phy-names");
+       if (ret < 0) {
+               dev_err(pcie->dev, "Failed to find PHY entries: %d\n",
+                       ret);
+               return ret;
+       }
+       pcie->phy_count = ret;
+
+       if (of_property_read_bool(np, "nvidia,update-fc-fixup"))
+               pcie->update_fc_fixup = true;
+
+       pcie->supports_clkreq =
+               of_property_read_bool(pcie->dev->of_node, "supports-clkreq");
+
+       pcie->enable_cdm_check =
+               of_property_read_bool(np, "snps,enable-cdm-check");
+
+       return 0;
+}
+
+static int tegra_pcie_bpmp_set_ctrl_state(struct tegra_pcie_dw *pcie,
+                                         bool enable)
+{
+       struct mrq_uphy_response resp;
+       struct tegra_bpmp_message msg;
+       struct mrq_uphy_request req;
+
+       /* Controller-5 doesn't need to have its state set by BPMP-FW */
+       if (pcie->cid == 5)
+               return 0;
+
+       memset(&req, 0, sizeof(req));
+       memset(&resp, 0, sizeof(resp));
+
+       req.cmd = CMD_UPHY_PCIE_CONTROLLER_STATE;
+       req.controller_state.pcie_controller = pcie->cid;
+       req.controller_state.enable = enable;
+
+       memset(&msg, 0, sizeof(msg));
+       msg.mrq = MRQ_UPHY;
+       msg.tx.data = &req;
+       msg.tx.size = sizeof(req);
+       msg.rx.data = &resp;
+       msg.rx.size = sizeof(resp);
+
+       return tegra_bpmp_transfer(pcie->bpmp, &msg);
+}
+
+static void tegra_pcie_downstream_dev_to_D0(struct tegra_pcie_dw *pcie)
+{
+       struct pcie_port *pp = &pcie->pci.pp;
+       struct pci_bus *child, *root_bus = NULL;
+       struct pci_dev *pdev;
+
+       /*
+        * link doesn't go into L2 state with some of the endpoints with Tegra
+        * if they are not in D0 state. So, need to make sure that immediate
+        * downstream devices are in D0 state before sending PME_TurnOff to put
+        * link into L2 state.
+        * This is as per PCI Express Base r4.0 v1.0 September 27-2017,
+        * 5.2 Link State Power Management (Page #428).
+        */
+
+       list_for_each_entry(child, &pp->root_bus->children, node) {
+               /* Bring downstream devices to D0 if they are not already in */
+               if (child->parent == pp->root_bus) {
+                       root_bus = child;
+                       break;
+               }
+       }
+
+       if (!root_bus) {
+               dev_err(pcie->dev, "Failed to find downstream devices\n");
+               return;
+       }
+
+       list_for_each_entry(pdev, &root_bus->devices, bus_list) {
+               if (PCI_SLOT(pdev->devfn) == 0) {
+                       if (pci_set_power_state(pdev, PCI_D0))
+                               dev_err(pcie->dev,
+                                       "Failed to transition %s to D0 state\n",
+                                       dev_name(&pdev->dev));
+               }
+       }
+}
+
+static int tegra_pcie_get_slot_regulators(struct tegra_pcie_dw *pcie)
+{
+       pcie->slot_ctl_3v3 = devm_regulator_get_optional(pcie->dev, "vpcie3v3");
+       if (IS_ERR(pcie->slot_ctl_3v3)) {
+               if (PTR_ERR(pcie->slot_ctl_3v3) != -ENODEV)
+                       return PTR_ERR(pcie->slot_ctl_3v3);
+
+               pcie->slot_ctl_3v3 = NULL;
+       }
+
+       pcie->slot_ctl_12v = devm_regulator_get_optional(pcie->dev, "vpcie12v");
+       if (IS_ERR(pcie->slot_ctl_12v)) {
+               if (PTR_ERR(pcie->slot_ctl_12v) != -ENODEV)
+                       return PTR_ERR(pcie->slot_ctl_12v);
+
+               pcie->slot_ctl_12v = NULL;
+       }
+
+       return 0;
+}
+
+static int tegra_pcie_enable_slot_regulators(struct tegra_pcie_dw *pcie)
+{
+       int ret;
+
+       if (pcie->slot_ctl_3v3) {
+               ret = regulator_enable(pcie->slot_ctl_3v3);
+               if (ret < 0) {
+                       dev_err(pcie->dev,
+                               "Failed to enable 3.3V slot supply: %d\n", ret);
+                       return ret;
+               }
+       }
+
+       if (pcie->slot_ctl_12v) {
+               ret = regulator_enable(pcie->slot_ctl_12v);
+               if (ret < 0) {
+                       dev_err(pcie->dev,
+                               "Failed to enable 12V slot supply: %d\n", ret);
+                       goto fail_12v_enable;
+               }
+       }
+
+       /*
+        * According to PCI Express Card Electromechanical Specification
+        * Revision 1.1, Table-2.4, T_PVPERL (Power stable to PERST# inactive)
+        * should be a minimum of 100ms.
+        */
+       if (pcie->slot_ctl_3v3 || pcie->slot_ctl_12v)
+               msleep(100);
+
+       return 0;
+
+fail_12v_enable:
+       if (pcie->slot_ctl_3v3)
+               regulator_disable(pcie->slot_ctl_3v3);
+       return ret;
+}
+
+static void tegra_pcie_disable_slot_regulators(struct tegra_pcie_dw *pcie)
+{
+       if (pcie->slot_ctl_12v)
+               regulator_disable(pcie->slot_ctl_12v);
+       if (pcie->slot_ctl_3v3)
+               regulator_disable(pcie->slot_ctl_3v3);
+}
+
+static int tegra_pcie_config_controller(struct tegra_pcie_dw *pcie,
+                                       bool en_hw_hot_rst)
+{
+       int ret;
+       u32 val;
+
+       ret = tegra_pcie_bpmp_set_ctrl_state(pcie, true);
+       if (ret) {
+               dev_err(pcie->dev,
+                       "Failed to enable controller %u: %d\n", pcie->cid, ret);
+               return ret;
+       }
+
+       ret = tegra_pcie_enable_slot_regulators(pcie);
+       if (ret < 0)
+               goto fail_slot_reg_en;
+
+       ret = regulator_enable(pcie->pex_ctl_supply);
+       if (ret < 0) {
+               dev_err(pcie->dev, "Failed to enable regulator: %d\n", ret);
+               goto fail_reg_en;
+       }
+
+       ret = clk_prepare_enable(pcie->core_clk);
+       if (ret) {
+               dev_err(pcie->dev, "Failed to enable core clock: %d\n", ret);
+               goto fail_core_clk;
+       }
+
+       ret = reset_control_deassert(pcie->core_apb_rst);
+       if (ret) {
+               dev_err(pcie->dev, "Failed to deassert core APB reset: %d\n",
+                       ret);
+               goto fail_core_apb_rst;
+       }
+
+       if (en_hw_hot_rst) {
+               /* Enable HW_HOT_RST mode */
+               val = appl_readl(pcie, APPL_CTRL);
+               val &= ~(APPL_CTRL_HW_HOT_RST_MODE_MASK <<
+                        APPL_CTRL_HW_HOT_RST_MODE_SHIFT);
+               val |= APPL_CTRL_HW_HOT_RST_EN;
+               appl_writel(pcie, val, APPL_CTRL);
+       }
+
+       ret = tegra_pcie_enable_phy(pcie);
+       if (ret) {
+               dev_err(pcie->dev, "Failed to enable PHY: %d\n", ret);
+               goto fail_phy;
+       }
+
+       /* Update CFG base address */
+       appl_writel(pcie, pcie->dbi_res->start & APPL_CFG_BASE_ADDR_MASK,
+                   APPL_CFG_BASE_ADDR);
+
+       /* Configure this core for RP mode operation */
+       appl_writel(pcie, APPL_DM_TYPE_RP, APPL_DM_TYPE);
+
+       appl_writel(pcie, 0x0, APPL_CFG_SLCG_OVERRIDE);
+
+       val = appl_readl(pcie, APPL_CTRL);
+       appl_writel(pcie, val | APPL_CTRL_SYS_PRE_DET_STATE, APPL_CTRL);
+
+       val = appl_readl(pcie, APPL_CFG_MISC);
+       val |= (APPL_CFG_MISC_ARCACHE_VAL << APPL_CFG_MISC_ARCACHE_SHIFT);
+       appl_writel(pcie, val, APPL_CFG_MISC);
+
+       if (!pcie->supports_clkreq) {
+               val = appl_readl(pcie, APPL_PINMUX);
+               val |= APPL_PINMUX_CLKREQ_OUT_OVRD_EN;
+               val |= APPL_PINMUX_CLKREQ_OUT_OVRD;
+               appl_writel(pcie, val, APPL_PINMUX);
+       }
+
+       /* Update iATU_DMA base address */
+       appl_writel(pcie,
+                   pcie->atu_dma_res->start & APPL_CFG_IATU_DMA_BASE_ADDR_MASK,
+                   APPL_CFG_IATU_DMA_BASE_ADDR);
+
+       reset_control_deassert(pcie->core_rst);
+
+       pcie->pcie_cap_base = dw_pcie_find_capability(&pcie->pci,
+                                                     PCI_CAP_ID_EXP);
+
+       /* Disable ASPM-L1SS advertisement as there is no CLKREQ routing */
+       if (!pcie->supports_clkreq) {
+               disable_aspm_l11(pcie);
+               disable_aspm_l12(pcie);
+       }
+
+       return ret;
+
+fail_phy:
+       reset_control_assert(pcie->core_apb_rst);
+fail_core_apb_rst:
+       clk_disable_unprepare(pcie->core_clk);
+fail_core_clk:
+       regulator_disable(pcie->pex_ctl_supply);
+fail_reg_en:
+       tegra_pcie_disable_slot_regulators(pcie);
+fail_slot_reg_en:
+       tegra_pcie_bpmp_set_ctrl_state(pcie, false);
+
+       return ret;
+}
+
+static int __deinit_controller(struct tegra_pcie_dw *pcie)
+{
+       int ret;
+
+       ret = reset_control_assert(pcie->core_rst);
+       if (ret) {
+               dev_err(pcie->dev, "Failed to assert \"core\" reset: %d\n",
+                       ret);
+               return ret;
+       }
+
+       tegra_pcie_disable_phy(pcie);
+
+       ret = reset_control_assert(pcie->core_apb_rst);
+       if (ret) {
+               dev_err(pcie->dev, "Failed to assert APB reset: %d\n", ret);
+               return ret;
+       }
+
+       clk_disable_unprepare(pcie->core_clk);
+
+       ret = regulator_disable(pcie->pex_ctl_supply);
+       if (ret) {
+               dev_err(pcie->dev, "Failed to disable regulator: %d\n", ret);
+               return ret;
+       }
+
+       tegra_pcie_disable_slot_regulators(pcie);
+
+       ret = tegra_pcie_bpmp_set_ctrl_state(pcie, false);
+       if (ret) {
+               dev_err(pcie->dev, "Failed to disable controller %d: %d\n",
+                       pcie->cid, ret);
+               return ret;
+       }
+
+       return ret;
+}
+
+static int tegra_pcie_init_controller(struct tegra_pcie_dw *pcie)
+{
+       struct dw_pcie *pci = &pcie->pci;
+       struct pcie_port *pp = &pci->pp;
+       int ret;
+
+       ret = tegra_pcie_config_controller(pcie, false);
+       if (ret < 0)
+               return ret;
+
+       pp->ops = &tegra_pcie_dw_host_ops;
+
+       ret = dw_pcie_host_init(pp);
+       if (ret < 0) {
+               dev_err(pcie->dev, "Failed to add PCIe port: %d\n", ret);
+               goto fail_host_init;
+       }
+
+       return 0;
+
+fail_host_init:
+       return __deinit_controller(pcie);
+}
+
+static int tegra_pcie_try_link_l2(struct tegra_pcie_dw *pcie)
+{
+       u32 val;
+
+       if (!tegra_pcie_dw_link_up(&pcie->pci))
+               return 0;
+
+       val = appl_readl(pcie, APPL_RADM_STATUS);
+       val |= APPL_PM_XMT_TURNOFF_STATE;
+       appl_writel(pcie, val, APPL_RADM_STATUS);
+
+       return readl_poll_timeout_atomic(pcie->appl_base + APPL_DEBUG, val,
+                                val & APPL_DEBUG_PM_LINKST_IN_L2_LAT,
+                                1, PME_ACK_TIMEOUT);
+}
+
+static void tegra_pcie_dw_pme_turnoff(struct tegra_pcie_dw *pcie)
+{
+       u32 data;
+       int err;
+
+       if (!tegra_pcie_dw_link_up(&pcie->pci)) {
+               dev_dbg(pcie->dev, "PCIe link is not up...!\n");
+               return;
+       }
+
+       if (tegra_pcie_try_link_l2(pcie)) {
+               dev_info(pcie->dev, "Link didn't transition to L2 state\n");
+               /*
+                * TX lane clock freq will reset to Gen1 only if link is in L2
+                * or detect state.
+                * So apply pex_rst to end point to force RP to go into detect
+                * state
+                */
+               data = appl_readl(pcie, APPL_PINMUX);
+               data &= ~APPL_PINMUX_PEX_RST;
+               appl_writel(pcie, data, APPL_PINMUX);
+
+               err = readl_poll_timeout_atomic(pcie->appl_base + APPL_DEBUG,
+                                               data,
+                                               ((data &
+                                               APPL_DEBUG_LTSSM_STATE_MASK) >>
+                                               APPL_DEBUG_LTSSM_STATE_SHIFT) ==
+                                               LTSSM_STATE_PRE_DETECT,
+                                               1, LTSSM_TIMEOUT);
+               if (err) {
+                       dev_info(pcie->dev, "Link didn't go to detect state\n");
+               } else {
+                       /* Disable LTSSM after link is in detect state */
+                       data = appl_readl(pcie, APPL_CTRL);
+                       data &= ~APPL_CTRL_LTSSM_EN;
+                       appl_writel(pcie, data, APPL_CTRL);
+               }
+       }
+       /*
+        * DBI registers may not be accessible after this as PLL-E would be
+        * down depending on how CLKREQ is pulled by end point
+        */
+       data = appl_readl(pcie, APPL_PINMUX);
+       data |= (APPL_PINMUX_CLKREQ_OVERRIDE_EN | APPL_PINMUX_CLKREQ_OVERRIDE);
+       /* Cut REFCLK to slot */
+       data |= APPL_PINMUX_CLK_OUTPUT_IN_OVERRIDE_EN;
+       data &= ~APPL_PINMUX_CLK_OUTPUT_IN_OVERRIDE;
+       appl_writel(pcie, data, APPL_PINMUX);
+}
+
+static int tegra_pcie_deinit_controller(struct tegra_pcie_dw *pcie)
+{
+       tegra_pcie_downstream_dev_to_D0(pcie);
+       dw_pcie_host_deinit(&pcie->pci.pp);
+       tegra_pcie_dw_pme_turnoff(pcie);
+
+       return __deinit_controller(pcie);
+}
+
+static int tegra_pcie_config_rp(struct tegra_pcie_dw *pcie)
+{
+       struct pcie_port *pp = &pcie->pci.pp;
+       struct device *dev = pcie->dev;
+       char *name;
+       int ret;
+
+       if (IS_ENABLED(CONFIG_PCI_MSI)) {
+               pp->msi_irq = of_irq_get_byname(dev->of_node, "msi");
+               if (!pp->msi_irq) {
+                       dev_err(dev, "Failed to get MSI interrupt\n");
+                       return -ENODEV;
+               }
+       }
+
+       pm_runtime_enable(dev);
+
+       ret = pm_runtime_get_sync(dev);
+       if (ret < 0) {
+               dev_err(dev, "Failed to get runtime sync for PCIe dev: %d\n",
+                       ret);
+               goto fail_pm_get_sync;
+       }
+
+       ret = pinctrl_pm_select_default_state(dev);
+       if (ret < 0) {
+               dev_err(dev, "Failed to configure sideband pins: %d\n", ret);
+               goto fail_pinctrl;
+       }
+
+       tegra_pcie_init_controller(pcie);
+
+       pcie->link_state = tegra_pcie_dw_link_up(&pcie->pci);
+       if (!pcie->link_state) {
+               ret = -ENOMEDIUM;
+               goto fail_host_init;
+       }
+
+       name = devm_kasprintf(dev, GFP_KERNEL, "%pOFP", dev->of_node);
+       if (!name) {
+               ret = -ENOMEM;
+               goto fail_host_init;
+       }
+
+       pcie->debugfs = debugfs_create_dir(name, NULL);
+       if (!pcie->debugfs)
+               dev_err(dev, "Failed to create debugfs\n");
+       else
+               init_debugfs(pcie);
+
+       return ret;
+
+fail_host_init:
+       tegra_pcie_deinit_controller(pcie);
+fail_pinctrl:
+       pm_runtime_put_sync(dev);
+fail_pm_get_sync:
+       pm_runtime_disable(dev);
+       return ret;
+}
+
+static int tegra_pcie_dw_probe(struct platform_device *pdev)
+{
+       struct device *dev = &pdev->dev;
+       struct resource *atu_dma_res;
+       struct tegra_pcie_dw *pcie;
+       struct resource *dbi_res;
+       struct pcie_port *pp;
+       struct dw_pcie *pci;
+       struct phy **phys;
+       char *name;
+       int ret;
+       u32 i;
+
+       pcie = devm_kzalloc(dev, sizeof(*pcie), GFP_KERNEL);
+       if (!pcie)
+               return -ENOMEM;
+
+       pci = &pcie->pci;
+       pci->dev = &pdev->dev;
+       pci->ops = &tegra_dw_pcie_ops;
+       pp = &pci->pp;
+       pcie->dev = &pdev->dev;
+
+       ret = tegra_pcie_dw_parse_dt(pcie);
+       if (ret < 0) {
+               dev_err(dev, "Failed to parse device tree: %d\n", ret);
+               return ret;
+       }
+
+       ret = tegra_pcie_get_slot_regulators(pcie);
+       if (ret < 0) {
+               dev_err(dev, "Failed to get slot regulators: %d\n", ret);
+               return ret;
+       }
+
+       pcie->pex_ctl_supply = devm_regulator_get(dev, "vddio-pex-ctl");
+       if (IS_ERR(pcie->pex_ctl_supply)) {
+               ret = PTR_ERR(pcie->pex_ctl_supply);
+               if (ret != -EPROBE_DEFER)
+                       dev_err(dev, "Failed to get regulator: %ld\n",
+                               PTR_ERR(pcie->pex_ctl_supply));
+               return ret;
+       }
+
+       pcie->core_clk = devm_clk_get(dev, "core");
+       if (IS_ERR(pcie->core_clk)) {
+               dev_err(dev, "Failed to get core clock: %ld\n",
+                       PTR_ERR(pcie->core_clk));
+               return PTR_ERR(pcie->core_clk);
+       }
+
+       pcie->appl_res = platform_get_resource_byname(pdev, IORESOURCE_MEM,
+                                                     "appl");
+       if (!pcie->appl_res) {
+               dev_err(dev, "Failed to find \"appl\" region\n");
+               return -ENODEV;
+       }
+
+       pcie->appl_base = devm_ioremap_resource(dev, pcie->appl_res);
+       if (IS_ERR(pcie->appl_base))
+               return PTR_ERR(pcie->appl_base);
+
+       pcie->core_apb_rst = devm_reset_control_get(dev, "apb");
+       if (IS_ERR(pcie->core_apb_rst)) {
+               dev_err(dev, "Failed to get APB reset: %ld\n",
+                       PTR_ERR(pcie->core_apb_rst));
+               return PTR_ERR(pcie->core_apb_rst);
+       }
+
+       phys = devm_kcalloc(dev, pcie->phy_count, sizeof(*phys), GFP_KERNEL);
+       if (!phys)
+               return -ENOMEM;
+
+       for (i = 0; i < pcie->phy_count; i++) {
+               name = kasprintf(GFP_KERNEL, "p2u-%u", i);
+               if (!name) {
+                       dev_err(dev, "Failed to create P2U string\n");
+                       return -ENOMEM;
+               }
+               phys[i] = devm_phy_get(dev, name);
+               kfree(name);
+               if (IS_ERR(phys[i])) {
+                       ret = PTR_ERR(phys[i]);
+                       if (ret != -EPROBE_DEFER)
+                               dev_err(dev, "Failed to get PHY: %d\n", ret);
+                       return ret;
+               }
+       }
+
+       pcie->phys = phys;
+
+       dbi_res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "dbi");
+       if (!dbi_res) {
+               dev_err(dev, "Failed to find \"dbi\" region\n");
+               return -ENODEV;
+       }
+       pcie->dbi_res = dbi_res;
+
+       pci->dbi_base = devm_ioremap_resource(dev, dbi_res);
+       if (IS_ERR(pci->dbi_base))
+               return PTR_ERR(pci->dbi_base);
+
+       /* Tegra HW locates DBI2 at a fixed offset from DBI */
+       pci->dbi_base2 = pci->dbi_base + 0x1000;
+
+       atu_dma_res = platform_get_resource_byname(pdev, IORESOURCE_MEM,
+                                                  "atu_dma");
+       if (!atu_dma_res) {
+               dev_err(dev, "Failed to find \"atu_dma\" region\n");
+               return -ENODEV;
+       }
+       pcie->atu_dma_res = atu_dma_res;
+
+       pci->atu_base = devm_ioremap_resource(dev, atu_dma_res);
+       if (IS_ERR(pci->atu_base))
+               return PTR_ERR(pci->atu_base);
+
+       pcie->core_rst = devm_reset_control_get(dev, "core");
+       if (IS_ERR(pcie->core_rst)) {
+               dev_err(dev, "Failed to get core reset: %ld\n",
+                       PTR_ERR(pcie->core_rst));
+               return PTR_ERR(pcie->core_rst);
+       }
+
+       pp->irq = platform_get_irq_byname(pdev, "intr");
+       if (!pp->irq) {
+               dev_err(dev, "Failed to get \"intr\" interrupt\n");
+               return -ENODEV;
+       }
+
+       ret = devm_request_irq(dev, pp->irq, tegra_pcie_irq_handler,
+                              IRQF_SHARED, "tegra-pcie-intr", pcie);
+       if (ret) {
+               dev_err(dev, "Failed to request IRQ %d: %d\n", pp->irq, ret);
+               return ret;
+       }
+
+       pcie->bpmp = tegra_bpmp_get(dev);
+       if (IS_ERR(pcie->bpmp))
+               return PTR_ERR(pcie->bpmp);
+
+       platform_set_drvdata(pdev, pcie);
+
+       ret = tegra_pcie_config_rp(pcie);
+       if (ret && ret != -ENOMEDIUM)
+               goto fail;
+       else
+               return 0;
+
+fail:
+       tegra_bpmp_put(pcie->bpmp);
+       return ret;
+}
+
+static int tegra_pcie_dw_remove(struct platform_device *pdev)
+{
+       struct tegra_pcie_dw *pcie = platform_get_drvdata(pdev);
+
+       if (!pcie->link_state)
+               return 0;
+
+       debugfs_remove_recursive(pcie->debugfs);
+       tegra_pcie_deinit_controller(pcie);
+       pm_runtime_put_sync(pcie->dev);
+       pm_runtime_disable(pcie->dev);
+       tegra_bpmp_put(pcie->bpmp);
+
+       return 0;
+}
+
+static int tegra_pcie_dw_suspend_late(struct device *dev)
+{
+       struct tegra_pcie_dw *pcie = dev_get_drvdata(dev);
+       u32 val;
+
+       if (!pcie->link_state)
+               return 0;
+
+       /* Enable HW_HOT_RST mode */
+       val = appl_readl(pcie, APPL_CTRL);
+       val &= ~(APPL_CTRL_HW_HOT_RST_MODE_MASK <<
+                APPL_CTRL_HW_HOT_RST_MODE_SHIFT);
+       val |= APPL_CTRL_HW_HOT_RST_EN;
+       appl_writel(pcie, val, APPL_CTRL);
+
+       return 0;
+}
+
+static int tegra_pcie_dw_suspend_noirq(struct device *dev)
+{
+       struct tegra_pcie_dw *pcie = dev_get_drvdata(dev);
+
+       if (!pcie->link_state)
+               return 0;
+
+       /* Save MSI interrupt vector */
+       pcie->msi_ctrl_int = dw_pcie_readl_dbi(&pcie->pci,
+                                              PORT_LOGIC_MSI_CTRL_INT_0_EN);
+       tegra_pcie_downstream_dev_to_D0(pcie);
+       tegra_pcie_dw_pme_turnoff(pcie);
+
+       return __deinit_controller(pcie);
+}
+
+static int tegra_pcie_dw_resume_noirq(struct device *dev)
+{
+       struct tegra_pcie_dw *pcie = dev_get_drvdata(dev);
+       int ret;
+
+       if (!pcie->link_state)
+               return 0;
+
+       ret = tegra_pcie_config_controller(pcie, true);
+       if (ret < 0)
+               return ret;
+
+       ret = tegra_pcie_dw_host_init(&pcie->pci.pp);
+       if (ret < 0) {
+               dev_err(dev, "Failed to init host: %d\n", ret);
+               goto fail_host_init;
+       }
+
+       /* Restore MSI interrupt vector */
+       dw_pcie_writel_dbi(&pcie->pci, PORT_LOGIC_MSI_CTRL_INT_0_EN,
+                          pcie->msi_ctrl_int);
+
+       return 0;
+
+fail_host_init:
+       return __deinit_controller(pcie);
+}
+
+static int tegra_pcie_dw_resume_early(struct device *dev)
+{
+       struct tegra_pcie_dw *pcie = dev_get_drvdata(dev);
+       u32 val;
+
+       if (!pcie->link_state)
+               return 0;
+
+       /* Disable HW_HOT_RST mode */
+       val = appl_readl(pcie, APPL_CTRL);
+       val &= ~(APPL_CTRL_HW_HOT_RST_MODE_MASK <<
+                APPL_CTRL_HW_HOT_RST_MODE_SHIFT);
+       val |= APPL_CTRL_HW_HOT_RST_MODE_IMDT_RST <<
+              APPL_CTRL_HW_HOT_RST_MODE_SHIFT;
+       val &= ~APPL_CTRL_HW_HOT_RST_EN;
+       appl_writel(pcie, val, APPL_CTRL);
+
+       return 0;
+}
+
+static void tegra_pcie_dw_shutdown(struct platform_device *pdev)
+{
+       struct tegra_pcie_dw *pcie = platform_get_drvdata(pdev);
+
+       if (!pcie->link_state)
+               return;
+
+       debugfs_remove_recursive(pcie->debugfs);
+       tegra_pcie_downstream_dev_to_D0(pcie);
+
+       disable_irq(pcie->pci.pp.irq);
+       if (IS_ENABLED(CONFIG_PCI_MSI))
+               disable_irq(pcie->pci.pp.msi_irq);
+
+       tegra_pcie_dw_pme_turnoff(pcie);
+       __deinit_controller(pcie);
+}
+
+static const struct of_device_id tegra_pcie_dw_of_match[] = {
+       {
+               .compatible = "nvidia,tegra194-pcie",
+       },
+       {},
+};
+
+static const struct dev_pm_ops tegra_pcie_dw_pm_ops = {
+       .suspend_late = tegra_pcie_dw_suspend_late,
+       .suspend_noirq = tegra_pcie_dw_suspend_noirq,
+       .resume_noirq = tegra_pcie_dw_resume_noirq,
+       .resume_early = tegra_pcie_dw_resume_early,
+};
+
+static struct platform_driver tegra_pcie_dw_driver = {
+       .probe = tegra_pcie_dw_probe,
+       .remove = tegra_pcie_dw_remove,
+       .shutdown = tegra_pcie_dw_shutdown,
+       .driver = {
+               .name   = "tegra194-pcie",
+               .pm = &tegra_pcie_dw_pm_ops,
+               .of_match_table = tegra_pcie_dw_of_match,
+       },
+};
+module_platform_driver(tegra_pcie_dw_driver);
+
+MODULE_DEVICE_TABLE(of, tegra_pcie_dw_of_match);
+
+MODULE_AUTHOR("Vidya Sagar <vidyas@nvidia.com>");
+MODULE_DESCRIPTION("NVIDIA PCIe host controller driver");
+MODULE_LICENSE("GPL v2");
index c742881..c8cb9c5 100644 (file)
@@ -43,9 +43,8 @@ static struct pci_config_window *gen_pci_init(struct device *dev,
                goto err_out;
        }
 
-       err = devm_add_action(dev, gen_pci_unmap_cfg, cfg);
+       err = devm_add_action_or_reset(dev, gen_pci_unmap_cfg, cfg);
        if (err) {
-               gen_pci_unmap_cfg(cfg);
                goto err_out;
        }
        return cfg;
index 0ca73c8..f1f3002 100644 (file)
@@ -2809,6 +2809,48 @@ static void put_hvpcibus(struct hv_pcibus_device *hbus)
                complete(&hbus->remove_event);
 }
 
+#define HVPCI_DOM_MAP_SIZE (64 * 1024)
+static DECLARE_BITMAP(hvpci_dom_map, HVPCI_DOM_MAP_SIZE);
+
+/*
+ * PCI domain number 0 is used by emulated devices on Gen1 VMs, so define 0
+ * as invalid for passthrough PCI devices of this driver.
+ */
+#define HVPCI_DOM_INVALID 0
+
+/**
+ * hv_get_dom_num() - Get a valid PCI domain number
+ * Check if the PCI domain number is in use, and return another number if
+ * it is in use.
+ *
+ * @dom: Requested domain number
+ *
+ * return: domain number on success, HVPCI_DOM_INVALID on failure
+ */
+static u16 hv_get_dom_num(u16 dom)
+{
+       unsigned int i;
+
+       if (test_and_set_bit(dom, hvpci_dom_map) == 0)
+               return dom;
+
+       for_each_clear_bit(i, hvpci_dom_map, HVPCI_DOM_MAP_SIZE) {
+               if (test_and_set_bit(i, hvpci_dom_map) == 0)
+                       return i;
+       }
+
+       return HVPCI_DOM_INVALID;
+}
+
+/**
+ * hv_put_dom_num() - Mark the PCI domain number as free
+ * @dom: Domain number to be freed
+ */
+static void hv_put_dom_num(u16 dom)
+{
+       clear_bit(dom, hvpci_dom_map);
+}
+
 /**
  * hv_pci_probe() - New VMBus channel probe, for a root PCI bus
  * @hdev:      VMBus's tracking struct for this root PCI bus
@@ -2820,6 +2862,7 @@ static int hv_pci_probe(struct hv_device *hdev,
                        const struct hv_vmbus_device_id *dev_id)
 {
        struct hv_pcibus_device *hbus;
+       u16 dom_req, dom;
        char *name;
        int ret;
 
@@ -2835,19 +2878,34 @@ static int hv_pci_probe(struct hv_device *hdev,
        hbus->state = hv_pcibus_init;
 
        /*
-        * The PCI bus "domain" is what is called "segment" in ACPI and
-        * other specs.  Pull it from the instance ID, to get something
-        * unique.  Bytes 8 and 9 are what is used in Windows guests, so
-        * do the same thing for consistency.  Note that, since this code
-        * only runs in a Hyper-V VM, Hyper-V can (and does) guarantee
-        * that (1) the only domain in use for something that looks like
-        * a physical PCI bus (which is actually emulated by the
-        * hypervisor) is domain 0 and (2) there will be no overlap
-        * between domains derived from these instance IDs in the same
-        * VM.
+        * The PCI bus "domain" is what is called "segment" in ACPI and other
+        * specs. Pull it from the instance ID, to get something usually
+        * unique. In rare cases of collision, we will find out another number
+        * not in use.
+        *
+        * Note that, since this code only runs in a Hyper-V VM, Hyper-V
+        * together with this guest driver can guarantee that (1) The only
+        * domain used by Gen1 VMs for something that looks like a physical
+        * PCI bus (which is actually emulated by the hypervisor) is domain 0.
+        * (2) There will be no overlap between domains (after fixing possible
+        * collisions) in the same VM.
         */
-       hbus->sysdata.domain = hdev->dev_instance.b[9] |
-                              hdev->dev_instance.b[8] << 8;
+       dom_req = hdev->dev_instance.b[5] << 8 | hdev->dev_instance.b[4];
+       dom = hv_get_dom_num(dom_req);
+
+       if (dom == HVPCI_DOM_INVALID) {
+               dev_err(&hdev->device,
+                       "Unable to use dom# 0x%hx or other numbers", dom_req);
+               ret = -EINVAL;
+               goto free_bus;
+       }
+
+       if (dom != dom_req)
+               dev_info(&hdev->device,
+                        "PCI dom# 0x%hx has collision, using 0x%hx",
+                        dom_req, dom);
+
+       hbus->sysdata.domain = dom;
 
        hbus->hdev = hdev;
        refcount_set(&hbus->remove_lock, 1);
@@ -2862,7 +2920,7 @@ static int hv_pci_probe(struct hv_device *hdev,
                                           hbus->sysdata.domain);
        if (!hbus->wq) {
                ret = -ENOMEM;
-               goto free_bus;
+               goto free_dom;
        }
 
        ret = vmbus_open(hdev->channel, pci_ring_size, pci_ring_size, NULL, 0,
@@ -2946,6 +3004,8 @@ close:
        vmbus_close(hdev->channel);
 destroy_wq:
        destroy_workqueue(hbus->wq);
+free_dom:
+       hv_put_dom_num(hbus->sysdata.domain);
 free_bus:
        free_page((unsigned long)hbus);
        return ret;
@@ -3008,8 +3068,8 @@ static int hv_pci_remove(struct hv_device *hdev)
                /* Remove the bus from PCI's point of view. */
                pci_lock_rescan_remove();
                pci_stop_root_bus(hbus->pci_bus);
-               pci_remove_root_bus(hbus->pci_bus);
                hv_pci_remove_slots(hbus);
+               pci_remove_root_bus(hbus->pci_bus);
                pci_unlock_rescan_remove();
                hbus->state = hv_pcibus_removed;
        }
@@ -3027,6 +3087,9 @@ static int hv_pci_remove(struct hv_device *hdev)
        put_hvpcibus(hbus);
        wait_for_completion(&hbus->remove_event);
        destroy_workqueue(hbus->wq);
+
+       hv_put_dom_num(hbus->sysdata.domain);
+
        free_page((unsigned long)hbus);
        return 0;
 }
@@ -3058,6 +3121,9 @@ static void __exit exit_hv_pci_drv(void)
 
 static int __init init_hv_pci_drv(void)
 {
+       /* Set the invalid domain number's bit, so it will not be used */
+       set_bit(HVPCI_DOM_INVALID, hvpci_dom_map);
+
        /* Initialize PCI block r/w interface */
        hvpci_block_ops.read_block = hv_read_config_block;
        hvpci_block_ops.write_block = hv_write_config_block;
index 9a917b2..673a172 100644 (file)
@@ -2237,14 +2237,15 @@ static int tegra_pcie_parse_dt(struct tegra_pcie *pcie)
                err = of_pci_get_devfn(port);
                if (err < 0) {
                        dev_err(dev, "failed to parse address: %d\n", err);
-                       return err;
+                       goto err_node_put;
                }
 
                index = PCI_SLOT(err);
 
                if (index < 1 || index > soc->num_ports) {
                        dev_err(dev, "invalid port number: %d\n", index);
-                       return -EINVAL;
+                       err = -EINVAL;
+                       goto err_node_put;
                }
 
                index--;
@@ -2253,12 +2254,13 @@ static int tegra_pcie_parse_dt(struct tegra_pcie *pcie)
                if (err < 0) {
                        dev_err(dev, "failed to parse # of lanes: %d\n",
                                err);
-                       return err;
+                       goto err_node_put;
                }
 
                if (value > 16) {
                        dev_err(dev, "invalid # of lanes: %u\n", value);
-                       return -EINVAL;
+                       err = -EINVAL;
+                       goto err_node_put;
                }
 
                lanes |= value << (index << 3);
@@ -2272,13 +2274,15 @@ static int tegra_pcie_parse_dt(struct tegra_pcie *pcie)
                lane += value;
 
                rp = devm_kzalloc(dev, sizeof(*rp), GFP_KERNEL);
-               if (!rp)
-                       return -ENOMEM;
+               if (!rp) {
+                       err = -ENOMEM;
+                       goto err_node_put;
+               }
 
                err = of_address_to_resource(port, 0, &rp->regs);
                if (err < 0) {
                        dev_err(dev, "failed to parse address: %d\n", err);
-                       return err;
+                       goto err_node_put;
                }
 
                INIT_LIST_HEAD(&rp->list);
@@ -2330,6 +2334,10 @@ static int tegra_pcie_parse_dt(struct tegra_pcie *pcie)
                return err;
 
        return 0;
+
+err_node_put:
+       of_node_put(port);
+       return err;
 }
 
 /*
index 5a3550b..9ee6200 100644 (file)
@@ -93,12 +93,9 @@ static int iproc_pcie_pltfm_probe(struct platform_device *pdev)
        pcie->need_ib_cfg = of_property_read_bool(np, "dma-ranges");
 
        /* PHY use is optional */
-       pcie->phy = devm_phy_get(dev, "pcie-phy");
-       if (IS_ERR(pcie->phy)) {
-               if (PTR_ERR(pcie->phy) == -EPROBE_DEFER)
-                       return -EPROBE_DEFER;
-               pcie->phy = NULL;
-       }
+       pcie->phy = devm_phy_optional_get(dev, "pcie-phy");
+       if (IS_ERR(pcie->phy))
+               return PTR_ERR(pcie->phy);
 
        ret = devm_of_pci_get_host_bridge_resources(dev, 0, 0xff, &resources,
                                                    &iobase);
index 80601e1..626a7c3 100644 (file)
@@ -73,6 +73,7 @@
 #define PCIE_MSI_VECTOR                0x0c0
 
 #define PCIE_CONF_VEND_ID      0x100
+#define PCIE_CONF_DEVICE_ID    0x102
 #define PCIE_CONF_CLASS_ID     0x106
 
 #define PCIE_INT_MASK          0x420
@@ -141,12 +142,16 @@ struct mtk_pcie_port;
 /**
  * struct mtk_pcie_soc - differentiate between host generations
  * @need_fix_class_id: whether this host's class ID needed to be fixed or not
+ * @need_fix_device_id: whether this host's device ID needed to be fixed or not
+ * @device_id: device ID which this host need to be fixed
  * @ops: pointer to configuration access functions
  * @startup: pointer to controller setting functions
  * @setup_irq: pointer to initialize IRQ functions
  */
 struct mtk_pcie_soc {
        bool need_fix_class_id;
+       bool need_fix_device_id;
+       unsigned int device_id;
        struct pci_ops *ops;
        int (*startup)(struct mtk_pcie_port *port);
        int (*setup_irq)(struct mtk_pcie_port *port, struct device_node *node);
@@ -630,8 +635,6 @@ static void mtk_pcie_intr_handler(struct irq_desc *desc)
        }
 
        chained_irq_exit(irqchip, desc);
-
-       return;
 }
 
 static int mtk_pcie_setup_irq(struct mtk_pcie_port *port,
@@ -696,6 +699,9 @@ static int mtk_pcie_startup_port_v2(struct mtk_pcie_port *port)
                writew(val, port->base + PCIE_CONF_CLASS_ID);
        }
 
+       if (soc->need_fix_device_id)
+               writew(soc->device_id, port->base + PCIE_CONF_DEVICE_ID);
+
        /* 100ms timeout value should be enough for Gen1/2 training */
        err = readl_poll_timeout(port->base + PCIE_LINK_STATUS_V2, val,
                                 !!(val & PCIE_PORT_LINKUP_V2), 20,
@@ -1216,11 +1222,21 @@ static const struct mtk_pcie_soc mtk_pcie_soc_mt7622 = {
        .setup_irq = mtk_pcie_setup_irq,
 };
 
+static const struct mtk_pcie_soc mtk_pcie_soc_mt7629 = {
+       .need_fix_class_id = true,
+       .need_fix_device_id = true,
+       .device_id = PCI_DEVICE_ID_MEDIATEK_7629,
+       .ops = &mtk_pcie_ops_v2,
+       .startup = mtk_pcie_startup_port_v2,
+       .setup_irq = mtk_pcie_setup_irq,
+};
+
 static const struct of_device_id mtk_pcie_ids[] = {
        { .compatible = "mediatek,mt2701-pcie", .data = &mtk_pcie_soc_v1 },
        { .compatible = "mediatek,mt7623-pcie", .data = &mtk_pcie_soc_v1 },
        { .compatible = "mediatek,mt2712-pcie", .data = &mtk_pcie_soc_mt2712 },
        { .compatible = "mediatek,mt7622-pcie", .data = &mtk_pcie_soc_mt7622 },
+       { .compatible = "mediatek,mt7629-pcie", .data = &mtk_pcie_soc_mt7629 },
        {},
 };
 
index 672e633..a45a644 100644 (file)
@@ -88,6 +88,7 @@
 #define  AMAP_CTRL_TYPE_MASK           3
 
 #define PAB_EXT_PEX_AMAP_SIZEN(win)    PAB_EXT_REG_ADDR(0xbef0, win)
+#define PAB_EXT_PEX_AMAP_AXI_WIN(win)  PAB_EXT_REG_ADDR(0xb4a0, win)
 #define PAB_PEX_AMAP_AXI_WIN(win)      PAB_REG_ADDR(0x4ba4, win)
 #define PAB_PEX_AMAP_PEX_WIN_L(win)    PAB_REG_ADDR(0x4ba8, win)
 #define PAB_PEX_AMAP_PEX_WIN_H(win)    PAB_REG_ADDR(0x4bac, win)
@@ -462,7 +463,7 @@ static int mobiveil_pcie_parse_dt(struct mobiveil_pcie *pcie)
 }
 
 static void program_ib_windows(struct mobiveil_pcie *pcie, int win_num,
-                              u64 pci_addr, u32 type, u64 size)
+                              u64 cpu_addr, u64 pci_addr, u32 type, u64 size)
 {
        u32 value;
        u64 size64 = ~(size - 1);
@@ -482,7 +483,10 @@ static void program_ib_windows(struct mobiveil_pcie *pcie, int win_num,
        csr_writel(pcie, upper_32_bits(size64),
                   PAB_EXT_PEX_AMAP_SIZEN(win_num));
 
-       csr_writel(pcie, pci_addr, PAB_PEX_AMAP_AXI_WIN(win_num));
+       csr_writel(pcie, lower_32_bits(cpu_addr),
+                  PAB_PEX_AMAP_AXI_WIN(win_num));
+       csr_writel(pcie, upper_32_bits(cpu_addr),
+                  PAB_EXT_PEX_AMAP_AXI_WIN(win_num));
 
        csr_writel(pcie, lower_32_bits(pci_addr),
                   PAB_PEX_AMAP_PEX_WIN_L(win_num));
@@ -624,7 +628,7 @@ static int mobiveil_host_init(struct mobiveil_pcie *pcie)
                           CFG_WINDOW_TYPE, resource_size(pcie->ob_io_res));
 
        /* memory inbound translation window */
-       program_ib_windows(pcie, WIN_NUM_0, 0, MEM_WINDOW_TYPE, IB_WIN_SIZE);
+       program_ib_windows(pcie, WIN_NUM_0, 0, 0, MEM_WINDOW_TYPE, IB_WIN_SIZE);
 
        /* Get the I/O and memory ranges from DT */
        resource_list_for_each_entry(win, &pcie->resources) {
index 8d20f17..ef8e677 100644 (file)
@@ -608,29 +608,29 @@ static int rockchip_pcie_parse_host_dt(struct rockchip_pcie *rockchip)
 
        rockchip->vpcie12v = devm_regulator_get_optional(dev, "vpcie12v");
        if (IS_ERR(rockchip->vpcie12v)) {
-               if (PTR_ERR(rockchip->vpcie12v) == -EPROBE_DEFER)
-                       return -EPROBE_DEFER;
+               if (PTR_ERR(rockchip->vpcie12v) != -ENODEV)
+                       return PTR_ERR(rockchip->vpcie12v);
                dev_info(dev, "no vpcie12v regulator found\n");
        }
 
        rockchip->vpcie3v3 = devm_regulator_get_optional(dev, "vpcie3v3");
        if (IS_ERR(rockchip->vpcie3v3)) {
-               if (PTR_ERR(rockchip->vpcie3v3) == -EPROBE_DEFER)
-                       return -EPROBE_DEFER;
+               if (PTR_ERR(rockchip->vpcie3v3) != -ENODEV)
+                       return PTR_ERR(rockchip->vpcie3v3);
                dev_info(dev, "no vpcie3v3 regulator found\n");
        }
 
        rockchip->vpcie1v8 = devm_regulator_get_optional(dev, "vpcie1v8");
        if (IS_ERR(rockchip->vpcie1v8)) {
-               if (PTR_ERR(rockchip->vpcie1v8) == -EPROBE_DEFER)
-                       return -EPROBE_DEFER;
+               if (PTR_ERR(rockchip->vpcie1v8) != -ENODEV)
+                       return PTR_ERR(rockchip->vpcie1v8);
                dev_info(dev, "no vpcie1v8 regulator found\n");
        }
 
        rockchip->vpcie0v9 = devm_regulator_get_optional(dev, "vpcie0v9");
        if (IS_ERR(rockchip->vpcie0v9)) {
-               if (PTR_ERR(rockchip->vpcie0v9) == -EPROBE_DEFER)
-                       return -EPROBE_DEFER;
+               if (PTR_ERR(rockchip->vpcie0v9) != -ENODEV)
+                       return PTR_ERR(rockchip->vpcie0v9);
                dev_info(dev, "no vpcie0v9 regulator found\n");
        }
 
index 4575e0c..a35d3f3 100644 (file)
@@ -31,6 +31,9 @@
 #define PCI_REG_VMLOCK         0x70
 #define MB2_SHADOW_EN(vmlock)  (vmlock & 0x2)
 
+#define MB2_SHADOW_OFFSET      0x2000
+#define MB2_SHADOW_SIZE                16
+
 enum vmd_features {
        /*
         * Device may contain registers which hint the physical location of the
@@ -94,6 +97,7 @@ struct vmd_dev {
        struct resource         resources[3];
        struct irq_domain       *irq_domain;
        struct pci_bus          *bus;
+       u8                      busn_start;
 
        struct dma_map_ops      dma_ops;
        struct dma_domain       dma_domain;
@@ -440,7 +444,8 @@ static char __iomem *vmd_cfg_addr(struct vmd_dev *vmd, struct pci_bus *bus,
                                  unsigned int devfn, int reg, int len)
 {
        char __iomem *addr = vmd->cfgbar +
-                            (bus->number << 20) + (devfn << 12) + reg;
+                            ((bus->number - vmd->busn_start) << 20) +
+                            (devfn << 12) + reg;
 
        if ((addr - vmd->cfgbar) + len >=
            resource_size(&vmd->dev->resource[VMD_CFGBAR]))
@@ -563,7 +568,7 @@ static int vmd_enable_domain(struct vmd_dev *vmd, unsigned long features)
        unsigned long flags;
        LIST_HEAD(resources);
        resource_size_t offset[2] = {0};
-       resource_size_t membar2_offset = 0x2000, busn_start = 0;
+       resource_size_t membar2_offset = 0x2000;
        struct pci_bus *child;
 
        /*
@@ -576,7 +581,7 @@ static int vmd_enable_domain(struct vmd_dev *vmd, unsigned long features)
                u32 vmlock;
                int ret;
 
-               membar2_offset = 0x2018;
+               membar2_offset = MB2_SHADOW_OFFSET + MB2_SHADOW_SIZE;
                ret = pci_read_config_dword(vmd->dev, PCI_REG_VMLOCK, &vmlock);
                if (ret || vmlock == ~0)
                        return -ENODEV;
@@ -588,9 +593,9 @@ static int vmd_enable_domain(struct vmd_dev *vmd, unsigned long features)
                        if (!membar2)
                                return -ENOMEM;
                        offset[0] = vmd->dev->resource[VMD_MEMBAR1].start -
-                                               readq(membar2 + 0x2008);
+                                       readq(membar2 + MB2_SHADOW_OFFSET);
                        offset[1] = vmd->dev->resource[VMD_MEMBAR2].start -
-                                               readq(membar2 + 0x2010);
+                                       readq(membar2 + MB2_SHADOW_OFFSET + 8);
                        pci_iounmap(vmd->dev, membar2);
                }
        }
@@ -606,14 +611,14 @@ static int vmd_enable_domain(struct vmd_dev *vmd, unsigned long features)
                pci_read_config_dword(vmd->dev, PCI_REG_VMCONFIG, &vmconfig);
                if (BUS_RESTRICT_CAP(vmcap) &&
                    (BUS_RESTRICT_CFG(vmconfig) == 0x1))
-                       busn_start = 128;
+                       vmd->busn_start = 128;
        }
 
        res = &vmd->dev->resource[VMD_CFGBAR];
        vmd->resources[0] = (struct resource) {
                .name  = "VMD CFGBAR",
-               .start = busn_start,
-               .end   = busn_start + (resource_size(res) >> 20) - 1,
+               .start = vmd->busn_start,
+               .end   = vmd->busn_start + (resource_size(res) >> 20) - 1,
                .flags = IORESOURCE_BUS | IORESOURCE_PCI_FIXED,
        };
 
@@ -681,8 +686,8 @@ static int vmd_enable_domain(struct vmd_dev *vmd, unsigned long features)
        pci_add_resource_offset(&resources, &vmd->resources[1], offset[0]);
        pci_add_resource_offset(&resources, &vmd->resources[2], offset[1]);
 
-       vmd->bus = pci_create_root_bus(&vmd->dev->dev, busn_start, &vmd_ops,
-                                      sd, &resources);
+       vmd->bus = pci_create_root_bus(&vmd->dev->dev, vmd->busn_start,
+                                      &vmd_ops, sd, &resources);
        if (!vmd->bus) {
                pci_free_resource_list(&resources);
                irq_domain_remove(vmd->irq_domain);
index 603eadf..d0559d2 100644 (file)
@@ -563,7 +563,6 @@ cleanup_slots(void)
        }
 cleanup_null:
        up_write(&list_rwsem);
-       return;
 }
 
 int
index 16bbb18..b8aacb4 100644 (file)
@@ -173,7 +173,6 @@ static void pci_print_IRQ_route(void)
                dbg("%d %d %d %d\n", tbus, tdevice >> 3, tdevice & 0x7, tslot);
 
        }
-       return;
 }
 
 
index b7f4e1f..68de958 100644 (file)
@@ -1872,8 +1872,6 @@ static void interrupt_event_handler(struct controller *ctrl)
                        }
                }               /* End of FOR loop */
        }
-
-       return;
 }
 
 
@@ -1943,8 +1941,6 @@ void cpqhp_pushbutton_thread(struct timer_list *t)
 
                p_slot->state = STATIC_STATE;
        }
-
-       return;
 }
 
 
index 918ff8d..70e879b 100644 (file)
 
 #ifndef CONFIG_HOTPLUG_PCI_COMPAQ_NVRAM
 
-static inline void compaq_nvram_init(void __iomem *rom_start)
-{
-       return;
-}
+static inline void compaq_nvram_init(void __iomem *rom_start) { }
 
 static inline int compaq_nvram_load(void __iomem *rom_start, struct controller *ctrl)
 {
index 5e8caf7..5c93aa1 100644 (file)
@@ -1941,6 +1941,7 @@ static int __init update_bridge_ranges(struct bus_node **bus)
                                                break;
                                        case PCI_HEADER_TYPE_BRIDGE:
                                                function = 0x8;
+                                               /* fall through */
                                        case PCI_HEADER_TYPE_MULTIBRIDGE:
                                                /* We assume here that only 1 bus behind the bridge
                                                   TO DO: add functionality for several:
index 8c51a04..654c972 100644 (file)
@@ -110,9 +110,9 @@ struct controller {
  *
  * @OFF_STATE: slot is powered off, no subordinate devices are enumerated
  * @BLINKINGON_STATE: slot will be powered on after the 5 second delay,
- *     green led is blinking
+ *     Power Indicator is blinking
  * @BLINKINGOFF_STATE: slot will be powered off after the 5 second delay,
- *     green led is blinking
+ *     Power Indicator is blinking
  * @POWERON_STATE: slot is currently powering on
  * @POWEROFF_STATE: slot is currently powering off
  * @ON_STATE: slot is powered on, subordinate devices have been enumerated
@@ -167,12 +167,11 @@ int pciehp_power_on_slot(struct controller *ctrl);
 void pciehp_power_off_slot(struct controller *ctrl);
 void pciehp_get_power_status(struct controller *ctrl, u8 *status);
 
-void pciehp_set_attention_status(struct controller *ctrl, u8 status);
+#define INDICATOR_NOOP -1      /* Leave indicator unchanged */
+void pciehp_set_indicators(struct controller *ctrl, int pwr, int attn);
+
 void pciehp_get_latch_status(struct controller *ctrl, u8 *status);
 int pciehp_query_power_fault(struct controller *ctrl);
-void pciehp_green_led_on(struct controller *ctrl);
-void pciehp_green_led_off(struct controller *ctrl);
-void pciehp_green_led_blink(struct controller *ctrl);
 bool pciehp_card_present(struct controller *ctrl);
 bool pciehp_card_present_or_link_active(struct controller *ctrl);
 int pciehp_check_link_status(struct controller *ctrl);
index 6ad0d86..b3122c1 100644 (file)
@@ -95,15 +95,20 @@ static void cleanup_slot(struct controller *ctrl)
 }
 
 /*
- * set_attention_status - Turns the Amber LED for a slot on, off or blink
+ * set_attention_status - Turns the Attention Indicator on, off or blinking
  */
 static int set_attention_status(struct hotplug_slot *hotplug_slot, u8 status)
 {
        struct controller *ctrl = to_ctrl(hotplug_slot);
        struct pci_dev *pdev = ctrl->pcie->port;
 
+       if (status)
+               status <<= PCI_EXP_SLTCTL_ATTN_IND_SHIFT;
+       else
+               status = PCI_EXP_SLTCTL_ATTN_IND_OFF;
+
        pci_config_pm_runtime_get(pdev);
-       pciehp_set_attention_status(ctrl, status);
+       pciehp_set_indicators(ctrl, INDICATOR_NOOP, status);
        pci_config_pm_runtime_put(pdev);
        return 0;
 }
index 631ced0..21af7b1 100644 (file)
 
 static void set_slot_off(struct controller *ctrl)
 {
-       /* turn off slot, turn on Amber LED, turn off Green LED if supported*/
+       /*
+        * Turn off slot, turn on attention indicator, turn off power
+        * indicator
+        */
        if (POWER_CTRL(ctrl)) {
                pciehp_power_off_slot(ctrl);
 
@@ -42,8 +45,8 @@ static void set_slot_off(struct controller *ctrl)
                msleep(1000);
        }
 
-       pciehp_green_led_off(ctrl);
-       pciehp_set_attention_status(ctrl, 1);
+       pciehp_set_indicators(ctrl, PCI_EXP_SLTCTL_PWR_IND_OFF,
+                             PCI_EXP_SLTCTL_ATTN_IND_ON);
 }
 
 /**
@@ -65,7 +68,8 @@ static int board_added(struct controller *ctrl)
                        return retval;
        }
 
-       pciehp_green_led_blink(ctrl);
+       pciehp_set_indicators(ctrl, PCI_EXP_SLTCTL_PWR_IND_BLINK,
+                             INDICATOR_NOOP);
 
        /* Check link training status */
        retval = pciehp_check_link_status(ctrl);
@@ -90,8 +94,8 @@ static int board_added(struct controller *ctrl)
                }
        }
 
-       pciehp_green_led_on(ctrl);
-       pciehp_set_attention_status(ctrl, 0);
+       pciehp_set_indicators(ctrl, PCI_EXP_SLTCTL_PWR_IND_ON,
+                             PCI_EXP_SLTCTL_ATTN_IND_OFF);
        return 0;
 
 err_exit:
@@ -100,7 +104,7 @@ err_exit:
 }
 
 /**
- * remove_board - Turns off slot and LEDs
+ * remove_board - Turn off slot and Power Indicator
  * @ctrl: PCIe hotplug controller where board is being removed
  * @safe_removal: whether the board is safely removed (versus surprise removed)
  */
@@ -123,8 +127,8 @@ static void remove_board(struct controller *ctrl, bool safe_removal)
                           &ctrl->pending_events);
        }
 
-       /* turn off Green LED */
-       pciehp_green_led_off(ctrl);
+       pciehp_set_indicators(ctrl, PCI_EXP_SLTCTL_PWR_IND_OFF,
+                             INDICATOR_NOOP);
 }
 
 static int pciehp_enable_slot(struct controller *ctrl);
@@ -171,9 +175,9 @@ void pciehp_handle_button_press(struct controller *ctrl)
                        ctrl_info(ctrl, "Slot(%s) Powering on due to button press\n",
                                  slot_name(ctrl));
                }
-               /* blink green LED and turn off amber */
-               pciehp_green_led_blink(ctrl);
-               pciehp_set_attention_status(ctrl, 0);
+               /* blink power indicator and turn off attention */
+               pciehp_set_indicators(ctrl, PCI_EXP_SLTCTL_PWR_IND_BLINK,
+                                     PCI_EXP_SLTCTL_ATTN_IND_OFF);
                schedule_delayed_work(&ctrl->button_work, 5 * HZ);
                break;
        case BLINKINGOFF_STATE:
@@ -187,12 +191,13 @@ void pciehp_handle_button_press(struct controller *ctrl)
                cancel_delayed_work(&ctrl->button_work);
                if (ctrl->state == BLINKINGOFF_STATE) {
                        ctrl->state = ON_STATE;
-                       pciehp_green_led_on(ctrl);
+                       pciehp_set_indicators(ctrl, PCI_EXP_SLTCTL_PWR_IND_ON,
+                                             PCI_EXP_SLTCTL_ATTN_IND_OFF);
                } else {
                        ctrl->state = OFF_STATE;
-                       pciehp_green_led_off(ctrl);
+                       pciehp_set_indicators(ctrl, PCI_EXP_SLTCTL_PWR_IND_OFF,
+                                             PCI_EXP_SLTCTL_ATTN_IND_OFF);
                }
-               pciehp_set_attention_status(ctrl, 0);
                ctrl_info(ctrl, "Slot(%s): Action canceled due to button press\n",
                          slot_name(ctrl));
                break;
@@ -310,7 +315,9 @@ static int pciehp_enable_slot(struct controller *ctrl)
        pm_runtime_get_sync(&ctrl->pcie->port->dev);
        ret = __pciehp_enable_slot(ctrl);
        if (ret && ATTN_BUTTN(ctrl))
-               pciehp_green_led_off(ctrl); /* may be blinking */
+               /* may be blinking */
+               pciehp_set_indicators(ctrl, PCI_EXP_SLTCTL_PWR_IND_OFF,
+                                     INDICATOR_NOOP);
        pm_runtime_put(&ctrl->pcie->port->dev);
 
        mutex_lock(&ctrl->state_lock);
index bd990e3..1a522c1 100644 (file)
@@ -418,65 +418,40 @@ int pciehp_set_raw_indicator_status(struct hotplug_slot *hotplug_slot,
        return 0;
 }
 
-void pciehp_set_attention_status(struct controller *ctrl, u8 value)
+/**
+ * pciehp_set_indicators() - set attention indicator, power indicator, or both
+ * @ctrl: PCIe hotplug controller
+ * @pwr: one of:
+ *     PCI_EXP_SLTCTL_PWR_IND_ON
+ *     PCI_EXP_SLTCTL_PWR_IND_BLINK
+ *     PCI_EXP_SLTCTL_PWR_IND_OFF
+ * @attn: one of:
+ *     PCI_EXP_SLTCTL_ATTN_IND_ON
+ *     PCI_EXP_SLTCTL_ATTN_IND_BLINK
+ *     PCI_EXP_SLTCTL_ATTN_IND_OFF
+ *
+ * Either @pwr or @attn can also be INDICATOR_NOOP to leave that indicator
+ * unchanged.
+ */
+void pciehp_set_indicators(struct controller *ctrl, int pwr, int attn)
 {
-       u16 slot_cmd;
+       u16 cmd = 0, mask = 0;
 
-       if (!ATTN_LED(ctrl))
-               return;
-
-       switch (value) {
-       case 0:         /* turn off */
-               slot_cmd = PCI_EXP_SLTCTL_ATTN_IND_OFF;
-               break;
-       case 1:         /* turn on */
-               slot_cmd = PCI_EXP_SLTCTL_ATTN_IND_ON;
-               break;
-       case 2:         /* turn blink */
-               slot_cmd = PCI_EXP_SLTCTL_ATTN_IND_BLINK;
-               break;
-       default:
-               return;
+       if (PWR_LED(ctrl) && pwr != INDICATOR_NOOP) {
+               cmd |= (pwr & PCI_EXP_SLTCTL_PIC);
+               mask |= PCI_EXP_SLTCTL_PIC;
        }
-       pcie_write_cmd_nowait(ctrl, slot_cmd, PCI_EXP_SLTCTL_AIC);
-       ctrl_dbg(ctrl, "%s: SLOTCTRL %x write cmd %x\n", __func__,
-                pci_pcie_cap(ctrl->pcie->port) + PCI_EXP_SLTCTL, slot_cmd);
-}
 
-void pciehp_green_led_on(struct controller *ctrl)
-{
-       if (!PWR_LED(ctrl))
-               return;
-
-       pcie_write_cmd_nowait(ctrl, PCI_EXP_SLTCTL_PWR_IND_ON,
-                             PCI_EXP_SLTCTL_PIC);
-       ctrl_dbg(ctrl, "%s: SLOTCTRL %x write cmd %x\n", __func__,
-                pci_pcie_cap(ctrl->pcie->port) + PCI_EXP_SLTCTL,
-                PCI_EXP_SLTCTL_PWR_IND_ON);
-}
-
-void pciehp_green_led_off(struct controller *ctrl)
-{
-       if (!PWR_LED(ctrl))
-               return;
-
-       pcie_write_cmd_nowait(ctrl, PCI_EXP_SLTCTL_PWR_IND_OFF,
-                             PCI_EXP_SLTCTL_PIC);
-       ctrl_dbg(ctrl, "%s: SLOTCTRL %x write cmd %x\n", __func__,
-                pci_pcie_cap(ctrl->pcie->port) + PCI_EXP_SLTCTL,
-                PCI_EXP_SLTCTL_PWR_IND_OFF);
-}
-
-void pciehp_green_led_blink(struct controller *ctrl)
-{
-       if (!PWR_LED(ctrl))
-               return;
+       if (ATTN_LED(ctrl) && attn != INDICATOR_NOOP) {
+               cmd |= (attn & PCI_EXP_SLTCTL_AIC);
+               mask |= PCI_EXP_SLTCTL_AIC;
+       }
 
-       pcie_write_cmd_nowait(ctrl, PCI_EXP_SLTCTL_PWR_IND_BLINK,
-                             PCI_EXP_SLTCTL_PIC);
-       ctrl_dbg(ctrl, "%s: SLOTCTRL %x write cmd %x\n", __func__,
-                pci_pcie_cap(ctrl->pcie->port) + PCI_EXP_SLTCTL,
-                PCI_EXP_SLTCTL_PWR_IND_BLINK);
+       if (cmd) {
+               pcie_write_cmd_nowait(ctrl, cmd, mask);
+               ctrl_dbg(ctrl, "%s: SLOTCTRL %x write cmd %x\n", __func__,
+                        pci_pcie_cap(ctrl->pcie->port) + PCI_EXP_SLTCTL, cmd);
+       }
 }
 
 int pciehp_power_on_slot(struct controller *ctrl)
@@ -638,8 +613,8 @@ static irqreturn_t pciehp_ist(int irq, void *dev_id)
        if ((events & PCI_EXP_SLTSTA_PFD) && !ctrl->power_fault_detected) {
                ctrl->power_fault_detected = 1;
                ctrl_err(ctrl, "Slot(%s): Power fault\n", slot_name(ctrl));
-               pciehp_set_attention_status(ctrl, 1);
-               pciehp_green_led_off(ctrl);
+               pciehp_set_indicators(ctrl, PCI_EXP_SLTCTL_PWR_IND_OFF,
+                                     PCI_EXP_SLTCTL_ATTN_IND_ON);
        }
 
        /*
index 182f9e3..977946e 100644 (file)
@@ -473,7 +473,6 @@ int __init rpadlpar_io_init(void)
 void rpadlpar_io_exit(void)
 {
        dlpar_sysfs_exit();
-       return;
 }
 
 module_init(rpadlpar_io_init);
index c3899ee..18627bb 100644 (file)
@@ -408,7 +408,6 @@ static void __exit cleanup_slots(void)
                pci_hp_deregister(&slot->hotplug_slot);
                dealloc_slot_struct(slot);
        }
-       return;
 }
 
 static int __init rpaphp_init(void)
index 525fd3f..b3f972e 100644 (file)
@@ -240,6 +240,173 @@ void pci_iov_remove_virtfn(struct pci_dev *dev, int id)
        pci_dev_put(dev);
 }
 
+static ssize_t sriov_totalvfs_show(struct device *dev,
+                                  struct device_attribute *attr,
+                                  char *buf)
+{
+       struct pci_dev *pdev = to_pci_dev(dev);
+
+       return sprintf(buf, "%u\n", pci_sriov_get_totalvfs(pdev));
+}
+
+static ssize_t sriov_numvfs_show(struct device *dev,
+                                struct device_attribute *attr,
+                                char *buf)
+{
+       struct pci_dev *pdev = to_pci_dev(dev);
+
+       return sprintf(buf, "%u\n", pdev->sriov->num_VFs);
+}
+
+/*
+ * num_vfs > 0; number of VFs to enable
+ * num_vfs = 0; disable all VFs
+ *
+ * Note: SRIOV spec does not allow partial VF
+ *      disable, so it's all or none.
+ */
+static ssize_t sriov_numvfs_store(struct device *dev,
+                                 struct device_attribute *attr,
+                                 const char *buf, size_t count)
+{
+       struct pci_dev *pdev = to_pci_dev(dev);
+       int ret;
+       u16 num_vfs;
+
+       ret = kstrtou16(buf, 0, &num_vfs);
+       if (ret < 0)
+               return ret;
+
+       if (num_vfs > pci_sriov_get_totalvfs(pdev))
+               return -ERANGE;
+
+       device_lock(&pdev->dev);
+
+       if (num_vfs == pdev->sriov->num_VFs)
+               goto exit;
+
+       /* is PF driver loaded w/callback */
+       if (!pdev->driver || !pdev->driver->sriov_configure) {
+               pci_info(pdev, "Driver does not support SRIOV configuration via sysfs\n");
+               ret = -ENOENT;
+               goto exit;
+       }
+
+       if (num_vfs == 0) {
+               /* disable VFs */
+               ret = pdev->driver->sriov_configure(pdev, 0);
+               goto exit;
+       }
+
+       /* enable VFs */
+       if (pdev->sriov->num_VFs) {
+               pci_warn(pdev, "%d VFs already enabled. Disable before enabling %d VFs\n",
+                        pdev->sriov->num_VFs, num_vfs);
+               ret = -EBUSY;
+               goto exit;
+       }
+
+       ret = pdev->driver->sriov_configure(pdev, num_vfs);
+       if (ret < 0)
+               goto exit;
+
+       if (ret != num_vfs)
+               pci_warn(pdev, "%d VFs requested; only %d enabled\n",
+                        num_vfs, ret);
+
+exit:
+       device_unlock(&pdev->dev);
+
+       if (ret < 0)
+               return ret;
+
+       return count;
+}
+
+static ssize_t sriov_offset_show(struct device *dev,
+                                struct device_attribute *attr,
+                                char *buf)
+{
+       struct pci_dev *pdev = to_pci_dev(dev);
+
+       return sprintf(buf, "%u\n", pdev->sriov->offset);
+}
+
+static ssize_t sriov_stride_show(struct device *dev,
+                                struct device_attribute *attr,
+                                char *buf)
+{
+       struct pci_dev *pdev = to_pci_dev(dev);
+
+       return sprintf(buf, "%u\n", pdev->sriov->stride);
+}
+
+static ssize_t sriov_vf_device_show(struct device *dev,
+                                   struct device_attribute *attr,
+                                   char *buf)
+{
+       struct pci_dev *pdev = to_pci_dev(dev);
+
+       return sprintf(buf, "%x\n", pdev->sriov->vf_device);
+}
+
+static ssize_t sriov_drivers_autoprobe_show(struct device *dev,
+                                           struct device_attribute *attr,
+                                           char *buf)
+{
+       struct pci_dev *pdev = to_pci_dev(dev);
+
+       return sprintf(buf, "%u\n", pdev->sriov->drivers_autoprobe);
+}
+
+static ssize_t sriov_drivers_autoprobe_store(struct device *dev,
+                                            struct device_attribute *attr,
+                                            const char *buf, size_t count)
+{
+       struct pci_dev *pdev = to_pci_dev(dev);
+       bool drivers_autoprobe;
+
+       if (kstrtobool(buf, &drivers_autoprobe) < 0)
+               return -EINVAL;
+
+       pdev->sriov->drivers_autoprobe = drivers_autoprobe;
+
+       return count;
+}
+
+static DEVICE_ATTR_RO(sriov_totalvfs);
+static DEVICE_ATTR_RW(sriov_numvfs);
+static DEVICE_ATTR_RO(sriov_offset);
+static DEVICE_ATTR_RO(sriov_stride);
+static DEVICE_ATTR_RO(sriov_vf_device);
+static DEVICE_ATTR_RW(sriov_drivers_autoprobe);
+
+static struct attribute *sriov_dev_attrs[] = {
+       &dev_attr_sriov_totalvfs.attr,
+       &dev_attr_sriov_numvfs.attr,
+       &dev_attr_sriov_offset.attr,
+       &dev_attr_sriov_stride.attr,
+       &dev_attr_sriov_vf_device.attr,
+       &dev_attr_sriov_drivers_autoprobe.attr,
+       NULL,
+};
+
+static umode_t sriov_attrs_are_visible(struct kobject *kobj,
+                                      struct attribute *a, int n)
+{
+       struct device *dev = kobj_to_dev(kobj);
+
+       if (!dev_is_pf(dev))
+               return 0;
+
+       return a->mode;
+}
+
+const struct attribute_group sriov_dev_attr_group = {
+       .attrs = sriov_dev_attrs,
+       .is_visible = sriov_attrs_are_visible,
+};
+
 int __weak pcibios_sriov_enable(struct pci_dev *pdev, u16 num_vfs)
 {
        return 0;
@@ -557,8 +724,8 @@ static void sriov_restore_state(struct pci_dev *dev)
        ctrl |= iov->ctrl & PCI_SRIOV_CTRL_ARI;
        pci_write_config_word(dev, iov->pos + PCI_SRIOV_CTRL, ctrl);
 
-       for (i = PCI_IOV_RESOURCES; i <= PCI_IOV_RESOURCE_END; i++)
-               pci_update_resource(dev, i);
+       for (i = 0; i < PCI_SRIOV_NUM_BARS; i++)
+               pci_update_resource(dev, i + PCI_IOV_RESOURCES);
 
        pci_write_config_dword(dev, iov->pos + PCI_SRIOV_SYS_PGSIZE, iov->pgsz);
        pci_iov_set_numvfs(dev, iov->num_VFs);
index bc7b27a..36891e7 100644 (file)
@@ -353,7 +353,7 @@ EXPORT_SYMBOL_GPL(devm_of_pci_get_host_bridge_resources);
 /**
  * of_irq_parse_pci - Resolve the interrupt for a PCI device
  * @pdev:       the device whose interrupt is to be resolved
- * @out_irq:    structure of_irq filled by this function
+ * @out_irq:    structure of_phandle_args filled by this function
  *
  * This function resolves the PCI interrupt for a given PCI device. If a
  * device-node exists for a given pci_dev, it will use normal OF tree
index 2344762..0608aae 100644 (file)
 #include <linux/percpu-refcount.h>
 #include <linux/random.h>
 #include <linux/seq_buf.h>
-#include <linux/iommu.h>
+#include <linux/xarray.h>
+
+enum pci_p2pdma_map_type {
+       PCI_P2PDMA_MAP_UNKNOWN = 0,
+       PCI_P2PDMA_MAP_NOT_SUPPORTED,
+       PCI_P2PDMA_MAP_BUS_ADDR,
+       PCI_P2PDMA_MAP_THRU_HOST_BRIDGE,
+};
 
 struct pci_p2pdma {
        struct gen_pool *pool;
        bool p2pmem_published;
+       struct xarray map_types;
 };
 
+struct pci_p2pdma_pagemap {
+       struct dev_pagemap pgmap;
+       struct pci_dev *provider;
+       u64 bus_offset;
+};
+
+static struct pci_p2pdma_pagemap *to_p2p_pgmap(struct dev_pagemap *pgmap)
+{
+       return container_of(pgmap, struct pci_p2pdma_pagemap, pgmap);
+}
+
 static ssize_t size_show(struct device *dev, struct device_attribute *attr,
                         char *buf)
 {
@@ -87,6 +106,7 @@ static void pci_p2pdma_release(void *data)
 
        gen_pool_destroy(p2pdma->pool);
        sysfs_remove_group(&pdev->dev.kobj, &p2pmem_group);
+       xa_destroy(&p2pdma->map_types);
 }
 
 static int pci_p2pdma_setup(struct pci_dev *pdev)
@@ -98,6 +118,8 @@ static int pci_p2pdma_setup(struct pci_dev *pdev)
        if (!p2p)
                return -ENOMEM;
 
+       xa_init(&p2p->map_types);
+
        p2p->pool = gen_pool_create(PAGE_SHIFT, dev_to_node(&pdev->dev));
        if (!p2p->pool)
                goto out;
@@ -135,6 +157,7 @@ out:
 int pci_p2pdma_add_resource(struct pci_dev *pdev, int bar, size_t size,
                            u64 offset)
 {
+       struct pci_p2pdma_pagemap *p2p_pgmap;
        struct dev_pagemap *pgmap;
        void *addr;
        int error;
@@ -157,14 +180,18 @@ int pci_p2pdma_add_resource(struct pci_dev *pdev, int bar, size_t size,
                        return error;
        }
 
-       pgmap = devm_kzalloc(&pdev->dev, sizeof(*pgmap), GFP_KERNEL);
-       if (!pgmap)
+       p2p_pgmap = devm_kzalloc(&pdev->dev, sizeof(*p2p_pgmap), GFP_KERNEL);
+       if (!p2p_pgmap)
                return -ENOMEM;
+
+       pgmap = &p2p_pgmap->pgmap;
        pgmap->res.start = pci_resource_start(pdev, bar) + offset;
        pgmap->res.end = pgmap->res.start + size - 1;
        pgmap->res.flags = pci_resource_flags(pdev, bar);
        pgmap->type = MEMORY_DEVICE_PCI_P2PDMA;
-       pgmap->pci_p2pdma_bus_offset = pci_bus_address(pdev, bar) -
+
+       p2p_pgmap->provider = pdev;
+       p2p_pgmap->bus_offset = pci_bus_address(pdev, bar) -
                pci_resource_start(pdev, bar);
 
        addr = devm_memremap_pages(&pdev->dev, pgmap);
@@ -246,19 +273,32 @@ static void seq_buf_print_bus_devfn(struct seq_buf *buf, struct pci_dev *pdev)
        seq_buf_printf(buf, "%s;", pci_name(pdev));
 }
 
-/*
- * If we can't find a common upstream bridge take a look at the root
- * complex and compare it to a whitelist of known good hardware.
- */
-static bool root_complex_whitelist(struct pci_dev *dev)
+static const struct pci_p2pdma_whitelist_entry {
+       unsigned short vendor;
+       unsigned short device;
+       enum {
+               REQ_SAME_HOST_BRIDGE    = 1 << 0,
+       } flags;
+} pci_p2pdma_whitelist[] = {
+       /* AMD ZEN */
+       {PCI_VENDOR_ID_AMD,     0x1450, 0},
+
+       /* Intel Xeon E5/Core i7 */
+       {PCI_VENDOR_ID_INTEL,   0x3c00, REQ_SAME_HOST_BRIDGE},
+       {PCI_VENDOR_ID_INTEL,   0x3c01, REQ_SAME_HOST_BRIDGE},
+       /* Intel Xeon E7 v3/Xeon E5 v3/Core i7 */
+       {PCI_VENDOR_ID_INTEL,   0x2f00, REQ_SAME_HOST_BRIDGE},
+       {PCI_VENDOR_ID_INTEL,   0x2f01, REQ_SAME_HOST_BRIDGE},
+       {}
+};
+
+static bool __host_bridge_whitelist(struct pci_host_bridge *host,
+                                   bool same_host_bridge)
 {
-       struct pci_host_bridge *host = pci_find_host_bridge(dev->bus);
        struct pci_dev *root = pci_get_slot(host->bus, PCI_DEVFN(0, 0));
+       const struct pci_p2pdma_whitelist_entry *entry;
        unsigned short vendor, device;
 
-       if (iommu_present(dev->dev.bus))
-               return false;
-
        if (!root)
                return false;
 
@@ -266,65 +306,49 @@ static bool root_complex_whitelist(struct pci_dev *dev)
        device = root->device;
        pci_dev_put(root);
 
-       /* AMD ZEN host bridges can do peer to peer */
-       if (vendor == PCI_VENDOR_ID_AMD && device == 0x1450)
+       for (entry = pci_p2pdma_whitelist; entry->vendor; entry++) {
+               if (vendor != entry->vendor || device != entry->device)
+                       continue;
+               if (entry->flags & REQ_SAME_HOST_BRIDGE && !same_host_bridge)
+                       return false;
+
                return true;
+       }
 
        return false;
 }
 
 /*
- * Find the distance through the nearest common upstream bridge between
- * two PCI devices.
- *
- * If the two devices are the same device then 0 will be returned.
- *
- * If there are two virtual functions of the same device behind the same
- * bridge port then 2 will be returned (one step down to the PCIe switch,
- * then one step back to the same device).
- *
- * In the case where two devices are connected to the same PCIe switch, the
- * value 4 will be returned. This corresponds to the following PCI tree:
- *
- *     -+  Root Port
- *      \+ Switch Upstream Port
- *       +-+ Switch Downstream Port
- *       + \- Device A
- *       \-+ Switch Downstream Port
- *         \- Device B
- *
- * The distance is 4 because we traverse from Device A through the downstream
- * port of the switch, to the common upstream port, back up to the second
- * downstream port and then to Device B.
- *
- * Any two devices that don't have a common upstream bridge will return -1.
- * In this way devices on separate PCIe root ports will be rejected, which
- * is what we want for peer-to-peer seeing each PCIe root port defines a
- * separate hierarchy domain and there's no way to determine whether the root
- * complex supports forwarding between them.
- *
- * In the case where two devices are connected to different PCIe switches,
- * this function will still return a positive distance as long as both
- * switches eventually have a common upstream bridge. Note this covers
- * the case of using multiple PCIe switches to achieve a desired level of
- * fan-out from a root port. The exact distance will be a function of the
- * number of switches between Device A and Device B.
- *
- * If a bridge which has any ACS redirection bits set is in the path
- * then this functions will return -2. This is so we reject any
- * cases where the TLPs are forwarded up into the root complex.
- * In this case, a list of all infringing bridge addresses will be
- * populated in acs_list (assuming it's non-null) for printk purposes.
+ * If we can't find a common upstream bridge take a look at the root
+ * complex and compare it to a whitelist of known good hardware.
  */
-static int upstream_bridge_distance(struct pci_dev *provider,
-                                   struct pci_dev *client,
-                                   struct seq_buf *acs_list)
+static bool host_bridge_whitelist(struct pci_dev *a, struct pci_dev *b)
+{
+       struct pci_host_bridge *host_a = pci_find_host_bridge(a->bus);
+       struct pci_host_bridge *host_b = pci_find_host_bridge(b->bus);
+
+       if (host_a == host_b)
+               return __host_bridge_whitelist(host_a, true);
+
+       if (__host_bridge_whitelist(host_a, false) &&
+           __host_bridge_whitelist(host_b, false))
+               return true;
+
+       return false;
+}
+
+static enum pci_p2pdma_map_type
+__upstream_bridge_distance(struct pci_dev *provider, struct pci_dev *client,
+               int *dist, bool *acs_redirects, struct seq_buf *acs_list)
 {
        struct pci_dev *a = provider, *b = client, *bb;
        int dist_a = 0;
        int dist_b = 0;
        int acs_cnt = 0;
 
+       if (acs_redirects)
+               *acs_redirects = false;
+
        /*
         * Note, we don't need to take references to devices returned by
         * pci_upstream_bridge() seeing we hold a reference to a child
@@ -353,15 +377,10 @@ static int upstream_bridge_distance(struct pci_dev *provider,
                dist_a++;
        }
 
-       /*
-        * Allow the connection if both devices are on a whitelisted root
-        * complex, but add an arbitrary large value to the distance.
-        */
-       if (root_complex_whitelist(provider) &&
-           root_complex_whitelist(client))
-               return 0x1000 + dist_a + dist_b;
+       if (dist)
+               *dist = dist_a + dist_b;
 
-       return -1;
+       return PCI_P2PDMA_MAP_THRU_HOST_BRIDGE;
 
 check_b_path_acs:
        bb = b;
@@ -378,33 +397,110 @@ check_b_path_acs:
                bb = pci_upstream_bridge(bb);
        }
 
-       if (acs_cnt)
-               return -2;
+       if (dist)
+               *dist = dist_a + dist_b;
+
+       if (acs_cnt) {
+               if (acs_redirects)
+                       *acs_redirects = true;
+
+               return PCI_P2PDMA_MAP_THRU_HOST_BRIDGE;
+       }
+
+       return PCI_P2PDMA_MAP_BUS_ADDR;
+}
+
+static unsigned long map_types_idx(struct pci_dev *client)
+{
+       return (pci_domain_nr(client->bus) << 16) |
+               (client->bus->number << 8) | client->devfn;
+}
+
+/*
+ * Find the distance through the nearest common upstream bridge between
+ * two PCI devices.
+ *
+ * If the two devices are the same device then 0 will be returned.
+ *
+ * If there are two virtual functions of the same device behind the same
+ * bridge port then 2 will be returned (one step down to the PCIe switch,
+ * then one step back to the same device).
+ *
+ * In the case where two devices are connected to the same PCIe switch, the
+ * value 4 will be returned. This corresponds to the following PCI tree:
+ *
+ *     -+  Root Port
+ *      \+ Switch Upstream Port
+ *       +-+ Switch Downstream Port
+ *       + \- Device A
+ *       \-+ Switch Downstream Port
+ *         \- Device B
+ *
+ * The distance is 4 because we traverse from Device A through the downstream
+ * port of the switch, to the common upstream port, back up to the second
+ * downstream port and then to Device B.
+ *
+ * Any two devices that cannot communicate using p2pdma will return
+ * PCI_P2PDMA_MAP_NOT_SUPPORTED.
+ *
+ * Any two devices that have a data path that goes through the host bridge
+ * will consult a whitelist. If the host bridges are on the whitelist,
+ * this function will return PCI_P2PDMA_MAP_THRU_HOST_BRIDGE.
+ *
+ * If either bridge is not on the whitelist this function returns
+ * PCI_P2PDMA_MAP_NOT_SUPPORTED.
+ *
+ * If a bridge which has any ACS redirection bits set is in the path,
+ * acs_redirects will be set to true. In this case, a list of all infringing
+ * bridge addresses will be populated in acs_list (assuming it's non-null)
+ * for printk purposes.
+ */
+static enum pci_p2pdma_map_type
+upstream_bridge_distance(struct pci_dev *provider, struct pci_dev *client,
+               int *dist, bool *acs_redirects, struct seq_buf *acs_list)
+{
+       enum pci_p2pdma_map_type map_type;
+
+       map_type = __upstream_bridge_distance(provider, client, dist,
+                                             acs_redirects, acs_list);
+
+       if (map_type == PCI_P2PDMA_MAP_THRU_HOST_BRIDGE) {
+               if (!host_bridge_whitelist(provider, client))
+                       map_type = PCI_P2PDMA_MAP_NOT_SUPPORTED;
+       }
+
+       if (provider->p2pdma)
+               xa_store(&provider->p2pdma->map_types, map_types_idx(client),
+                        xa_mk_value(map_type), GFP_KERNEL);
 
-       return dist_a + dist_b;
+       return map_type;
 }
 
-static int upstream_bridge_distance_warn(struct pci_dev *provider,
-                                        struct pci_dev *client)
+static enum pci_p2pdma_map_type
+upstream_bridge_distance_warn(struct pci_dev *provider, struct pci_dev *client,
+                             int *dist)
 {
        struct seq_buf acs_list;
+       bool acs_redirects;
        int ret;
 
        seq_buf_init(&acs_list, kmalloc(PAGE_SIZE, GFP_KERNEL), PAGE_SIZE);
        if (!acs_list.buffer)
                return -ENOMEM;
 
-       ret = upstream_bridge_distance(provider, client, &acs_list);
-       if (ret == -2) {
-               pci_warn(client, "cannot be used for peer-to-peer DMA as ACS redirect is set between the client and provider (%s)\n",
+       ret = upstream_bridge_distance(provider, client, dist, &acs_redirects,
+                                      &acs_list);
+       if (acs_redirects) {
+               pci_warn(client, "ACS redirect is set between the client and provider (%s)\n",
                         pci_name(provider));
                /* Drop final semicolon */
                acs_list.buffer[acs_list.len-1] = 0;
                pci_warn(client, "to disable ACS redirect for this path, add the kernel parameter: pci=disable_acs_redir=%s\n",
                         acs_list.buffer);
+       }
 
-       } else if (ret < 0) {
-               pci_warn(client, "cannot be used for peer-to-peer DMA as the client and provider (%s) do not share an upstream bridge\n",
+       if (ret == PCI_P2PDMA_MAP_NOT_SUPPORTED) {
+               pci_warn(client, "cannot be used for peer-to-peer DMA as the client and provider (%s) do not share an upstream bridge or whitelisted host bridge\n",
                         pci_name(provider));
        }
 
@@ -421,22 +517,22 @@ static int upstream_bridge_distance_warn(struct pci_dev *provider,
  * @num_clients: number of clients in the array
  * @verbose: if true, print warnings for devices when we return -1
  *
- * Returns -1 if any of the clients are not compatible (behind the same
- * root port as the provider), otherwise returns a positive number where
- * a lower number is the preferable choice. (If there's one client
- * that's the same as the provider it will return 0, which is best choice).
+ * Returns -1 if any of the clients are not compatible, otherwise returns a
+ * positive number where a lower number is the preferable choice. (If there's
+ * one client that's the same as the provider it will return 0, which is best
+ * choice).
  *
- * For now, "compatible" means the provider and the clients are all behind
- * the same PCI root port. This cuts out cases that may work but is safest
- * for the user. Future work can expand this to white-list root complexes that
- * can safely forward between each ports.
+ * "compatible" means the provider and the clients are either all behind
+ * the same PCI root port or the host bridges connected to each of the devices
+ * are listed in the 'pci_p2pdma_whitelist'.
  */
 int pci_p2pdma_distance_many(struct pci_dev *provider, struct device **clients,
                             int num_clients, bool verbose)
 {
        bool not_supported = false;
        struct pci_dev *pci_client;
-       int distance = 0;
+       int total_dist = 0;
+       int distance;
        int i, ret;
 
        if (num_clients == 0)
@@ -461,26 +557,26 @@ int pci_p2pdma_distance_many(struct pci_dev *provider, struct device **clients,
 
                if (verbose)
                        ret = upstream_bridge_distance_warn(provider,
-                                                           pci_client);
+                                       pci_client, &distance);
                else
                        ret = upstream_bridge_distance(provider, pci_client,
-                                                      NULL);
+                                                      &distance, NULL, NULL);
 
                pci_dev_put(pci_client);
 
-               if (ret < 0)
+               if (ret == PCI_P2PDMA_MAP_NOT_SUPPORTED)
                        not_supported = true;
 
                if (not_supported && !verbose)
                        break;
 
-               distance += ret;
+               total_dist += distance;
        }
 
        if (not_supported)
                return -1;
 
-       return distance;
+       return total_dist;
 }
 EXPORT_SYMBOL_GPL(pci_p2pdma_distance_many);
 
@@ -706,21 +802,19 @@ void pci_p2pmem_publish(struct pci_dev *pdev, bool publish)
 }
 EXPORT_SYMBOL_GPL(pci_p2pmem_publish);
 
-/**
- * pci_p2pdma_map_sg - map a PCI peer-to-peer scatterlist for DMA
- * @dev: device doing the DMA request
- * @sg: scatter list to map
- * @nents: elements in the scatterlist
- * @dir: DMA direction
- *
- * Scatterlists mapped with this function should not be unmapped in any way.
- *
- * Returns the number of SG entries mapped or 0 on error.
- */
-int pci_p2pdma_map_sg(struct device *dev, struct scatterlist *sg, int nents,
-                     enum dma_data_direction dir)
+static enum pci_p2pdma_map_type pci_p2pdma_map_type(struct pci_dev *provider,
+                                                   struct pci_dev *client)
+{
+       if (!provider->p2pdma)
+               return PCI_P2PDMA_MAP_NOT_SUPPORTED;
+
+       return xa_to_value(xa_load(&provider->p2pdma->map_types,
+                                  map_types_idx(client)));
+}
+
+static int __pci_p2pdma_map_sg(struct pci_p2pdma_pagemap *p2p_pgmap,
+               struct device *dev, struct scatterlist *sg, int nents)
 {
-       struct dev_pagemap *pgmap;
        struct scatterlist *s;
        phys_addr_t paddr;
        int i;
@@ -736,16 +830,80 @@ int pci_p2pdma_map_sg(struct device *dev, struct scatterlist *sg, int nents,
                return 0;
 
        for_each_sg(sg, s, nents, i) {
-               pgmap = sg_page(s)->pgmap;
                paddr = sg_phys(s);
 
-               s->dma_address = paddr - pgmap->pci_p2pdma_bus_offset;
+               s->dma_address = paddr - p2p_pgmap->bus_offset;
                sg_dma_len(s) = s->length;
        }
 
        return nents;
 }
-EXPORT_SYMBOL_GPL(pci_p2pdma_map_sg);
+
+/**
+ * pci_p2pdma_map_sg - map a PCI peer-to-peer scatterlist for DMA
+ * @dev: device doing the DMA request
+ * @sg: scatter list to map
+ * @nents: elements in the scatterlist
+ * @dir: DMA direction
+ * @attrs: DMA attributes passed to dma_map_sg() (if called)
+ *
+ * Scatterlists mapped with this function should be unmapped using
+ * pci_p2pdma_unmap_sg_attrs().
+ *
+ * Returns the number of SG entries mapped or 0 on error.
+ */
+int pci_p2pdma_map_sg_attrs(struct device *dev, struct scatterlist *sg,
+               int nents, enum dma_data_direction dir, unsigned long attrs)
+{
+       struct pci_p2pdma_pagemap *p2p_pgmap =
+               to_p2p_pgmap(sg_page(sg)->pgmap);
+       struct pci_dev *client;
+
+       if (WARN_ON_ONCE(!dev_is_pci(dev)))
+               return 0;
+
+       client = to_pci_dev(dev);
+
+       switch (pci_p2pdma_map_type(p2p_pgmap->provider, client)) {
+       case PCI_P2PDMA_MAP_THRU_HOST_BRIDGE:
+               return dma_map_sg_attrs(dev, sg, nents, dir, attrs);
+       case PCI_P2PDMA_MAP_BUS_ADDR:
+               return __pci_p2pdma_map_sg(p2p_pgmap, dev, sg, nents);
+       default:
+               WARN_ON_ONCE(1);
+               return 0;
+       }
+}
+EXPORT_SYMBOL_GPL(pci_p2pdma_map_sg_attrs);
+
+/**
+ * pci_p2pdma_unmap_sg - unmap a PCI peer-to-peer scatterlist that was
+ *     mapped with pci_p2pdma_map_sg()
+ * @dev: device doing the DMA request
+ * @sg: scatter list to map
+ * @nents: number of elements returned by pci_p2pdma_map_sg()
+ * @dir: DMA direction
+ * @attrs: DMA attributes passed to dma_unmap_sg() (if called)
+ */
+void pci_p2pdma_unmap_sg_attrs(struct device *dev, struct scatterlist *sg,
+               int nents, enum dma_data_direction dir, unsigned long attrs)
+{
+       struct pci_p2pdma_pagemap *p2p_pgmap =
+               to_p2p_pgmap(sg_page(sg)->pgmap);
+       enum pci_p2pdma_map_type map_type;
+       struct pci_dev *client;
+
+       if (WARN_ON_ONCE(!dev_is_pci(dev)))
+               return;
+
+       client = to_pci_dev(dev);
+
+       map_type = pci_p2pdma_map_type(p2p_pgmap->provider, client);
+
+       if (map_type == PCI_P2PDMA_MAP_THRU_HOST_BRIDGE)
+               dma_unmap_sg_attrs(dev, sg, nents, dir, attrs);
+}
+EXPORT_SYMBOL_GPL(pci_p2pdma_unmap_sg_attrs);
 
 /**
  * pci_p2pdma_enable_store - parse a configfs/sysfs attribute store
index 45049f5..0c02d50 100644 (file)
@@ -14,7 +14,6 @@
 #include <linux/msi.h>
 #include <linux/pci_hotplug.h>
 #include <linux/module.h>
-#include <linux/pci-aspm.h>
 #include <linux/pci-acpi.h>
 #include <linux/pm_runtime.h>
 #include <linux/pm_qos.h>
@@ -118,8 +117,58 @@ phys_addr_t acpi_pci_root_get_mcfg_addr(acpi_handle handle)
        return (phys_addr_t)mcfg_addr;
 }
 
+/* _HPX PCI Setting Record (Type 0); same as _HPP */
+struct hpx_type0 {
+       u32 revision;           /* Not present in _HPP */
+       u8  cache_line_size;    /* Not applicable to PCIe */
+       u8  latency_timer;      /* Not applicable to PCIe */
+       u8  enable_serr;
+       u8  enable_perr;
+};
+
+static struct hpx_type0 pci_default_type0 = {
+       .revision = 1,
+       .cache_line_size = 8,
+       .latency_timer = 0x40,
+       .enable_serr = 0,
+       .enable_perr = 0,
+};
+
+static void program_hpx_type0(struct pci_dev *dev, struct hpx_type0 *hpx)
+{
+       u16 pci_cmd, pci_bctl;
+
+       if (!hpx)
+               hpx = &pci_default_type0;
+
+       if (hpx->revision > 1) {
+               pci_warn(dev, "PCI settings rev %d not supported; using defaults\n",
+                        hpx->revision);
+               hpx = &pci_default_type0;
+       }
+
+       pci_write_config_byte(dev, PCI_CACHE_LINE_SIZE, hpx->cache_line_size);
+       pci_write_config_byte(dev, PCI_LATENCY_TIMER, hpx->latency_timer);
+       pci_read_config_word(dev, PCI_COMMAND, &pci_cmd);
+       if (hpx->enable_serr)
+               pci_cmd |= PCI_COMMAND_SERR;
+       if (hpx->enable_perr)
+               pci_cmd |= PCI_COMMAND_PARITY;
+       pci_write_config_word(dev, PCI_COMMAND, pci_cmd);
+
+       /* Program bridge control value */
+       if ((dev->class >> 8) == PCI_CLASS_BRIDGE_PCI) {
+               pci_write_config_byte(dev, PCI_SEC_LATENCY_TIMER,
+                                     hpx->latency_timer);
+               pci_read_config_word(dev, PCI_BRIDGE_CONTROL, &pci_bctl);
+               if (hpx->enable_perr)
+                       pci_bctl |= PCI_BRIDGE_CTL_PARITY;
+               pci_write_config_word(dev, PCI_BRIDGE_CONTROL, pci_bctl);
+       }
+}
+
 static acpi_status decode_type0_hpx_record(union acpi_object *record,
-                                          struct hpp_type0 *hpx0)
+                                          struct hpx_type0 *hpx0)
 {
        int i;
        union acpi_object *fields = record->package.elements;
@@ -146,8 +195,30 @@ static acpi_status decode_type0_hpx_record(union acpi_object *record,
        return AE_OK;
 }
 
+/* _HPX PCI-X Setting Record (Type 1) */
+struct hpx_type1 {
+       u32 revision;
+       u8  max_mem_read;
+       u8  avg_max_split;
+       u16 tot_max_split;
+};
+
+static void program_hpx_type1(struct pci_dev *dev, struct hpx_type1 *hpx)
+{
+       int pos;
+
+       if (!hpx)
+               return;
+
+       pos = pci_find_capability(dev, PCI_CAP_ID_PCIX);
+       if (!pos)
+               return;
+
+       pci_warn(dev, "PCI-X settings not supported\n");
+}
+
 static acpi_status decode_type1_hpx_record(union acpi_object *record,
-                                          struct hpp_type1 *hpx1)
+                                          struct hpx_type1 *hpx1)
 {
        int i;
        union acpi_object *fields = record->package.elements;
@@ -173,8 +244,130 @@ static acpi_status decode_type1_hpx_record(union acpi_object *record,
        return AE_OK;
 }
 
+static bool pcie_root_rcb_set(struct pci_dev *dev)
+{
+       struct pci_dev *rp = pcie_find_root_port(dev);
+       u16 lnkctl;
+
+       if (!rp)
+               return false;
+
+       pcie_capability_read_word(rp, PCI_EXP_LNKCTL, &lnkctl);
+       if (lnkctl & PCI_EXP_LNKCTL_RCB)
+               return true;
+
+       return false;
+}
+
+/* _HPX PCI Express Setting Record (Type 2) */
+struct hpx_type2 {
+       u32 revision;
+       u32 unc_err_mask_and;
+       u32 unc_err_mask_or;
+       u32 unc_err_sever_and;
+       u32 unc_err_sever_or;
+       u32 cor_err_mask_and;
+       u32 cor_err_mask_or;
+       u32 adv_err_cap_and;
+       u32 adv_err_cap_or;
+       u16 pci_exp_devctl_and;
+       u16 pci_exp_devctl_or;
+       u16 pci_exp_lnkctl_and;
+       u16 pci_exp_lnkctl_or;
+       u32 sec_unc_err_sever_and;
+       u32 sec_unc_err_sever_or;
+       u32 sec_unc_err_mask_and;
+       u32 sec_unc_err_mask_or;
+};
+
+static void program_hpx_type2(struct pci_dev *dev, struct hpx_type2 *hpx)
+{
+       int pos;
+       u32 reg32;
+
+       if (!hpx)
+               return;
+
+       if (!pci_is_pcie(dev))
+               return;
+
+       if (hpx->revision > 1) {
+               pci_warn(dev, "PCIe settings rev %d not supported\n",
+                        hpx->revision);
+               return;
+       }
+
+       /*
+        * Don't allow _HPX to change MPS or MRRS settings.  We manage
+        * those to make sure they're consistent with the rest of the
+        * platform.
+        */
+       hpx->pci_exp_devctl_and |= PCI_EXP_DEVCTL_PAYLOAD |
+                                   PCI_EXP_DEVCTL_READRQ;
+       hpx->pci_exp_devctl_or &= ~(PCI_EXP_DEVCTL_PAYLOAD |
+                                   PCI_EXP_DEVCTL_READRQ);
+
+       /* Initialize Device Control Register */
+       pcie_capability_clear_and_set_word(dev, PCI_EXP_DEVCTL,
+                       ~hpx->pci_exp_devctl_and, hpx->pci_exp_devctl_or);
+
+       /* Initialize Link Control Register */
+       if (pcie_cap_has_lnkctl(dev)) {
+
+               /*
+                * If the Root Port supports Read Completion Boundary of
+                * 128, set RCB to 128.  Otherwise, clear it.
+                */
+               hpx->pci_exp_lnkctl_and |= PCI_EXP_LNKCTL_RCB;
+               hpx->pci_exp_lnkctl_or &= ~PCI_EXP_LNKCTL_RCB;
+               if (pcie_root_rcb_set(dev))
+                       hpx->pci_exp_lnkctl_or |= PCI_EXP_LNKCTL_RCB;
+
+               pcie_capability_clear_and_set_word(dev, PCI_EXP_LNKCTL,
+                       ~hpx->pci_exp_lnkctl_and, hpx->pci_exp_lnkctl_or);
+       }
+
+       /* Find Advanced Error Reporting Enhanced Capability */
+       pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ERR);
+       if (!pos)
+               return;
+
+       /* Initialize Uncorrectable Error Mask Register */
+       pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_MASK, &reg32);
+       reg32 = (reg32 & hpx->unc_err_mask_and) | hpx->unc_err_mask_or;
+       pci_write_config_dword(dev, pos + PCI_ERR_UNCOR_MASK, reg32);
+
+       /* Initialize Uncorrectable Error Severity Register */
+       pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_SEVER, &reg32);
+       reg32 = (reg32 & hpx->unc_err_sever_and) | hpx->unc_err_sever_or;
+       pci_write_config_dword(dev, pos + PCI_ERR_UNCOR_SEVER, reg32);
+
+       /* Initialize Correctable Error Mask Register */
+       pci_read_config_dword(dev, pos + PCI_ERR_COR_MASK, &reg32);
+       reg32 = (reg32 & hpx->cor_err_mask_and) | hpx->cor_err_mask_or;
+       pci_write_config_dword(dev, pos + PCI_ERR_COR_MASK, reg32);
+
+       /* Initialize Advanced Error Capabilities and Control Register */
+       pci_read_config_dword(dev, pos + PCI_ERR_CAP, &reg32);
+       reg32 = (reg32 & hpx->adv_err_cap_and) | hpx->adv_err_cap_or;
+
+       /* Don't enable ECRC generation or checking if unsupported */
+       if (!(reg32 & PCI_ERR_CAP_ECRC_GENC))
+               reg32 &= ~PCI_ERR_CAP_ECRC_GENE;
+       if (!(reg32 & PCI_ERR_CAP_ECRC_CHKC))
+               reg32 &= ~PCI_ERR_CAP_ECRC_CHKE;
+       pci_write_config_dword(dev, pos + PCI_ERR_CAP, reg32);
+
+       /*
+        * FIXME: The following two registers are not supported yet.
+        *
+        *   o Secondary Uncorrectable Error Severity Register
+        *   o Secondary Uncorrectable Error Mask Register
+        */
+}
+
 static acpi_status decode_type2_hpx_record(union acpi_object *record,
-                                          struct hpp_type2 *hpx2)
+                                          struct hpx_type2 *hpx2)
 {
        int i;
        union acpi_object *fields = record->package.elements;
@@ -213,6 +406,164 @@ static acpi_status decode_type2_hpx_record(union acpi_object *record,
        return AE_OK;
 }
 
+/* _HPX PCI Express Setting Record (Type 3) */
+struct hpx_type3 {
+       u16 device_type;
+       u16 function_type;
+       u16 config_space_location;
+       u16 pci_exp_cap_id;
+       u16 pci_exp_cap_ver;
+       u16 pci_exp_vendor_id;
+       u16 dvsec_id;
+       u16 dvsec_rev;
+       u16 match_offset;
+       u32 match_mask_and;
+       u32 match_value;
+       u16 reg_offset;
+       u32 reg_mask_and;
+       u32 reg_mask_or;
+};
+
+enum hpx_type3_dev_type {
+       HPX_TYPE_ENDPOINT       = BIT(0),
+       HPX_TYPE_LEG_END        = BIT(1),
+       HPX_TYPE_RC_END         = BIT(2),
+       HPX_TYPE_RC_EC          = BIT(3),
+       HPX_TYPE_ROOT_PORT      = BIT(4),
+       HPX_TYPE_UPSTREAM       = BIT(5),
+       HPX_TYPE_DOWNSTREAM     = BIT(6),
+       HPX_TYPE_PCI_BRIDGE     = BIT(7),
+       HPX_TYPE_PCIE_BRIDGE    = BIT(8),
+};
+
+static u16 hpx3_device_type(struct pci_dev *dev)
+{
+       u16 pcie_type = pci_pcie_type(dev);
+       const int pcie_to_hpx3_type[] = {
+               [PCI_EXP_TYPE_ENDPOINT]    = HPX_TYPE_ENDPOINT,
+               [PCI_EXP_TYPE_LEG_END]     = HPX_TYPE_LEG_END,
+               [PCI_EXP_TYPE_RC_END]      = HPX_TYPE_RC_END,
+               [PCI_EXP_TYPE_RC_EC]       = HPX_TYPE_RC_EC,
+               [PCI_EXP_TYPE_ROOT_PORT]   = HPX_TYPE_ROOT_PORT,
+               [PCI_EXP_TYPE_UPSTREAM]    = HPX_TYPE_UPSTREAM,
+               [PCI_EXP_TYPE_DOWNSTREAM]  = HPX_TYPE_DOWNSTREAM,
+               [PCI_EXP_TYPE_PCI_BRIDGE]  = HPX_TYPE_PCI_BRIDGE,
+               [PCI_EXP_TYPE_PCIE_BRIDGE] = HPX_TYPE_PCIE_BRIDGE,
+       };
+
+       if (pcie_type >= ARRAY_SIZE(pcie_to_hpx3_type))
+               return 0;
+
+       return pcie_to_hpx3_type[pcie_type];
+}
+
+enum hpx_type3_fn_type {
+       HPX_FN_NORMAL           = BIT(0),
+       HPX_FN_SRIOV_PHYS       = BIT(1),
+       HPX_FN_SRIOV_VIRT       = BIT(2),
+};
+
+static u8 hpx3_function_type(struct pci_dev *dev)
+{
+       if (dev->is_virtfn)
+               return HPX_FN_SRIOV_VIRT;
+       else if (pci_find_ext_capability(dev, PCI_EXT_CAP_ID_SRIOV) > 0)
+               return HPX_FN_SRIOV_PHYS;
+       else
+               return HPX_FN_NORMAL;
+}
+
+static bool hpx3_cap_ver_matches(u8 pcie_cap_id, u8 hpx3_cap_id)
+{
+       u8 cap_ver = hpx3_cap_id & 0xf;
+
+       if ((hpx3_cap_id & BIT(4)) && cap_ver >= pcie_cap_id)
+               return true;
+       else if (cap_ver == pcie_cap_id)
+               return true;
+
+       return false;
+}
+
+enum hpx_type3_cfg_loc {
+       HPX_CFG_PCICFG          = 0,
+       HPX_CFG_PCIE_CAP        = 1,
+       HPX_CFG_PCIE_CAP_EXT    = 2,
+       HPX_CFG_VEND_CAP        = 3,
+       HPX_CFG_DVSEC           = 4,
+       HPX_CFG_MAX,
+};
+
+static void program_hpx_type3_register(struct pci_dev *dev,
+                                      const struct hpx_type3 *reg)
+{
+       u32 match_reg, write_reg, header, orig_value;
+       u16 pos;
+
+       if (!(hpx3_device_type(dev) & reg->device_type))
+               return;
+
+       if (!(hpx3_function_type(dev) & reg->function_type))
+               return;
+
+       switch (reg->config_space_location) {
+       case HPX_CFG_PCICFG:
+               pos = 0;
+               break;
+       case HPX_CFG_PCIE_CAP:
+               pos = pci_find_capability(dev, reg->pci_exp_cap_id);
+               if (pos == 0)
+                       return;
+
+               break;
+       case HPX_CFG_PCIE_CAP_EXT:
+               pos = pci_find_ext_capability(dev, reg->pci_exp_cap_id);
+               if (pos == 0)
+                       return;
+
+               pci_read_config_dword(dev, pos, &header);
+               if (!hpx3_cap_ver_matches(PCI_EXT_CAP_VER(header),
+                                         reg->pci_exp_cap_ver))
+                       return;
+
+               break;
+       case HPX_CFG_VEND_CAP:  /* Fall through */
+       case HPX_CFG_DVSEC:     /* Fall through */
+       default:
+               pci_warn(dev, "Encountered _HPX type 3 with unsupported config space location");
+               return;
+       }
+
+       pci_read_config_dword(dev, pos + reg->match_offset, &match_reg);
+
+       if ((match_reg & reg->match_mask_and) != reg->match_value)
+               return;
+
+       pci_read_config_dword(dev, pos + reg->reg_offset, &write_reg);
+       orig_value = write_reg;
+       write_reg &= reg->reg_mask_and;
+       write_reg |= reg->reg_mask_or;
+
+       if (orig_value == write_reg)
+               return;
+
+       pci_write_config_dword(dev, pos + reg->reg_offset, write_reg);
+
+       pci_dbg(dev, "Applied _HPX3 at [0x%x]: 0x%08x -> 0x%08x",
+               pos, orig_value, write_reg);
+}
+
+static void program_hpx_type3(struct pci_dev *dev, struct hpx_type3 *hpx)
+{
+       if (!hpx)
+               return;
+
+       if (!pci_is_pcie(dev))
+               return;
+
+       program_hpx_type3_register(dev, hpx);
+}
+
 static void parse_hpx3_register(struct hpx_type3 *hpx3_reg,
                                union acpi_object *reg_fields)
 {
@@ -233,8 +584,7 @@ static void parse_hpx3_register(struct hpx_type3 *hpx3_reg,
 }
 
 static acpi_status program_type3_hpx_record(struct pci_dev *dev,
-                                          union acpi_object *record,
-                                          const struct hotplug_program_ops *hp_ops)
+                                          union acpi_object *record)
 {
        union acpi_object *fields = record->package.elements;
        u32 desc_count, expected_length, revision;
@@ -258,7 +608,7 @@ static acpi_status program_type3_hpx_record(struct pci_dev *dev,
                for (i = 0; i < desc_count; i++) {
                        reg_fields = fields + 3 + i * 14;
                        parse_hpx3_register(&hpx3, reg_fields);
-                       hp_ops->program_type3(dev, &hpx3);
+                       program_hpx_type3(dev, &hpx3);
                }
 
                break;
@@ -271,15 +621,14 @@ static acpi_status program_type3_hpx_record(struct pci_dev *dev,
        return AE_OK;
 }
 
-static acpi_status acpi_run_hpx(struct pci_dev *dev, acpi_handle handle,
-                               const struct hotplug_program_ops *hp_ops)
+static acpi_status acpi_run_hpx(struct pci_dev *dev, acpi_handle handle)
 {
        acpi_status status;
        struct acpi_buffer buffer = {ACPI_ALLOCATE_BUFFER, NULL};
        union acpi_object *package, *record, *fields;
-       struct hpp_type0 hpx0;
-       struct hpp_type1 hpx1;
-       struct hpp_type2 hpx2;
+       struct hpx_type0 hpx0;
+       struct hpx_type1 hpx1;
+       struct hpx_type2 hpx2;
        u32 type;
        int i;
 
@@ -314,24 +663,24 @@ static acpi_status acpi_run_hpx(struct pci_dev *dev, acpi_handle handle,
                        status = decode_type0_hpx_record(record, &hpx0);
                        if (ACPI_FAILURE(status))
                                goto exit;
-                       hp_ops->program_type0(dev, &hpx0);
+                       program_hpx_type0(dev, &hpx0);
                        break;
                case 1:
                        memset(&hpx1, 0, sizeof(hpx1));
                        status = decode_type1_hpx_record(record, &hpx1);
                        if (ACPI_FAILURE(status))
                                goto exit;
-                       hp_ops->program_type1(dev, &hpx1);
+                       program_hpx_type1(dev, &hpx1);
                        break;
                case 2:
                        memset(&hpx2, 0, sizeof(hpx2));
                        status = decode_type2_hpx_record(record, &hpx2);
                        if (ACPI_FAILURE(status))
                                goto exit;
-                       hp_ops->program_type2(dev, &hpx2);
+                       program_hpx_type2(dev, &hpx2);
                        break;
                case 3:
-                       status = program_type3_hpx_record(dev, record, hp_ops);
+                       status = program_type3_hpx_record(dev, record);
                        if (ACPI_FAILURE(status))
                                goto exit;
                        break;
@@ -347,16 +696,15 @@ static acpi_status acpi_run_hpx(struct pci_dev *dev, acpi_handle handle,
        return status;
 }
 
-static acpi_status acpi_run_hpp(struct pci_dev *dev, acpi_handle handle,
-                               const struct hotplug_program_ops *hp_ops)
+static acpi_status acpi_run_hpp(struct pci_dev *dev, acpi_handle handle)
 {
        acpi_status status;
        struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL };
        union acpi_object *package, *fields;
-       struct hpp_type0 hpp0;
+       struct hpx_type0 hpx0;
        int i;
 
-       memset(&hpp0, 0, sizeof(hpp0));
+       memset(&hpx0, 0, sizeof(hpx0));
 
        status = acpi_evaluate_object(handle, "_HPP", NULL, &buffer);
        if (ACPI_FAILURE(status))
@@ -377,26 +725,24 @@ static acpi_status acpi_run_hpp(struct pci_dev *dev, acpi_handle handle,
                }
        }
 
-       hpp0.revision        = 1;
-       hpp0.cache_line_size = fields[0].integer.value;
-       hpp0.latency_timer   = fields[1].integer.value;
-       hpp0.enable_serr     = fields[2].integer.value;
-       hpp0.enable_perr     = fields[3].integer.value;
+       hpx0.revision        = 1;
+       hpx0.cache_line_size = fields[0].integer.value;
+       hpx0.latency_timer   = fields[1].integer.value;
+       hpx0.enable_serr     = fields[2].integer.value;
+       hpx0.enable_perr     = fields[3].integer.value;
 
-       hp_ops->program_type0(dev, &hpp0);
+       program_hpx_type0(dev, &hpx0);
 
 exit:
        kfree(buffer.pointer);
        return status;
 }
 
-/* pci_get_hp_params
+/* pci_acpi_program_hp_params
  *
  * @dev - the pci_dev for which we want parameters
- * @hpp - allocated by the caller
  */
-int pci_acpi_program_hp_params(struct pci_dev *dev,
-                              const struct hotplug_program_ops *hp_ops)
+int pci_acpi_program_hp_params(struct pci_dev *dev)
 {
        acpi_status status;
        acpi_handle handle, phandle;
@@ -419,10 +765,10 @@ int pci_acpi_program_hp_params(struct pci_dev *dev,
         * this pci dev.
         */
        while (handle) {
-               status = acpi_run_hpx(dev, handle, hp_ops);
+               status = acpi_run_hpx(dev, handle);
                if (ACPI_SUCCESS(status))
                        return 0;
-               status = acpi_run_hpp(dev, handle, hp_ops);
+               status = acpi_run_hpp(dev, handle);
                if (ACPI_SUCCESS(status))
                        return 0;
                if (acpi_is_root_bridge(handle))
index 06083b8..5fd9010 100644 (file)
@@ -38,7 +38,7 @@ struct pci_bridge_reg_behavior {
        u32 rsvd;
 };
 
-const static struct pci_bridge_reg_behavior pci_regs_behavior[] = {
+static const struct pci_bridge_reg_behavior pci_regs_behavior[] = {
        [PCI_VENDOR_ID / 4] = { .ro = ~0 },
        [PCI_COMMAND / 4] = {
                .rw = (PCI_COMMAND_IO | PCI_COMMAND_MEMORY |
@@ -173,7 +173,7 @@ const static struct pci_bridge_reg_behavior pci_regs_behavior[] = {
        },
 };
 
-const static struct pci_bridge_reg_behavior pcie_cap_regs_behavior[] = {
+static const struct pci_bridge_reg_behavior pcie_cap_regs_behavior[] = {
        [PCI_CAP_LIST_ID / 4] = {
                /*
                 * Capability ID, Next Capability Pointer and
index 965c721..7934129 100644 (file)
@@ -464,9 +464,7 @@ static ssize_t dev_rescan_store(struct device *dev,
        }
        return count;
 }
-static struct device_attribute dev_rescan_attr = __ATTR(rescan,
-                                                       (S_IWUSR|S_IWGRP),
-                                                       NULL, dev_rescan_store);
+static DEVICE_ATTR_WO(dev_rescan);
 
 static ssize_t remove_store(struct device *dev, struct device_attribute *attr,
                            const char *buf, size_t count)
@@ -480,13 +478,12 @@ static ssize_t remove_store(struct device *dev, struct device_attribute *attr,
                pci_stop_and_remove_bus_device_locked(to_pci_dev(dev));
        return count;
 }
-static struct device_attribute dev_remove_attr = __ATTR_IGNORE_LOCKDEP(remove,
-                                                       (S_IWUSR|S_IWGRP),
-                                                       NULL, remove_store);
+static DEVICE_ATTR_IGNORE_LOCKDEP(remove, 0220, NULL,
+                                 remove_store);
 
-static ssize_t dev_bus_rescan_store(struct device *dev,
-                                   struct device_attribute *attr,
-                                   const char *buf, size_t count)
+static ssize_t bus_rescan_store(struct device *dev,
+                               struct device_attribute *attr,
+                               const char *buf, size_t count)
 {
        unsigned long val;
        struct pci_bus *bus = to_pci_bus(dev);
@@ -504,7 +501,7 @@ static ssize_t dev_bus_rescan_store(struct device *dev,
        }
        return count;
 }
-static DEVICE_ATTR(rescan, (S_IWUSR|S_IWGRP), NULL, dev_bus_rescan_store);
+static DEVICE_ATTR_WO(bus_rescan);
 
 #if defined(CONFIG_PM) && defined(CONFIG_ACPI)
 static ssize_t d3cold_allowed_store(struct device *dev,
@@ -551,154 +548,6 @@ static ssize_t devspec_show(struct device *dev,
 static DEVICE_ATTR_RO(devspec);
 #endif
 
-#ifdef CONFIG_PCI_IOV
-static ssize_t sriov_totalvfs_show(struct device *dev,
-                                  struct device_attribute *attr,
-                                  char *buf)
-{
-       struct pci_dev *pdev = to_pci_dev(dev);
-
-       return sprintf(buf, "%u\n", pci_sriov_get_totalvfs(pdev));
-}
-
-
-static ssize_t sriov_numvfs_show(struct device *dev,
-                                struct device_attribute *attr,
-                                char *buf)
-{
-       struct pci_dev *pdev = to_pci_dev(dev);
-
-       return sprintf(buf, "%u\n", pdev->sriov->num_VFs);
-}
-
-/*
- * num_vfs > 0; number of VFs to enable
- * num_vfs = 0; disable all VFs
- *
- * Note: SRIOV spec doesn't allow partial VF
- *       disable, so it's all or none.
- */
-static ssize_t sriov_numvfs_store(struct device *dev,
-                                 struct device_attribute *attr,
-                                 const char *buf, size_t count)
-{
-       struct pci_dev *pdev = to_pci_dev(dev);
-       int ret;
-       u16 num_vfs;
-
-       ret = kstrtou16(buf, 0, &num_vfs);
-       if (ret < 0)
-               return ret;
-
-       if (num_vfs > pci_sriov_get_totalvfs(pdev))
-               return -ERANGE;
-
-       device_lock(&pdev->dev);
-
-       if (num_vfs == pdev->sriov->num_VFs)
-               goto exit;
-
-       /* is PF driver loaded w/callback */
-       if (!pdev->driver || !pdev->driver->sriov_configure) {
-               pci_info(pdev, "Driver doesn't support SRIOV configuration via sysfs\n");
-               ret = -ENOENT;
-               goto exit;
-       }
-
-       if (num_vfs == 0) {
-               /* disable VFs */
-               ret = pdev->driver->sriov_configure(pdev, 0);
-               goto exit;
-       }
-
-       /* enable VFs */
-       if (pdev->sriov->num_VFs) {
-               pci_warn(pdev, "%d VFs already enabled. Disable before enabling %d VFs\n",
-                        pdev->sriov->num_VFs, num_vfs);
-               ret = -EBUSY;
-               goto exit;
-       }
-
-       ret = pdev->driver->sriov_configure(pdev, num_vfs);
-       if (ret < 0)
-               goto exit;
-
-       if (ret != num_vfs)
-               pci_warn(pdev, "%d VFs requested; only %d enabled\n",
-                        num_vfs, ret);
-
-exit:
-       device_unlock(&pdev->dev);
-
-       if (ret < 0)
-               return ret;
-
-       return count;
-}
-
-static ssize_t sriov_offset_show(struct device *dev,
-                                struct device_attribute *attr,
-                                char *buf)
-{
-       struct pci_dev *pdev = to_pci_dev(dev);
-
-       return sprintf(buf, "%u\n", pdev->sriov->offset);
-}
-
-static ssize_t sriov_stride_show(struct device *dev,
-                                struct device_attribute *attr,
-                                char *buf)
-{
-       struct pci_dev *pdev = to_pci_dev(dev);
-
-       return sprintf(buf, "%u\n", pdev->sriov->stride);
-}
-
-static ssize_t sriov_vf_device_show(struct device *dev,
-                                   struct device_attribute *attr,
-                                   char *buf)
-{
-       struct pci_dev *pdev = to_pci_dev(dev);
-
-       return sprintf(buf, "%x\n", pdev->sriov->vf_device);
-}
-
-static ssize_t sriov_drivers_autoprobe_show(struct device *dev,
-                                           struct device_attribute *attr,
-                                           char *buf)
-{
-       struct pci_dev *pdev = to_pci_dev(dev);
-
-       return sprintf(buf, "%u\n", pdev->sriov->drivers_autoprobe);
-}
-
-static ssize_t sriov_drivers_autoprobe_store(struct device *dev,
-                                            struct device_attribute *attr,
-                                            const char *buf, size_t count)
-{
-       struct pci_dev *pdev = to_pci_dev(dev);
-       bool drivers_autoprobe;
-
-       if (kstrtobool(buf, &drivers_autoprobe) < 0)
-               return -EINVAL;
-
-       pdev->sriov->drivers_autoprobe = drivers_autoprobe;
-
-       return count;
-}
-
-static struct device_attribute sriov_totalvfs_attr = __ATTR_RO(sriov_totalvfs);
-static struct device_attribute sriov_numvfs_attr =
-               __ATTR(sriov_numvfs, (S_IRUGO|S_IWUSR|S_IWGRP),
-                      sriov_numvfs_show, sriov_numvfs_store);
-static struct device_attribute sriov_offset_attr = __ATTR_RO(sriov_offset);
-static struct device_attribute sriov_stride_attr = __ATTR_RO(sriov_stride);
-static struct device_attribute sriov_vf_device_attr = __ATTR_RO(sriov_vf_device);
-static struct device_attribute sriov_drivers_autoprobe_attr =
-               __ATTR(sriov_drivers_autoprobe, (S_IRUGO|S_IWUSR|S_IWGRP),
-                      sriov_drivers_autoprobe_show, sriov_drivers_autoprobe_store);
-#endif /* CONFIG_PCI_IOV */
-
 static ssize_t driver_override_store(struct device *dev,
                                     struct device_attribute *attr,
                                     const char *buf, size_t count)
@@ -792,7 +641,7 @@ static struct attribute *pcie_dev_attrs[] = {
 };
 
 static struct attribute *pcibus_attrs[] = {
-       &dev_attr_rescan.attr,
+       &dev_attr_bus_rescan.attr,
        &dev_attr_cpuaffinity.attr,
        &dev_attr_cpulistaffinity.attr,
        NULL,
@@ -820,7 +669,7 @@ static ssize_t boot_vga_show(struct device *dev, struct device_attribute *attr,
                !!(pdev->resource[PCI_ROM_RESOURCE].flags &
                   IORESOURCE_ROM_SHADOW));
 }
-static struct device_attribute vga_attr = __ATTR_RO(boot_vga);
+static DEVICE_ATTR_RO(boot_vga);
 
 static ssize_t pci_read_config(struct file *filp, struct kobject *kobj,
                               struct bin_attribute *bin_attr, char *buf,
@@ -906,6 +755,11 @@ static ssize_t pci_write_config(struct file *filp, struct kobject *kobj,
        unsigned int size = count;
        loff_t init_off = off;
        u8 *data = (u8 *) buf;
+       int ret;
+
+       ret = security_locked_down(LOCKDOWN_PCI_ACCESS);
+       if (ret)
+               return ret;
 
        if (off > dev->cfg_size)
                return 0;
@@ -1085,7 +939,7 @@ void pci_create_legacy_files(struct pci_bus *b)
        sysfs_bin_attr_init(b->legacy_io);
        b->legacy_io->attr.name = "legacy_io";
        b->legacy_io->size = 0xffff;
-       b->legacy_io->attr.mode = S_IRUSR | S_IWUSR;
+       b->legacy_io->attr.mode = 0600;
        b->legacy_io->read = pci_read_legacy_io;
        b->legacy_io->write = pci_write_legacy_io;
        b->legacy_io->mmap = pci_mmap_legacy_io;
@@ -1099,7 +953,7 @@ void pci_create_legacy_files(struct pci_bus *b)
        sysfs_bin_attr_init(b->legacy_mem);
        b->legacy_mem->attr.name = "legacy_mem";
        b->legacy_mem->size = 1024*1024;
-       b->legacy_mem->attr.mode = S_IRUSR | S_IWUSR;
+       b->legacy_mem->attr.mode = 0600;
        b->legacy_mem->mmap = pci_mmap_legacy_mem;
        pci_adjust_legacy_attr(b, pci_mmap_mem);
        error = device_create_bin_file(&b->dev, b->legacy_mem);
@@ -1167,6 +1021,11 @@ static int pci_mmap_resource(struct kobject *kobj, struct bin_attribute *attr,
        int bar = (unsigned long)attr->private;
        enum pci_mmap_state mmap_type;
        struct resource *res = &pdev->resource[bar];
+       int ret;
+
+       ret = security_locked_down(LOCKDOWN_PCI_ACCESS);
+       if (ret)
+               return ret;
 
        if (res->flags & IORESOURCE_MEM && iomem_is_exclusive(res->start))
                return -EINVAL;
@@ -1243,6 +1102,12 @@ static ssize_t pci_write_resource_io(struct file *filp, struct kobject *kobj,
                                     struct bin_attribute *attr, char *buf,
                                     loff_t off, size_t count)
 {
+       int ret;
+
+       ret = security_locked_down(LOCKDOWN_PCI_ACCESS);
+       if (ret)
+               return ret;
+
        return pci_resource_io(filp, kobj, attr, buf, off, count, true);
 }
 
@@ -1306,7 +1171,7 @@ static int pci_create_attr(struct pci_dev *pdev, int num, int write_combine)
                }
        }
        res_attr->attr.name = res_attr_name;
-       res_attr->attr.mode = S_IRUSR | S_IWUSR;
+       res_attr->attr.mode = 0600;
        res_attr->size = pci_resource_len(pdev, num);
        res_attr->private = (void *)(unsigned long)num;
        retval = sysfs_create_bin_file(&pdev->dev.kobj, res_attr);
@@ -1419,7 +1284,7 @@ static ssize_t pci_read_rom(struct file *filp, struct kobject *kobj,
 static const struct bin_attribute pci_config_attr = {
        .attr = {
                .name = "config",
-               .mode = S_IRUGO | S_IWUSR,
+               .mode = 0644,
        },
        .size = PCI_CFG_SPACE_SIZE,
        .read = pci_read_config,
@@ -1429,7 +1294,7 @@ static const struct bin_attribute pci_config_attr = {
 static const struct bin_attribute pcie_config_attr = {
        .attr = {
                .name = "config",
-               .mode = S_IRUGO | S_IWUSR,
+               .mode = 0644,
        },
        .size = PCI_CFG_SPACE_EXP_SIZE,
        .read = pci_read_config,
@@ -1458,7 +1323,7 @@ static ssize_t reset_store(struct device *dev, struct device_attribute *attr,
        return count;
 }
 
-static struct device_attribute reset_attr = __ATTR(reset, 0200, NULL, reset_store);
+static DEVICE_ATTR(reset, 0200, NULL, reset_store);
 
 static int pci_create_capabilities_sysfs(struct pci_dev *dev)
 {
@@ -1468,7 +1333,7 @@ static int pci_create_capabilities_sysfs(struct pci_dev *dev)
        pcie_aspm_create_sysfs_dev_files(dev);
 
        if (dev->reset_fn) {
-               retval = device_create_file(&dev->dev, &reset_attr);
+               retval = device_create_file(&dev->dev, &dev_attr_reset);
                if (retval)
                        goto error;
        }
@@ -1511,7 +1376,7 @@ int __must_check pci_create_sysfs_dev_files(struct pci_dev *pdev)
                sysfs_bin_attr_init(attr);
                attr->size = rom_size;
                attr->attr.name = "rom";
-               attr->attr.mode = S_IRUSR | S_IWUSR;
+               attr->attr.mode = 0600;
                attr->read = pci_read_rom;
                attr->write = pci_write_rom;
                retval = sysfs_create_bin_file(&pdev->dev.kobj, attr);
@@ -1553,7 +1418,7 @@ static void pci_remove_capabilities_sysfs(struct pci_dev *dev)
        pcie_vpd_remove_sysfs_dev_files(dev);
        pcie_aspm_remove_sysfs_dev_files(dev);
        if (dev->reset_fn) {
-               device_remove_file(&dev->dev, &reset_attr);
+               device_remove_file(&dev->dev, &dev_attr_reset);
                dev->reset_fn = 0;
        }
 }
@@ -1606,7 +1471,7 @@ static int __init pci_sysfs_init(void)
 late_initcall(pci_sysfs_init);
 
 static struct attribute *pci_dev_dev_attrs[] = {
-       &vga_attr.attr,
+       &dev_attr_boot_vga.attr,
        NULL,
 };
 
@@ -1616,7 +1481,7 @@ static umode_t pci_dev_attrs_are_visible(struct kobject *kobj,
        struct device *dev = kobj_to_dev(kobj);
        struct pci_dev *pdev = to_pci_dev(dev);
 
-       if (a == &vga_attr.attr)
+       if (a == &dev_attr_boot_vga.attr)
                if ((pdev->class >> 8) != PCI_CLASS_DISPLAY_VGA)
                        return 0;
 
@@ -1624,8 +1489,8 @@ static umode_t pci_dev_attrs_are_visible(struct kobject *kobj,
 }
 
 static struct attribute *pci_dev_hp_attrs[] = {
-       &dev_remove_attr.attr,
-       &dev_rescan_attr.attr,
+       &dev_attr_remove.attr,
+       &dev_attr_dev_rescan.attr,
        NULL,
 };
 
@@ -1697,34 +1562,6 @@ static const struct attribute_group pci_dev_hp_attr_group = {
        .is_visible = pci_dev_hp_attrs_are_visible,
 };
 
-#ifdef CONFIG_PCI_IOV
-static struct attribute *sriov_dev_attrs[] = {
-       &sriov_totalvfs_attr.attr,
-       &sriov_numvfs_attr.attr,
-       &sriov_offset_attr.attr,
-       &sriov_stride_attr.attr,
-       &sriov_vf_device_attr.attr,
-       &sriov_drivers_autoprobe_attr.attr,
-       NULL,
-};
-
-static umode_t sriov_attrs_are_visible(struct kobject *kobj,
-                                      struct attribute *a, int n)
-{
-       struct device *dev = kobj_to_dev(kobj);
-
-       if (!dev_is_pf(dev))
-               return 0;
-
-       return a->mode;
-}
-
-static const struct attribute_group sriov_dev_attr_group = {
-       .attrs = sriov_dev_attrs,
-       .is_visible = sriov_attrs_are_visible,
-};
-#endif /* CONFIG_PCI_IOV */
-
 static const struct attribute_group pci_dev_attr_group = {
        .attrs = pci_dev_dev_attrs,
        .is_visible = pci_dev_attrs_are_visible,
index 1b27b5a..e7982af 100644 (file)
@@ -890,8 +890,8 @@ static int pci_raw_set_power_state(struct pci_dev *dev, pci_power_t state)
 
        pci_read_config_word(dev, dev->pm_cap + PCI_PM_CTRL, &pmcsr);
        dev->current_state = (pmcsr & PCI_PM_CTRL_STATE_MASK);
-       if (dev->current_state != state && printk_ratelimit())
-               pci_info(dev, "Refused to change power state, currently in D%d\n",
+       if (dev->current_state != state)
+               pci_info_ratelimited(dev, "Refused to change power state, currently in D%d\n",
                         dev->current_state);
 
        /*
@@ -1443,7 +1443,7 @@ static void pci_restore_rebar_state(struct pci_dev *pdev)
                pci_read_config_dword(pdev, pos + PCI_REBAR_CTRL, &ctrl);
                bar_idx = ctrl & PCI_REBAR_CTRL_BAR_IDX;
                res = pdev->resource + bar_idx;
-               size = order_base_2((resource_size(res) >> 20) | 1) - 1;
+               size = ilog2(resource_size(res)) - 20;
                ctrl &= ~PCI_REBAR_CTRL_BAR_SIZE;
                ctrl |= size << PCI_REBAR_CTRL_BAR_SHIFT;
                pci_write_config_dword(pdev, pos + PCI_REBAR_CTRL, ctrl);
@@ -3581,7 +3581,7 @@ int pci_enable_atomic_ops_to_root(struct pci_dev *dev, u32 cap_mask)
                }
 
                /* Ensure upstream ports don't block AtomicOps on egress */
-               if (!bridge->has_secondary_link) {
+               if (pci_pcie_type(bridge) == PCI_EXP_TYPE_UPSTREAM) {
                        pcie_capability_read_dword(bridge, PCI_EXP_DEVCTL2,
                                                   &ctl2);
                        if (ctl2 & PCI_EXP_DEVCTL2_ATOMIC_EGRESS_BLOCK)
@@ -5923,8 +5923,19 @@ resource_size_t __weak pcibios_default_alignment(void)
        return 0;
 }
 
-#define RESOURCE_ALIGNMENT_PARAM_SIZE COMMAND_LINE_SIZE
-static char resource_alignment_param[RESOURCE_ALIGNMENT_PARAM_SIZE] = {0};
+/*
+ * Arches that don't want to expose struct resource to userland as-is in
+ * sysfs and /proc can implement their own pci_resource_to_user().
+ */
+void __weak pci_resource_to_user(const struct pci_dev *dev, int bar,
+                                const struct resource *rsrc,
+                                resource_size_t *start, resource_size_t *end)
+{
+       *start = rsrc->start;
+       *end = rsrc->end;
+}
+
+static char *resource_alignment_param;
 static DEFINE_SPINLOCK(resource_alignment_lock);
 
 /**
@@ -5945,7 +5956,7 @@ static resource_size_t pci_specified_resource_alignment(struct pci_dev *dev,
 
        spin_lock(&resource_alignment_lock);
        p = resource_alignment_param;
-       if (!*p && !align)
+       if (!p || !*p)
                goto out;
        if (pci_has_flag(PCI_PROBE_ONLY)) {
                align = 0;
@@ -6109,35 +6120,41 @@ void pci_reassigndev_resource_alignment(struct pci_dev *dev)
        }
 }
 
-static ssize_t pci_set_resource_alignment_param(const char *buf, size_t count)
+static ssize_t resource_alignment_show(struct bus_type *bus, char *buf)
 {
-       if (count > RESOURCE_ALIGNMENT_PARAM_SIZE - 1)
-               count = RESOURCE_ALIGNMENT_PARAM_SIZE - 1;
-       spin_lock(&resource_alignment_lock);
-       strncpy(resource_alignment_param, buf, count);
-       resource_alignment_param[count] = '\0';
-       spin_unlock(&resource_alignment_lock);
-       return count;
-}
+       size_t count = 0;
 
-static ssize_t pci_get_resource_alignment_param(char *buf, size_t size)
-{
-       size_t count;
        spin_lock(&resource_alignment_lock);
-       count = snprintf(buf, size, "%s", resource_alignment_param);
+       if (resource_alignment_param)
+               count = snprintf(buf, PAGE_SIZE, "%s", resource_alignment_param);
        spin_unlock(&resource_alignment_lock);
-       return count;
-}
 
-static ssize_t resource_alignment_show(struct bus_type *bus, char *buf)
-{
-       return pci_get_resource_alignment_param(buf, PAGE_SIZE);
+       /*
+        * When set by the command line, resource_alignment_param will not
+        * have a trailing line feed, which is ugly. So conditionally add
+        * it here.
+        */
+       if (count >= 2 && buf[count - 2] != '\n' && count < PAGE_SIZE - 1) {
+               buf[count - 1] = '\n';
+               buf[count++] = 0;
+       }
+
+       return count;
 }
 
 static ssize_t resource_alignment_store(struct bus_type *bus,
                                        const char *buf, size_t count)
 {
-       return pci_set_resource_alignment_param(buf, count);
+       char *param = kstrndup(buf, count, GFP_KERNEL);
+
+       if (!param)
+               return -ENOMEM;
+
+       spin_lock(&resource_alignment_lock);
+       kfree(resource_alignment_param);
+       resource_alignment_param = param;
+       spin_unlock(&resource_alignment_lock);
+       return count;
 }
 
 static BUS_ATTR_RW(resource_alignment);
@@ -6266,8 +6283,7 @@ static int __init pci_setup(char *str)
                        } else if (!strncmp(str, "cbmemsize=", 10)) {
                                pci_cardbus_mem_size = memparse(str + 10, &str);
                        } else if (!strncmp(str, "resource_alignment=", 19)) {
-                               pci_set_resource_alignment_param(str + 19,
-                                                       strlen(str + 19));
+                               resource_alignment_param = str + 19;
                        } else if (!strncmp(str, "ecrc=", 5)) {
                                pcie_ecrc_get_policy(str + 5);
                        } else if (!strncmp(str, "hpiosize=", 9)) {
@@ -6302,15 +6318,18 @@ static int __init pci_setup(char *str)
 early_param("pci", pci_setup);
 
 /*
- * 'disable_acs_redir_param' is initialized in pci_setup(), above, to point
- * to data in the __initdata section which will be freed after the init
- * sequence is complete. We can't allocate memory in pci_setup() because some
- * architectures do not have any memory allocation service available during
- * an early_param() call. So we allocate memory and copy the variable here
- * before the init section is freed.
+ * 'resource_alignment_param' and 'disable_acs_redir_param' are initialized
+ * in pci_setup(), above, to point to data in the __initdata section which
+ * will be freed after the init sequence is complete. We can't allocate memory
+ * in pci_setup() because some architectures do not have any memory allocation
+ * service available during an early_param() call. So we allocate memory and
+ * copy the variable here before the init section is freed.
+ *
  */
 static int __init pci_realloc_setup_params(void)
 {
+       resource_alignment_param = kstrdup(resource_alignment_param,
+                                          GFP_KERNEL);
        disable_acs_redir_param = kstrdup(disable_acs_redir_param, GFP_KERNEL);
 
        return 0;
index d22d1b8..3f6947e 100644 (file)
@@ -39,6 +39,11 @@ int pci_probe_reset_function(struct pci_dev *dev);
 int pci_bridge_secondary_bus_reset(struct pci_dev *dev);
 int pci_bus_error_reset(struct pci_dev *dev);
 
+#define PCI_PM_D2_DELAY         200
+#define PCI_PM_D3_WAIT          10
+#define PCI_PM_D3COLD_WAIT      100
+#define PCI_PM_BUS_WAIT         50
+
 /**
  * struct pci_platform_pm_ops - Firmware PM callbacks
  *
@@ -84,6 +89,8 @@ void pci_power_up(struct pci_dev *dev);
 void pci_disable_enabled_device(struct pci_dev *dev);
 int pci_finish_runtime_suspend(struct pci_dev *dev);
 void pcie_clear_root_pme_status(struct pci_dev *dev);
+bool pci_check_pme_status(struct pci_dev *dev);
+void pci_pme_wakeup_bus(struct pci_bus *bus);
 int __pci_pme_wakeup(struct pci_dev *dev, void *ign);
 void pci_pme_restore(struct pci_dev *dev);
 bool pci_dev_need_resume(struct pci_dev *dev);
@@ -118,11 +125,25 @@ static inline bool pci_power_manageable(struct pci_dev *pci_dev)
        return !pci_has_subordinate(pci_dev) || pci_dev->bridge_d3;
 }
 
+static inline bool pcie_downstream_port(const struct pci_dev *dev)
+{
+       int type = pci_pcie_type(dev);
+
+       return type == PCI_EXP_TYPE_ROOT_PORT ||
+              type == PCI_EXP_TYPE_DOWNSTREAM ||
+              type == PCI_EXP_TYPE_PCIE_BRIDGE;
+}
+
 int pci_vpd_init(struct pci_dev *dev);
 void pci_vpd_release(struct pci_dev *dev);
 void pcie_vpd_create_sysfs_dev_files(struct pci_dev *dev);
 void pcie_vpd_remove_sysfs_dev_files(struct pci_dev *dev);
 
+/* PCI Virtual Channel */
+int pci_save_vc_state(struct pci_dev *dev);
+void pci_restore_vc_state(struct pci_dev *dev);
+void pci_allocate_vc_save_buffers(struct pci_dev *dev);
+
 /* PCI /proc functions */
 #ifdef CONFIG_PROC_FS
 int pci_proc_attach_device(struct pci_dev *dev);
@@ -196,6 +217,9 @@ extern const struct attribute_group *pcibus_groups[];
 extern const struct device_type pci_dev_type;
 extern const struct attribute_group *pci_bus_groups[];
 
+extern unsigned long pci_hotplug_io_size;
+extern unsigned long pci_hotplug_mem_size;
+extern unsigned long pci_hotplug_bus_size;
 
 /**
  * pci_match_one_device - Tell if a PCI device structure has a matching
@@ -236,6 +260,9 @@ enum pci_bar_type {
        pci_bar_mem64,          /* A 64-bit memory BAR */
 };
 
+struct device *pci_get_host_bridge_device(struct pci_dev *dev);
+void pci_put_host_bridge_device(struct device *dev);
+
 int pci_configure_extended_tags(struct pci_dev *dev, void *ign);
 bool pci_bus_read_dev_vendor_id(struct pci_bus *bus, int devfn, u32 *pl,
                                int crs_timeout);
@@ -256,6 +283,8 @@ bool pci_bus_clip_resource(struct pci_dev *dev, int idx);
 
 void pci_reassigndev_resource_alignment(struct pci_dev *dev);
 void pci_disable_bridge_window(struct pci_dev *dev);
+struct pci_bus *pci_bus_get(struct pci_bus *bus);
+void pci_bus_put(struct pci_bus *bus);
 
 /* PCIe link information */
 #define PCIE_SPEED2STR(speed) \
@@ -279,6 +308,7 @@ u32 pcie_bandwidth_capable(struct pci_dev *dev, enum pci_bus_speed *speed,
                           enum pcie_link_width *width);
 void __pcie_print_link_status(struct pci_dev *dev, bool verbose);
 void pcie_report_downtraining(struct pci_dev *dev);
+void pcie_update_link_speed(struct pci_bus *bus, u16 link_status);
 
 /* Single Root I/O Virtualization */
 struct pci_sriov {
@@ -418,11 +448,12 @@ static inline void pci_restore_dpc_state(struct pci_dev *dev) {}
 #endif
 
 #ifdef CONFIG_PCI_ATS
+/* Address Translation Service */
+void pci_ats_init(struct pci_dev *dev);
 void pci_restore_ats_state(struct pci_dev *dev);
 #else
-static inline void pci_restore_ats_state(struct pci_dev *dev)
-{
-}
+static inline void pci_ats_init(struct pci_dev *d) { }
+static inline void pci_restore_ats_state(struct pci_dev *dev) { }
 #endif /* CONFIG_PCI_ATS */
 
 #ifdef CONFIG_PCI_IOV
@@ -433,7 +464,7 @@ void pci_iov_update_resource(struct pci_dev *dev, int resno);
 resource_size_t pci_sriov_resource_alignment(struct pci_dev *dev, int resno);
 void pci_restore_iov_state(struct pci_dev *dev);
 int pci_iov_bus_range(struct pci_bus *bus);
-
+extern const struct attribute_group sriov_dev_attr_group;
 #else
 static inline int pci_iov_init(struct pci_dev *dev)
 {
@@ -518,10 +549,21 @@ static inline void pcie_aspm_create_sysfs_dev_files(struct pci_dev *pdev) { }
 static inline void pcie_aspm_remove_sysfs_dev_files(struct pci_dev *pdev) { }
 #endif
 
+#ifdef CONFIG_PCIE_ECRC
+void pcie_set_ecrc_checking(struct pci_dev *dev);
+void pcie_ecrc_get_policy(char *str);
+#else
+static inline void pcie_set_ecrc_checking(struct pci_dev *dev) { }
+static inline void pcie_ecrc_get_policy(char *str) { }
+#endif
+
 #ifdef CONFIG_PCIE_PTM
 void pci_ptm_init(struct pci_dev *dev);
+int pci_enable_ptm(struct pci_dev *dev, u8 *granularity);
 #else
 static inline void pci_ptm_init(struct pci_dev *dev) { }
+static inline int pci_enable_ptm(struct pci_dev *dev, u8 *granularity)
+{ return -EINVAL; }
 #endif
 
 struct pci_dev_reset_methods {
@@ -558,6 +600,10 @@ struct device_node;
 int of_pci_parse_bus_range(struct device_node *node, struct resource *res);
 int of_get_pci_domain_nr(struct device_node *node);
 int of_pci_get_max_link_speed(struct device_node *node);
+void pci_set_of_node(struct pci_dev *dev);
+void pci_release_of_node(struct pci_dev *dev);
+void pci_set_bus_of_node(struct pci_bus *bus);
+void pci_release_bus_of_node(struct pci_bus *bus);
 
 #else
 static inline int
@@ -577,6 +623,11 @@ of_pci_get_max_link_speed(struct device_node *node)
 {
        return -EINVAL;
 }
+
+static inline void pci_set_of_node(struct pci_dev *dev) { }
+static inline void pci_release_of_node(struct pci_dev *dev) { }
+static inline void pci_set_bus_of_node(struct pci_bus *bus) { }
+static inline void pci_release_bus_of_node(struct pci_bus *bus) { }
 #endif /* CONFIG_OF */
 
 #if defined(CONFIG_OF_ADDRESS)
@@ -607,4 +658,13 @@ static inline void pci_aer_clear_fatal_status(struct pci_dev *dev) { }
 static inline void pci_aer_clear_device_status(struct pci_dev *dev) { }
 #endif
 
+#ifdef CONFIG_ACPI
+int pci_acpi_program_hp_params(struct pci_dev *dev);
+#else
+static inline int pci_acpi_program_hp_params(struct pci_dev *dev)
+{
+       return -ENODEV;
+}
+#endif
+
 #endif /* DRIVERS_PCI_H */
index 464f8f9..652ef23 100644 (file)
@@ -18,7 +18,6 @@
 #include <linux/slab.h>
 #include <linux/jiffies.h>
 #include <linux/delay.h>
-#include <linux/pci-aspm.h>
 #include "../pci.h"
 
 #ifdef MODULE_PARAM_PREFIX
@@ -913,10 +912,10 @@ void pcie_aspm_init_link_state(struct pci_dev *pdev)
 
        /*
         * We allocate pcie_link_state for the component on the upstream
-        * end of a Link, so there's nothing to do unless this device has a
-        * Link on its secondary side.
+        * end of a Link, so there's nothing to do unless this device is
+        * downstream port.
         */
-       if (!pdev->has_secondary_link)
+       if (!pcie_downstream_port(pdev))
                return;
 
        /* VIA has a strange chipset, root port is under a bridge */
@@ -1070,7 +1069,7 @@ static int __pci_disable_link_state(struct pci_dev *pdev, int state, bool sem)
        if (!pci_is_pcie(pdev))
                return 0;
 
-       if (pdev->has_secondary_link)
+       if (pcie_downstream_port(pdev))
                parent = pdev;
        if (!parent || !parent->link_state)
                return -EINVAL;
index 773197a..b0e6048 100644 (file)
@@ -166,7 +166,7 @@ static pci_ers_result_t reset_link(struct pci_dev *dev, u32 service)
        driver = pcie_port_find_service(dev, service);
        if (driver && driver->reset_link) {
                status = driver->reset_link(dev);
-       } else if (dev->has_secondary_link) {
+       } else if (pcie_downstream_port(dev)) {
                status = default_reset_link(dev);
        } else {
                pci_printk(KERN_DEBUG, dev, "no link-reset support at upstream device %s\n",
index dbeeb38..3d5271a 100644 (file)
@@ -1426,26 +1426,38 @@ void set_pcie_port_type(struct pci_dev *pdev)
        pci_read_config_word(pdev, pos + PCI_EXP_DEVCAP, &reg16);
        pdev->pcie_mpss = reg16 & PCI_EXP_DEVCAP_PAYLOAD;
 
+       parent = pci_upstream_bridge(pdev);
+       if (!parent)
+               return;
+
        /*
-        * A Root Port or a PCI-to-PCIe bridge is always the upstream end
-        * of a Link.  No PCIe component has two Links.  Two Links are
-        * connected by a Switch that has a Port on each Link and internal
-        * logic to connect the two Ports.
+        * Some systems do not identify their upstream/downstream ports
+        * correctly so detect impossible configurations here and correct
+        * the port type accordingly.
         */
        type = pci_pcie_type(pdev);
-       if (type == PCI_EXP_TYPE_ROOT_PORT ||
-           type == PCI_EXP_TYPE_PCIE_BRIDGE)
-               pdev->has_secondary_link = 1;
-       else if (type == PCI_EXP_TYPE_UPSTREAM ||
-                type == PCI_EXP_TYPE_DOWNSTREAM) {
-               parent = pci_upstream_bridge(pdev);
-
+       if (type == PCI_EXP_TYPE_DOWNSTREAM) {
                /*
-                * Usually there's an upstream device (Root Port or Switch
-                * Downstream Port), but we can't assume one exists.
+                * If pdev claims to be downstream port but the parent
+                * device is also downstream port assume pdev is actually
+                * upstream port.
                 */
-               if (parent && !parent->has_secondary_link)
-                       pdev->has_secondary_link = 1;
+               if (pcie_downstream_port(parent)) {
+                       pci_info(pdev, "claims to be downstream port but is acting as upstream port, correcting type\n");
+                       pdev->pcie_flags_reg &= ~PCI_EXP_FLAGS_TYPE;
+                       pdev->pcie_flags_reg |= PCI_EXP_TYPE_UPSTREAM;
+               }
+       } else if (type == PCI_EXP_TYPE_UPSTREAM) {
+               /*
+                * If pdev claims to be upstream port but the parent
+                * device is also upstream port assume pdev is actually
+                * downstream port.
+                */
+               if (pci_pcie_type(parent) == PCI_EXP_TYPE_UPSTREAM) {
+                       pci_info(pdev, "claims to be upstream port but is acting as downstream port, correcting type\n");
+                       pdev->pcie_flags_reg &= ~PCI_EXP_FLAGS_TYPE;
+                       pdev->pcie_flags_reg |= PCI_EXP_TYPE_DOWNSTREAM;
+               }
        }
 }
 
@@ -1915,275 +1927,6 @@ static void pci_configure_mps(struct pci_dev *dev)
                 p_mps, mps, mpss);
 }
 
-static struct hpp_type0 pci_default_type0 = {
-       .revision = 1,
-       .cache_line_size = 8,
-       .latency_timer = 0x40,
-       .enable_serr = 0,
-       .enable_perr = 0,
-};
-
-static void program_hpp_type0(struct pci_dev *dev, struct hpp_type0 *hpp)
-{
-       u16 pci_cmd, pci_bctl;
-
-       if (!hpp)
-               hpp = &pci_default_type0;
-
-       if (hpp->revision > 1) {
-               pci_warn(dev, "PCI settings rev %d not supported; using defaults\n",
-                        hpp->revision);
-               hpp = &pci_default_type0;
-       }
-
-       pci_write_config_byte(dev, PCI_CACHE_LINE_SIZE, hpp->cache_line_size);
-       pci_write_config_byte(dev, PCI_LATENCY_TIMER, hpp->latency_timer);
-       pci_read_config_word(dev, PCI_COMMAND, &pci_cmd);
-       if (hpp->enable_serr)
-               pci_cmd |= PCI_COMMAND_SERR;
-       if (hpp->enable_perr)
-               pci_cmd |= PCI_COMMAND_PARITY;
-       pci_write_config_word(dev, PCI_COMMAND, pci_cmd);
-
-       /* Program bridge control value */
-       if ((dev->class >> 8) == PCI_CLASS_BRIDGE_PCI) {
-               pci_write_config_byte(dev, PCI_SEC_LATENCY_TIMER,
-                                     hpp->latency_timer);
-               pci_read_config_word(dev, PCI_BRIDGE_CONTROL, &pci_bctl);
-               if (hpp->enable_perr)
-                       pci_bctl |= PCI_BRIDGE_CTL_PARITY;
-               pci_write_config_word(dev, PCI_BRIDGE_CONTROL, pci_bctl);
-       }
-}
-
-static void program_hpp_type1(struct pci_dev *dev, struct hpp_type1 *hpp)
-{
-       int pos;
-
-       if (!hpp)
-               return;
-
-       pos = pci_find_capability(dev, PCI_CAP_ID_PCIX);
-       if (!pos)
-               return;
-
-       pci_warn(dev, "PCI-X settings not supported\n");
-}
-
-static bool pcie_root_rcb_set(struct pci_dev *dev)
-{
-       struct pci_dev *rp = pcie_find_root_port(dev);
-       u16 lnkctl;
-
-       if (!rp)
-               return false;
-
-       pcie_capability_read_word(rp, PCI_EXP_LNKCTL, &lnkctl);
-       if (lnkctl & PCI_EXP_LNKCTL_RCB)
-               return true;
-
-       return false;
-}
-
-static void program_hpp_type2(struct pci_dev *dev, struct hpp_type2 *hpp)
-{
-       int pos;
-       u32 reg32;
-
-       if (!hpp)
-               return;
-
-       if (!pci_is_pcie(dev))
-               return;
-
-       if (hpp->revision > 1) {
-               pci_warn(dev, "PCIe settings rev %d not supported\n",
-                        hpp->revision);
-               return;
-       }
-
-       /*
-        * Don't allow _HPX to change MPS or MRRS settings.  We manage
-        * those to make sure they're consistent with the rest of the
-        * platform.
-        */
-       hpp->pci_exp_devctl_and |= PCI_EXP_DEVCTL_PAYLOAD |
-                                   PCI_EXP_DEVCTL_READRQ;
-       hpp->pci_exp_devctl_or &= ~(PCI_EXP_DEVCTL_PAYLOAD |
-                                   PCI_EXP_DEVCTL_READRQ);
-
-       /* Initialize Device Control Register */
-       pcie_capability_clear_and_set_word(dev, PCI_EXP_DEVCTL,
-                       ~hpp->pci_exp_devctl_and, hpp->pci_exp_devctl_or);
-
-       /* Initialize Link Control Register */
-       if (pcie_cap_has_lnkctl(dev)) {
-
-               /*
-                * If the Root Port supports Read Completion Boundary of
-                * 128, set RCB to 128.  Otherwise, clear it.
-                */
-               hpp->pci_exp_lnkctl_and |= PCI_EXP_LNKCTL_RCB;
-               hpp->pci_exp_lnkctl_or &= ~PCI_EXP_LNKCTL_RCB;
-               if (pcie_root_rcb_set(dev))
-                       hpp->pci_exp_lnkctl_or |= PCI_EXP_LNKCTL_RCB;
-
-               pcie_capability_clear_and_set_word(dev, PCI_EXP_LNKCTL,
-                       ~hpp->pci_exp_lnkctl_and, hpp->pci_exp_lnkctl_or);
-       }
-
-       /* Find Advanced Error Reporting Enhanced Capability */
-       pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ERR);
-       if (!pos)
-               return;
-
-       /* Initialize Uncorrectable Error Mask Register */
-       pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_MASK, &reg32);
-       reg32 = (reg32 & hpp->unc_err_mask_and) | hpp->unc_err_mask_or;
-       pci_write_config_dword(dev, pos + PCI_ERR_UNCOR_MASK, reg32);
-
-       /* Initialize Uncorrectable Error Severity Register */
-       pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_SEVER, &reg32);
-       reg32 = (reg32 & hpp->unc_err_sever_and) | hpp->unc_err_sever_or;
-       pci_write_config_dword(dev, pos + PCI_ERR_UNCOR_SEVER, reg32);
-
-       /* Initialize Correctable Error Mask Register */
-       pci_read_config_dword(dev, pos + PCI_ERR_COR_MASK, &reg32);
-       reg32 = (reg32 & hpp->cor_err_mask_and) | hpp->cor_err_mask_or;
-       pci_write_config_dword(dev, pos + PCI_ERR_COR_MASK, reg32);
-
-       /* Initialize Advanced Error Capabilities and Control Register */
-       pci_read_config_dword(dev, pos + PCI_ERR_CAP, &reg32);
-       reg32 = (reg32 & hpp->adv_err_cap_and) | hpp->adv_err_cap_or;
-
-       /* Don't enable ECRC generation or checking if unsupported */
-       if (!(reg32 & PCI_ERR_CAP_ECRC_GENC))
-               reg32 &= ~PCI_ERR_CAP_ECRC_GENE;
-       if (!(reg32 & PCI_ERR_CAP_ECRC_CHKC))
-               reg32 &= ~PCI_ERR_CAP_ECRC_CHKE;
-       pci_write_config_dword(dev, pos + PCI_ERR_CAP, reg32);
-
-       /*
-        * FIXME: The following two registers are not supported yet.
-        *
-        *   o Secondary Uncorrectable Error Severity Register
-        *   o Secondary Uncorrectable Error Mask Register
-        */
-}
-
-static u16 hpx3_device_type(struct pci_dev *dev)
-{
-       u16 pcie_type = pci_pcie_type(dev);
-       const int pcie_to_hpx3_type[] = {
-               [PCI_EXP_TYPE_ENDPOINT]    = HPX_TYPE_ENDPOINT,
-               [PCI_EXP_TYPE_LEG_END]     = HPX_TYPE_LEG_END,
-               [PCI_EXP_TYPE_RC_END]      = HPX_TYPE_RC_END,
-               [PCI_EXP_TYPE_RC_EC]       = HPX_TYPE_RC_EC,
-               [PCI_EXP_TYPE_ROOT_PORT]   = HPX_TYPE_ROOT_PORT,
-               [PCI_EXP_TYPE_UPSTREAM]    = HPX_TYPE_UPSTREAM,
-               [PCI_EXP_TYPE_DOWNSTREAM]  = HPX_TYPE_DOWNSTREAM,
-               [PCI_EXP_TYPE_PCI_BRIDGE]  = HPX_TYPE_PCI_BRIDGE,
-               [PCI_EXP_TYPE_PCIE_BRIDGE] = HPX_TYPE_PCIE_BRIDGE,
-       };
-
-       if (pcie_type >= ARRAY_SIZE(pcie_to_hpx3_type))
-               return 0;
-
-       return pcie_to_hpx3_type[pcie_type];
-}
-
-static u8 hpx3_function_type(struct pci_dev *dev)
-{
-       if (dev->is_virtfn)
-               return HPX_FN_SRIOV_VIRT;
-       else if (pci_find_ext_capability(dev, PCI_EXT_CAP_ID_SRIOV) > 0)
-               return HPX_FN_SRIOV_PHYS;
-       else
-               return HPX_FN_NORMAL;
-}
-
-static bool hpx3_cap_ver_matches(u8 pcie_cap_id, u8 hpx3_cap_id)
-{
-       u8 cap_ver = hpx3_cap_id & 0xf;
-
-       if ((hpx3_cap_id & BIT(4)) && cap_ver >= pcie_cap_id)
-               return true;
-       else if (cap_ver == pcie_cap_id)
-               return true;
-
-       return false;
-}
-
-static void program_hpx_type3_register(struct pci_dev *dev,
-                                      const struct hpx_type3 *reg)
-{
-       u32 match_reg, write_reg, header, orig_value;
-       u16 pos;
-
-       if (!(hpx3_device_type(dev) & reg->device_type))
-               return;
-
-       if (!(hpx3_function_type(dev) & reg->function_type))
-               return;
-
-       switch (reg->config_space_location) {
-       case HPX_CFG_PCICFG:
-               pos = 0;
-               break;
-       case HPX_CFG_PCIE_CAP:
-               pos = pci_find_capability(dev, reg->pci_exp_cap_id);
-               if (pos == 0)
-                       return;
-
-               break;
-       case HPX_CFG_PCIE_CAP_EXT:
-               pos = pci_find_ext_capability(dev, reg->pci_exp_cap_id);
-               if (pos == 0)
-                       return;
-
-               pci_read_config_dword(dev, pos, &header);
-               if (!hpx3_cap_ver_matches(PCI_EXT_CAP_VER(header),
-                                         reg->pci_exp_cap_ver))
-                       return;
-
-               break;
-       case HPX_CFG_VEND_CAP:  /* Fall through */
-       case HPX_CFG_DVSEC:     /* Fall through */
-       default:
-               pci_warn(dev, "Encountered _HPX type 3 with unsupported config space location");
-               return;
-       }
-
-       pci_read_config_dword(dev, pos + reg->match_offset, &match_reg);
-
-       if ((match_reg & reg->match_mask_and) != reg->match_value)
-               return;
-
-       pci_read_config_dword(dev, pos + reg->reg_offset, &write_reg);
-       orig_value = write_reg;
-       write_reg &= reg->reg_mask_and;
-       write_reg |= reg->reg_mask_or;
-
-       if (orig_value == write_reg)
-               return;
-
-       pci_write_config_dword(dev, pos + reg->reg_offset, write_reg);
-
-       pci_dbg(dev, "Applied _HPX3 at [0x%x]: 0x%08x -> 0x%08x",
-               pos, orig_value, write_reg);
-}
-
-static void program_hpx_type3(struct pci_dev *dev, struct hpx_type3 *hpx3)
-{
-       if (!hpx3)
-               return;
-
-       if (!pci_is_pcie(dev))
-               return;
-
-       program_hpx_type3_register(dev, hpx3);
-}
-
 int pci_configure_extended_tags(struct pci_dev *dev, void *ign)
 {
        struct pci_host_bridge *host;
@@ -2364,13 +2107,6 @@ static void pci_configure_serr(struct pci_dev *dev)
 
 static void pci_configure_device(struct pci_dev *dev)
 {
-       static const struct hotplug_program_ops hp_ops = {
-               .program_type0 = program_hpp_type0,
-               .program_type1 = program_hpp_type1,
-               .program_type2 = program_hpp_type2,
-               .program_type3 = program_hpx_type3,
-       };
-
        pci_configure_mps(dev);
        pci_configure_extended_tags(dev, NULL);
        pci_configure_relaxed_ordering(dev);
@@ -2378,7 +2114,7 @@ static void pci_configure_device(struct pci_dev *dev)
        pci_configure_eetlp_prefix(dev);
        pci_configure_serr(dev);
 
-       pci_acpi_program_hp_params(dev, &hp_ops);
+       pci_acpi_program_hp_params(dev);
 }
 
 static void pci_release_capabilities(struct pci_dev *dev)
@@ -2759,12 +2495,8 @@ static int only_one_child(struct pci_bus *bus)
         * A PCIe Downstream Port normally leads to a Link with only Device
         * 0 on it (PCIe spec r3.1, sec 7.3.1).  As an optimization, scan
         * only for Device 0 in that situation.
-        *
-        * Checking has_secondary_link is a hack to identify Downstream
-        * Ports because sometimes Switches are configured such that the
-        * PCIe Port Type labels are backwards.
         */
-       if (bridge && pci_is_pcie(bridge) && bridge->has_secondary_link)
+       if (bridge && pci_is_pcie(bridge) && pcie_downstream_port(bridge))
                return 1;
 
        return 0;
index fe7fe67..5495537 100644 (file)
@@ -13,6 +13,7 @@
 #include <linux/seq_file.h>
 #include <linux/capability.h>
 #include <linux/uaccess.h>
+#include <linux/security.h>
 #include <asm/byteorder.h>
 #include "pci.h"
 
@@ -115,7 +116,11 @@ static ssize_t proc_bus_pci_write(struct file *file, const char __user *buf,
        struct pci_dev *dev = PDE_DATA(ino);
        int pos = *ppos;
        int size = dev->cfg_size;
-       int cnt;
+       int cnt, ret;
+
+       ret = security_locked_down(LOCKDOWN_PCI_ACCESS);
+       if (ret)
+               return ret;
 
        if (pos >= size)
                return 0;
@@ -196,6 +201,10 @@ static long proc_bus_pci_ioctl(struct file *file, unsigned int cmd,
 #endif /* HAVE_PCI_MMAP */
        int ret = 0;
 
+       ret = security_locked_down(LOCKDOWN_PCI_ACCESS);
+       if (ret)
+               return ret;
+
        switch (cmd) {
        case PCIIOC_CONTROLLER:
                ret = pci_domain_nr(dev->bus);
@@ -238,7 +247,8 @@ static int proc_bus_pci_mmap(struct file *file, struct vm_area_struct *vma)
        struct pci_filp_private *fpriv = file->private_data;
        int i, ret, write_combine = 0, res_bit = IORESOURCE_MEM;
 
-       if (!capable(CAP_SYS_RAWIO))
+       if (!capable(CAP_SYS_RAWIO) ||
+           security_locked_down(LOCKDOWN_PCI_ACCESS))
                return -EPERM;
 
        if (fpriv->mmap_state == pci_mmap_io) {
index 44c4ae1..320255e 100644 (file)
@@ -20,7 +20,6 @@
 #include <linux/delay.h>
 #include <linux/acpi.h>
 #include <linux/dmi.h>
-#include <linux/pci-aspm.h>
 #include <linux/ioport.h>
 #include <linux/sched.h>
 #include <linux/ktime.h>
@@ -2593,6 +2592,59 @@ DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_NVIDIA,
                        nvenet_msi_disable);
 
 /*
+ * PCIe spec r4.0 sec 7.7.1.2 and sec 7.7.2.2 say that if MSI/MSI-X is enabled,
+ * then the device can't use INTx interrupts. Tegra's PCIe root ports don't
+ * generate MSI interrupts for PME and AER events instead only INTx interrupts
+ * are generated. Though Tegra's PCIe root ports can generate MSI interrupts
+ * for other events, since PCIe specificiation doesn't support using a mix of
+ * INTx and MSI/MSI-X, it is required to disable MSI interrupts to avoid port
+ * service drivers registering their respective ISRs for MSIs.
+ */
+static void pci_quirk_nvidia_tegra_disable_rp_msi(struct pci_dev *dev)
+{
+       dev->no_msi = 1;
+}
+DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_NVIDIA, 0x1ad0,
+                             PCI_CLASS_BRIDGE_PCI, 8,
+                             pci_quirk_nvidia_tegra_disable_rp_msi);
+DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_NVIDIA, 0x1ad1,
+                             PCI_CLASS_BRIDGE_PCI, 8,
+                             pci_quirk_nvidia_tegra_disable_rp_msi);
+DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_NVIDIA, 0x1ad2,
+                             PCI_CLASS_BRIDGE_PCI, 8,
+                             pci_quirk_nvidia_tegra_disable_rp_msi);
+DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_NVIDIA, 0x0bf0,
+                             PCI_CLASS_BRIDGE_PCI, 8,
+                             pci_quirk_nvidia_tegra_disable_rp_msi);
+DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_NVIDIA, 0x0bf1,
+                             PCI_CLASS_BRIDGE_PCI, 8,
+                             pci_quirk_nvidia_tegra_disable_rp_msi);
+DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_NVIDIA, 0x0e1c,
+                             PCI_CLASS_BRIDGE_PCI, 8,
+                             pci_quirk_nvidia_tegra_disable_rp_msi);
+DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_NVIDIA, 0x0e1d,
+                             PCI_CLASS_BRIDGE_PCI, 8,
+                             pci_quirk_nvidia_tegra_disable_rp_msi);
+DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_NVIDIA, 0x0e12,
+                             PCI_CLASS_BRIDGE_PCI, 8,
+                             pci_quirk_nvidia_tegra_disable_rp_msi);
+DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_NVIDIA, 0x0e13,
+                             PCI_CLASS_BRIDGE_PCI, 8,
+                             pci_quirk_nvidia_tegra_disable_rp_msi);
+DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_NVIDIA, 0x0fae,
+                             PCI_CLASS_BRIDGE_PCI, 8,
+                             pci_quirk_nvidia_tegra_disable_rp_msi);
+DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_NVIDIA, 0x0faf,
+                             PCI_CLASS_BRIDGE_PCI, 8,
+                             pci_quirk_nvidia_tegra_disable_rp_msi);
+DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_NVIDIA, 0x10e5,
+                             PCI_CLASS_BRIDGE_PCI, 8,
+                             pci_quirk_nvidia_tegra_disable_rp_msi);
+DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_NVIDIA, 0x10e6,
+                             PCI_CLASS_BRIDGE_PCI, 8,
+                             pci_quirk_nvidia_tegra_disable_rp_msi);
+
+/*
  * Some versions of the MCP55 bridge from Nvidia have a legacy IRQ routing
  * config register.  This register controls the routing of legacy
  * interrupts from devices that route through the MCP55.  If this register
@@ -2925,6 +2977,24 @@ DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_ATTANSIC, 0x10a1,
                        quirk_msi_intx_disable_qca_bug);
 DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_ATTANSIC, 0xe091,
                        quirk_msi_intx_disable_qca_bug);
+
+/*
+ * Amazon's Annapurna Labs 1c36:0031 Root Ports don't support MSI-X, so it
+ * should be disabled on platforms where the device (mistakenly) advertises it.
+ *
+ * Notice that this quirk also disables MSI (which may work, but hasn't been
+ * tested), since currently there is no standard way to disable only MSI-X.
+ *
+ * The 0031 device id is reused for other non Root Port device types,
+ * therefore the quirk is registered for the PCI_CLASS_BRIDGE_PCI class.
+ */
+static void quirk_al_msi_disable(struct pci_dev *dev)
+{
+       dev->no_msi = 1;
+       pci_warn(dev, "Disabling MSI/MSI-X\n");
+}
+DECLARE_PCI_FIXUP_CLASS_FINAL(PCI_VENDOR_ID_AMAZON_ANNAPURNA_LABS, 0x0031,
+                             PCI_CLASS_BRIDGE_PCI, 8, quirk_al_msi_disable);
 #endif /* CONFIG_PCI_MSI */
 
 /*
@@ -4366,6 +4436,24 @@ static int pci_quirk_qcom_rp_acs(struct pci_dev *dev, u16 acs_flags)
        return ret;
 }
 
+static int pci_quirk_al_acs(struct pci_dev *dev, u16 acs_flags)
+{
+       if (pci_pcie_type(dev) != PCI_EXP_TYPE_ROOT_PORT)
+               return -ENOTTY;
+
+       /*
+        * Amazon's Annapurna Labs root ports don't include an ACS capability,
+        * but do include ACS-like functionality. The hardware doesn't support
+        * peer-to-peer transactions via the root port and each has a unique
+        * segment number.
+        *
+        * Additionally, the root ports cannot send traffic to each other.
+        */
+       acs_flags &= ~(PCI_ACS_SV | PCI_ACS_RR | PCI_ACS_CR | PCI_ACS_UF);
+
+       return acs_flags ? 0 : 1;
+}
+
 /*
  * Sunrise Point PCH root ports implement ACS, but unfortunately as shown in
  * the datasheet (Intel 100 Series Chipset Family PCH Datasheet, Vol. 2,
@@ -4466,6 +4554,19 @@ static int pci_quirk_mf_endpoint_acs(struct pci_dev *dev, u16 acs_flags)
        return acs_flags ? 0 : 1;
 }
 
+static int pci_quirk_brcm_acs(struct pci_dev *dev, u16 acs_flags)
+{
+       /*
+        * iProc PAXB Root Ports don't advertise an ACS capability, but
+        * they do not allow peer-to-peer transactions between Root Ports.
+        * Allow each Root Port to be in a separate IOMMU group by masking
+        * SV/RR/CR/UF bits.
+        */
+       acs_flags &= ~(PCI_ACS_SV | PCI_ACS_RR | PCI_ACS_CR | PCI_ACS_UF);
+
+       return acs_flags ? 0 : 1;
+}
+
 static const struct pci_dev_acs_enabled {
        u16 vendor;
        u16 device;
@@ -4559,6 +4660,9 @@ static const struct pci_dev_acs_enabled {
        { PCI_VENDOR_ID_AMPERE, 0xE00A, pci_quirk_xgene_acs },
        { PCI_VENDOR_ID_AMPERE, 0xE00B, pci_quirk_xgene_acs },
        { PCI_VENDOR_ID_AMPERE, 0xE00C, pci_quirk_xgene_acs },
+       { PCI_VENDOR_ID_BROADCOM, 0xD714, pci_quirk_brcm_acs },
+       /* Amazon Annapurna Labs */
+       { PCI_VENDOR_ID_AMAZON_ANNAPURNA_LABS, 0x0031, pci_quirk_al_acs },
        { 0 }
 };
 
index 7f4e658..bade140 100644 (file)
@@ -15,7 +15,6 @@
 #include "pci.h"
 
 DECLARE_RWSEM(pci_bus_sem);
-EXPORT_SYMBOL_GPL(pci_bus_sem);
 
 /*
  * pci_for_each_dma_alias - Iterate over DMA aliases for a device
index 79b1fa6..e7dbe21 100644 (file)
@@ -1662,8 +1662,8 @@ static int iov_resources_unassigned(struct pci_dev *dev, void *data)
        int i;
        bool *unassigned = data;
 
-       for (i = PCI_IOV_RESOURCES; i <= PCI_IOV_RESOURCE_END; i++) {
-               struct resource *r = &dev->resource[i];
+       for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) {
+               struct resource *r = &dev->resource[i + PCI_IOV_RESOURCES];
                struct pci_bus_region region;
 
                /* Not assigned or rejected by kernel? */
index d96626c..31e3955 100644 (file)
@@ -7,6 +7,7 @@
 
 #include <linux/errno.h>
 #include <linux/pci.h>
+#include <linux/security.h>
 #include <linux/syscalls.h>
 #include <linux/uaccess.h>
 #include "pci.h"
@@ -90,7 +91,8 @@ SYSCALL_DEFINE5(pciconfig_write, unsigned long, bus, unsigned long, dfn,
        u32 dword;
        int err = 0;
 
-       if (!capable(CAP_SYS_ADMIN))
+       if (!capable(CAP_SYS_ADMIN) ||
+           security_locked_down(LOCKDOWN_PCI_ACCESS))
                return -EPERM;
 
        dev = pci_get_domain_bus_and_slot(0, bus, dfn);
index 5acd9c0..5486f87 100644 (file)
@@ -13,6 +13,8 @@
 #include <linux/pci_regs.h>
 #include <linux/types.h>
 
+#include "pci.h"
+
 /**
  * pci_vc_save_restore_dwords - Save or restore a series of dwords
  * @dev: device
@@ -105,7 +107,7 @@ static void pci_vc_enable(struct pci_dev *dev, int pos, int res)
        struct pci_dev *link = NULL;
 
        /* Enable VCs from the downstream device */
-       if (!dev->has_secondary_link)
+       if (!pci_is_pcie(dev) || !pcie_downstream_port(dev))
                return;
 
        ctrl_pos = pos + PCI_VC_RES_CTRL + (res * PCI_CAP_VC_PER_VC_SIZEOF);
@@ -409,7 +411,6 @@ void pci_restore_vc_state(struct pci_dev *dev)
  * For each type of VC capability, VC/VC9/MFVC, find the capability, size
  * it, and allocate a buffer for save/restore.
  */
-
 void pci_allocate_vc_save_buffers(struct pci_dev *dev)
 {
        int i;
index 4963c2e..7915d10 100644 (file)
@@ -571,6 +571,12 @@ DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_LSI_LOGIC, 0x005f, quirk_blacklist_vpd);
 DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_ATTANSIC, PCI_ANY_ID,
                quirk_blacklist_vpd);
 DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_QLOGIC, 0x2261, quirk_blacklist_vpd);
+/*
+ * The Amazon Annapurna Labs 0x0031 device id is reused for other non Root Port
+ * device types, so the quirk is registered for the PCI_CLASS_BRIDGE_PCI class.
+ */
+DECLARE_PCI_FIXUP_CLASS_FINAL(PCI_VENDOR_ID_AMAZON_ANNAPURNA_LABS, 0x0031,
+                             PCI_CLASS_BRIDGE_PCI, 8, quirk_blacklist_vpd);
 
 /*
  * For Broadcom 5706, 5708, 5709 rev. A nics, any read beyond the
index abd0299..629359f 100644 (file)
@@ -21,6 +21,7 @@
 #include <linux/pci.h>
 #include <linux/ioport.h>
 #include <linux/io.h>
+#include <linux/security.h>
 #include <asm/byteorder.h>
 #include <asm/unaligned.h>
 
@@ -1575,6 +1576,10 @@ static ssize_t pccard_store_cis(struct file *filp, struct kobject *kobj,
        struct pcmcia_socket *s;
        int error;
 
+       error = security_locked_down(LOCKDOWN_PCMCIA_CIS);
+       if (error)
+               return error;
+
        s = to_socket(container_of(kobj, struct device, kobj));
 
        if (off)
index 6601ad0..ead06c6 100644 (file)
@@ -231,8 +231,9 @@ static void cpcap_usb_detect(struct work_struct *work)
                        goto out_err;
 
                error = regmap_update_bits(ddata->reg, CPCAP_REG_USBC3,
-                                          CPCAP_BIT_VBUSSTBY_EN,
-                                          CPCAP_BIT_VBUSSTBY_EN);
+                                          CPCAP_BIT_VBUSSTBY_EN |
+                                          CPCAP_BIT_VBUSEN_SPI,
+                                          CPCAP_BIT_VBUSEN_SPI);
                if (error)
                        goto out_err;
 
@@ -240,7 +241,8 @@ static void cpcap_usb_detect(struct work_struct *work)
        }
 
        error = regmap_update_bits(ddata->reg, CPCAP_REG_USBC3,
-                                  CPCAP_BIT_VBUSSTBY_EN, 0);
+                                  CPCAP_BIT_VBUSSTBY_EN |
+                                  CPCAP_BIT_VBUSEN_SPI, 0);
        if (error)
                goto out_err;
 
index e516967..f9817c3 100644 (file)
@@ -7,3 +7,10 @@ config PHY_TEGRA_XUSB
 
          To compile this driver as a module, choose M here: the module will
          be called phy-tegra-xusb.
+
+config PHY_TEGRA194_P2U
+       tristate "NVIDIA Tegra194 PIPE2UPHY PHY driver"
+       depends on ARCH_TEGRA_194_SOC || COMPILE_TEST
+       select GENERIC_PHY
+       help
+         Enable this to support the P2U (PIPE to UPHY) that is part of Tegra 19x SOCs.
index 64ccaea..320dd38 100644 (file)
@@ -6,3 +6,4 @@ phy-tegra-xusb-$(CONFIG_ARCH_TEGRA_124_SOC) += xusb-tegra124.o
 phy-tegra-xusb-$(CONFIG_ARCH_TEGRA_132_SOC) += xusb-tegra124.o
 phy-tegra-xusb-$(CONFIG_ARCH_TEGRA_210_SOC) += xusb-tegra210.o
 phy-tegra-xusb-$(CONFIG_ARCH_TEGRA_186_SOC) += xusb-tegra186.o
+obj-$(CONFIG_PHY_TEGRA194_P2U) += phy-tegra194-p2u.o
diff --git a/drivers/phy/tegra/phy-tegra194-p2u.c b/drivers/phy/tegra/phy-tegra194-p2u.c
new file mode 100644 (file)
index 0000000..7042bed
--- /dev/null
@@ -0,0 +1,120 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * P2U (PIPE to UPHY) driver for Tegra T194 SoC
+ *
+ * Copyright (C) 2019 NVIDIA Corporation.
+ *
+ * Author: Vidya Sagar <vidyas@nvidia.com>
+ */
+
+#include <linux/err.h>
+#include <linux/io.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/of_platform.h>
+#include <linux/phy/phy.h>
+
+#define P2U_PERIODIC_EQ_CTRL_GEN3      0xc0
+#define P2U_PERIODIC_EQ_CTRL_GEN3_PERIODIC_EQ_EN               BIT(0)
+#define P2U_PERIODIC_EQ_CTRL_GEN3_INIT_PRESET_EQ_TRAIN_EN      BIT(1)
+#define P2U_PERIODIC_EQ_CTRL_GEN4      0xc4
+#define P2U_PERIODIC_EQ_CTRL_GEN4_INIT_PRESET_EQ_TRAIN_EN      BIT(1)
+
+#define P2U_RX_DEBOUNCE_TIME                           0xa4
+#define P2U_RX_DEBOUNCE_TIME_DEBOUNCE_TIMER_MASK       0xffff
+#define P2U_RX_DEBOUNCE_TIME_DEBOUNCE_TIMER_VAL                160
+
+struct tegra_p2u {
+       void __iomem *base;
+};
+
+static inline void p2u_writel(struct tegra_p2u *phy, const u32 value,
+                             const u32 reg)
+{
+       writel_relaxed(value, phy->base + reg);
+}
+
+static inline u32 p2u_readl(struct tegra_p2u *phy, const u32 reg)
+{
+       return readl_relaxed(phy->base + reg);
+}
+
+static int tegra_p2u_power_on(struct phy *x)
+{
+       struct tegra_p2u *phy = phy_get_drvdata(x);
+       u32 val;
+
+       val = p2u_readl(phy, P2U_PERIODIC_EQ_CTRL_GEN3);
+       val &= ~P2U_PERIODIC_EQ_CTRL_GEN3_PERIODIC_EQ_EN;
+       val |= P2U_PERIODIC_EQ_CTRL_GEN3_INIT_PRESET_EQ_TRAIN_EN;
+       p2u_writel(phy, val, P2U_PERIODIC_EQ_CTRL_GEN3);
+
+       val = p2u_readl(phy, P2U_PERIODIC_EQ_CTRL_GEN4);
+       val |= P2U_PERIODIC_EQ_CTRL_GEN4_INIT_PRESET_EQ_TRAIN_EN;
+       p2u_writel(phy, val, P2U_PERIODIC_EQ_CTRL_GEN4);
+
+       val = p2u_readl(phy, P2U_RX_DEBOUNCE_TIME);
+       val &= ~P2U_RX_DEBOUNCE_TIME_DEBOUNCE_TIMER_MASK;
+       val |= P2U_RX_DEBOUNCE_TIME_DEBOUNCE_TIMER_VAL;
+       p2u_writel(phy, val, P2U_RX_DEBOUNCE_TIME);
+
+       return 0;
+}
+
+static const struct phy_ops ops = {
+       .power_on = tegra_p2u_power_on,
+       .owner = THIS_MODULE,
+};
+
+static int tegra_p2u_probe(struct platform_device *pdev)
+{
+       struct phy_provider *phy_provider;
+       struct device *dev = &pdev->dev;
+       struct phy *generic_phy;
+       struct tegra_p2u *phy;
+       struct resource *res;
+
+       phy = devm_kzalloc(dev, sizeof(*phy), GFP_KERNEL);
+       if (!phy)
+               return -ENOMEM;
+
+       res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "ctl");
+       phy->base = devm_ioremap_resource(dev, res);
+       if (IS_ERR(phy->base))
+               return PTR_ERR(phy->base);
+
+       platform_set_drvdata(pdev, phy);
+
+       generic_phy = devm_phy_create(dev, NULL, &ops);
+       if (IS_ERR(generic_phy))
+               return PTR_ERR(generic_phy);
+
+       phy_set_drvdata(generic_phy, phy);
+
+       phy_provider = devm_of_phy_provider_register(dev, of_phy_simple_xlate);
+       if (IS_ERR(phy_provider))
+               return PTR_ERR(phy_provider);
+
+       return 0;
+}
+
+static const struct of_device_id tegra_p2u_id_table[] = {
+       {
+               .compatible = "nvidia,tegra194-p2u",
+       },
+       {}
+};
+MODULE_DEVICE_TABLE(of, tegra_p2u_id_table);
+
+static struct platform_driver tegra_p2u_driver = {
+       .probe = tegra_p2u_probe,
+       .driver = {
+               .name = "tegra194-p2u",
+               .of_match_table = tegra_p2u_id_table,
+       },
+};
+module_platform_driver(tegra_p2u_driver);
+
+MODULE_AUTHOR("Vidya Sagar <vidyas@nvidia.com>");
+MODULE_DESCRIPTION("NVIDIA Tegra194 PIPE2UPHY PHY driver");
+MODULE_LICENSE("GPL v2");
index f14f1f0..88a047b 100644 (file)
@@ -335,6 +335,7 @@ static int serdes_am654_clk_mux_set_parent(struct clk_hw *hw, u8 index)
 {
        struct serdes_am654_clk_mux *mux = to_serdes_am654_clk_mux(hw);
        struct regmap *regmap = mux->regmap;
+       const char *name = clk_hw_get_name(hw);
        unsigned int reg = mux->reg;
        int clk_id = mux->clk_id;
        int parents[SERDES_NUM_CLOCKS];
@@ -374,8 +375,7 @@ static int serdes_am654_clk_mux_set_parent(struct clk_hw *hw, u8 index)
                 * This can never happen, unless we missed
                 * a valid combination in serdes_am654_mux_table.
                 */
-               WARN(1, "Failed to find the parent of %s clock\n",
-                    hw->init->name);
+               WARN(1, "Failed to find the parent of %s clock\n", name);
                return -EINVAL;
        }
 
index 1b67bb5..ae21d08 100644 (file)
@@ -674,6 +674,7 @@ config EEEPC_LAPTOP
 config ASUS_WMI
        tristate "ASUS WMI Driver"
        depends on ACPI_WMI
+       depends on ACPI_BATTERY
        depends on INPUT
        depends on HWMON
        depends on BACKLIGHT_CLASS_DEVICE
index 61fe341..ea68f6e 100644 (file)
@@ -90,7 +90,7 @@ static int i2c_multi_inst_probe(struct platform_device *pdev)
        for (i = 0; i < multi->num_clients && inst_data[i].type; i++) {
                memset(&board_info, 0, sizeof(board_info));
                strlcpy(board_info.type, inst_data[i].type, I2C_NAME_SIZE);
-               snprintf(name, sizeof(name), "%s-%s.%d", match->id,
+               snprintf(name, sizeof(name), "%s-%s.%d", dev_name(dev),
                         inst_data[i].type, i);
                board_info.dev_name = name;
                switch (inst_data[i].flags & IRQ_RESOURCE_TYPE) {
index 9aca5e7..07d1b91 100644 (file)
@@ -422,6 +422,13 @@ static const struct dmi_system_id critclk_systems[] = {
                        DMI_MATCH(DMI_PRODUCT_VERSION, "6ES7647-8B"),
                },
        },
+       {
+               .ident = "SIMATIC IPC277E",
+               .matches = {
+                       DMI_MATCH(DMI_SYS_VENDOR, "SIEMENS AG"),
+                       DMI_MATCH(DMI_PRODUCT_VERSION, "6AV7882-0"),
+               },
+       },
        { /*sentinel*/ }
 };
 
index 2880cd5..308ca9d 100644 (file)
@@ -65,7 +65,7 @@ static int gpio_restart_probe(struct platform_device *pdev)
        gpio_restart->reset_gpio = devm_gpiod_get(&pdev->dev, NULL,
                        open_source ? GPIOD_IN : GPIOD_OUT_LOW);
        if (IS_ERR(gpio_restart->reset_gpio)) {
-               dev_err(&pdev->dev, "Could net get reset GPIO\n");
+               dev_err(&pdev->dev, "Could not get reset GPIO\n");
                return PTR_ERR(gpio_restart->reset_gpio);
        }
 
index 06ff035..b4076b1 100644 (file)
@@ -190,6 +190,6 @@ void devm_reboot_mode_unregister(struct device *dev,
 }
 EXPORT_SYMBOL_GPL(devm_reboot_mode_unregister);
 
-MODULE_AUTHOR("Andy Yan <andy.yan@rock-chips.com");
+MODULE_AUTHOR("Andy Yan <andy.yan@rock-chips.com>");
 MODULE_DESCRIPTION("System reboot mode core library");
 MODULE_LICENSE("GPL v2");
index 5e448b6..c84a7b1 100644 (file)
@@ -417,17 +417,6 @@ config CHARGER_PCF50633
        help
         Say Y to include support for NXP PCF50633 Main Battery Charger.
 
-config BATTERY_JZ4740
-       tristate "Ingenic JZ4740 battery"
-       depends on MACH_JZ4740
-       depends on MFD_JZ4740_ADC
-       help
-         Say Y to enable support for the battery on Ingenic JZ4740 based
-         boards.
-
-         This driver can be build as a module. If so, the module will be
-         called jz4740-battery.
-
 config BATTERY_RX51
        tristate "Nokia RX-51 (N900) battery driver"
        depends on TWL4030_MADC
index 96c2b74..6c7da92 100644 (file)
@@ -58,7 +58,6 @@ obj-$(CONFIG_BATTERY_S3C_ADC) += s3c_adc_battery.o
 obj-$(CONFIG_BATTERY_TWL4030_MADC)     += twl4030_madc_battery.o
 obj-$(CONFIG_CHARGER_88PM860X) += 88pm860x_charger.o
 obj-$(CONFIG_CHARGER_PCF50633) += pcf50633-charger.o
-obj-$(CONFIG_BATTERY_JZ4740)   += jz4740-battery.o
 obj-$(CONFIG_BATTERY_RX51)     += rx51_battery.o
 obj-$(CONFIG_AB8500_BM)                += ab8500_bmdata.o ab8500_charger.o ab8500_fg.o ab8500_btemp.o abx500_chargalg.o pm2301_charger.o
 obj-$(CONFIG_CHARGER_CPCAP)    += cpcap-charger.o
index 86d88ae..e51d0e7 100644 (file)
@@ -742,7 +742,7 @@ static int ab8500_charger_max_usb_curr(struct ab8500_charger *di,
                                                USB_CH_IP_CUR_LVL_1P5;
                        break;
                }
-               /* Else, fall through */
+               /* else, fall through */
        case USB_STAT_HM_IDGND:
                dev_err(di->dev, "USB Type - Charging not allowed\n");
                di->max_usb_in_curr.usb_type_max = USB_CH_IP_CUR_LVL_0P05;
@@ -3011,7 +3011,6 @@ static int ab8500_charger_usb_get_property(struct power_supply *psy,
 static int ab8500_charger_init_hw_registers(struct ab8500_charger *di)
 {
        int ret = 0;
-       u8 bup_vch_range = 0, vbup33_vrtcn = 0;
 
        /* Setup maximum charger current and voltage for ABB cut2.0 */
        if (!is_ab8500_1p1_or_earlier(di->parent)) {
@@ -3112,12 +3111,6 @@ static int ab8500_charger_init_hw_registers(struct ab8500_charger *di)
                goto out;
        }
 
-       /* Backup battery voltage and current */
-       if (di->bm->bkup_bat_v > BUP_VCH_SEL_3P1V)
-               bup_vch_range = BUP_VCH_RANGE;
-       if (di->bm->bkup_bat_v == BUP_VCH_SEL_3P3V)
-               vbup33_vrtcn = VBUP33_VRTCN;
-
        ret = abx500_set_register_interruptible(di->dev,
                AB8500_RTC,
                AB8500_RTC_BACKUP_CHG_REG,
index 44169da..e1bc4e6 100644 (file)
@@ -674,6 +674,7 @@ intr_failed:
 /*
  * Some devices have no battery (HDMI sticks) and the axp288 battery's
  * detection reports one despite it not being there.
+ * Please keep this listed sorted alphabetically.
  */
 static const struct dmi_system_id axp288_fuel_gauge_blacklist[] = {
        {
@@ -697,6 +698,12 @@ static const struct dmi_system_id axp288_fuel_gauge_blacklist[] = {
                },
        },
        {
+               /* ECS EF20EA */
+               .matches = {
+                       DMI_MATCH(DMI_PRODUCT_NAME, "EF20EA"),
+               },
+       },
+       {
                /* Intel Cherry Trail Compute Stick, Windows version */
                .matches = {
                        DMI_MATCH(DMI_SYS_VENDOR, "Intel Corporation"),
@@ -720,10 +727,11 @@ static const struct dmi_system_id axp288_fuel_gauge_blacklist[] = {
                },
        },
        {
-               /* ECS EF20EA */
+               /* Minix Neo Z83-4 mini PC */
                .matches = {
-                       DMI_MATCH(DMI_PRODUCT_NAME, "EF20EA"),
-               },
+                       DMI_MATCH(DMI_SYS_VENDOR, "MINIX"),
+                       DMI_MATCH(DMI_PRODUCT_NAME, "Z83-4"),
+               }
        },
        {}
 };
index d333f2b..9d1ec8d 100644 (file)
@@ -22,6 +22,7 @@
 #define BQ25890_IRQ_PIN                        "bq25890_irq"
 
 #define BQ25890_ID                     3
+#define BQ25895_ID                     7
 #define BQ25896_ID                     0
 
 enum bq25890_fields {
@@ -171,7 +172,7 @@ static const struct reg_field bq25890_reg_fields[] = {
        [F_WD]                  = REG_FIELD(0x07, 4, 5),
        [F_TMR_EN]              = REG_FIELD(0x07, 3, 3),
        [F_CHG_TMR]             = REG_FIELD(0x07, 1, 2),
-       [F_JEITA_ISET]          = REG_FIELD(0x07, 0, 0),
+       [F_JEITA_ISET]          = REG_FIELD(0x07, 0, 0), // reserved on BQ25895
        /* REG08 */
        [F_BATCMP]              = REG_FIELD(0x08, 5, 7),
        [F_VCLAMP]              = REG_FIELD(0x08, 2, 4),
@@ -180,7 +181,7 @@ static const struct reg_field bq25890_reg_fields[] = {
        [F_FORCE_ICO]           = REG_FIELD(0x09, 7, 7),
        [F_TMR2X_EN]            = REG_FIELD(0x09, 6, 6),
        [F_BATFET_DIS]          = REG_FIELD(0x09, 5, 5),
-       [F_JEITA_VSET]          = REG_FIELD(0x09, 4, 4),
+       [F_JEITA_VSET]          = REG_FIELD(0x09, 4, 4), // reserved on BQ25895
        [F_BATFET_DLY]          = REG_FIELD(0x09, 3, 3),
        [F_BATFET_RST_EN]       = REG_FIELD(0x09, 2, 2),
        [F_PUMPX_UP]            = REG_FIELD(0x09, 1, 1),
@@ -188,7 +189,7 @@ static const struct reg_field bq25890_reg_fields[] = {
        /* REG0A */
        [F_BOOSTV]              = REG_FIELD(0x0A, 4, 7),
        /* PFM_OTG_DIS 3 on BQ25896 */
-       [F_BOOSTI]              = REG_FIELD(0x0A, 0, 2),
+       [F_BOOSTI]              = REG_FIELD(0x0A, 0, 2), // reserved on BQ25895
        /* REG0B */
        [F_VBUS_STAT]           = REG_FIELD(0x0B, 5, 7),
        [F_CHG_STAT]            = REG_FIELD(0x0B, 3, 4),
@@ -392,6 +393,8 @@ static int bq25890_power_supply_get_property(struct power_supply *psy,
        case POWER_SUPPLY_PROP_MODEL_NAME:
                if (bq->chip_id == BQ25890_ID)
                        val->strval = "BQ25890";
+               else if (bq->chip_id == BQ25895_ID)
+                       val->strval = "BQ25895";
                else if (bq->chip_id == BQ25896_ID)
                        val->strval = "BQ25896";
                else
@@ -862,7 +865,8 @@ static int bq25890_probe(struct i2c_client *client,
                return bq->chip_id;
        }
 
-       if ((bq->chip_id != BQ25890_ID) && (bq->chip_id != BQ25896_ID)) {
+       if ((bq->chip_id != BQ25890_ID) && (bq->chip_id != BQ25895_ID)
+                       && (bq->chip_id != BQ25896_ID)) {
                dev_err(dev, "Chip with ID=%d, not supported!\n", bq->chip_id);
                return -ENODEV;
        }
index cc546bc..74258c7 100644 (file)
 #define CPCAP_REG_CRM_ICHRG_1A596      CPCAP_REG_CRM_ICHRG(0xe)
 #define CPCAP_REG_CRM_ICHRG_NO_LIMIT   CPCAP_REG_CRM_ICHRG(0xf)
 
+/* CPCAP_REG_VUSBC register bits needed for VBUS */
+#define CPCAP_BIT_VBUS_SWITCH          BIT(0)  /* VBUS boost to 5V */
+
 enum {
        CPCAP_CHARGER_IIO_BATTDET,
        CPCAP_CHARGER_IIO_VOLTAGE,
@@ -130,7 +133,8 @@ struct cpcap_charger_ddata {
        struct power_supply *usb;
 
        struct phy_companion comparator;        /* For USB VBUS */
-       bool vbus_enabled;
+       unsigned int vbus_enabled:1;
+       unsigned int feeding_vbus:1;
        atomic_t active;
 
        int status;
@@ -325,7 +329,6 @@ static bool cpcap_charger_vbus_valid(struct cpcap_charger_ddata *ddata)
 }
 
 /* VBUS control functions for the USB PHY companion */
-
 static void cpcap_charger_vbus_work(struct work_struct *work)
 {
        struct cpcap_charger_ddata *ddata;
@@ -343,6 +346,7 @@ static void cpcap_charger_vbus_work(struct work_struct *work)
                        return;
                }
 
+               ddata->feeding_vbus = true;
                cpcap_charger_set_cable_path(ddata, false);
                cpcap_charger_set_inductive_path(ddata, false);
 
@@ -350,12 +354,23 @@ static void cpcap_charger_vbus_work(struct work_struct *work)
                if (error)
                        goto out_err;
 
+               error = regmap_update_bits(ddata->reg, CPCAP_REG_VUSBC,
+                                          CPCAP_BIT_VBUS_SWITCH,
+                                          CPCAP_BIT_VBUS_SWITCH);
+               if (error)
+                       goto out_err;
+
                error = regmap_update_bits(ddata->reg, CPCAP_REG_CRM,
                                           CPCAP_REG_CRM_RVRSMODE,
                                           CPCAP_REG_CRM_RVRSMODE);
                if (error)
                        goto out_err;
        } else {
+               error = regmap_update_bits(ddata->reg, CPCAP_REG_VUSBC,
+                                          CPCAP_BIT_VBUS_SWITCH, 0);
+               if (error)
+                       goto out_err;
+
                error = regmap_update_bits(ddata->reg, CPCAP_REG_CRM,
                                           CPCAP_REG_CRM_RVRSMODE, 0);
                if (error)
@@ -363,6 +378,7 @@ static void cpcap_charger_vbus_work(struct work_struct *work)
 
                cpcap_charger_set_cable_path(ddata, true);
                cpcap_charger_set_inductive_path(ddata, true);
+               ddata->feeding_vbus = false;
        }
 
        return;
@@ -431,7 +447,8 @@ static void cpcap_usb_detect(struct work_struct *work)
        if (error)
                return;
 
-       if (cpcap_charger_vbus_valid(ddata) && s.chrgcurr1) {
+       if (!ddata->feeding_vbus && cpcap_charger_vbus_valid(ddata) &&
+           s.chrgcurr1) {
                int max_current;
 
                if (cpcap_charger_battery_found(ddata))
index b48cb7a..4812ac1 100644 (file)
@@ -342,7 +342,7 @@ static inline int isp1704_test_ulpi(struct isp1704_charger *isp)
        int vendor;
        int product;
        int i;
-       int ret = -ENODEV;
+       int ret;
 
        /* Test ULPI interface */
        ret = isp1704_write(isp, ULPI_SCRATCH, 0xaa);
diff --git a/drivers/power/supply/jz4740-battery.c b/drivers/power/supply/jz4740-battery.c
deleted file mode 100644 (file)
index 6366bd6..0000000
+++ /dev/null
@@ -1,421 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Battery measurement code for Ingenic JZ SOC.
- *
- * Copyright (C) 2009 Jiejing Zhang <kzjeef@gmail.com>
- * Copyright (C) 2010, Lars-Peter Clausen <lars@metafoo.de>
- *
- * based on tosa_battery.c
- *
- * Copyright (C) 2008 Marek Vasut <marek.vasut@gmail.com>
- */
-
-#include <linux/interrupt.h>
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/platform_device.h>
-#include <linux/slab.h>
-#include <linux/io.h>
-
-#include <linux/delay.h>
-#include <linux/err.h>
-#include <linux/gpio.h>
-#include <linux/mfd/core.h>
-#include <linux/power_supply.h>
-
-#include <linux/power/jz4740-battery.h>
-#include <linux/jz4740-adc.h>
-
-struct jz_battery {
-       struct jz_battery_platform_data *pdata;
-       struct platform_device *pdev;
-
-       void __iomem *base;
-
-       int irq;
-       int charge_irq;
-
-       const struct mfd_cell *cell;
-
-       int status;
-       long voltage;
-
-       struct completion read_completion;
-
-       struct power_supply *battery;
-       struct power_supply_desc battery_desc;
-       struct delayed_work work;
-
-       struct mutex lock;
-};
-
-static inline struct jz_battery *psy_to_jz_battery(struct power_supply *psy)
-{
-       return power_supply_get_drvdata(psy);
-}
-
-static irqreturn_t jz_battery_irq_handler(int irq, void *devid)
-{
-       struct jz_battery *battery = devid;
-
-       complete(&battery->read_completion);
-       return IRQ_HANDLED;
-}
-
-static long jz_battery_read_voltage(struct jz_battery *battery)
-{
-       long t;
-       unsigned long val;
-       long voltage;
-
-       mutex_lock(&battery->lock);
-
-       reinit_completion(&battery->read_completion);
-
-       enable_irq(battery->irq);
-       battery->cell->enable(battery->pdev);
-
-       t = wait_for_completion_interruptible_timeout(&battery->read_completion,
-               HZ);
-
-       if (t > 0) {
-               val = readw(battery->base) & 0xfff;
-
-               if (battery->pdata->info.voltage_max_design <= 2500000)
-                       val = (val * 78125UL) >> 7UL;
-               else
-                       val = ((val * 924375UL) >> 9UL) + 33000;
-               voltage = (long)val;
-       } else {
-               voltage = t ? t : -ETIMEDOUT;
-       }
-
-       battery->cell->disable(battery->pdev);
-       disable_irq(battery->irq);
-
-       mutex_unlock(&battery->lock);
-
-       return voltage;
-}
-
-static int jz_battery_get_capacity(struct power_supply *psy)
-{
-       struct jz_battery *jz_battery = psy_to_jz_battery(psy);
-       struct power_supply_info *info = &jz_battery->pdata->info;
-       long voltage;
-       int ret;
-       int voltage_span;
-
-       voltage = jz_battery_read_voltage(jz_battery);
-
-       if (voltage < 0)
-               return voltage;
-
-       voltage_span = info->voltage_max_design - info->voltage_min_design;
-       ret = ((voltage - info->voltage_min_design) * 100) / voltage_span;
-
-       if (ret > 100)
-               ret = 100;
-       else if (ret < 0)
-               ret = 0;
-
-       return ret;
-}
-
-static int jz_battery_get_property(struct power_supply *psy,
-       enum power_supply_property psp, union power_supply_propval *val)
-{
-       struct jz_battery *jz_battery = psy_to_jz_battery(psy);
-       struct power_supply_info *info = &jz_battery->pdata->info;
-       long voltage;
-
-       switch (psp) {
-       case POWER_SUPPLY_PROP_STATUS:
-               val->intval = jz_battery->status;
-               break;
-       case POWER_SUPPLY_PROP_TECHNOLOGY:
-               val->intval = jz_battery->pdata->info.technology;
-               break;
-       case POWER_SUPPLY_PROP_HEALTH:
-               voltage = jz_battery_read_voltage(jz_battery);
-               if (voltage < info->voltage_min_design)
-                       val->intval = POWER_SUPPLY_HEALTH_DEAD;
-               else
-                       val->intval = POWER_SUPPLY_HEALTH_GOOD;
-               break;
-       case POWER_SUPPLY_PROP_CAPACITY:
-               val->intval = jz_battery_get_capacity(psy);
-               break;
-       case POWER_SUPPLY_PROP_VOLTAGE_NOW:
-               val->intval = jz_battery_read_voltage(jz_battery);
-               if (val->intval < 0)
-                       return val->intval;
-               break;
-       case POWER_SUPPLY_PROP_VOLTAGE_MAX_DESIGN:
-               val->intval = info->voltage_max_design;
-               break;
-       case POWER_SUPPLY_PROP_VOLTAGE_MIN_DESIGN:
-               val->intval = info->voltage_min_design;
-               break;
-       case POWER_SUPPLY_PROP_PRESENT:
-               val->intval = 1;
-               break;
-       default:
-               return -EINVAL;
-       }
-       return 0;
-}
-
-static void jz_battery_external_power_changed(struct power_supply *psy)
-{
-       struct jz_battery *jz_battery = psy_to_jz_battery(psy);
-
-       mod_delayed_work(system_wq, &jz_battery->work, 0);
-}
-
-static irqreturn_t jz_battery_charge_irq(int irq, void *data)
-{
-       struct jz_battery *jz_battery = data;
-
-       mod_delayed_work(system_wq, &jz_battery->work, 0);
-
-       return IRQ_HANDLED;
-}
-
-static void jz_battery_update(struct jz_battery *jz_battery)
-{
-       int status;
-       long voltage;
-       bool has_changed = false;
-       int is_charging;
-
-       if (gpio_is_valid(jz_battery->pdata->gpio_charge)) {
-               is_charging = gpio_get_value(jz_battery->pdata->gpio_charge);
-               is_charging ^= jz_battery->pdata->gpio_charge_active_low;
-               if (is_charging)
-                       status = POWER_SUPPLY_STATUS_CHARGING;
-               else
-                       status = POWER_SUPPLY_STATUS_NOT_CHARGING;
-
-               if (status != jz_battery->status) {
-                       jz_battery->status = status;
-                       has_changed = true;
-               }
-       }
-
-       voltage = jz_battery_read_voltage(jz_battery);
-       if (voltage >= 0 && abs(voltage - jz_battery->voltage) > 50000) {
-               jz_battery->voltage = voltage;
-               has_changed = true;
-       }
-
-       if (has_changed)
-               power_supply_changed(jz_battery->battery);
-}
-
-static enum power_supply_property jz_battery_properties[] = {
-       POWER_SUPPLY_PROP_STATUS,
-       POWER_SUPPLY_PROP_TECHNOLOGY,
-       POWER_SUPPLY_PROP_HEALTH,
-       POWER_SUPPLY_PROP_CAPACITY,
-       POWER_SUPPLY_PROP_VOLTAGE_NOW,
-       POWER_SUPPLY_PROP_VOLTAGE_MAX_DESIGN,
-       POWER_SUPPLY_PROP_VOLTAGE_MIN_DESIGN,
-       POWER_SUPPLY_PROP_PRESENT,
-};
-
-static void jz_battery_work(struct work_struct *work)
-{
-       /* Too small interval will increase system workload */
-       const int interval = HZ * 30;
-       struct jz_battery *jz_battery = container_of(work, struct jz_battery,
-                                           work.work);
-
-       jz_battery_update(jz_battery);
-       schedule_delayed_work(&jz_battery->work, interval);
-}
-
-static int jz_battery_probe(struct platform_device *pdev)
-{
-       int ret = 0;
-       struct jz_battery_platform_data *pdata = pdev->dev.parent->platform_data;
-       struct power_supply_config psy_cfg = {};
-       struct jz_battery *jz_battery;
-       struct power_supply_desc *battery_desc;
-       struct resource *mem;
-
-       if (!pdata) {
-               dev_err(&pdev->dev, "No platform_data supplied\n");
-               return -ENXIO;
-       }
-
-       jz_battery = devm_kzalloc(&pdev->dev, sizeof(*jz_battery), GFP_KERNEL);
-       if (!jz_battery) {
-               dev_err(&pdev->dev, "Failed to allocate driver structure\n");
-               return -ENOMEM;
-       }
-
-       jz_battery->cell = mfd_get_cell(pdev);
-
-       jz_battery->irq = platform_get_irq(pdev, 0);
-       if (jz_battery->irq < 0) {
-               dev_err(&pdev->dev, "Failed to get platform irq: %d\n", ret);
-               return jz_battery->irq;
-       }
-
-       mem = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-
-       jz_battery->base = devm_ioremap_resource(&pdev->dev, mem);
-       if (IS_ERR(jz_battery->base))
-               return PTR_ERR(jz_battery->base);
-
-       battery_desc = &jz_battery->battery_desc;
-       battery_desc->name = pdata->info.name;
-       battery_desc->type = POWER_SUPPLY_TYPE_BATTERY;
-       battery_desc->properties        = jz_battery_properties;
-       battery_desc->num_properties    = ARRAY_SIZE(jz_battery_properties);
-       battery_desc->get_property      = jz_battery_get_property;
-       battery_desc->external_power_changed =
-                                       jz_battery_external_power_changed;
-       battery_desc->use_for_apm       = 1;
-
-       psy_cfg.drv_data = jz_battery;
-
-       jz_battery->pdata = pdata;
-       jz_battery->pdev = pdev;
-
-       init_completion(&jz_battery->read_completion);
-       mutex_init(&jz_battery->lock);
-
-       INIT_DELAYED_WORK(&jz_battery->work, jz_battery_work);
-
-       ret = request_irq(jz_battery->irq, jz_battery_irq_handler, 0, pdev->name,
-                       jz_battery);
-       if (ret) {
-               dev_err(&pdev->dev, "Failed to request irq %d\n", ret);
-               return ret;
-       }
-       disable_irq(jz_battery->irq);
-
-       if (gpio_is_valid(pdata->gpio_charge)) {
-               ret = gpio_request(pdata->gpio_charge, dev_name(&pdev->dev));
-               if (ret) {
-                       dev_err(&pdev->dev, "charger state gpio request failed.\n");
-                       goto err_free_irq;
-               }
-               ret = gpio_direction_input(pdata->gpio_charge);
-               if (ret) {
-                       dev_err(&pdev->dev, "charger state gpio set direction failed.\n");
-                       goto err_free_gpio;
-               }
-
-               jz_battery->charge_irq = gpio_to_irq(pdata->gpio_charge);
-
-               if (jz_battery->charge_irq >= 0) {
-                       ret = request_irq(jz_battery->charge_irq,
-                                   jz_battery_charge_irq,
-                                   IRQF_TRIGGER_RISING | IRQF_TRIGGER_FALLING,
-                                   dev_name(&pdev->dev), jz_battery);
-                       if (ret) {
-                               dev_err(&pdev->dev, "Failed to request charge irq: %d\n", ret);
-                               goto err_free_gpio;
-                       }
-               }
-       } else {
-               jz_battery->charge_irq = -1;
-       }
-
-       if (jz_battery->pdata->info.voltage_max_design <= 2500000)
-               jz4740_adc_set_config(pdev->dev.parent, JZ_ADC_CONFIG_BAT_MB,
-                       JZ_ADC_CONFIG_BAT_MB);
-       else
-               jz4740_adc_set_config(pdev->dev.parent, JZ_ADC_CONFIG_BAT_MB, 0);
-
-       jz_battery->battery = power_supply_register(&pdev->dev, battery_desc,
-                                                       &psy_cfg);
-       if (IS_ERR(jz_battery->battery)) {
-               dev_err(&pdev->dev, "power supply battery register failed.\n");
-               ret = PTR_ERR(jz_battery->battery);
-               goto err_free_charge_irq;
-       }
-
-       platform_set_drvdata(pdev, jz_battery);
-       schedule_delayed_work(&jz_battery->work, 0);
-
-       return 0;
-
-err_free_charge_irq:
-       if (jz_battery->charge_irq >= 0)
-               free_irq(jz_battery->charge_irq, jz_battery);
-err_free_gpio:
-       if (gpio_is_valid(pdata->gpio_charge))
-               gpio_free(jz_battery->pdata->gpio_charge);
-err_free_irq:
-       free_irq(jz_battery->irq, jz_battery);
-       return ret;
-}
-
-static int jz_battery_remove(struct platform_device *pdev)
-{
-       struct jz_battery *jz_battery = platform_get_drvdata(pdev);
-
-       cancel_delayed_work_sync(&jz_battery->work);
-
-       if (gpio_is_valid(jz_battery->pdata->gpio_charge)) {
-               if (jz_battery->charge_irq >= 0)
-                       free_irq(jz_battery->charge_irq, jz_battery);
-               gpio_free(jz_battery->pdata->gpio_charge);
-       }
-
-       power_supply_unregister(jz_battery->battery);
-
-       free_irq(jz_battery->irq, jz_battery);
-
-       return 0;
-}
-
-#ifdef CONFIG_PM
-static int jz_battery_suspend(struct device *dev)
-{
-       struct jz_battery *jz_battery = dev_get_drvdata(dev);
-
-       cancel_delayed_work_sync(&jz_battery->work);
-       jz_battery->status = POWER_SUPPLY_STATUS_UNKNOWN;
-
-       return 0;
-}
-
-static int jz_battery_resume(struct device *dev)
-{
-       struct jz_battery *jz_battery = dev_get_drvdata(dev);
-
-       schedule_delayed_work(&jz_battery->work, 0);
-
-       return 0;
-}
-
-static const struct dev_pm_ops jz_battery_pm_ops = {
-       .suspend        = jz_battery_suspend,
-       .resume         = jz_battery_resume,
-};
-
-#define JZ_BATTERY_PM_OPS (&jz_battery_pm_ops)
-#else
-#define JZ_BATTERY_PM_OPS NULL
-#endif
-
-static struct platform_driver jz_battery_driver = {
-       .probe          = jz_battery_probe,
-       .remove         = jz_battery_remove,
-       .driver = {
-               .name = "jz4740-battery",
-               .pm = JZ_BATTERY_PM_OPS,
-       },
-};
-
-module_platform_driver(jz_battery_driver);
-
-MODULE_ALIAS("platform:jz4740-battery");
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Lars-Peter Clausen <lars@metafoo.de>");
-MODULE_DESCRIPTION("JZ4740 SoC battery driver");
index 64f3358..0dfad2c 100644 (file)
@@ -511,7 +511,7 @@ static inline void max17042_override_por(struct regmap *map,
                regmap_write(map, reg, value);
 }
 
-static inline void max10742_unlock_model(struct max17042_chip *chip)
+static inline void max17042_unlock_model(struct max17042_chip *chip)
 {
        struct regmap *map = chip->regmap;
 
@@ -519,7 +519,7 @@ static inline void max10742_unlock_model(struct max17042_chip *chip)
        regmap_write(map, MAX17042_MLOCKReg2, MODEL_UNLOCK2);
 }
 
-static inline void max10742_lock_model(struct max17042_chip *chip)
+static inline void max17042_lock_model(struct max17042_chip *chip)
 {
        struct regmap *map = chip->regmap;
 
@@ -577,7 +577,7 @@ static int max17042_init_model(struct max17042_chip *chip)
        if (!temp_data)
                return -ENOMEM;
 
-       max10742_unlock_model(chip);
+       max17042_unlock_model(chip);
        max17042_write_model_data(chip, MAX17042_MODELChrTbl,
                                table_size);
        max17042_read_model_data(chip, MAX17042_MODELChrTbl, temp_data,
@@ -589,7 +589,7 @@ static int max17042_init_model(struct max17042_chip *chip)
                temp_data,
                table_size);
 
-       max10742_lock_model(chip);
+       max17042_lock_model(chip);
        kfree(temp_data);
 
        return ret;
index e34714c..5f9477c 100644 (file)
@@ -366,3 +366,4 @@ module_platform_driver(max77650_charger_driver);
 MODULE_DESCRIPTION("MAXIM 77650/77651 charger driver");
 MODULE_AUTHOR("Bartosz Golaszewski <bgolaszewski@baylibre.com>");
 MODULE_LICENSE("GPL v2");
+MODULE_ALIAS("platform:max77650-charger");
index 82e8480..5c36c43 100644 (file)
@@ -1051,14 +1051,14 @@ __power_supply_register(struct device *parent,
        }
 
        spin_lock_init(&psy->changed_lock);
-       rc = device_init_wakeup(dev, ws);
-       if (rc)
-               goto wakeup_init_failed;
-
        rc = device_add(dev);
        if (rc)
                goto device_add_failed;
 
+       rc = device_init_wakeup(dev, ws);
+       if (rc)
+               goto wakeup_init_failed;
+
        rc = psy_register_thermal(psy);
        if (rc)
                goto register_thermal_failed;
@@ -1101,8 +1101,8 @@ register_cooler_failed:
        psy_unregister_thermal(psy);
 register_thermal_failed:
        device_del(dev);
-device_add_failed:
 wakeup_init_failed:
+device_add_failed:
 check_supplies_failed:
 dev_set_name_failed:
        put_device(dev);
index 51fe604..75cf861 100644 (file)
@@ -284,6 +284,7 @@ int power_supply_add_hwmon_sysfs(struct power_supply *psy)
        struct device *dev = &psy->dev;
        struct device *hwmon;
        int ret, i;
+       const char *name;
 
        if (!devres_open_group(dev, power_supply_add_hwmon_sysfs,
                               GFP_KERNEL))
@@ -334,7 +335,19 @@ int power_supply_add_hwmon_sysfs(struct power_supply *psy)
                }
        }
 
-       hwmon = devm_hwmon_device_register_with_info(dev, psy->desc->name,
+       name = psy->desc->name;
+       if (strchr(name, '-')) {
+               char *new_name;
+
+               new_name = devm_kstrdup(dev, name, GFP_KERNEL);
+               if (!new_name) {
+                       ret = -ENOMEM;
+                       goto error;
+               }
+               strreplace(new_name, '-', '_');
+               name = new_name;
+       }
+       hwmon = devm_hwmon_device_register_with_info(dev, name,
                                                psyhw,
                                                &power_supply_hwmon_chip_info,
                                                NULL);
index 048d205..f8d74e9 100644 (file)
@@ -314,17 +314,22 @@ static int sbs_get_battery_presence_and_health(
 {
        int ret;
 
-       if (psp == POWER_SUPPLY_PROP_PRESENT) {
-               /* Dummy command; if it succeeds, battery is present. */
-               ret = sbs_read_word_data(client, sbs_data[REG_STATUS].addr);
-               if (ret < 0)
-                       val->intval = 0; /* battery disconnected */
-               else
-                       val->intval = 1; /* battery present */
-       } else { /* POWER_SUPPLY_PROP_HEALTH */
+       /* Dummy command; if it succeeds, battery is present. */
+       ret = sbs_read_word_data(client, sbs_data[REG_STATUS].addr);
+
+       if (ret < 0) { /* battery not present*/
+               if (psp == POWER_SUPPLY_PROP_PRESENT) {
+                       val->intval = 0;
+                       return 0;
+               }
+               return ret;
+       }
+
+       if (psp == POWER_SUPPLY_PROP_PRESENT)
+               val->intval = 1; /* battery present */
+       else /* POWER_SUPPLY_PROP_HEALTH */
                /* SBS spec doesn't have a general health command. */
                val->intval = POWER_SUPPLY_HEALTH_UNKNOWN;
-       }
 
        return 0;
 }
@@ -620,12 +625,14 @@ static int sbs_get_property(struct power_supply *psy,
        switch (psp) {
        case POWER_SUPPLY_PROP_PRESENT:
        case POWER_SUPPLY_PROP_HEALTH:
-               if (client->flags & SBS_FLAGS_TI_BQ20Z75)
+               if (chip->flags & SBS_FLAGS_TI_BQ20Z75)
                        ret = sbs_get_ti_battery_presence_and_health(client,
                                                                     psp, val);
                else
                        ret = sbs_get_battery_presence_and_health(client, psp,
                                                                  val);
+
+               /* this can only be true if no gpio is used */
                if (psp == POWER_SUPPLY_PROP_PRESENT)
                        return 0;
                break;
index 24895cc..bc8f5bd 100644 (file)
@@ -109,6 +109,9 @@ struct sc27xx_fgu_data {
 };
 
 static int sc27xx_fgu_cap_to_clbcnt(struct sc27xx_fgu_data *data, int capacity);
+static void sc27xx_fgu_capacity_calibration(struct sc27xx_fgu_data *data,
+                                           int cap, bool int_mode);
+static void sc27xx_fgu_adjust_cap(struct sc27xx_fgu_data *data, int cap);
 
 static const char * const sc27xx_charger_supply_name[] = {
        "sc2731_charger",
@@ -326,8 +329,6 @@ static int sc27xx_fgu_set_clbcnt(struct sc27xx_fgu_data *data, int clbcnt)
 {
        int ret;
 
-       clbcnt *= SC27XX_FGU_SAMPLE_HZ;
-
        ret = regmap_update_bits(data->regmap,
                                 data->base + SC27XX_FGU_CLBCNT_SETL,
                                 SC27XX_FGU_CLBCNT_MASK, clbcnt);
@@ -362,7 +363,6 @@ static int sc27xx_fgu_get_clbcnt(struct sc27xx_fgu_data *data, int *clb_cnt)
 
        *clb_cnt = ccl & SC27XX_FGU_CLBCNT_MASK;
        *clb_cnt |= (cch & SC27XX_FGU_CLBCNT_MASK) << SC27XX_FGU_CLBCNT_SHIFT;
-       *clb_cnt /= SC27XX_FGU_SAMPLE_HZ;
 
        return 0;
 }
@@ -380,10 +380,10 @@ static int sc27xx_fgu_get_capacity(struct sc27xx_fgu_data *data, int *cap)
 
        /*
         * Convert coulomb counter to delta capacity (mAh), and set multiplier
-        * as 100 to improve the precision.
+        * as 10 to improve the precision.
         */
-       temp = DIV_ROUND_CLOSEST(delta_clbcnt, 360);
-       temp = sc27xx_fgu_adc_to_current(data, temp);
+       temp = DIV_ROUND_CLOSEST(delta_clbcnt * 10, 36 * SC27XX_FGU_SAMPLE_HZ);
+       temp = sc27xx_fgu_adc_to_current(data, temp / 1000);
 
        /*
         * Convert to capacity percent of the battery total capacity,
@@ -392,6 +392,9 @@ static int sc27xx_fgu_get_capacity(struct sc27xx_fgu_data *data, int *cap)
        delta_cap = DIV_ROUND_CLOSEST(temp * 100, data->total_cap);
        *cap = delta_cap + data->init_cap;
 
+       /* Calibrate the battery capacity in a normal range. */
+       sc27xx_fgu_capacity_calibration(data, *cap, false);
+
        return 0;
 }
 
@@ -587,6 +590,10 @@ static int sc27xx_fgu_get_property(struct power_supply *psy,
                val->intval = value * 1000;
                break;
 
+       case POWER_SUPPLY_PROP_ENERGY_FULL_DESIGN:
+               val->intval = data->total_cap * 1000;
+               break;
+
        default:
                ret = -EINVAL;
                break;
@@ -604,17 +611,25 @@ static int sc27xx_fgu_set_property(struct power_supply *psy,
        struct sc27xx_fgu_data *data = power_supply_get_drvdata(psy);
        int ret;
 
-       if (psp != POWER_SUPPLY_PROP_CAPACITY)
-               return -EINVAL;
-
        mutex_lock(&data->lock);
 
-       ret = sc27xx_fgu_save_last_cap(data, val->intval);
+       switch (psp) {
+       case POWER_SUPPLY_PROP_CAPACITY:
+               ret = sc27xx_fgu_save_last_cap(data, val->intval);
+               if (ret < 0)
+                       dev_err(data->dev, "failed to save battery capacity\n");
+               break;
 
-       mutex_unlock(&data->lock);
+       case POWER_SUPPLY_PROP_CALIBRATE:
+               sc27xx_fgu_adjust_cap(data, val->intval);
+               ret = 0;
+               break;
 
-       if (ret < 0)
-               dev_err(data->dev, "failed to save battery capacity\n");
+       default:
+               ret = -EINVAL;
+       }
+
+       mutex_unlock(&data->lock);
 
        return ret;
 }
@@ -629,7 +644,8 @@ static void sc27xx_fgu_external_power_changed(struct power_supply *psy)
 static int sc27xx_fgu_property_is_writeable(struct power_supply *psy,
                                            enum power_supply_property psp)
 {
-       return psp == POWER_SUPPLY_PROP_CAPACITY;
+       return psp == POWER_SUPPLY_PROP_CAPACITY ||
+               psp == POWER_SUPPLY_PROP_CALIBRATE;
 }
 
 static enum power_supply_property sc27xx_fgu_props[] = {
@@ -644,6 +660,8 @@ static enum power_supply_property sc27xx_fgu_props[] = {
        POWER_SUPPLY_PROP_CURRENT_NOW,
        POWER_SUPPLY_PROP_CURRENT_AVG,
        POWER_SUPPLY_PROP_CONSTANT_CHARGE_VOLTAGE,
+       POWER_SUPPLY_PROP_ENERGY_FULL_DESIGN,
+       POWER_SUPPLY_PROP_CALIBRATE,
 };
 
 static const struct power_supply_desc sc27xx_fgu_desc = {
@@ -659,50 +677,62 @@ static const struct power_supply_desc sc27xx_fgu_desc = {
 
 static void sc27xx_fgu_adjust_cap(struct sc27xx_fgu_data *data, int cap)
 {
+       int ret;
+
        data->init_cap = cap;
-       data->init_clbcnt = sc27xx_fgu_cap_to_clbcnt(data, data->init_cap);
+       ret = sc27xx_fgu_get_clbcnt(data, &data->init_clbcnt);
+       if (ret)
+               dev_err(data->dev, "failed to get init coulomb counter\n");
 }
 
-static irqreturn_t sc27xx_fgu_interrupt(int irq, void *dev_id)
+static void sc27xx_fgu_capacity_calibration(struct sc27xx_fgu_data *data,
+                                           int cap, bool int_mode)
 {
-       struct sc27xx_fgu_data *data = dev_id;
-       int ret, cap, ocv, adc;
-       u32 status;
+       int ret, ocv, chg_sts, adc;
 
-       mutex_lock(&data->lock);
-
-       ret = regmap_read(data->regmap, data->base + SC27XX_FGU_INT_STS,
-                         &status);
-       if (ret)
-               goto out;
+       ret = sc27xx_fgu_get_vbat_ocv(data, &ocv);
+       if (ret) {
+               dev_err(data->dev, "get battery ocv error.\n");
+               return;
+       }
 
-       ret = regmap_update_bits(data->regmap, data->base + SC27XX_FGU_INT_CLR,
-                                status, status);
-       if (ret)
-               goto out;
+       ret = sc27xx_fgu_get_status(data, &chg_sts);
+       if (ret) {
+               dev_err(data->dev, "get charger status error.\n");
+               return;
+       }
 
        /*
-        * When low overload voltage interrupt happens, we should calibrate the
-        * battery capacity in lower voltage stage.
+        * If we are in charging mode, then we do not need to calibrate the
+        * lower capacity.
         */
-       if (!(status & SC27XX_FGU_LOW_OVERLOAD_INT))
-               goto out;
-
-       ret = sc27xx_fgu_get_capacity(data, &cap);
-       if (ret)
-               goto out;
+       if (chg_sts == POWER_SUPPLY_STATUS_CHARGING)
+               return;
 
-       ret = sc27xx_fgu_get_vbat_ocv(data, &ocv);
-       if (ret)
-               goto out;
-
-       /*
-        * If current OCV value is less than the minimum OCV value in OCV table,
-        * which means now battery capacity is 0%, and we should adjust the
-        * inititial capacity to 0.
-        */
-       if (ocv <= data->cap_table[data->table_len - 1].ocv) {
+       if ((ocv > data->cap_table[0].ocv && cap < 100) || cap > 100) {
+               /*
+                * If current OCV value is larger than the max OCV value in
+                * OCV table, or the current capacity is larger than 100,
+                * we should force the inititial capacity to 100.
+                */
+               sc27xx_fgu_adjust_cap(data, 100);
+       } else if (ocv <= data->cap_table[data->table_len - 1].ocv) {
+               /*
+                * If current OCV value is leass than the minimum OCV value in
+                * OCV table, we should force the inititial capacity to 0.
+                */
                sc27xx_fgu_adjust_cap(data, 0);
+       } else if ((ocv > data->cap_table[data->table_len - 1].ocv && cap <= 0) ||
+                  (ocv > data->min_volt && cap <= data->alarm_cap)) {
+               /*
+                * If current OCV value is not matchable with current capacity,
+                * we should re-calculate current capacity by looking up the
+                * OCV table.
+                */
+               int cur_cap = power_supply_ocv2cap_simple(data->cap_table,
+                                                         data->table_len, ocv);
+
+               sc27xx_fgu_adjust_cap(data, cur_cap);
        } else if (ocv <= data->min_volt) {
                /*
                 * If current OCV value is less than the low alarm voltage, but
@@ -711,7 +741,7 @@ static irqreturn_t sc27xx_fgu_interrupt(int irq, void *dev_id)
                 */
                if (cap > data->alarm_cap) {
                        sc27xx_fgu_adjust_cap(data, data->alarm_cap);
-               } else if (cap <= 0) {
+               } else {
                        int cur_cap;
 
                        /*
@@ -726,15 +756,55 @@ static irqreturn_t sc27xx_fgu_interrupt(int irq, void *dev_id)
                        sc27xx_fgu_adjust_cap(data, cur_cap);
                }
 
+               if (!int_mode)
+                       return;
+
                /*
                 * After adjusting the battery capacity, we should set the
                 * lowest alarm voltage instead.
                 */
                data->min_volt = data->cap_table[data->table_len - 1].ocv;
+               data->alarm_cap = power_supply_ocv2cap_simple(data->cap_table,
+                                                             data->table_len,
+                                                             data->min_volt);
+
                adc = sc27xx_fgu_voltage_to_adc(data, data->min_volt / 1000);
-               regmap_update_bits(data->regmap, data->base + SC27XX_FGU_LOW_OVERLOAD,
+               regmap_update_bits(data->regmap,
+                                  data->base + SC27XX_FGU_LOW_OVERLOAD,
                                   SC27XX_FGU_LOW_OVERLOAD_MASK, adc);
        }
+}
+
+static irqreturn_t sc27xx_fgu_interrupt(int irq, void *dev_id)
+{
+       struct sc27xx_fgu_data *data = dev_id;
+       int ret, cap;
+       u32 status;
+
+       mutex_lock(&data->lock);
+
+       ret = regmap_read(data->regmap, data->base + SC27XX_FGU_INT_STS,
+                         &status);
+       if (ret)
+               goto out;
+
+       ret = regmap_update_bits(data->regmap, data->base + SC27XX_FGU_INT_CLR,
+                                status, status);
+       if (ret)
+               goto out;
+
+       /*
+        * When low overload voltage interrupt happens, we should calibrate the
+        * battery capacity in lower voltage stage.
+        */
+       if (!(status & SC27XX_FGU_LOW_OVERLOAD_INT))
+               goto out;
+
+       ret = sc27xx_fgu_get_capacity(data, &cap);
+       if (ret)
+               goto out;
+
+       sc27xx_fgu_capacity_calibration(data, cap, true);
 
 out:
        mutex_unlock(&data->lock);
@@ -785,7 +855,7 @@ static int sc27xx_fgu_cap_to_clbcnt(struct sc27xx_fgu_data *data, int capacity)
         * Convert current capacity (mAh) to coulomb counter according to the
         * formula: 1 mAh =3.6 coulomb.
         */
-       return DIV_ROUND_CLOSEST(cur_cap * 36 * data->cur_1000ma_adc, 10);
+       return DIV_ROUND_CLOSEST(cur_cap * 36 * data->cur_1000ma_adc * SC27XX_FGU_SAMPLE_HZ, 10);
 }
 
 static int sc27xx_fgu_calibration(struct sc27xx_fgu_data *data)
@@ -856,6 +926,8 @@ static int sc27xx_fgu_hw_init(struct sc27xx_fgu_data *data)
        data->alarm_cap = power_supply_ocv2cap_simple(data->cap_table,
                                                      data->table_len,
                                                      data->min_volt);
+       if (!data->alarm_cap)
+               data->alarm_cap += 1;
 
        power_supply_put_battery_info(data->battery, &info);
 
@@ -957,81 +1029,81 @@ disable_fgu:
 
 static int sc27xx_fgu_probe(struct platform_device *pdev)
 {
-       struct device_node *np = pdev->dev.of_node;
+       struct device *dev = &pdev->dev;
+       struct device_node *np = dev->of_node;
        struct power_supply_config fgu_cfg = { };
        struct sc27xx_fgu_data *data;
        int ret, irq;
 
-       data = devm_kzalloc(&pdev->dev, sizeof(*data), GFP_KERNEL);
+       data = devm_kzalloc(dev, sizeof(*data), GFP_KERNEL);
        if (!data)
                return -ENOMEM;
 
-       data->regmap = dev_get_regmap(pdev->dev.parent, NULL);
+       data->regmap = dev_get_regmap(dev->parent, NULL);
        if (!data->regmap) {
-               dev_err(&pdev->dev, "failed to get regmap\n");
+               dev_err(dev, "failed to get regmap\n");
                return -ENODEV;
        }
 
-       ret = device_property_read_u32(&pdev->dev, "reg", &data->base);
+       ret = device_property_read_u32(dev, "reg", &data->base);
        if (ret) {
-               dev_err(&pdev->dev, "failed to get fgu address\n");
+               dev_err(dev, "failed to get fgu address\n");
                return ret;
        }
 
-       data->channel = devm_iio_channel_get(&pdev->dev, "bat-temp");
+       data->channel = devm_iio_channel_get(dev, "bat-temp");
        if (IS_ERR(data->channel)) {
-               dev_err(&pdev->dev, "failed to get IIO channel\n");
+               dev_err(dev, "failed to get IIO channel\n");
                return PTR_ERR(data->channel);
        }
 
-       data->charge_chan = devm_iio_channel_get(&pdev->dev, "charge-vol");
+       data->charge_chan = devm_iio_channel_get(dev, "charge-vol");
        if (IS_ERR(data->charge_chan)) {
-               dev_err(&pdev->dev, "failed to get charge IIO channel\n");
+               dev_err(dev, "failed to get charge IIO channel\n");
                return PTR_ERR(data->charge_chan);
        }
 
-       data->gpiod = devm_gpiod_get(&pdev->dev, "bat-detect", GPIOD_IN);
+       data->gpiod = devm_gpiod_get(dev, "bat-detect", GPIOD_IN);
        if (IS_ERR(data->gpiod)) {
-               dev_err(&pdev->dev, "failed to get battery detection GPIO\n");
+               dev_err(dev, "failed to get battery detection GPIO\n");
                return PTR_ERR(data->gpiod);
        }
 
        ret = gpiod_get_value_cansleep(data->gpiod);
        if (ret < 0) {
-               dev_err(&pdev->dev, "failed to get gpio state\n");
+               dev_err(dev, "failed to get gpio state\n");
                return ret;
        }
 
        data->bat_present = !!ret;
        mutex_init(&data->lock);
-       data->dev = &pdev->dev;
+       data->dev = dev;
        platform_set_drvdata(pdev, data);
 
        fgu_cfg.drv_data = data;
        fgu_cfg.of_node = np;
-       data->battery = devm_power_supply_register(&pdev->dev, &sc27xx_fgu_desc,
+       data->battery = devm_power_supply_register(dev, &sc27xx_fgu_desc,
                                                   &fgu_cfg);
        if (IS_ERR(data->battery)) {
-               dev_err(&pdev->dev, "failed to register power supply\n");
+               dev_err(dev, "failed to register power supply\n");
                return PTR_ERR(data->battery);
        }
 
        ret = sc27xx_fgu_hw_init(data);
        if (ret) {
-               dev_err(&pdev->dev, "failed to initialize fgu hardware\n");
+               dev_err(dev, "failed to initialize fgu hardware\n");
                return ret;
        }
 
-       ret = devm_add_action(&pdev->dev, sc27xx_fgu_disable, data);
+       ret = devm_add_action_or_reset(dev, sc27xx_fgu_disable, data);
        if (ret) {
-               sc27xx_fgu_disable(data);
-               dev_err(&pdev->dev, "failed to add fgu disable action\n");
+               dev_err(dev, "failed to add fgu disable action\n");
                return ret;
        }
 
        irq = platform_get_irq(pdev, 0);
        if (irq < 0) {
-               dev_err(&pdev->dev, "no irq resource specified\n");
+               dev_err(dev, "no irq resource specified\n");
                return irq;
        }
 
@@ -1046,17 +1118,17 @@ static int sc27xx_fgu_probe(struct platform_device *pdev)
 
        irq = gpiod_to_irq(data->gpiod);
        if (irq < 0) {
-               dev_err(&pdev->dev, "failed to translate GPIO to IRQ\n");
+               dev_err(dev, "failed to translate GPIO to IRQ\n");
                return irq;
        }
 
-       ret = devm_request_threaded_irq(&pdev->dev, irq, NULL,
+       ret = devm_request_threaded_irq(dev, irq, NULL,
                                        sc27xx_fgu_bat_detection,
                                        IRQF_ONESHOT | IRQF_TRIGGER_RISING |
                                        IRQF_TRIGGER_FALLING,
                                        pdev->name, data);
        if (ret) {
-               dev_err(&pdev->dev, "failed to request IRQ\n");
+               dev_err(dev, "failed to request IRQ\n");
                return ret;
        }
 
@@ -1093,7 +1165,8 @@ static int sc27xx_fgu_suspend(struct device *dev)
         * If we are charging, then no need to enable the FGU interrupts to
         * adjust the battery capacity.
         */
-       if (status != POWER_SUPPLY_STATUS_NOT_CHARGING)
+       if (status != POWER_SUPPLY_STATUS_NOT_CHARGING &&
+           status != POWER_SUPPLY_STATUS_DISCHARGING)
                return 0;
 
        ret = regmap_update_bits(data->regmap, data->base + SC27XX_FGU_INT_EN,
index b0e632b..e3a2518 100644 (file)
@@ -44,7 +44,7 @@ config PWM_AB8500
 
 config PWM_ATMEL
        tristate "Atmel PWM support"
-       depends on ARCH_AT91
+       depends on ARCH_AT91 && OF
        help
          Generic PWM framework driver for Atmel SoC.
 
@@ -423,6 +423,17 @@ config PWM_SPEAR
          To compile this driver as a module, choose M here: the module
          will be called pwm-spear.
 
+config PWM_SPRD
+       tristate "Spreadtrum PWM support"
+       depends on ARCH_SPRD || COMPILE_TEST
+       depends on HAS_IOMEM
+       help
+         Generic PWM framework driver for the PWM controller on
+         Spreadtrum SoCs.
+
+         To compile this driver as a module, choose M here: the module
+         will be called pwm-sprd.
+
 config PWM_STI
        tristate "STiH4xx PWM support"
        depends on ARCH_STI
index 76b555b..26326ad 100644 (file)
@@ -41,6 +41,7 @@ obj-$(CONFIG_PWM_ROCKCHIP)    += pwm-rockchip.o
 obj-$(CONFIG_PWM_SAMSUNG)      += pwm-samsung.o
 obj-$(CONFIG_PWM_SIFIVE)       += pwm-sifive.o
 obj-$(CONFIG_PWM_SPEAR)                += pwm-spear.o
+obj-$(CONFIG_PWM_SPRD)         += pwm-sprd.o
 obj-$(CONFIG_PWM_STI)          += pwm-sti.o
 obj-$(CONFIG_PWM_STM32)                += pwm-stm32.o
 obj-$(CONFIG_PWM_STM32_LP)     += pwm-stm32-lp.o
index 8edfac1..6ad51aa 100644 (file)
@@ -448,36 +448,44 @@ EXPORT_SYMBOL_GPL(pwm_free);
 /**
  * pwm_apply_state() - atomically apply a new state to a PWM device
  * @pwm: PWM device
- * @state: new state to apply. This can be adjusted by the PWM driver
- *        if the requested config is not achievable, for example,
- *        ->duty_cycle and ->period might be approximated.
+ * @state: new state to apply
  */
-int pwm_apply_state(struct pwm_device *pwm, struct pwm_state *state)
+int pwm_apply_state(struct pwm_device *pwm, const struct pwm_state *state)
 {
+       struct pwm_chip *chip;
        int err;
 
        if (!pwm || !state || !state->period ||
            state->duty_cycle > state->period)
                return -EINVAL;
 
+       chip = pwm->chip;
+
        if (state->period == pwm->state.period &&
            state->duty_cycle == pwm->state.duty_cycle &&
            state->polarity == pwm->state.polarity &&
            state->enabled == pwm->state.enabled)
                return 0;
 
-       if (pwm->chip->ops->apply) {
-               err = pwm->chip->ops->apply(pwm->chip, pwm, state);
+       if (chip->ops->apply) {
+               err = chip->ops->apply(chip, pwm, state);
                if (err)
                        return err;
 
-               pwm->state = *state;
+               /*
+                * .apply might have to round some values in *state, if possible
+                * read the actually implemented value back.
+                */
+               if (chip->ops->get_state)
+                       chip->ops->get_state(chip, pwm, &pwm->state);
+               else
+                       pwm->state = *state;
        } else {
                /*
                 * FIXME: restore the initial state in case of error.
                 */
                if (state->polarity != pwm->state.polarity) {
-                       if (!pwm->chip->ops->set_polarity)
+                       if (!chip->ops->set_polarity)
                                return -ENOTSUPP;
 
                        /*
@@ -486,12 +494,12 @@ int pwm_apply_state(struct pwm_device *pwm, struct pwm_state *state)
                         * ->apply().
                         */
                        if (pwm->state.enabled) {
-                               pwm->chip->ops->disable(pwm->chip, pwm);
+                               chip->ops->disable(chip, pwm);
                                pwm->state.enabled = false;
                        }
 
-                       err = pwm->chip->ops->set_polarity(pwm->chip, pwm,
-                                                          state->polarity);
+                       err = chip->ops->set_polarity(chip, pwm,
+                                                     state->polarity);
                        if (err)
                                return err;
 
@@ -500,9 +508,9 @@ int pwm_apply_state(struct pwm_device *pwm, struct pwm_state *state)
 
                if (state->period != pwm->state.period ||
                    state->duty_cycle != pwm->state.duty_cycle) {
-                       err = pwm->chip->ops->config(pwm->chip, pwm,
-                                                    state->duty_cycle,
-                                                    state->period);
+                       err = chip->ops->config(pwm->chip, pwm,
+                                               state->duty_cycle,
+                                               state->period);
                        if (err)
                                return err;
 
@@ -512,11 +520,11 @@ int pwm_apply_state(struct pwm_device *pwm, struct pwm_state *state)
 
                if (state->enabled != pwm->state.enabled) {
                        if (state->enabled) {
-                               err = pwm->chip->ops->enable(pwm->chip, pwm);
+                               err = chip->ops->enable(chip, pwm);
                                if (err)
                                        return err;
                        } else {
-                               pwm->chip->ops->disable(pwm->chip, pwm);
+                               chip->ops->disable(chip, pwm);
                        }
 
                        pwm->state.enabled = state->enabled;
index d13a83f..dcbc048 100644 (file)
@@ -39,7 +39,7 @@ static inline struct atmel_hlcdc_pwm *to_atmel_hlcdc_pwm(struct pwm_chip *chip)
 }
 
 static int atmel_hlcdc_pwm_apply(struct pwm_chip *c, struct pwm_device *pwm,
-                                struct pwm_state *state)
+                                const struct pwm_state *state)
 {
        struct atmel_hlcdc_pwm *chip = to_atmel_hlcdc_pwm(c);
        struct atmel_hlcdc *hlcdc = chip->hlcdc;
index e5e1eaf..9ba7334 100644 (file)
@@ -209,7 +209,7 @@ static void atmel_pwm_disable(struct pwm_chip *chip, struct pwm_device *pwm,
 }
 
 static int atmel_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm,
-                          struct pwm_state *state)
+                          const struct pwm_state *state)
 {
        struct atmel_pwm_chip *atmel_pwm = to_atmel_pwm_chip(chip);
        struct pwm_state cstate;
@@ -318,19 +318,6 @@ static const struct atmel_pwm_data mchp_sam9x60_pwm_data = {
        },
 };
 
-static const struct platform_device_id atmel_pwm_devtypes[] = {
-       {
-               .name = "at91sam9rl-pwm",
-               .driver_data = (kernel_ulong_t)&atmel_sam9rl_pwm_data,
-       }, {
-               .name = "sama5d3-pwm",
-               .driver_data = (kernel_ulong_t)&atmel_sama5_pwm_data,
-       }, {
-               /* sentinel */
-       },
-};
-MODULE_DEVICE_TABLE(platform, atmel_pwm_devtypes);
-
 static const struct of_device_id atmel_pwm_dt_ids[] = {
        {
                .compatible = "atmel,at91sam9rl-pwm",
@@ -350,34 +337,20 @@ static const struct of_device_id atmel_pwm_dt_ids[] = {
 };
 MODULE_DEVICE_TABLE(of, atmel_pwm_dt_ids);
 
-static inline const struct atmel_pwm_data *
-atmel_pwm_get_driver_data(struct platform_device *pdev)
-{
-       const struct platform_device_id *id;
-
-       if (pdev->dev.of_node)
-               return of_device_get_match_data(&pdev->dev);
-
-       id = platform_get_device_id(pdev);
-
-       return (struct atmel_pwm_data *)id->driver_data;
-}
-
 static int atmel_pwm_probe(struct platform_device *pdev)
 {
-       const struct atmel_pwm_data *data;
        struct atmel_pwm_chip *atmel_pwm;
        struct resource *res;
        int ret;
 
-       data = atmel_pwm_get_driver_data(pdev);
-       if (!data)
-               return -ENODEV;
-
        atmel_pwm = devm_kzalloc(&pdev->dev, sizeof(*atmel_pwm), GFP_KERNEL);
        if (!atmel_pwm)
                return -ENOMEM;
 
+       mutex_init(&atmel_pwm->isr_lock);
+       atmel_pwm->data = of_device_get_match_data(&pdev->dev);
+       atmel_pwm->updated_pwms = 0;
+
        res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
        atmel_pwm->base = devm_ioremap_resource(&pdev->dev, res);
        if (IS_ERR(atmel_pwm->base))
@@ -395,17 +368,10 @@ static int atmel_pwm_probe(struct platform_device *pdev)
 
        atmel_pwm->chip.dev = &pdev->dev;
        atmel_pwm->chip.ops = &atmel_pwm_ops;
-
-       if (pdev->dev.of_node) {
-               atmel_pwm->chip.of_xlate = of_pwm_xlate_with_flags;
-               atmel_pwm->chip.of_pwm_n_cells = 3;
-       }
-
+       atmel_pwm->chip.of_xlate = of_pwm_xlate_with_flags;
+       atmel_pwm->chip.of_pwm_n_cells = 3;
        atmel_pwm->chip.base = -1;
        atmel_pwm->chip.npwm = 4;
-       atmel_pwm->data = data;
-       atmel_pwm->updated_pwms = 0;
-       mutex_init(&atmel_pwm->isr_lock);
 
        ret = pwmchip_add(&atmel_pwm->chip);
        if (ret < 0) {
@@ -437,7 +403,6 @@ static struct platform_driver atmel_pwm_driver = {
                .name = "atmel-pwm",
                .of_match_table = of_match_ptr(atmel_pwm_dt_ids),
        },
-       .id_table = atmel_pwm_devtypes,
        .probe = atmel_pwm_probe,
        .remove = atmel_pwm_remove,
 };
index d961a82..56c38cf 100644 (file)
@@ -115,7 +115,7 @@ static void iproc_pwmc_get_state(struct pwm_chip *chip, struct pwm_device *pwm,
 }
 
 static int iproc_pwmc_apply(struct pwm_chip *chip, struct pwm_device *pwm,
-                           struct pwm_state *state)
+                           const struct pwm_state *state)
 {
        unsigned long prescale = IPROC_PWM_PRESCALE_MIN;
        struct iproc_pwmc *ip = to_iproc_pwmc(chip);
index f6fe0b9..91e24f0 100644 (file)
@@ -21,7 +21,7 @@
 #define PERIOD(x)              (((x) * 0x10) + 0x10)
 #define DUTY(x)                        (((x) * 0x10) + 0x14)
 
-#define MIN_PERIOD             108             /* 9.2 MHz max. PWM clock */
+#define PERIOD_MIN             0x2
 
 struct bcm2835_pwm {
        struct pwm_chip chip;
@@ -64,6 +64,7 @@ static int bcm2835_pwm_config(struct pwm_chip *chip, struct pwm_device *pwm,
        struct bcm2835_pwm *pc = to_bcm2835_pwm(chip);
        unsigned long rate = clk_get_rate(pc->clk);
        unsigned long scaler;
+       u32 period;
 
        if (!rate) {
                dev_err(pc->dev, "failed to get clock rate\n");
@@ -71,17 +72,14 @@ static int bcm2835_pwm_config(struct pwm_chip *chip, struct pwm_device *pwm,
        }
 
        scaler = DIV_ROUND_CLOSEST(NSEC_PER_SEC, rate);
+       period = DIV_ROUND_CLOSEST(period_ns, scaler);
 
-       if (period_ns <= MIN_PERIOD) {
-               dev_err(pc->dev, "period %d not supported, minimum %d\n",
-                       period_ns, MIN_PERIOD);
+       if (period < PERIOD_MIN)
                return -EINVAL;
-       }
 
        writel(DIV_ROUND_CLOSEST(duty_ns, scaler),
               pc->base + DUTY(pwm->hwpwm));
-       writel(DIV_ROUND_CLOSEST(period_ns, scaler),
-              pc->base + PERIOD(pwm->hwpwm));
+       writel(period, pc->base + PERIOD(pwm->hwpwm));
 
        return 0;
 }
@@ -155,8 +153,11 @@ static int bcm2835_pwm_probe(struct platform_device *pdev)
 
        pc->clk = devm_clk_get(&pdev->dev, NULL);
        if (IS_ERR(pc->clk)) {
-               dev_err(&pdev->dev, "clock not found: %ld\n", PTR_ERR(pc->clk));
-               return PTR_ERR(pc->clk);
+               ret = PTR_ERR(pc->clk);
+               if (ret != -EPROBE_DEFER)
+                       dev_err(&pdev->dev, "clock not found: %d\n", ret);
+
+               return ret;
        }
 
        ret = clk_prepare_enable(pc->clk);
index 85bea2d..8949744 100644 (file)
@@ -93,7 +93,7 @@ static int cros_ec_pwm_get_duty(struct cros_ec_device *ec, u8 index)
 }
 
 static int cros_ec_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm,
-                            struct pwm_state *state)
+                            const struct pwm_state *state)
 {
        struct cros_ec_pwm_device *ec_pwm = pwm_to_cros_ec_pwm(chip);
        int duty_cycle;
index 9d31a21..59272a9 100644 (file)
@@ -227,7 +227,7 @@ static bool fsl_pwm_is_other_pwm_enabled(struct fsl_pwm_chip *fpc,
 
 static int fsl_pwm_apply_config(struct fsl_pwm_chip *fpc,
                                struct pwm_device *pwm,
-                               struct pwm_state *newstate)
+                               const struct pwm_state *newstate)
 {
        unsigned int duty;
        u32 reg_polarity;
@@ -292,17 +292,13 @@ static int fsl_pwm_apply_config(struct fsl_pwm_chip *fpc,
 
        regmap_update_bits(fpc->regmap, FTM_POL, BIT(pwm->hwpwm), reg_polarity);
 
-       newstate->period = fsl_pwm_ticks_to_ns(fpc,
-                                              fpc->period.mod_period + 1);
-       newstate->duty_cycle = fsl_pwm_ticks_to_ns(fpc, duty);
-
        ftm_set_write_protection(fpc);
 
        return 0;
 }
 
 static int fsl_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm,
-                        struct pwm_state *newstate)
+                        const struct pwm_state *newstate)
 {
        struct fsl_pwm_chip *fpc = to_fsl_chip(chip);
        struct pwm_state *oldstate = &pwm->state;
index 753bd58..ad205fd 100644 (file)
@@ -149,7 +149,7 @@ static void hibvt_pwm_get_state(struct pwm_chip *chip, struct pwm_device *pwm,
 }
 
 static int hibvt_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm,
-                               struct pwm_state *state)
+                          const struct pwm_state *state)
 {
        struct hibvt_pwm_chip *hi_pwm_chip = to_hibvt_pwm_chip(chip);
 
index e8385c1..9145f61 100644 (file)
@@ -89,7 +89,7 @@ to_imx_tpm_pwm_chip(struct pwm_chip *chip)
 static int pwm_imx_tpm_round_state(struct pwm_chip *chip,
                                   struct imx_tpm_pwm_param *p,
                                   struct pwm_state *real_state,
-                                  struct pwm_state *state)
+                                  const struct pwm_state *state)
 {
        struct imx_tpm_pwm_chip *tpm = to_imx_tpm_pwm_chip(chip);
        u32 rate, prescale, period_count, clock_unit;
@@ -289,7 +289,7 @@ static int pwm_imx_tpm_apply_hw(struct pwm_chip *chip,
 
 static int pwm_imx_tpm_apply(struct pwm_chip *chip,
                             struct pwm_device *pwm,
-                            struct pwm_state *state)
+                            const struct pwm_state *state)
 {
        struct imx_tpm_pwm_chip *tpm = to_imx_tpm_pwm_chip(chip);
        struct imx_tpm_pwm_param param;
index 434a351..ae11d85 100644 (file)
@@ -3,6 +3,10 @@
  * simple driver for PWM (Pulse Width Modulator) controller
  *
  * Derived from pxa PWM driver by eric miao <eric.miao@marvell.com>
+ *
+ * Limitations:
+ * - When disabled the output is driven to 0 independent of the configured
+ *   polarity.
  */
 
 #include <linux/bitfield.h>
@@ -205,7 +209,7 @@ static void pwm_imx27_wait_fifo_slot(struct pwm_chip *chip,
 }
 
 static int pwm_imx27_apply(struct pwm_chip *chip, struct pwm_device *pwm,
-                          struct pwm_state *state)
+                          const struct pwm_state *state)
 {
        unsigned long period_cycles, duty_cycles, prescale;
        struct pwm_imx27_chip *imx = to_pwm_imx27_chip(chip);
index f901e8a..9d78cc2 100644 (file)
@@ -2,6 +2,11 @@
 /*
  *  Copyright (C) 2010, Lars-Peter Clausen <lars@metafoo.de>
  *  JZ4740 platform PWM support
+ *
+ * Limitations:
+ * - The .apply callback doesn't complete the currently running period before
+ *   reconfiguring the hardware.
+ * - Each period starts with the inactive part.
  */
 
 #include <linux/clk.h>
@@ -83,7 +88,7 @@ static void jz4740_pwm_disable(struct pwm_chip *chip, struct pwm_device *pwm)
 }
 
 static int jz4740_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm,
-                           struct pwm_state *state)
+                           const struct pwm_state *state)
 {
        struct jz4740_pwm_chip *jz4740 = to_jz4740(pwm->chip);
        unsigned long long tmp;
index 4098a46..75bbfe5 100644 (file)
@@ -122,7 +122,7 @@ static inline void pwm_lpss_cond_enable(struct pwm_device *pwm, bool cond)
 }
 
 static int pwm_lpss_apply(struct pwm_chip *chip, struct pwm_device *pwm,
-                         struct pwm_state *state)
+                         const struct pwm_state *state)
 {
        struct pwm_lpss_chip *lpwm = to_lpwm(chip);
        int ret;
index eb6674c..b94e0d0 100644 (file)
@@ -1,12 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
- * Mediatek Pulse Width Modulator driver
+ * MediaTek Pulse Width Modulator driver
  *
  * Copyright (C) 2015 John Crispin <blogic@openwrt.org>
  * Copyright (C) 2017 Zhi Mao <zhi.mao@mediatek.com>
  *
- * This file is licensed under the terms of the GNU General Public
- * License version 2. This program is licensed "as is" without any
- * warranty of any kind, whether express or implied.
  */
 
 #include <linux/err.h>
 
 #define PWM_CLK_DIV_MAX                7
 
-enum {
-       MTK_CLK_MAIN = 0,
-       MTK_CLK_TOP,
-       MTK_CLK_PWM1,
-       MTK_CLK_PWM2,
-       MTK_CLK_PWM3,
-       MTK_CLK_PWM4,
-       MTK_CLK_PWM5,
-       MTK_CLK_PWM6,
-       MTK_CLK_PWM7,
-       MTK_CLK_PWM8,
-       MTK_CLK_MAX,
-};
-
-static const char * const mtk_pwm_clk_name[MTK_CLK_MAX] = {
-       "main", "top", "pwm1", "pwm2", "pwm3", "pwm4", "pwm5", "pwm6", "pwm7",
-       "pwm8"
-};
-
-struct mtk_pwm_platform_data {
+struct pwm_mediatek_of_data {
        unsigned int num_pwms;
        bool pwm45_fixup;
-       bool has_clks;
 };
 
 /**
- * struct mtk_pwm_chip - struct representing PWM chip
+ * struct pwm_mediatek_chip - struct representing PWM chip
  * @chip: linux PWM chip representation
  * @regs: base address of PWM chip
- * @clks: list of clocks
+ * @clk_top: the top clock generator
+ * @clk_main: the clock used by PWM core
+ * @clk_pwms: the clock used by each PWM channel
+ * @clk_freq: the fix clock frequency of legacy MIPS SoC
  */
-struct mtk_pwm_chip {
+struct pwm_mediatek_chip {
        struct pwm_chip chip;
        void __iomem *regs;
-       struct clk *clks[MTK_CLK_MAX];
-       const struct mtk_pwm_platform_data *soc;
+       struct clk *clk_top;
+       struct clk *clk_main;
+       struct clk **clk_pwms;
+       const struct pwm_mediatek_of_data *soc;
 };
 
-static const unsigned int mtk_pwm_reg_offset[] = {
+static const unsigned int pwm_mediatek_reg_offset[] = {
        0x0010, 0x0050, 0x0090, 0x00d0, 0x0110, 0x0150, 0x0190, 0x0220
 };
 
-static inline struct mtk_pwm_chip *to_mtk_pwm_chip(struct pwm_chip *chip)
+static inline struct pwm_mediatek_chip *
+to_pwm_mediatek_chip(struct pwm_chip *chip)
 {
-       return container_of(chip, struct mtk_pwm_chip, chip);
+       return container_of(chip, struct pwm_mediatek_chip, chip);
 }
 
-static int mtk_pwm_clk_enable(struct pwm_chip *chip, struct pwm_device *pwm)
+static int pwm_mediatek_clk_enable(struct pwm_chip *chip,
+                                  struct pwm_device *pwm)
 {
-       struct mtk_pwm_chip *pc = to_mtk_pwm_chip(chip);
+       struct pwm_mediatek_chip *pc = to_pwm_mediatek_chip(chip);
        int ret;
 
-       if (!pc->soc->has_clks)
-               return 0;
-
-       ret = clk_prepare_enable(pc->clks[MTK_CLK_TOP]);
+       ret = clk_prepare_enable(pc->clk_top);
        if (ret < 0)
                return ret;
 
-       ret = clk_prepare_enable(pc->clks[MTK_CLK_MAIN]);
+       ret = clk_prepare_enable(pc->clk_main);
        if (ret < 0)
                goto disable_clk_top;
 
-       ret = clk_prepare_enable(pc->clks[MTK_CLK_PWM1 + pwm->hwpwm]);
+       ret = clk_prepare_enable(pc->clk_pwms[pwm->hwpwm]);
        if (ret < 0)
                goto disable_clk_main;
 
        return 0;
 
 disable_clk_main:
-       clk_disable_unprepare(pc->clks[MTK_CLK_MAIN]);
+       clk_disable_unprepare(pc->clk_main);
 disable_clk_top:
-       clk_disable_unprepare(pc->clks[MTK_CLK_TOP]);
+       clk_disable_unprepare(pc->clk_top);
 
        return ret;
 }
 
-static void mtk_pwm_clk_disable(struct pwm_chip *chip, struct pwm_device *pwm)
+static void pwm_mediatek_clk_disable(struct pwm_chip *chip,
+                                    struct pwm_device *pwm)
 {
-       struct mtk_pwm_chip *pc = to_mtk_pwm_chip(chip);
-
-       if (!pc->soc->has_clks)
-               return;
+       struct pwm_mediatek_chip *pc = to_pwm_mediatek_chip(chip);
 
-       clk_disable_unprepare(pc->clks[MTK_CLK_PWM1 + pwm->hwpwm]);
-       clk_disable_unprepare(pc->clks[MTK_CLK_MAIN]);
-       clk_disable_unprepare(pc->clks[MTK_CLK_TOP]);
+       clk_disable_unprepare(pc->clk_pwms[pwm->hwpwm]);
+       clk_disable_unprepare(pc->clk_main);
+       clk_disable_unprepare(pc->clk_top);
 }
 
-static inline u32 mtk_pwm_readl(struct mtk_pwm_chip *chip, unsigned int num,
-                               unsigned int offset)
+static inline u32 pwm_mediatek_readl(struct pwm_mediatek_chip *chip,
+                                    unsigned int num, unsigned int offset)
 {
-       return readl(chip->regs + mtk_pwm_reg_offset[num] + offset);
+       return readl(chip->regs + pwm_mediatek_reg_offset[num] + offset);
 }
 
-static inline void mtk_pwm_writel(struct mtk_pwm_chip *chip,
-                                 unsigned int num, unsigned int offset,
-                                 u32 value)
+static inline void pwm_mediatek_writel(struct pwm_mediatek_chip *chip,
+                                      unsigned int num, unsigned int offset,
+                                      u32 value)
 {
-       writel(value, chip->regs + mtk_pwm_reg_offset[num] + offset);
+       writel(value, chip->regs + pwm_mediatek_reg_offset[num] + offset);
 }
 
-static int mtk_pwm_config(struct pwm_chip *chip, struct pwm_device *pwm,
-                         int duty_ns, int period_ns)
+static int pwm_mediatek_config(struct pwm_chip *chip, struct pwm_device *pwm,
+                              int duty_ns, int period_ns)
 {
-       struct mtk_pwm_chip *pc = to_mtk_pwm_chip(chip);
-       struct clk *clk = pc->clks[MTK_CLK_PWM1 + pwm->hwpwm];
+       struct pwm_mediatek_chip *pc = to_pwm_mediatek_chip(chip);
        u32 clkdiv = 0, cnt_period, cnt_duty, reg_width = PWMDWIDTH,
            reg_thres = PWMTHRES;
        u64 resolution;
        int ret;
 
-       ret = mtk_pwm_clk_enable(chip, pwm);
+       ret = pwm_mediatek_clk_enable(chip, pwm);
+
        if (ret < 0)
                return ret;
 
        /* Using resolution in picosecond gets accuracy higher */
        resolution = (u64)NSEC_PER_SEC * 1000;
-       do_div(resolution, clk_get_rate(clk));
+       do_div(resolution, clk_get_rate(pc->clk_pwms[pwm->hwpwm]));
 
        cnt_period = DIV_ROUND_CLOSEST_ULL((u64)period_ns * 1000, resolution);
        while (cnt_period > 8191) {
@@ -164,7 +144,7 @@ static int mtk_pwm_config(struct pwm_chip *chip, struct pwm_device *pwm,
        }
 
        if (clkdiv > PWM_CLK_DIV_MAX) {
-               mtk_pwm_clk_disable(chip, pwm);
+               pwm_mediatek_clk_disable(chip, pwm);
                dev_err(chip->dev, "period %d not supported\n", period_ns);
                return -EINVAL;
        }
@@ -179,22 +159,22 @@ static int mtk_pwm_config(struct pwm_chip *chip, struct pwm_device *pwm,
        }
 
        cnt_duty = DIV_ROUND_CLOSEST_ULL((u64)duty_ns * 1000, resolution);
-       mtk_pwm_writel(pc, pwm->hwpwm, PWMCON, BIT(15) | clkdiv);
-       mtk_pwm_writel(pc, pwm->hwpwm, reg_width, cnt_period);
-       mtk_pwm_writel(pc, pwm->hwpwm, reg_thres, cnt_duty);
+       pwm_mediatek_writel(pc, pwm->hwpwm, PWMCON, BIT(15) | clkdiv);
+       pwm_mediatek_writel(pc, pwm->hwpwm, reg_width, cnt_period);
+       pwm_mediatek_writel(pc, pwm->hwpwm, reg_thres, cnt_duty);
 
-       mtk_pwm_clk_disable(chip, pwm);
+       pwm_mediatek_clk_disable(chip, pwm);
 
        return 0;
 }
 
-static int mtk_pwm_enable(struct pwm_chip *chip, struct pwm_device *pwm)
+static int pwm_mediatek_enable(struct pwm_chip *chip, struct pwm_device *pwm)
 {
-       struct mtk_pwm_chip *pc = to_mtk_pwm_chip(chip);
+       struct pwm_mediatek_chip *pc = to_pwm_mediatek_chip(chip);
        u32 value;
        int ret;
 
-       ret = mtk_pwm_clk_enable(chip, pwm);
+       ret = pwm_mediatek_clk_enable(chip, pwm);
        if (ret < 0)
                return ret;
 
@@ -205,29 +185,28 @@ static int mtk_pwm_enable(struct pwm_chip *chip, struct pwm_device *pwm)
        return 0;
 }
 
-static void mtk_pwm_disable(struct pwm_chip *chip, struct pwm_device *pwm)
+static void pwm_mediatek_disable(struct pwm_chip *chip, struct pwm_device *pwm)
 {
-       struct mtk_pwm_chip *pc = to_mtk_pwm_chip(chip);
+       struct pwm_mediatek_chip *pc = to_pwm_mediatek_chip(chip);
        u32 value;
 
        value = readl(pc->regs);
        value &= ~BIT(pwm->hwpwm);
        writel(value, pc->regs);
 
-       mtk_pwm_clk_disable(chip, pwm);
+       pwm_mediatek_clk_disable(chip, pwm);
 }
 
-static const struct pwm_ops mtk_pwm_ops = {
-       .config = mtk_pwm_config,
-       .enable = mtk_pwm_enable,
-       .disable = mtk_pwm_disable,
+static const struct pwm_ops pwm_mediatek_ops = {
+       .config = pwm_mediatek_config,
+       .enable = pwm_mediatek_enable,
+       .disable = pwm_mediatek_disable,
        .owner = THIS_MODULE,
 };
 
-static int mtk_pwm_probe(struct platform_device *pdev)
+static int pwm_mediatek_probe(struct platform_device *pdev)
 {
-       const struct mtk_pwm_platform_data *data;
-       struct mtk_pwm_chip *pc;
+       struct pwm_mediatek_chip *pc;
        struct resource *res;
        unsigned int i;
        int ret;
@@ -236,31 +215,51 @@ static int mtk_pwm_probe(struct platform_device *pdev)
        if (!pc)
                return -ENOMEM;
 
-       data = of_device_get_match_data(&pdev->dev);
-       if (data == NULL)
-               return -EINVAL;
-       pc->soc = data;
+       pc->soc = of_device_get_match_data(&pdev->dev);
 
        res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
        pc->regs = devm_ioremap_resource(&pdev->dev, res);
        if (IS_ERR(pc->regs))
                return PTR_ERR(pc->regs);
 
-       for (i = 0; i < data->num_pwms + 2 && pc->soc->has_clks; i++) {
-               pc->clks[i] = devm_clk_get(&pdev->dev, mtk_pwm_clk_name[i]);
-               if (IS_ERR(pc->clks[i])) {
+       pc->clk_pwms = devm_kcalloc(&pdev->dev, pc->soc->num_pwms,
+                                   sizeof(*pc->clk_pwms), GFP_KERNEL);
+       if (!pc->clk_pwms)
+               return -ENOMEM;
+
+       pc->clk_top = devm_clk_get(&pdev->dev, "top");
+       if (IS_ERR(pc->clk_top)) {
+               dev_err(&pdev->dev, "clock: top fail: %ld\n",
+                       PTR_ERR(pc->clk_top));
+               return PTR_ERR(pc->clk_top);
+       }
+
+       pc->clk_main = devm_clk_get(&pdev->dev, "main");
+       if (IS_ERR(pc->clk_main)) {
+               dev_err(&pdev->dev, "clock: main fail: %ld\n",
+                       PTR_ERR(pc->clk_main));
+               return PTR_ERR(pc->clk_main);
+       }
+
+       for (i = 0; i < pc->soc->num_pwms; i++) {
+               char name[8];
+
+               snprintf(name, sizeof(name), "pwm%d", i + 1);
+
+               pc->clk_pwms[i] = devm_clk_get(&pdev->dev, name);
+               if (IS_ERR(pc->clk_pwms[i])) {
                        dev_err(&pdev->dev, "clock: %s fail: %ld\n",
-                               mtk_pwm_clk_name[i], PTR_ERR(pc->clks[i]));
-                       return PTR_ERR(pc->clks[i]);
+                               name, PTR_ERR(pc->clk_pwms[i]));
+                       return PTR_ERR(pc->clk_pwms[i]);
                }
        }
 
        platform_set_drvdata(pdev, pc);
 
        pc->chip.dev = &pdev->dev;
-       pc->chip.ops = &mtk_pwm_ops;
+       pc->chip.ops = &pwm_mediatek_ops;
        pc->chip.base = -1;
-       pc->chip.npwm = data->num_pwms;
+       pc->chip.npwm = pc->soc->num_pwms;
 
        ret = pwmchip_add(&pc->chip);
        if (ret < 0) {
@@ -271,55 +270,63 @@ static int mtk_pwm_probe(struct platform_device *pdev)
        return 0;
 }
 
-static int mtk_pwm_remove(struct platform_device *pdev)
+static int pwm_mediatek_remove(struct platform_device *pdev)
 {
-       struct mtk_pwm_chip *pc = platform_get_drvdata(pdev);
+       struct pwm_mediatek_chip *pc = platform_get_drvdata(pdev);
 
        return pwmchip_remove(&pc->chip);
 }
 
-static const struct mtk_pwm_platform_data mt2712_pwm_data = {
+static const struct pwm_mediatek_of_data mt2712_pwm_data = {
        .num_pwms = 8,
        .pwm45_fixup = false,
-       .has_clks = true,
 };
 
-static const struct mtk_pwm_platform_data mt7622_pwm_data = {
+static const struct pwm_mediatek_of_data mt7622_pwm_data = {
        .num_pwms = 6,
        .pwm45_fixup = false,
-       .has_clks = true,
 };
 
-static const struct mtk_pwm_platform_data mt7623_pwm_data = {
+static const struct pwm_mediatek_of_data mt7623_pwm_data = {
        .num_pwms = 5,
        .pwm45_fixup = true,
-       .has_clks = true,
 };
 
-static const struct mtk_pwm_platform_data mt7628_pwm_data = {
+static const struct pwm_mediatek_of_data mt7628_pwm_data = {
        .num_pwms = 4,
        .pwm45_fixup = true,
-       .has_clks = false,
 };
 
-static const struct of_device_id mtk_pwm_of_match[] = {
+static const struct pwm_mediatek_of_data mt7629_pwm_data = {
+       .num_pwms = 1,
+       .pwm45_fixup = false,
+};
+
+static const struct pwm_mediatek_of_data mt8516_pwm_data = {
+       .num_pwms = 5,
+       .pwm45_fixup = false,
+};
+
+static const struct of_device_id pwm_mediatek_of_match[] = {
        { .compatible = "mediatek,mt2712-pwm", .data = &mt2712_pwm_data },
        { .compatible = "mediatek,mt7622-pwm", .data = &mt7622_pwm_data },
        { .compatible = "mediatek,mt7623-pwm", .data = &mt7623_pwm_data },
        { .compatible = "mediatek,mt7628-pwm", .data = &mt7628_pwm_data },
+       { .compatible = "mediatek,mt7629-pwm", .data = &mt7629_pwm_data },
+       { .compatible = "mediatek,mt8516-pwm", .data = &mt8516_pwm_data },
        { },
 };
-MODULE_DEVICE_TABLE(of, mtk_pwm_of_match);
+MODULE_DEVICE_TABLE(of, pwm_mediatek_of_match);
 
-static struct platform_driver mtk_pwm_driver = {
+static struct platform_driver pwm_mediatek_driver = {
        .driver = {
-               .name = "mtk-pwm",
-               .of_match_table = mtk_pwm_of_match,
+               .name = "pwm-mediatek",
+               .of_match_table = pwm_mediatek_of_match,
        },
-       .probe = mtk_pwm_probe,
-       .remove = mtk_pwm_remove,
+       .probe = pwm_mediatek_probe,
+       .remove = pwm_mediatek_remove,
 };
-module_platform_driver(mtk_pwm_driver);
+module_platform_driver(pwm_mediatek_driver);
 
 MODULE_AUTHOR("John Crispin <blogic@openwrt.org>");
-MODULE_LICENSE("GPL");
+MODULE_LICENSE("GPL v2");
index 3cbff5c..6245bbd 100644 (file)
@@ -159,7 +159,7 @@ static void meson_pwm_free(struct pwm_chip *chip, struct pwm_device *pwm)
 }
 
 static int meson_pwm_calc(struct meson_pwm *meson, struct pwm_device *pwm,
-                         struct pwm_state *state)
+                         const struct pwm_state *state)
 {
        struct meson_pwm_channel *channel = pwm_get_chip_data(pwm);
        unsigned int duty, period, pre_div, cnt, duty_cnt;
@@ -265,7 +265,7 @@ static void meson_pwm_disable(struct meson_pwm *meson, struct pwm_device *pwm)
 }
 
 static int meson_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm,
-                          struct pwm_state *state)
+                          const struct pwm_state *state)
 {
        struct meson_pwm_channel *channel = pwm_get_chip_data(pwm);
        struct meson_pwm *meson = to_meson_pwm(chip);
index 04c0f6b..b14376b 100644 (file)
@@ -126,15 +126,13 @@ static int mxs_pwm_probe(struct platform_device *pdev)
 {
        struct device_node *np = pdev->dev.of_node;
        struct mxs_pwm_chip *mxs;
-       struct resource *res;
        int ret;
 
        mxs = devm_kzalloc(&pdev->dev, sizeof(*mxs), GFP_KERNEL);
        if (!mxs)
                return -ENOMEM;
 
-       res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-       mxs->base = devm_ioremap_resource(&pdev->dev, res);
+       mxs->base = devm_platform_ioremap_resource(pdev, 0);
        if (IS_ERR(mxs->base))
                return PTR_ERR(mxs->base);
 
index 5b2b8ec..852eb23 100644 (file)
@@ -158,7 +158,7 @@ static void rcar_pwm_disable(struct rcar_pwm_chip *rp)
 }
 
 static int rcar_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm,
-                         struct pwm_state *state)
+                         const struct pwm_state *state)
 {
        struct rcar_pwm_chip *rp = to_rcar_pwm_chip(chip);
        struct pwm_state cur_state;
@@ -187,7 +187,7 @@ static int rcar_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm,
        /* The SYNC should be set to 0 even if rcar_pwm_set_counter failed */
        rcar_pwm_update(rp, RCAR_PWMCR_SYNC, 0, RCAR_PWMCR);
 
-       if (!ret && state->enabled)
+       if (!ret)
                ret = rcar_pwm_enable(rp);
 
        return ret;
index 51b96cb..73352e6 100644 (file)
@@ -90,16 +90,16 @@ static void rockchip_pwm_get_state(struct pwm_chip *chip,
                state->enabled = ((val & enable_conf) == enable_conf) ?
                                 true : false;
 
-       if (pc->data->supports_polarity) {
-               if (!(val & PWM_DUTY_POSITIVE))
-                       state->polarity = PWM_POLARITY_INVERSED;
-       }
+       if (pc->data->supports_polarity && !(val & PWM_DUTY_POSITIVE))
+               state->polarity = PWM_POLARITY_INVERSED;
+       else
+               state->polarity = PWM_POLARITY_NORMAL;
 
        clk_disable(pc->pclk);
 }
 
 static void rockchip_pwm_config(struct pwm_chip *chip, struct pwm_device *pwm,
-                              struct pwm_state *state)
+                              const struct pwm_state *state)
 {
        struct rockchip_pwm_chip *pc = to_rockchip_pwm_chip(chip);
        unsigned long period, duty;
@@ -183,7 +183,7 @@ static int rockchip_pwm_enable(struct pwm_chip *chip,
 }
 
 static int rockchip_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm,
-                             struct pwm_state *state)
+                             const struct pwm_state *state)
 {
        struct rockchip_pwm_chip *pc = to_rockchip_pwm_chip(chip);
        struct pwm_state curstate;
@@ -212,12 +212,6 @@ static int rockchip_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm,
                        goto out;
        }
 
-       /*
-        * Update the state with the real hardware, which can differ a bit
-        * because of period/duty_cycle approximation.
-        */
-       rockchip_pwm_get_state(chip, pwm, state);
-
 out:
        clk_disable(pc->pclk);
 
index a7c107f..cc63f9b 100644 (file)
@@ -147,7 +147,7 @@ static int pwm_sifive_enable(struct pwm_chip *chip, bool enable)
 }
 
 static int pwm_sifive_apply(struct pwm_chip *chip, struct pwm_device *pwm,
-                           struct pwm_state *state)
+                           const struct pwm_state *state)
 {
        struct pwm_sifive_ddata *ddata = pwm_sifive_chip_to_ddata(chip);
        struct pwm_state cur_state;
@@ -250,10 +250,8 @@ static int pwm_sifive_probe(struct platform_device *pdev)
 
        res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
        ddata->regs = devm_ioremap_resource(dev, res);
-       if (IS_ERR(ddata->regs)) {
-               dev_err(dev, "Unable to map IO resources\n");
+       if (IS_ERR(ddata->regs))
                return PTR_ERR(ddata->regs);
-       }
 
        ddata->clk = devm_clk_get(dev, NULL);
        if (IS_ERR(ddata->clk)) {
diff --git a/drivers/pwm/pwm-sprd.c b/drivers/pwm/pwm-sprd.c
new file mode 100644 (file)
index 0000000..be23942
--- /dev/null
@@ -0,0 +1,309 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2019 Spreadtrum Communications Inc.
+ */
+
+#include <linux/clk.h>
+#include <linux/err.h>
+#include <linux/io.h>
+#include <linux/math64.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/pwm.h>
+
+#define SPRD_PWM_PRESCALE      0x0
+#define SPRD_PWM_MOD           0x4
+#define SPRD_PWM_DUTY          0x8
+#define SPRD_PWM_ENABLE                0x18
+
+#define SPRD_PWM_MOD_MAX       GENMASK(7, 0)
+#define SPRD_PWM_DUTY_MSK      GENMASK(15, 0)
+#define SPRD_PWM_PRESCALE_MSK  GENMASK(7, 0)
+#define SPRD_PWM_ENABLE_BIT    BIT(0)
+
+#define SPRD_PWM_CHN_NUM       4
+#define SPRD_PWM_REGS_SHIFT    5
+#define SPRD_PWM_CHN_CLKS_NUM  2
+#define SPRD_PWM_CHN_OUTPUT_CLK        1
+
+struct sprd_pwm_chn {
+       struct clk_bulk_data clks[SPRD_PWM_CHN_CLKS_NUM];
+       u32 clk_rate;
+};
+
+struct sprd_pwm_chip {
+       void __iomem *base;
+       struct device *dev;
+       struct pwm_chip chip;
+       int num_pwms;
+       struct sprd_pwm_chn chn[SPRD_PWM_CHN_NUM];
+};
+
+/*
+ * The list of clocks required by PWM channels, and each channel has 2 clocks:
+ * enable clock and pwm clock.
+ */
+static const char * const sprd_pwm_clks[] = {
+       "enable0", "pwm0",
+       "enable1", "pwm1",
+       "enable2", "pwm2",
+       "enable3", "pwm3",
+};
+
+static u32 sprd_pwm_read(struct sprd_pwm_chip *spc, u32 hwid, u32 reg)
+{
+       u32 offset = reg + (hwid << SPRD_PWM_REGS_SHIFT);
+
+       return readl_relaxed(spc->base + offset);
+}
+
+static void sprd_pwm_write(struct sprd_pwm_chip *spc, u32 hwid,
+                          u32 reg, u32 val)
+{
+       u32 offset = reg + (hwid << SPRD_PWM_REGS_SHIFT);
+
+       writel_relaxed(val, spc->base + offset);
+}
+
+static void sprd_pwm_get_state(struct pwm_chip *chip, struct pwm_device *pwm,
+                              struct pwm_state *state)
+{
+       struct sprd_pwm_chip *spc =
+               container_of(chip, struct sprd_pwm_chip, chip);
+       struct sprd_pwm_chn *chn = &spc->chn[pwm->hwpwm];
+       u32 val, duty, prescale;
+       u64 tmp;
+       int ret;
+
+       /*
+        * The clocks to PWM channel has to be enabled first before
+        * reading to the registers.
+        */
+       ret = clk_bulk_prepare_enable(SPRD_PWM_CHN_CLKS_NUM, chn->clks);
+       if (ret) {
+               dev_err(spc->dev, "failed to enable pwm%u clocks\n",
+                       pwm->hwpwm);
+               return;
+       }
+
+       val = sprd_pwm_read(spc, pwm->hwpwm, SPRD_PWM_ENABLE);
+       if (val & SPRD_PWM_ENABLE_BIT)
+               state->enabled = true;
+       else
+               state->enabled = false;
+
+       /*
+        * The hardware provides a counter that is feed by the source clock.
+        * The period length is (PRESCALE + 1) * MOD counter steps.
+        * The duty cycle length is (PRESCALE + 1) * DUTY counter steps.
+        * Thus the period_ns and duty_ns calculation formula should be:
+        * period_ns = NSEC_PER_SEC * (prescale + 1) * mod / clk_rate
+        * duty_ns = NSEC_PER_SEC * (prescale + 1) * duty / clk_rate
+        */
+       val = sprd_pwm_read(spc, pwm->hwpwm, SPRD_PWM_PRESCALE);
+       prescale = val & SPRD_PWM_PRESCALE_MSK;
+       tmp = (prescale + 1) * NSEC_PER_SEC * SPRD_PWM_MOD_MAX;
+       state->period = DIV_ROUND_CLOSEST_ULL(tmp, chn->clk_rate);
+
+       val = sprd_pwm_read(spc, pwm->hwpwm, SPRD_PWM_DUTY);
+       duty = val & SPRD_PWM_DUTY_MSK;
+       tmp = (prescale + 1) * NSEC_PER_SEC * duty;
+       state->duty_cycle = DIV_ROUND_CLOSEST_ULL(tmp, chn->clk_rate);
+
+       /* Disable PWM clocks if the PWM channel is not in enable state. */
+       if (!state->enabled)
+               clk_bulk_disable_unprepare(SPRD_PWM_CHN_CLKS_NUM, chn->clks);
+}
+
+static int sprd_pwm_config(struct sprd_pwm_chip *spc, struct pwm_device *pwm,
+                          int duty_ns, int period_ns)
+{
+       struct sprd_pwm_chn *chn = &spc->chn[pwm->hwpwm];
+       u32 prescale, duty;
+       u64 tmp;
+
+       /*
+        * The hardware provides a counter that is feed by the source clock.
+        * The period length is (PRESCALE + 1) * MOD counter steps.
+        * The duty cycle length is (PRESCALE + 1) * DUTY counter steps.
+        *
+        * To keep the maths simple we're always using MOD = SPRD_PWM_MOD_MAX.
+        * The value for PRESCALE is selected such that the resulting period
+        * gets the maximal length not bigger than the requested one with the
+        * given settings (MOD = SPRD_PWM_MOD_MAX and input clock).
+        */
+       duty = duty_ns * SPRD_PWM_MOD_MAX / period_ns;
+
+       tmp = (u64)chn->clk_rate * period_ns;
+       do_div(tmp, NSEC_PER_SEC);
+       prescale = DIV_ROUND_CLOSEST_ULL(tmp, SPRD_PWM_MOD_MAX) - 1;
+       if (prescale > SPRD_PWM_PRESCALE_MSK)
+               prescale = SPRD_PWM_PRESCALE_MSK;
+
+       /*
+        * Note: Writing DUTY triggers the hardware to actually apply the
+        * values written to MOD and DUTY to the output, so must keep writing
+        * DUTY last.
+        *
+        * The hardware can ensures that current running period is completed
+        * before changing a new configuration to avoid mixed settings.
+        */
+       sprd_pwm_write(spc, pwm->hwpwm, SPRD_PWM_PRESCALE, prescale);
+       sprd_pwm_write(spc, pwm->hwpwm, SPRD_PWM_MOD, SPRD_PWM_MOD_MAX);
+       sprd_pwm_write(spc, pwm->hwpwm, SPRD_PWM_DUTY, duty);
+
+       return 0;
+}
+
+static int sprd_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm,
+                         const struct pwm_state *state)
+{
+       struct sprd_pwm_chip *spc =
+               container_of(chip, struct sprd_pwm_chip, chip);
+       struct sprd_pwm_chn *chn = &spc->chn[pwm->hwpwm];
+       struct pwm_state *cstate = &pwm->state;
+       int ret;
+
+       if (state->enabled) {
+               if (!cstate->enabled) {
+                       /*
+                        * The clocks to PWM channel has to be enabled first
+                        * before writing to the registers.
+                        */
+                       ret = clk_bulk_prepare_enable(SPRD_PWM_CHN_CLKS_NUM,
+                                                     chn->clks);
+                       if (ret) {
+                               dev_err(spc->dev,
+                                       "failed to enable pwm%u clocks\n",
+                                       pwm->hwpwm);
+                               return ret;
+                       }
+               }
+
+               if (state->period != cstate->period ||
+                   state->duty_cycle != cstate->duty_cycle) {
+                       ret = sprd_pwm_config(spc, pwm, state->duty_cycle,
+                                             state->period);
+                       if (ret)
+                               return ret;
+               }
+
+               sprd_pwm_write(spc, pwm->hwpwm, SPRD_PWM_ENABLE, 1);
+       } else if (cstate->enabled) {
+               /*
+                * Note: After setting SPRD_PWM_ENABLE to zero, the controller
+                * will not wait for current period to be completed, instead it
+                * will stop the PWM channel immediately.
+                */
+               sprd_pwm_write(spc, pwm->hwpwm, SPRD_PWM_ENABLE, 0);
+
+               clk_bulk_disable_unprepare(SPRD_PWM_CHN_CLKS_NUM, chn->clks);
+       }
+
+       return 0;
+}
+
+static const struct pwm_ops sprd_pwm_ops = {
+       .apply = sprd_pwm_apply,
+       .get_state = sprd_pwm_get_state,
+       .owner = THIS_MODULE,
+};
+
+static int sprd_pwm_clk_init(struct sprd_pwm_chip *spc)
+{
+       struct clk *clk_pwm;
+       int ret, i;
+
+       for (i = 0; i < SPRD_PWM_CHN_NUM; i++) {
+               struct sprd_pwm_chn *chn = &spc->chn[i];
+               int j;
+
+               for (j = 0; j < SPRD_PWM_CHN_CLKS_NUM; ++j)
+                       chn->clks[j].id =
+                               sprd_pwm_clks[i * SPRD_PWM_CHN_CLKS_NUM + j];
+
+               ret = devm_clk_bulk_get(spc->dev, SPRD_PWM_CHN_CLKS_NUM,
+                                       chn->clks);
+               if (ret) {
+                       if (ret == -ENOENT)
+                               break;
+
+                       if (ret != -EPROBE_DEFER)
+                               dev_err(spc->dev,
+                                       "failed to get channel clocks\n");
+
+                       return ret;
+               }
+
+               clk_pwm = chn->clks[SPRD_PWM_CHN_OUTPUT_CLK].clk;
+               chn->clk_rate = clk_get_rate(clk_pwm);
+       }
+
+       if (!i) {
+               dev_err(spc->dev, "no available PWM channels\n");
+               return -ENODEV;
+       }
+
+       spc->num_pwms = i;
+
+       return 0;
+}
+
+static int sprd_pwm_probe(struct platform_device *pdev)
+{
+       struct sprd_pwm_chip *spc;
+       int ret;
+
+       spc = devm_kzalloc(&pdev->dev, sizeof(*spc), GFP_KERNEL);
+       if (!spc)
+               return -ENOMEM;
+
+       spc->base = devm_platform_ioremap_resource(pdev, 0);
+       if (IS_ERR(spc->base))
+               return PTR_ERR(spc->base);
+
+       spc->dev = &pdev->dev;
+       platform_set_drvdata(pdev, spc);
+
+       ret = sprd_pwm_clk_init(spc);
+       if (ret)
+               return ret;
+
+       spc->chip.dev = &pdev->dev;
+       spc->chip.ops = &sprd_pwm_ops;
+       spc->chip.base = -1;
+       spc->chip.npwm = spc->num_pwms;
+
+       ret = pwmchip_add(&spc->chip);
+       if (ret)
+               dev_err(&pdev->dev, "failed to add PWM chip\n");
+
+       return ret;
+}
+
+static int sprd_pwm_remove(struct platform_device *pdev)
+{
+       struct sprd_pwm_chip *spc = platform_get_drvdata(pdev);
+
+       return pwmchip_remove(&spc->chip);
+}
+
+static const struct of_device_id sprd_pwm_of_match[] = {
+       { .compatible = "sprd,ums512-pwm", },
+       { },
+};
+MODULE_DEVICE_TABLE(of, sprd_pwm_of_match);
+
+static struct platform_driver sprd_pwm_driver = {
+       .driver = {
+               .name = "sprd-pwm",
+               .of_match_table = sprd_pwm_of_match,
+       },
+       .probe = sprd_pwm_probe,
+       .remove = sprd_pwm_remove,
+};
+
+module_platform_driver(sprd_pwm_driver);
+
+MODULE_DESCRIPTION("Spreadtrum PWM Driver");
+MODULE_LICENSE("GPL v2");
index 20450e3..1508616 100644 (file)
@@ -564,10 +564,8 @@ static int sti_pwm_probe(struct platform_device *pdev)
                return PTR_ERR(pc->regmap);
 
        irq = platform_get_irq(pdev, 0);
-       if (irq < 0) {
-               dev_err(&pdev->dev, "Failed to obtain IRQ\n");
+       if (irq < 0)
                return irq;
-       }
 
        ret = devm_request_irq(&pdev->dev, irq, sti_pwm_interrupt, 0,
                               pdev->name, pc);
index 2211a64..67fca62 100644 (file)
@@ -32,7 +32,7 @@ static inline struct stm32_pwm_lp *to_stm32_pwm_lp(struct pwm_chip *chip)
 #define STM32_LPTIM_MAX_PRESCALER      128
 
 static int stm32_pwm_lp_apply(struct pwm_chip *chip, struct pwm_device *pwm,
-                             struct pwm_state *state)
+                             const struct pwm_state *state)
 {
        struct stm32_pwm_lp *priv = to_stm32_pwm_lp(chip);
        unsigned long long prd, div, dty;
@@ -59,6 +59,12 @@ static int stm32_pwm_lp_apply(struct pwm_chip *chip, struct pwm_device *pwm,
        /* Calculate the period and prescaler value */
        div = (unsigned long long)clk_get_rate(priv->clk) * state->period;
        do_div(div, NSEC_PER_SEC);
+       if (!div) {
+               /* Clock is too slow to achieve requested period. */
+               dev_dbg(priv->chip.dev, "Can't reach %u ns\n",  state->period);
+               return -EINVAL;
+       }
+
        prd = div;
        while (div > STM32_LPTIM_MAX_ARR) {
                presc++;
index 740e2de..359b085 100644 (file)
@@ -440,7 +440,7 @@ static void stm32_pwm_disable(struct stm32_pwm *priv, int ch)
 }
 
 static int stm32_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm,
-                          struct pwm_state *state)
+                          const struct pwm_state *state)
 {
        bool enabled;
        struct stm32_pwm *priv = to_stm32_pwm_dev(chip);
@@ -468,7 +468,7 @@ static int stm32_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm,
 }
 
 static int stm32_pwm_apply_locked(struct pwm_chip *chip, struct pwm_device *pwm,
-                                 struct pwm_state *state)
+                                 const struct pwm_state *state)
 {
        struct stm32_pwm *priv = to_stm32_pwm_dev(chip);
        int ret;
index de78c82..6f5840a 100644 (file)
@@ -145,7 +145,7 @@ static void sun4i_pwm_get_state(struct pwm_chip *chip,
 }
 
 static int sun4i_pwm_calculate(struct sun4i_pwm_chip *sun4i_pwm,
-                              struct pwm_state *state,
+                              const struct pwm_state *state,
                               u32 *dty, u32 *prd, unsigned int *prsclr)
 {
        u64 clk_rate, div = 0;
@@ -192,17 +192,11 @@ static int sun4i_pwm_calculate(struct sun4i_pwm_chip *sun4i_pwm,
        *dty = div;
        *prsclr = prescaler;
 
-       div = (u64)pval * NSEC_PER_SEC * *prd;
-       state->period = DIV_ROUND_CLOSEST_ULL(div, clk_rate);
-
-       div = (u64)pval * NSEC_PER_SEC * *dty;
-       state->duty_cycle = DIV_ROUND_CLOSEST_ULL(div, clk_rate);
-
        return 0;
 }
 
 static int sun4i_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm,
-                          struct pwm_state *state)
+                          const struct pwm_state *state)
 {
        struct sun4i_pwm_chip *sun4i_pwm = to_sun4i_pwm_chip(chip);
        struct pwm_state cstate;
index e24f4be..e2c21cc 100644 (file)
@@ -148,7 +148,7 @@ static int zx_pwm_config(struct pwm_chip *chip, struct pwm_device *pwm,
 }
 
 static int zx_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm,
-                       struct pwm_state *state)
+                       const struct pwm_state *state)
 {
        struct zx_pwm_chip *zpc = to_zx_pwm_chip(chip);
        struct pwm_state cstate;
index b2c7af3..98e0be9 100644 (file)
@@ -249,10 +249,8 @@ static int da8xx_rproc_probe(struct platform_device *pdev)
        int ret;
 
        irq = platform_get_irq(pdev, 0);
-       if (irq < 0) {
-               dev_err(dev, "platform_get_irq(pdev, 0) error: %d\n", irq);
+       if (irq < 0)
                return irq;
-       }
 
        irq_data = irq_get_irq_data(irq);
        if (!irq_data) {
index 4cb4401..5c4658f 100644 (file)
@@ -424,16 +424,12 @@ static int keystone_rproc_probe(struct platform_device *pdev)
        ksproc->irq_ring = platform_get_irq_byname(pdev, "vring");
        if (ksproc->irq_ring < 0) {
                ret = ksproc->irq_ring;
-               dev_err(dev, "failed to get vring interrupt, status = %d\n",
-                       ret);
                goto disable_clk;
        }
 
        ksproc->irq_fault = platform_get_irq_byname(pdev, "exception");
        if (ksproc->irq_fault < 0) {
                ret = ksproc->irq_fault;
-               dev_err(dev, "failed to get exception interrupt, status = %d\n",
-                       ret);
                goto disable_clk;
        }
 
index 6a44842..60650bc 100644 (file)
@@ -200,7 +200,7 @@ void qcom_unregister_ssr_notifier(struct notifier_block *nb)
 }
 EXPORT_SYMBOL_GPL(qcom_unregister_ssr_notifier);
 
-static void ssr_notify_stop(struct rproc_subdev *subdev, bool crashed)
+static void ssr_notify_unprepare(struct rproc_subdev *subdev)
 {
        struct qcom_rproc_ssr *ssr = to_ssr_subdev(subdev);
 
@@ -220,7 +220,7 @@ void qcom_add_ssr_subdev(struct rproc *rproc, struct qcom_rproc_ssr *ssr,
                         const char *ssr_name)
 {
        ssr->name = ssr_name;
-       ssr->subdev.stop = ssr_notify_stop;
+       ssr->subdev.unprepare = ssr_notify_unprepare;
 
        rproc_add_subdev(rproc, &ssr->subdev);
 }
index 0d33e30..cb0f4a0 100644 (file)
@@ -187,13 +187,8 @@ int qcom_q6v5_init(struct qcom_q6v5 *q6v5, struct platform_device *pdev,
        init_completion(&q6v5->stop_done);
 
        q6v5->wdog_irq = platform_get_irq_byname(pdev, "wdog");
-       if (q6v5->wdog_irq < 0) {
-               if (q6v5->wdog_irq != -EPROBE_DEFER)
-                       dev_err(&pdev->dev,
-                               "failed to retrieve wdog IRQ: %d\n",
-                               q6v5->wdog_irq);
+       if (q6v5->wdog_irq < 0)
                return q6v5->wdog_irq;
-       }
 
        ret = devm_request_threaded_irq(&pdev->dev, q6v5->wdog_irq,
                                        NULL, q6v5_wdog_interrupt,
@@ -205,13 +200,8 @@ int qcom_q6v5_init(struct qcom_q6v5 *q6v5, struct platform_device *pdev,
        }
 
        q6v5->fatal_irq = platform_get_irq_byname(pdev, "fatal");
-       if (q6v5->fatal_irq < 0) {
-               if (q6v5->fatal_irq != -EPROBE_DEFER)
-                       dev_err(&pdev->dev,
-                               "failed to retrieve fatal IRQ: %d\n",
-                               q6v5->fatal_irq);
+       if (q6v5->fatal_irq < 0)
                return q6v5->fatal_irq;
-       }
 
        ret = devm_request_threaded_irq(&pdev->dev, q6v5->fatal_irq,
                                        NULL, q6v5_fatal_interrupt,
@@ -223,13 +213,8 @@ int qcom_q6v5_init(struct qcom_q6v5 *q6v5, struct platform_device *pdev,
        }
 
        q6v5->ready_irq = platform_get_irq_byname(pdev, "ready");
-       if (q6v5->ready_irq < 0) {
-               if (q6v5->ready_irq != -EPROBE_DEFER)
-                       dev_err(&pdev->dev,
-                               "failed to retrieve ready IRQ: %d\n",
-                               q6v5->ready_irq);
+       if (q6v5->ready_irq < 0)
                return q6v5->ready_irq;
-       }
 
        ret = devm_request_threaded_irq(&pdev->dev, q6v5->ready_irq,
                                        NULL, q6v5_ready_interrupt,
@@ -241,13 +226,8 @@ int qcom_q6v5_init(struct qcom_q6v5 *q6v5, struct platform_device *pdev,
        }
 
        q6v5->handover_irq = platform_get_irq_byname(pdev, "handover");
-       if (q6v5->handover_irq < 0) {
-               if (q6v5->handover_irq != -EPROBE_DEFER)
-                       dev_err(&pdev->dev,
-                               "failed to retrieve handover IRQ: %d\n",
-                               q6v5->handover_irq);
+       if (q6v5->handover_irq < 0)
                return q6v5->handover_irq;
-       }
 
        ret = devm_request_threaded_irq(&pdev->dev, q6v5->handover_irq,
                                        NULL, q6v5_handover_interrupt,
@@ -260,13 +240,8 @@ int qcom_q6v5_init(struct qcom_q6v5 *q6v5, struct platform_device *pdev,
        disable_irq(q6v5->handover_irq);
 
        q6v5->stop_irq = platform_get_irq_byname(pdev, "stop-ack");
-       if (q6v5->stop_irq < 0) {
-               if (q6v5->stop_irq != -EPROBE_DEFER)
-                       dev_err(&pdev->dev,
-                               "failed to retrieve stop-ack IRQ: %d\n",
-                               q6v5->stop_irq);
+       if (q6v5->stop_irq < 0)
                return q6v5->stop_irq;
-       }
 
        ret = devm_request_threaded_irq(&pdev->dev, q6v5->stop_irq,
                                        NULL, q6v5_stop_interrupt,
index 8fcf9d2..de919f2 100644 (file)
@@ -1282,8 +1282,8 @@ static int q6v5_pds_attach(struct device *dev, struct device **devs,
 
        for (i = 0; i < num_pds; i++) {
                devs[i] = dev_pm_domain_attach_by_name(dev, pd_names[i]);
-               if (IS_ERR(devs[i])) {
-                       ret = PTR_ERR(devs[i]);
+               if (IS_ERR_OR_NULL(devs[i])) {
+                       ret = PTR_ERR(devs[i]) ? : -ENODATA;
                        goto unroll_attach;
                }
        }
index fa41319..7f8536b 100644 (file)
@@ -113,9 +113,20 @@ static ssize_t state_store(struct device *dev,
 }
 static DEVICE_ATTR_RW(state);
 
+/* Expose the name of the remote processor via sysfs */
+static ssize_t name_show(struct device *dev, struct device_attribute *attr,
+                        char *buf)
+{
+       struct rproc *rproc = to_rproc(dev);
+
+       return sprintf(buf, "%s\n", rproc->name);
+}
+static DEVICE_ATTR_RO(name);
+
 static struct attribute *rproc_attrs[] = {
        &dev_attr_firmware.attr,
        &dev_attr_state.attr,
+       &dev_attr_name.attr,
        NULL
 };
 
index e2da719..2cf4b29 100644 (file)
@@ -383,10 +383,21 @@ static void stm32_rproc_add_coredump_trace(struct rproc *rproc)
 
 static int stm32_rproc_start(struct rproc *rproc)
 {
+       struct stm32_rproc *ddata = rproc->priv;
        int err;
 
        stm32_rproc_add_coredump_trace(rproc);
 
+       /* clear remote proc Deep Sleep */
+       if (ddata->pdds.map) {
+               err = regmap_update_bits(ddata->pdds.map, ddata->pdds.reg,
+                                        ddata->pdds.mask, 0);
+               if (err) {
+                       dev_err(&rproc->dev, "failed to clear pdds\n");
+                       return err;
+               }
+       }
+
        err = stm32_rproc_set_hold_boot(rproc, false);
        if (err)
                return err;
@@ -506,6 +517,9 @@ static int stm32_rproc_parse_dt(struct platform_device *pdev)
        int err, irq;
 
        irq = platform_get_irq(pdev, 0);
+       if (irq == -EPROBE_DEFER)
+               return -EPROBE_DEFER;
+
        if (irq > 0) {
                err = devm_request_irq(dev, irq, stm32_rproc_wdg, 0,
                                       dev_name(dev), rproc);
index f46c787..621f1af 100644 (file)
@@ -892,7 +892,7 @@ static void qcom_glink_handle_intent(struct qcom_glink *glink,
                struct intent_pair intents[];
        } __packed * msg;
 
-       const size_t msglen = sizeof(*msg) + sizeof(struct intent_pair) * count;
+       const size_t msglen = struct_size(msg, intents, count);
        int ret;
        int i;
        unsigned long flags;
index 64a5ce3..4238383 100644 (file)
@@ -201,7 +201,7 @@ struct qcom_glink *qcom_glink_smem_register(struct device *parent,
        dev->parent = parent;
        dev->of_node = node;
        dev->release = qcom_glink_smem_release;
-       dev_set_name(dev, "%pOFn:%pOFn", node->parent, node);
+       dev_set_name(dev, "%s:%pOFn", dev_name(parent->parent), node);
        ret = device_register(dev);
        if (ret) {
                pr_err("failed to register glink edge\n");
index ea88fd4..e330ec4 100644 (file)
@@ -46,7 +46,7 @@
  * equals to the src address of their rpmsg channel), the driver's handler
  * is invoked to process it.
  *
- * That said, more complicated drivers might do need to allocate
+ * That said, more complicated drivers might need to allocate
  * additional rpmsg addresses, and bind them to different rx callbacks.
  * To accomplish that, those drivers need to call this function.
  *
@@ -177,7 +177,7 @@ int rpmsg_send_offchannel(struct rpmsg_endpoint *ept, u32 src, u32 dst,
 EXPORT_SYMBOL(rpmsg_send_offchannel);
 
 /**
- * rpmsg_send() - send a message across to the remote processor
+ * rpmsg_trysend() - send a message across to the remote processor
  * @ept: the rpmsg endpoint
  * @data: payload of message
  * @len: length of payload
@@ -205,7 +205,7 @@ int rpmsg_trysend(struct rpmsg_endpoint *ept, void *data, int len)
 EXPORT_SYMBOL(rpmsg_trysend);
 
 /**
- * rpmsg_sendto() - send a message across to the remote processor, specify dst
+ * rpmsg_trysendto() - send a message across to the remote processor, specify dst
  * @ept: the rpmsg endpoint
  * @data: payload of message
  * @len: length of payload
@@ -253,7 +253,7 @@ __poll_t rpmsg_poll(struct rpmsg_endpoint *ept, struct file *filp,
 EXPORT_SYMBOL(rpmsg_poll);
 
 /**
- * rpmsg_send_offchannel() - send a message using explicit src/dst addresses
+ * rpmsg_trysend_offchannel() - send a message using explicit src/dst addresses
  * @ept: the rpmsg endpoint
  * @src: source address
  * @dst: destination address
index 0d791c3..3fc83cd 100644 (file)
@@ -20,7 +20,7 @@
 
 /**
  * struct rpmsg_device_ops - indirection table for the rpmsg_device operations
- * @create_ept:                create backend-specific endpoint, requried
+ * @create_ept:                create backend-specific endpoint, required
  * @announce_create:   announce presence of new channel, optional
  * @announce_destroy:  announce destruction of channel, optional
  *
@@ -39,13 +39,14 @@ struct rpmsg_device_ops {
 
 /**
  * struct rpmsg_endpoint_ops - indirection table for rpmsg_endpoint operations
- * @destroy_ept:       destroy the given endpoint, required
+ * @destroy_ept:       see @rpmsg_destroy_ept(), required
  * @send:              see @rpmsg_send(), required
  * @sendto:            see @rpmsg_sendto(), optional
  * @send_offchannel:   see @rpmsg_send_offchannel(), optional
  * @trysend:           see @rpmsg_trysend(), required
  * @trysendto:         see @rpmsg_trysendto(), optional
  * @trysend_offchannel:        see @rpmsg_trysend_offchannel(), optional
+ * @poll:              see @rpmsg_poll(), optional
  *
  * Indirection table for the operations that a rpmsg backend should implement.
  * In addition to @destroy_ept, the backend must at least implement @send and
index 5d3685b..376ebbf 100644 (file)
@@ -920,7 +920,7 @@ static int rpmsg_probe(struct virtio_device *vdev)
                goto vqs_del;
        }
 
-       dev_dbg(&vdev->dev, "buffers: va %p, dma %pad\n",
+       dev_dbg(&vdev->dev, "buffers: va %pK, dma %pad\n",
                bufs_va, &vrp->bufs_dma);
 
        /* half of the buffers is dedicated for RX */
index a45175f..1adf9f8 100644 (file)
@@ -373,6 +373,17 @@ config RTC_DRV_MAX77686
          This driver can also be built as a module. If so, the module
          will be called rtc-max77686.
 
+config RTC_DRV_MESON_VRTC
+       tristate "Amlogic Meson Virtual RTC"
+       depends on ARCH_MESON || COMPILE_TEST
+       default m if ARCH_MESON
+       help
+         If you say yes here you will get support for the
+         Virtual RTC of Amlogic SoCs.
+
+         This driver can also be built as a module. If so, the module
+         will be called rtc-meson-vrtc.
+
 config RTC_DRV_RK808
        tristate "Rockchip RK805/RK808/RK809/RK817/RK818 RTC"
        depends on MFD_RK808
@@ -500,6 +511,7 @@ config RTC_DRV_M41T80_WDT
          watchdog timer in the ST M41T60 and M41T80 RTC chips series.
 config RTC_DRV_BD70528
        tristate "ROHM BD70528 PMIC RTC"
+       depends on MFD_ROHM_BD70528 && (BD70528_WATCHDOG || !BD70528_WATCHDOG)
        help
          If you say Y here you will get support for the RTC
          on ROHM BD70528 Power Management IC.
@@ -874,9 +886,15 @@ config RTC_DRV_DS3232_HWMON
 config RTC_DRV_PCF2127
        tristate "NXP PCF2127"
        depends on RTC_I2C_AND_SPI
+       select WATCHDOG_CORE if WATCHDOG
        help
          If you say yes here you get support for the NXP PCF2127/29 RTC
-         chips.
+         chips with integrated quartz crystal for industrial applications.
+         Both chips also have watchdog timer and tamper switch detection
+         features.
+
+         PCF2127 has an additional feature of 512 bytes battery backed
+         memory that's accessible using nvmem interface.
 
          This driver can also be built as a module. If so, the module
          will be called rtc-pcf2127.
@@ -1247,13 +1265,6 @@ config RTC_DRV_AB8500
          Select this to enable the ST-Ericsson AB8500 power management IC RTC
          support. This chip contains a battery- and capacitor-backed RTC.
 
-config RTC_DRV_NUC900
-       tristate "NUC910/NUC920 RTC driver"
-       depends on ARCH_W90X900 || COMPILE_TEST
-       help
-         If you say yes here you get support for the RTC subsystem of the
-         NUC910/NUC920 used in embedded systems.
-
 config RTC_DRV_OPAL
        tristate "IBM OPAL RTC driver"
        depends on PPC_POWERNV
@@ -1323,6 +1334,21 @@ config RTC_DRV_IMXDI
           This driver can also be built as a module, if so, the module
           will be called "rtc-imxdi".
 
+config RTC_DRV_FSL_FTM_ALARM
+       tristate "Freescale FlexTimer alarm timer"
+       depends on ARCH_LAYERSCAPE || SOC_LS1021A
+       select FSL_RCPM
+       default y
+       help
+          For the FlexTimer in LS1012A, LS1021A, LS1028A, LS1043A, LS1046A,
+          LS1088A, LS208xA, we can use FTM as the wakeup source.
+
+          Say y here to enable FTM alarm support. The FTM alarm provides
+          alarm functions for wakeup system from deep sleep.
+
+          This driver can also be built as a module, if so, the module
+          will be called "rtc-fsl-ftm-alarm".
+
 config RTC_DRV_MESON
        tristate "Amlogic Meson RTC"
        depends on (ARM && ARCH_MESON) || COMPILE_TEST
index 6b09c21..4ac8f19 100644 (file)
@@ -73,6 +73,7 @@ obj-$(CONFIG_RTC_DRV_EFI)     += rtc-efi.o
 obj-$(CONFIG_RTC_DRV_EM3027)   += rtc-em3027.o
 obj-$(CONFIG_RTC_DRV_EP93XX)   += rtc-ep93xx.o
 obj-$(CONFIG_RTC_DRV_FM3130)   += rtc-fm3130.o
+obj-$(CONFIG_RTC_DRV_FSL_FTM_ALARM)    += rtc-fsl-ftm-alarm.o
 obj-$(CONFIG_RTC_DRV_FTRTC010) += rtc-ftrtc010.o
 obj-$(CONFIG_RTC_DRV_GENERIC)  += rtc-generic.o
 obj-$(CONFIG_RTC_DRV_GOLDFISH) += rtc-goldfish.o
@@ -102,6 +103,7 @@ obj-$(CONFIG_RTC_DRV_MAX8907)       += rtc-max8907.o
 obj-$(CONFIG_RTC_DRV_MAX8925)  += rtc-max8925.o
 obj-$(CONFIG_RTC_DRV_MAX8997)  += rtc-max8997.o
 obj-$(CONFIG_RTC_DRV_MAX8998)  += rtc-max8998.o
+obj-$(CONFIG_RTC_DRV_MESON_VRTC)+= rtc-meson-vrtc.o
 obj-$(CONFIG_RTC_DRV_MC13XXX)  += rtc-mc13xxx.o
 obj-$(CONFIG_RTC_DRV_MCP795)   += rtc-mcp795.o
 obj-$(CONFIG_RTC_DRV_MESON)    += rtc-meson.o
@@ -113,7 +115,6 @@ obj-$(CONFIG_RTC_DRV_MT7622)        += rtc-mt7622.o
 obj-$(CONFIG_RTC_DRV_MV)       += rtc-mv.o
 obj-$(CONFIG_RTC_DRV_MXC)      += rtc-mxc.o
 obj-$(CONFIG_RTC_DRV_MXC_V2)   += rtc-mxc_v2.o
-obj-$(CONFIG_RTC_DRV_NUC900)   += rtc-nuc900.o
 obj-$(CONFIG_RTC_DRV_OMAP)     += rtc-omap.o
 obj-$(CONFIG_RTC_DRV_OPAL)     += rtc-opal.o
 obj-$(CONFIG_RTC_DRV_PALMAS)   += rtc-palmas.o
index 0f492b0..9458e6d 100644 (file)
@@ -346,8 +346,10 @@ int __rtc_register_device(struct module *owner, struct rtc_device *rtc)
        struct rtc_wkalrm alrm;
        int err;
 
-       if (!rtc->ops)
+       if (!rtc->ops) {
+               dev_dbg(&rtc->dev, "no ops set\n");
                return -EINVAL;
+       }
 
        rtc->owner = owner;
        rtc_device_get_offset(rtc);
index e4d5a19..75779e8 100644 (file)
@@ -264,7 +264,6 @@ static int pm80x_rtc_probe(struct platform_device *pdev)
                return -ENOMEM;
        info->irq = platform_get_irq(pdev, 0);
        if (info->irq < 0) {
-               dev_err(&pdev->dev, "No IRQ resource!\n");
                ret = -EINVAL;
                goto out;
        }
@@ -296,10 +295,9 @@ static int pm80x_rtc_probe(struct platform_device *pdev)
        info->rtc_dev->range_max = U32_MAX;
 
        ret = rtc_register_device(info->rtc_dev);
-       if (ret) {
-               dev_err(&pdev->dev, "Failed to register RTC device: %d\n", ret);
+       if (ret)
                goto out_rtc;
-       }
+
        /*
         * enable internal XO instead of internal 3.25MHz clock since it can
         * free running in PMIC power-down state.
index 434285f..4743b16 100644 (file)
@@ -328,10 +328,8 @@ static int pm860x_rtc_probe(struct platform_device *pdev)
        if (!info)
                return -ENOMEM;
        info->irq = platform_get_irq(pdev, 0);
-       if (info->irq < 0) {
-               dev_err(&pdev->dev, "No IRQ resource!\n");
+       if (info->irq < 0)
                return info->irq;
-       }
 
        info->chip = chip;
        info->i2c = (chip->id == CHIP_PM8607) ? chip->client : chip->companion;
index e4f6e00..d690985 100644 (file)
@@ -390,35 +390,31 @@ static int abeoz9_probe(struct i2c_client *client,
 
        if (!i2c_check_functionality(client->adapter, I2C_FUNC_I2C |
                                     I2C_FUNC_SMBUS_BYTE_DATA |
-                                    I2C_FUNC_SMBUS_I2C_BLOCK)) {
-               ret = -ENODEV;
-               goto err;
-       }
+                                    I2C_FUNC_SMBUS_I2C_BLOCK))
+               return -ENODEV;
 
        regmap = devm_regmap_init_i2c(client, &abeoz9_rtc_regmap_config);
        if (IS_ERR(regmap)) {
                ret = PTR_ERR(regmap);
                dev_err(dev, "regmap allocation failed: %d\n", ret);
-               goto err;
+               return ret;
        }
 
        data = devm_kzalloc(dev, sizeof(*data), GFP_KERNEL);
-       if (!data) {
-               ret = -ENOMEM;
-               goto err;
-       }
+       if (!data)
+               return -ENOMEM;
 
        data->regmap = regmap;
        dev_set_drvdata(dev, data);
 
        ret = abeoz9_rtc_setup(dev, client->dev.of_node);
        if (ret)
-               goto err;
+               return ret;
 
        data->rtc = devm_rtc_allocate_device(dev);
        ret = PTR_ERR_OR_ZERO(data->rtc);
        if (ret)
-               goto err;
+               return ret;
 
        data->rtc->ops = &rtc_ops;
        data->rtc->range_min = RTC_TIMESTAMP_BEGIN_2000;
@@ -426,14 +422,10 @@ static int abeoz9_probe(struct i2c_client *client,
 
        ret = rtc_register_device(data->rtc);
        if (ret)
-               goto err;
+               return ret;
 
        abeoz9_hwmon_register(dev, data);
        return 0;
-
-err:
-       dev_err(dev, "unable to register RTC device (%d)\n", ret);
-       return ret;
 }
 
 #ifdef CONFIG_OF
index 2e5a8b1..2922393 100644 (file)
@@ -578,10 +578,8 @@ static int ac100_rtc_probe(struct platform_device *pdev)
        chip->regmap = ac100->regmap;
 
        chip->irq = platform_get_irq(pdev, 0);
-       if (chip->irq < 0) {
-               dev_err(&pdev->dev, "No IRQ resource\n");
+       if (chip->irq < 0)
                return chip->irq;
-       }
 
        chip->rtc = devm_rtc_allocate_device(&pdev->dev);
        if (IS_ERR(chip->rtc))
@@ -612,15 +610,7 @@ static int ac100_rtc_probe(struct platform_device *pdev)
        if (ret)
                return ret;
 
-       ret = rtc_register_device(chip->rtc);
-       if (ret) {
-               dev_err(&pdev->dev, "unable to register device\n");
-               return ret;
-       }
-
-       dev_info(&pdev->dev, "RTC enabled\n");
-
-       return 0;
+       return rtc_register_device(chip->rtc);
 }
 
 static int ac100_rtc_remove(struct platform_device *pdev)
index 19d6980..9351bd5 100644 (file)
@@ -502,7 +502,6 @@ static __init int armada38x_rtc_probe(struct platform_device *pdev)
        struct resource *res;
        struct armada38x_rtc *rtc;
        const struct of_device_id *match;
-       int ret;
 
        match = of_match_device(armada38x_rtc_of_match_table, &pdev->dev);
        if (!match)
@@ -530,11 +529,8 @@ static __init int armada38x_rtc_probe(struct platform_device *pdev)
                return PTR_ERR(rtc->regs_soc);
 
        rtc->irq = platform_get_irq(pdev, 0);
-
-       if (rtc->irq < 0) {
-               dev_err(&pdev->dev, "no irq\n");
+       if (rtc->irq < 0)
                return rtc->irq;
-       }
 
        rtc->rtc_dev = devm_rtc_allocate_device(&pdev->dev);
        if (IS_ERR(rtc->rtc_dev))
@@ -564,11 +560,7 @@ static __init int armada38x_rtc_probe(struct platform_device *pdev)
 
        rtc->rtc_dev->range_max = U32_MAX;
 
-       ret = rtc_register_device(rtc->rtc_dev);
-       if (ret)
-               dev_err(&pdev->dev, "Failed to register RTC device: %d\n", ret);
-
-       return ret;
+       return rtc_register_device(rtc->rtc_dev);
 }
 
 #ifdef CONFIG_PM_SLEEP
index d45a449..10413d8 100644 (file)
@@ -257,10 +257,8 @@ static int asm9260_rtc_probe(struct platform_device *pdev)
        platform_set_drvdata(pdev, priv);
 
        irq_alarm = platform_get_irq(pdev, 0);
-       if (irq_alarm < 0) {
-               dev_err(dev, "No alarm IRQ resource defined\n");
+       if (irq_alarm < 0)
                return irq_alarm;
-       }
 
        res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
        priv->iobase = devm_ioremap_resource(dev, res);
index af3eb67..e351d35 100644 (file)
@@ -86,7 +86,6 @@ static int aspeed_rtc_probe(struct platform_device *pdev)
 {
        struct aspeed_rtc *rtc;
        struct resource *res;
-       int ret;
 
        rtc = devm_kzalloc(&pdev->dev, sizeof(*rtc), GFP_KERNEL);
        if (!rtc)
@@ -107,11 +106,7 @@ static int aspeed_rtc_probe(struct platform_device *pdev)
        rtc->rtc_dev->range_min = RTC_TIMESTAMP_BEGIN_1900;
        rtc->rtc_dev->range_max = 38814989399LL; /* 3199-12-31 23:59:59 */
 
-       ret = rtc_register_device(rtc->rtc_dev);
-       if (ret)
-               return ret;
-
-       return 0;
+       return rtc_register_device(rtc->rtc_dev);
 }
 
 static const struct of_device_id aspeed_rtc_match[] = {
index 82a54e9..d119c6e 100644 (file)
@@ -378,10 +378,8 @@ static int __init at91_rtc_probe(struct platform_device *pdev)
        }
 
        irq = platform_get_irq(pdev, 0);
-       if (irq < 0) {
-               dev_err(&pdev->dev, "no irq resource defined\n");
+       if (irq < 0)
                return -ENXIO;
-       }
 
        at91_rtc_regs = devm_ioremap(&pdev->dev, regs->start,
                                     resource_size(regs));
index 4daf378..bb3ba7b 100644 (file)
@@ -342,10 +342,8 @@ static int at91_rtc_probe(struct platform_device *pdev)
        struct of_phandle_args args;
 
        irq = platform_get_irq(pdev, 0);
-       if (irq < 0) {
-               dev_err(&pdev->dev, "failed to get interrupt resource\n");
+       if (irq < 0)
                return irq;
-       }
 
        rtc = devm_kzalloc(&pdev->dev, sizeof(*rtc), GFP_KERNEL);
        if (!rtc)
index f9bdd55..7744333 100644 (file)
@@ -416,11 +416,8 @@ static int bd70528_probe(struct platform_device *pdev)
        bd_rtc->dev = &pdev->dev;
 
        irq = platform_get_irq_byname(pdev, "bd70528-rtc-alm");
-
-       if (irq < 0) {
-               dev_err(&pdev->dev, "Failed to get irq\n");
+       if (irq < 0)
                return irq;
-       }
 
        platform_set_drvdata(pdev, bd_rtc);
 
@@ -479,11 +476,7 @@ static int bd70528_probe(struct platform_device *pdev)
                return ret;
        }
 
-       ret = rtc_register_device(rtc);
-       if (ret)
-               dev_err(&pdev->dev, "Registering RTC failed\n");
-
-       return ret;
+       return rtc_register_device(rtc);
 }
 
 static struct platform_driver bd70528_rtc = {
index 2f65943..3e9800f 100644 (file)
@@ -255,10 +255,8 @@ static int brcmstb_waketmr_probe(struct platform_device *pdev)
        timer->rtc->range_max = U32_MAX;
 
        ret = rtc_register_device(timer->rtc);
-       if (ret) {
-               dev_err(dev, "unable to register device\n");
+       if (ret)
                goto err_notifier;
-       }
 
        dev_info(dev, "registered, with irq %d\n", timer->irq);
 
index 3b7d643..592aae2 100644 (file)
@@ -289,12 +289,8 @@ static int cdns_rtc_probe(struct platform_device *pdev)
        }
 
        crtc->rtc_dev = devm_rtc_allocate_device(&pdev->dev);
-       if (IS_ERR(crtc->rtc_dev)) {
-               ret = PTR_ERR(crtc->rtc_dev);
-               dev_err(&pdev->dev,
-                       "Failed to allocate the RTC device, %d\n", ret);
-               return ret;
-       }
+       if (IS_ERR(crtc->rtc_dev))
+               return PTR_ERR(crtc->rtc_dev);
 
        platform_set_drvdata(pdev, crtc);
 
@@ -343,11 +339,8 @@ static int cdns_rtc_probe(struct platform_device *pdev)
        writel(CDNS_RTC_KRTCR_KRTC, crtc->regs + CDNS_RTC_KRTCR);
 
        ret = rtc_register_device(crtc->rtc_dev);
-       if (ret) {
-               dev_err(&pdev->dev,
-                       "Failed to register the RTC device, %d\n", ret);
+       if (ret)
                goto err_disable_wakeup;
-       }
 
        return 0;
 
index fcb71bf..d8e0db2 100644 (file)
@@ -477,10 +477,8 @@ static int __init davinci_rtc_probe(struct platform_device *pdev)
                return -ENOMEM;
 
        davinci_rtc->irq = platform_get_irq(pdev, 0);
-       if (davinci_rtc->irq < 0) {
-               dev_err(dev, "no RTC irq\n");
+       if (davinci_rtc->irq < 0)
                return davinci_rtc->irq;
-       }
 
        res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
        davinci_rtc->base = devm_ioremap_resource(dev, res);
index e04d6e8..4420fbf 100644 (file)
@@ -690,19 +690,16 @@ static int ds1305_probe(struct spi_device *spi)
 
        /* register RTC ... from here on, ds1305->ctrl needs locking */
        ds1305->rtc = devm_rtc_allocate_device(&spi->dev);
-       if (IS_ERR(ds1305->rtc)) {
+       if (IS_ERR(ds1305->rtc))
                return PTR_ERR(ds1305->rtc);
-       }
 
        ds1305->rtc->ops = &ds1305_ops;
 
        ds1305_nvmem_cfg.priv = ds1305;
        ds1305->rtc->nvram_old_abi = true;
        status = rtc_register_device(ds1305->rtc);
-       if (status) {
-               dev_dbg(&spi->dev, "register rtc --> %d\n", status);
+       if (status)
                return status;
-       }
 
        rtc_nvmem_register(ds1305->rtc, &ds1305_nvmem_cfg);
 
index e9e8d02..9da84df 100644 (file)
@@ -128,9 +128,6 @@ static int ds1672_probe(struct i2c_client *client,
        if (err)
                return err;
 
-       if (IS_ERR(rtc))
-               return PTR_ERR(rtc);
-
        i2c_set_clientdata(client, rtc);
 
        return 0;
diff --git a/drivers/rtc/rtc-fsl-ftm-alarm.c b/drivers/rtc/rtc-fsl-ftm-alarm.c
new file mode 100644 (file)
index 0000000..8df2075
--- /dev/null
@@ -0,0 +1,337 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Freescale FlexTimer Module (FTM) alarm device driver.
+ *
+ * Copyright 2014 Freescale Semiconductor, Inc.
+ * Copyright 2019 NXP
+ *
+ */
+
+#include <linux/device.h>
+#include <linux/err.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/of_address.h>
+#include <linux/of_irq.h>
+#include <linux/platform_device.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
+#include <linux/module.h>
+#include <linux/fsl/ftm.h>
+#include <linux/rtc.h>
+#include <linux/time.h>
+
+#define FTM_SC_CLK(c)          ((c) << FTM_SC_CLK_MASK_SHIFT)
+
+/*
+ * Select Fixed frequency clock (32KHz) as clock source
+ * of FlexTimer Module
+ */
+#define FTM_SC_CLKS_FIXED_FREQ 0x02
+#define FIXED_FREQ_CLK         32000
+
+/* Select 128 (2^7) as divider factor */
+#define MAX_FREQ_DIV           (1 << FTM_SC_PS_MASK)
+
+/* Maximum counter value in FlexTimer's CNT registers */
+#define MAX_COUNT_VAL          0xffff
+
+struct ftm_rtc {
+       struct rtc_device *rtc_dev;
+       void __iomem *base;
+       bool big_endian;
+       u32 alarm_freq;
+};
+
+static inline u32 rtc_readl(struct ftm_rtc *dev, u32 reg)
+{
+       if (dev->big_endian)
+               return ioread32be(dev->base + reg);
+       else
+               return ioread32(dev->base + reg);
+}
+
+static inline void rtc_writel(struct ftm_rtc *dev, u32 reg, u32 val)
+{
+       if (dev->big_endian)
+               iowrite32be(val, dev->base + reg);
+       else
+               iowrite32(val, dev->base + reg);
+}
+
+static inline void ftm_counter_enable(struct ftm_rtc *rtc)
+{
+       u32 val;
+
+       /* select and enable counter clock source */
+       val = rtc_readl(rtc, FTM_SC);
+       val &= ~(FTM_SC_PS_MASK | FTM_SC_CLK_MASK);
+       val |= (FTM_SC_PS_MASK | FTM_SC_CLK(FTM_SC_CLKS_FIXED_FREQ));
+       rtc_writel(rtc, FTM_SC, val);
+}
+
+static inline void ftm_counter_disable(struct ftm_rtc *rtc)
+{
+       u32 val;
+
+       /* disable counter clock source */
+       val = rtc_readl(rtc, FTM_SC);
+       val &= ~(FTM_SC_PS_MASK | FTM_SC_CLK_MASK);
+       rtc_writel(rtc, FTM_SC, val);
+}
+
+static inline void ftm_irq_acknowledge(struct ftm_rtc *rtc)
+{
+       unsigned int timeout = 100;
+
+       /*
+        *Fix errata A-007728 for flextimer
+        *      If the FTM counter reaches the FTM_MOD value between
+        *      the reading of the TOF bit and the writing of 0 to
+        *      the TOF bit, the process of clearing the TOF bit
+        *      does not work as expected when FTMx_CONF[NUMTOF] != 0
+        *      and the current TOF count is less than FTMx_CONF[NUMTOF].
+        *      If the above condition is met, the TOF bit remains set.
+        *      If the TOF interrupt is enabled (FTMx_SC[TOIE] = 1),the
+        *      TOF interrupt also remains asserted.
+        *
+        *      Above is the errata discription
+        *
+        *      In one word: software clearing TOF bit not works when
+        *      FTMx_CONF[NUMTOF] was seted as nonzero and FTM counter
+        *      reaches the FTM_MOD value.
+        *
+        *      The workaround is clearing TOF bit until it works
+        *      (FTM counter doesn't always reache the FTM_MOD anyway),
+        *      which may cost some cycles.
+        */
+       while ((FTM_SC_TOF & rtc_readl(rtc, FTM_SC)) && timeout--)
+               rtc_writel(rtc, FTM_SC, rtc_readl(rtc, FTM_SC) & (~FTM_SC_TOF));
+}
+
+static inline void ftm_irq_enable(struct ftm_rtc *rtc)
+{
+       u32 val;
+
+       val = rtc_readl(rtc, FTM_SC);
+       val |= FTM_SC_TOIE;
+       rtc_writel(rtc, FTM_SC, val);
+}
+
+static inline void ftm_irq_disable(struct ftm_rtc *rtc)
+{
+       u32 val;
+
+       val = rtc_readl(rtc, FTM_SC);
+       val &= ~FTM_SC_TOIE;
+       rtc_writel(rtc, FTM_SC, val);
+}
+
+static inline void ftm_reset_counter(struct ftm_rtc *rtc)
+{
+       /*
+        * The CNT register contains the FTM counter value.
+        * Reset clears the CNT register. Writing any value to COUNT
+        * updates the counter with its initial value, CNTIN.
+        */
+       rtc_writel(rtc, FTM_CNT, 0x00);
+}
+
+static void ftm_clean_alarm(struct ftm_rtc *rtc)
+{
+       ftm_counter_disable(rtc);
+
+       rtc_writel(rtc, FTM_CNTIN, 0x00);
+       rtc_writel(rtc, FTM_MOD, ~0U);
+
+       ftm_reset_counter(rtc);
+}
+
+static irqreturn_t ftm_rtc_alarm_interrupt(int irq, void *dev)
+{
+       struct ftm_rtc *rtc = dev;
+
+       ftm_irq_acknowledge(rtc);
+       ftm_irq_disable(rtc);
+       ftm_clean_alarm(rtc);
+
+       return IRQ_HANDLED;
+}
+
+static int ftm_rtc_alarm_irq_enable(struct device *dev,
+               unsigned int enabled)
+{
+       struct ftm_rtc *rtc = dev_get_drvdata(dev);
+
+       if (enabled)
+               ftm_irq_enable(rtc);
+       else
+               ftm_irq_disable(rtc);
+
+       return 0;
+}
+
+/*
+ * Note:
+ *     The function is not really getting time from the RTC
+ *     since FlexTimer is not a RTC device, but we need to
+ *     get time to setup alarm, so we are using system time
+ *     for now.
+ */
+static int ftm_rtc_read_time(struct device *dev, struct rtc_time *tm)
+{
+       struct timespec64 ts64;
+
+       ktime_get_real_ts64(&ts64);
+       rtc_time_to_tm(ts64.tv_sec, tm);
+
+       return 0;
+}
+
+static int ftm_rtc_read_alarm(struct device *dev, struct rtc_wkalrm *alm)
+{
+       return 0;
+}
+
+/*
+ * 1. Select fixed frequency clock (32KHz) as clock source;
+ * 2. Select 128 (2^7) as divider factor;
+ * So clock is 250 Hz (32KHz/128).
+ *
+ * 3. FlexTimer's CNT register is a 32bit register,
+ * but the register's 16 bit as counter value,it's other 16 bit
+ * is reserved.So minimum counter value is 0x0,maximum counter
+ * value is 0xffff.
+ * So max alarm value is 262 (65536 / 250) seconds
+ */
+static int ftm_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *alm)
+{
+       struct rtc_time tm;
+       unsigned long now, alm_time, cycle;
+       struct ftm_rtc *rtc = dev_get_drvdata(dev);
+
+       ftm_rtc_read_time(dev, &tm);
+       rtc_tm_to_time(&tm, &now);
+       rtc_tm_to_time(&alm->time, &alm_time);
+
+       ftm_clean_alarm(rtc);
+       cycle = (alm_time - now) * rtc->alarm_freq;
+       if (cycle > MAX_COUNT_VAL) {
+               pr_err("Out of alarm range {0~262} seconds.\n");
+               return -ERANGE;
+       }
+
+       ftm_irq_disable(rtc);
+
+       /*
+        * The counter increments until the value of MOD is reached,
+        * at which point the counter is reloaded with the value of CNTIN.
+        * The TOF (the overflow flag) bit is set when the FTM counter
+        * changes from MOD to CNTIN. So we should using the cycle - 1.
+        */
+       rtc_writel(rtc, FTM_MOD, cycle - 1);
+
+       ftm_counter_enable(rtc);
+       ftm_irq_enable(rtc);
+
+       return 0;
+
+}
+
+static const struct rtc_class_ops ftm_rtc_ops = {
+       .read_time              = ftm_rtc_read_time,
+       .read_alarm             = ftm_rtc_read_alarm,
+       .set_alarm              = ftm_rtc_set_alarm,
+       .alarm_irq_enable       = ftm_rtc_alarm_irq_enable,
+};
+
+static int ftm_rtc_probe(struct platform_device *pdev)
+{
+       struct device_node *np = pdev->dev.of_node;
+       struct resource *r;
+       int irq;
+       int ret;
+       struct ftm_rtc *rtc;
+
+       rtc = devm_kzalloc(&pdev->dev, sizeof(*rtc), GFP_KERNEL);
+       if (unlikely(!rtc)) {
+               dev_err(&pdev->dev, "cannot alloc memory for rtc\n");
+               return -ENOMEM;
+       }
+
+       platform_set_drvdata(pdev, rtc);
+
+       rtc->rtc_dev = devm_rtc_allocate_device(&pdev->dev);
+       if (IS_ERR(rtc->rtc_dev))
+               return PTR_ERR(rtc->rtc_dev);
+
+       r = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+       if (!r) {
+               dev_err(&pdev->dev, "cannot get resource for rtc\n");
+               return -ENODEV;
+       }
+
+       rtc->base = devm_ioremap_resource(&pdev->dev, r);
+       if (IS_ERR(rtc->base)) {
+               dev_err(&pdev->dev, "cannot ioremap resource for rtc\n");
+               return PTR_ERR(rtc->base);
+       }
+
+       irq = irq_of_parse_and_map(np, 0);
+       if (irq <= 0) {
+               dev_err(&pdev->dev, "unable to get IRQ from DT, %d\n", irq);
+               return -EINVAL;
+       }
+
+       ret = devm_request_irq(&pdev->dev, irq, ftm_rtc_alarm_interrupt,
+                              IRQF_NO_SUSPEND, dev_name(&pdev->dev), rtc);
+       if (ret < 0) {
+               dev_err(&pdev->dev, "failed to request irq\n");
+               return ret;
+       }
+
+       rtc->big_endian = of_property_read_bool(np, "big-endian");
+       rtc->alarm_freq = (u32)FIXED_FREQ_CLK / (u32)MAX_FREQ_DIV;
+       rtc->rtc_dev->ops = &ftm_rtc_ops;
+
+       device_init_wakeup(&pdev->dev, true);
+
+       ret = rtc_register_device(rtc->rtc_dev);
+       if (ret) {
+               dev_err(&pdev->dev, "can't register rtc device\n");
+               return ret;
+       }
+
+       return 0;
+}
+
+static const struct of_device_id ftm_rtc_match[] = {
+       { .compatible = "fsl,ls1012a-ftm-alarm", },
+       { .compatible = "fsl,ls1021a-ftm-alarm", },
+       { .compatible = "fsl,ls1028a-ftm-alarm", },
+       { .compatible = "fsl,ls1043a-ftm-alarm", },
+       { .compatible = "fsl,ls1046a-ftm-alarm", },
+       { .compatible = "fsl,ls1088a-ftm-alarm", },
+       { .compatible = "fsl,ls208xa-ftm-alarm", },
+       { .compatible = "fsl,lx2160a-ftm-alarm", },
+       { },
+};
+
+static struct platform_driver ftm_rtc_driver = {
+       .probe          = ftm_rtc_probe,
+       .driver         = {
+               .name   = "ftm-alarm",
+               .of_match_table = ftm_rtc_match,
+       },
+};
+
+static int __init ftm_alarm_init(void)
+{
+       return platform_driver_register(&ftm_rtc_driver);
+}
+
+device_initcall(ftm_alarm_init);
+
+MODULE_DESCRIPTION("NXP/Freescale FlexTimer alarm driver");
+MODULE_AUTHOR("Biwen Li <biwen.li@nxp.com>");
+MODULE_LICENSE("GPL");
index c933045..cf2c121 100644 (file)
@@ -167,10 +167,8 @@ static int imx_sc_rtc_probe(struct platform_device *pdev)
        imx_sc_rtc->range_max = U32_MAX;
 
        ret = rtc_register_device(imx_sc_rtc);
-       if (ret) {
-               dev_err(&pdev->dev, "failed to register rtc: %d\n", ret);
+       if (ret)
                return ret;
-       }
 
        imx_scu_irq_register_notifier(&imx_sc_rtc_alarm_sc_notifier);
 
index 3f3d652..f21dc6b 100644 (file)
@@ -740,7 +740,6 @@ static void dryice_work(struct work_struct *work)
  */
 static int __init dryice_rtc_probe(struct platform_device *pdev)
 {
-       struct resource *res;
        struct imxdi_dev *imxdi;
        int norm_irq, sec_irq;
        int rc;
@@ -751,8 +750,7 @@ static int __init dryice_rtc_probe(struct platform_device *pdev)
 
        imxdi->pdev = pdev;
 
-       res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-       imxdi->ioaddr = devm_ioremap_resource(&pdev->dev, res);
+       imxdi->ioaddr = devm_platform_ioremap_resource(pdev, 0);
        if (IS_ERR(imxdi->ioaddr))
                return PTR_ERR(imxdi->ioaddr);
 
index 97f594f..5b6b17f 100644 (file)
@@ -454,9 +454,9 @@ static int isl12026_probe_new(struct i2c_client *client)
 
        isl12026_force_power_modes(client);
 
-       priv->nvm_client = i2c_new_dummy(client->adapter, ISL12026_EEPROM_ADDR);
-       if (!priv->nvm_client)
-               return -ENOMEM;
+       priv->nvm_client = i2c_new_dummy_device(client->adapter, ISL12026_EEPROM_ADDR);
+       if (IS_ERR(priv->nvm_client))
+               return PTR_ERR(priv->nvm_client);
 
        priv->rtc = devm_rtc_allocate_device(&client->dev);
        ret = PTR_ERR_OR_ZERO(priv->rtc);
index 9e7b3a0..3089645 100644 (file)
@@ -323,10 +323,8 @@ static int jz4740_rtc_probe(struct platform_device *pdev)
                rtc->type = id->driver_data;
 
        rtc->irq = platform_get_irq(pdev, 0);
-       if (rtc->irq < 0) {
-               dev_err(&pdev->dev, "Failed to get platform irq\n");
+       if (rtc->irq < 0)
                return -ENOENT;
-       }
 
        mem = platform_get_resource(pdev, IORESOURCE_MEM, 0);
        rtc->base = devm_ioremap_resource(&pdev->dev, mem);
@@ -362,10 +360,8 @@ static int jz4740_rtc_probe(struct platform_device *pdev)
        rtc->rtc->range_max = U32_MAX;
 
        ret = rtc_register_device(rtc->rtc);
-       if (ret) {
-               dev_err(&pdev->dev, "Failed to register rtc device: %d\n", ret);
+       if (ret)
                return ret;
-       }
 
        ret = devm_request_irq(&pdev->dev, rtc->irq, jz4740_rtc_irq, 0,
                                pdev->name, rtc);
index 4aff349..d5a0e27 100644 (file)
@@ -673,11 +673,8 @@ static int max77686_init_rtc_regmap(struct max77686_rtc_info *info)
                struct platform_device *pdev = to_platform_device(info->dev);
 
                info->rtc_irq = platform_get_irq(pdev, 0);
-               if (info->rtc_irq < 0) {
-                       dev_err(info->dev, "Failed to get rtc interrupts: %d\n",
-                               info->rtc_irq);
+               if (info->rtc_irq < 0)
                        return info->rtc_irq;
-               }
        } else {
                info->rtc_irq =  parent_i2c->irq;
        }
@@ -693,11 +690,11 @@ static int max77686_init_rtc_regmap(struct max77686_rtc_info *info)
                goto add_rtc_irq;
        }
 
-       info->rtc = i2c_new_dummy(parent_i2c->adapter,
-                                 info->drv_data->rtc_i2c_addr);
-       if (!info->rtc) {
+       info->rtc = devm_i2c_new_dummy_device(info->dev, parent_i2c->adapter,
+                                             info->drv_data->rtc_i2c_addr);
+       if (IS_ERR(info->rtc)) {
                dev_err(info->dev, "Failed to allocate I2C device for RTC\n");
-               return -ENODEV;
+               return PTR_ERR(info->rtc);
        }
 
        info->rtc_regmap = devm_regmap_init_i2c(info->rtc,
@@ -705,7 +702,7 @@ static int max77686_init_rtc_regmap(struct max77686_rtc_info *info)
        if (IS_ERR(info->rtc_regmap)) {
                ret = PTR_ERR(info->rtc_regmap);
                dev_err(info->dev, "Failed to allocate RTC regmap: %d\n", ret);
-               goto err_unregister_i2c;
+               return ret;
        }
 
 add_rtc_irq:
@@ -715,15 +712,10 @@ add_rtc_irq:
                                  &info->rtc_irq_data);
        if (ret < 0) {
                dev_err(info->dev, "Failed to add RTC irq chip: %d\n", ret);
-               goto err_unregister_i2c;
+               return ret;
        }
 
        return 0;
-
-err_unregister_i2c:
-       if (info->rtc)
-               i2c_unregister_device(info->rtc);
-       return ret;
 }
 
 static int max77686_rtc_probe(struct platform_device *pdev)
@@ -786,8 +778,6 @@ static int max77686_rtc_probe(struct platform_device *pdev)
 
 err_rtc:
        regmap_del_irq_chip(info->rtc_irq, info->rtc_irq_data);
-       if (info->rtc)
-               i2c_unregister_device(info->rtc);
 
        return ret;
 }
@@ -798,8 +788,6 @@ static int max77686_rtc_remove(struct platform_device *pdev)
 
        free_irq(info->virq, info);
        regmap_del_irq_chip(info->rtc_irq, info->rtc_irq_data);
-       if (info->rtc)
-               i2c_unregister_device(info->rtc);
 
        return 0;
 }
diff --git a/drivers/rtc/rtc-meson-vrtc.c b/drivers/rtc/rtc-meson-vrtc.c
new file mode 100644 (file)
index 0000000..89e5ba0
--- /dev/null
@@ -0,0 +1,155 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2019 BayLibre, SAS
+ * Author: Neil Armstrong <narmstrong@baylibre.com>
+ * Copyright (C) 2015 Amlogic, Inc. All rights reserved.
+ */
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/rtc.h>
+#include <linux/io.h>
+#include <linux/of.h>
+#include <linux/time64.h>
+
+struct meson_vrtc_data {
+       void __iomem *io_alarm;
+       struct rtc_device *rtc;
+       unsigned long alarm_time;
+       bool enabled;
+};
+
+static int meson_vrtc_read_time(struct device *dev, struct rtc_time *tm)
+{
+       struct timespec64 time;
+
+       dev_dbg(dev, "%s\n", __func__);
+       ktime_get_raw_ts64(&time);
+       rtc_time64_to_tm(time.tv_sec, tm);
+
+       return 0;
+}
+
+static void meson_vrtc_set_wakeup_time(struct meson_vrtc_data *vrtc,
+                                      unsigned long time)
+{
+       writel_relaxed(time, vrtc->io_alarm);
+}
+
+static int meson_vrtc_set_alarm(struct device *dev, struct rtc_wkalrm *alarm)
+{
+       struct meson_vrtc_data *vrtc = dev_get_drvdata(dev);
+
+       dev_dbg(dev, "%s: alarm->enabled=%d\n", __func__, alarm->enabled);
+       if (alarm->enabled)
+               vrtc->alarm_time = rtc_tm_to_time64(&alarm->time);
+       else
+               vrtc->alarm_time = 0;
+
+       return 0;
+}
+
+static int meson_vrtc_alarm_irq_enable(struct device *dev, unsigned int enabled)
+{
+       struct meson_vrtc_data *vrtc = dev_get_drvdata(dev);
+
+       vrtc->enabled = enabled;
+       return 0;
+}
+
+static const struct rtc_class_ops meson_vrtc_ops = {
+       .read_time = meson_vrtc_read_time,
+       .set_alarm = meson_vrtc_set_alarm,
+       .alarm_irq_enable = meson_vrtc_alarm_irq_enable,
+};
+
+static int meson_vrtc_probe(struct platform_device *pdev)
+{
+       struct meson_vrtc_data *vrtc;
+       int ret;
+
+       vrtc = devm_kzalloc(&pdev->dev, sizeof(*vrtc), GFP_KERNEL);
+       if (!vrtc)
+               return -ENOMEM;
+
+       vrtc->io_alarm = devm_platform_ioremap_resource(pdev, 0);
+       if (IS_ERR(vrtc->io_alarm))
+               return PTR_ERR(vrtc->io_alarm);
+
+       device_init_wakeup(&pdev->dev, 1);
+
+       platform_set_drvdata(pdev, vrtc);
+
+       vrtc->rtc = devm_rtc_allocate_device(&pdev->dev);
+       if (IS_ERR(vrtc->rtc))
+               return PTR_ERR(vrtc->rtc);
+
+       vrtc->rtc->ops = &meson_vrtc_ops;
+       ret = rtc_register_device(vrtc->rtc);
+       if (ret)
+               return ret;
+
+       return 0;
+}
+
+static int __maybe_unused meson_vrtc_suspend(struct device *dev)
+{
+       struct meson_vrtc_data *vrtc = dev_get_drvdata(dev);
+
+       dev_dbg(dev, "%s\n", __func__);
+       if (vrtc->alarm_time) {
+               unsigned long local_time;
+               long alarm_secs;
+               struct timespec64 time;
+
+               ktime_get_raw_ts64(&time);
+               local_time = time.tv_sec;
+
+               dev_dbg(dev, "alarm_time = %lus, local_time=%lus\n",
+                       vrtc->alarm_time, local_time);
+               alarm_secs = vrtc->alarm_time - local_time;
+               if (alarm_secs > 0) {
+                       meson_vrtc_set_wakeup_time(vrtc, alarm_secs);
+                       dev_dbg(dev, "system will wakeup in %lds.\n",
+                               alarm_secs);
+               } else {
+                       dev_err(dev, "alarm time already passed: %lds.\n",
+                               alarm_secs);
+               }
+       }
+
+       return 0;
+}
+
+static int __maybe_unused meson_vrtc_resume(struct device *dev)
+{
+       struct meson_vrtc_data *vrtc = dev_get_drvdata(dev);
+
+       dev_dbg(dev, "%s\n", __func__);
+
+       vrtc->alarm_time = 0;
+       meson_vrtc_set_wakeup_time(vrtc, 0);
+       return 0;
+}
+
+static SIMPLE_DEV_PM_OPS(meson_vrtc_pm_ops,
+                        meson_vrtc_suspend, meson_vrtc_resume);
+
+static const struct of_device_id meson_vrtc_dt_match[] = {
+       { .compatible = "amlogic,meson-vrtc"},
+       {},
+};
+MODULE_DEVICE_TABLE(of, meson_vrtc_dt_match);
+
+static struct platform_driver meson_vrtc_driver = {
+       .probe = meson_vrtc_probe,
+       .driver = {
+               .name = "meson-vrtc",
+               .of_match_table = meson_vrtc_dt_match,
+               .pm = &meson_vrtc_pm_ops,
+       },
+};
+
+module_platform_driver(meson_vrtc_driver);
+
+MODULE_DESCRIPTION("Amlogic Virtual Wakeup RTC Timer driver");
+MODULE_LICENSE("GPL");
index b46ed4d..704229e 100644 (file)
@@ -343,10 +343,8 @@ static int mtk_rtc_probe(struct platform_device *pdev)
        rtc->rtc_dev->ops = &mtk_rtc_ops;
 
        ret = rtc_register_device(rtc->rtc_dev);
-       if (ret) {
-               dev_err(&pdev->dev, "register rtc device failed\n");
+       if (ret)
                goto out_free_irq;
-       }
 
        return 0;
 
index 82b0816..16bd26b 100644 (file)
@@ -329,7 +329,6 @@ static int mtk_rtc_probe(struct platform_device *pdev)
 
        hw->irq = platform_get_irq(pdev, 0);
        if (hw->irq < 0) {
-               dev_err(&pdev->dev, "No IRQ resource\n");
                ret = hw->irq;
                goto err;
        }
index e697e96..902d57d 100644 (file)
@@ -184,8 +184,9 @@ static void mxc_rtc_irq_enable(struct device *dev, unsigned int bit,
        struct rtc_plat_data *pdata = dev_get_drvdata(dev);
        void __iomem *ioaddr = pdata->ioaddr;
        u32 reg;
+       unsigned long flags;
 
-       spin_lock_irq(&pdata->rtc->irq_lock);
+       spin_lock_irqsave(&pdata->rtc->irq_lock, flags);
        reg = readw(ioaddr + RTC_RTCIENR);
 
        if (enabled)
@@ -194,7 +195,7 @@ static void mxc_rtc_irq_enable(struct device *dev, unsigned int bit,
                reg &= ~bit;
 
        writew(reg, ioaddr + RTC_RTCIENR);
-       spin_unlock_irq(&pdata->rtc->irq_lock);
+       spin_unlock_irqrestore(&pdata->rtc->irq_lock, flags);
 }
 
 /* This function is the RTC interrupt service routine. */
index 5b970a8..9153456 100644 (file)
@@ -279,7 +279,6 @@ static int mxc_rtc_wait_for_flag(void __iomem *ioaddr, int flag)
 static int mxc_rtc_probe(struct platform_device *pdev)
 {
        struct mxc_rtc_data *pdata;
-       struct resource *res;
        void __iomem *ioaddr;
        int ret = 0;
 
@@ -287,8 +286,7 @@ static int mxc_rtc_probe(struct platform_device *pdev)
        if (!pdata)
                return -ENOMEM;
 
-       res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-       pdata->ioaddr = devm_ioremap_resource(&pdev->dev, res);
+       pdata->ioaddr = devm_platform_ioremap_resource(pdev, 0);
        if (IS_ERR(pdata->ioaddr))
                return PTR_ERR(pdata->ioaddr);
 
diff --git a/drivers/rtc/rtc-nuc900.c b/drivers/rtc/rtc-nuc900.c
deleted file mode 100644 (file)
index 49cc405..0000000
+++ /dev/null
@@ -1,271 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Copyright (c) 2008-2009 Nuvoton technology corporation.
- *
- * Wan ZongShun <mcuos.com@gmail.com>
- */
-
-#include <linux/module.h>
-#include <linux/init.h>
-#include <linux/platform_device.h>
-#include <linux/slab.h>
-#include <linux/rtc.h>
-#include <linux/delay.h>
-#include <linux/io.h>
-#include <linux/bcd.h>
-
-/* RTC Control Registers */
-#define REG_RTC_INIR           0x00
-#define REG_RTC_AER            0x04
-#define REG_RTC_FCR            0x08
-#define REG_RTC_TLR            0x0C
-#define REG_RTC_CLR            0x10
-#define REG_RTC_TSSR           0x14
-#define REG_RTC_DWR            0x18
-#define REG_RTC_TAR            0x1C
-#define REG_RTC_CAR            0x20
-#define REG_RTC_LIR            0x24
-#define REG_RTC_RIER           0x28
-#define REG_RTC_RIIR           0x2C
-#define REG_RTC_TTR            0x30
-
-#define RTCSET                 0x01
-#define AERRWENB               0x10000
-#define INIRRESET              0xa5eb1357
-#define AERPOWERON             0xA965
-#define AERPOWEROFF            0x0000
-#define LEAPYEAR               0x0001
-#define TICKENB                        0x80
-#define TICKINTENB             0x0002
-#define ALARMINTENB            0x0001
-#define MODE24                 0x0001
-
-struct nuc900_rtc {
-       int                     irq_num;
-       void __iomem            *rtc_reg;
-       struct rtc_device       *rtcdev;
-};
-
-struct nuc900_bcd_time {
-       int bcd_sec;
-       int bcd_min;
-       int bcd_hour;
-       int bcd_mday;
-       int bcd_mon;
-       int bcd_year;
-};
-
-static irqreturn_t nuc900_rtc_interrupt(int irq, void *_rtc)
-{
-       struct nuc900_rtc *rtc = _rtc;
-       unsigned long events = 0, rtc_irq;
-
-       rtc_irq = __raw_readl(rtc->rtc_reg + REG_RTC_RIIR);
-
-       if (rtc_irq & ALARMINTENB) {
-               rtc_irq &= ~ALARMINTENB;
-               __raw_writel(rtc_irq, rtc->rtc_reg + REG_RTC_RIIR);
-               events |= RTC_AF | RTC_IRQF;
-       }
-
-       if (rtc_irq & TICKINTENB) {
-               rtc_irq &= ~TICKINTENB;
-               __raw_writel(rtc_irq, rtc->rtc_reg + REG_RTC_RIIR);
-               events |= RTC_UF | RTC_IRQF;
-       }
-
-       rtc_update_irq(rtc->rtcdev, 1, events);
-
-       return IRQ_HANDLED;
-}
-
-static int *check_rtc_access_enable(struct nuc900_rtc *nuc900_rtc)
-{
-       unsigned int timeout = 0x1000;
-       __raw_writel(INIRRESET, nuc900_rtc->rtc_reg + REG_RTC_INIR);
-
-       mdelay(10);
-
-       __raw_writel(AERPOWERON, nuc900_rtc->rtc_reg + REG_RTC_AER);
-
-       while (!(__raw_readl(nuc900_rtc->rtc_reg + REG_RTC_AER) & AERRWENB)
-                                                               && --timeout)
-               mdelay(1);
-
-       if (!timeout)
-               return ERR_PTR(-EPERM);
-
-       return NULL;
-}
-
-static void nuc900_rtc_bcd2bin(unsigned int timereg,
-                              unsigned int calreg, struct rtc_time *tm)
-{
-       tm->tm_mday     = bcd2bin(calreg >> 0);
-       tm->tm_mon      = bcd2bin(calreg >> 8);
-       tm->tm_year     = bcd2bin(calreg >> 16) + 100;
-
-       tm->tm_sec      = bcd2bin(timereg >> 0);
-       tm->tm_min      = bcd2bin(timereg >> 8);
-       tm->tm_hour     = bcd2bin(timereg >> 16);
-}
-
-static void nuc900_rtc_bin2bcd(struct device *dev, struct rtc_time *settm,
-                                               struct nuc900_bcd_time *gettm)
-{
-       gettm->bcd_mday = bin2bcd(settm->tm_mday) << 0;
-       gettm->bcd_mon  = bin2bcd(settm->tm_mon) << 8;
-
-       if (settm->tm_year < 100) {
-               dev_warn(dev, "The year will be between 1970-1999, right?\n");
-               gettm->bcd_year = bin2bcd(settm->tm_year) << 16;
-       } else {
-               gettm->bcd_year = bin2bcd(settm->tm_year - 100) << 16;
-       }
-
-       gettm->bcd_sec  = bin2bcd(settm->tm_sec) << 0;
-       gettm->bcd_min  = bin2bcd(settm->tm_min) << 8;
-       gettm->bcd_hour = bin2bcd(settm->tm_hour) << 16;
-}
-
-static int nuc900_alarm_irq_enable(struct device *dev, unsigned int enabled)
-{
-       struct nuc900_rtc *rtc = dev_get_drvdata(dev);
-
-       if (enabled)
-               __raw_writel(__raw_readl(rtc->rtc_reg + REG_RTC_RIER)|
-                               (ALARMINTENB), rtc->rtc_reg + REG_RTC_RIER);
-       else
-               __raw_writel(__raw_readl(rtc->rtc_reg + REG_RTC_RIER)&
-                               (~ALARMINTENB), rtc->rtc_reg + REG_RTC_RIER);
-
-       return 0;
-}
-
-static int nuc900_rtc_read_time(struct device *dev, struct rtc_time *tm)
-{
-       struct nuc900_rtc *rtc = dev_get_drvdata(dev);
-       unsigned int timeval, clrval;
-
-       timeval = __raw_readl(rtc->rtc_reg + REG_RTC_TLR);
-       clrval  = __raw_readl(rtc->rtc_reg + REG_RTC_CLR);
-
-       nuc900_rtc_bcd2bin(timeval, clrval, tm);
-
-       return 0;
-}
-
-static int nuc900_rtc_set_time(struct device *dev, struct rtc_time *tm)
-{
-       struct nuc900_rtc *rtc = dev_get_drvdata(dev);
-       struct nuc900_bcd_time gettm;
-       unsigned long val;
-       int *err;
-
-       nuc900_rtc_bin2bcd(dev, tm, &gettm);
-
-       err = check_rtc_access_enable(rtc);
-       if (IS_ERR(err))
-               return PTR_ERR(err);
-
-       val = gettm.bcd_mday | gettm.bcd_mon | gettm.bcd_year;
-       __raw_writel(val, rtc->rtc_reg + REG_RTC_CLR);
-
-       val = gettm.bcd_sec | gettm.bcd_min | gettm.bcd_hour;
-       __raw_writel(val, rtc->rtc_reg + REG_RTC_TLR);
-
-       return 0;
-}
-
-static int nuc900_rtc_read_alarm(struct device *dev, struct rtc_wkalrm *alrm)
-{
-       struct nuc900_rtc *rtc = dev_get_drvdata(dev);
-       unsigned int timeval, carval;
-
-       timeval = __raw_readl(rtc->rtc_reg + REG_RTC_TAR);
-       carval  = __raw_readl(rtc->rtc_reg + REG_RTC_CAR);
-
-       nuc900_rtc_bcd2bin(timeval, carval, &alrm->time);
-
-       return rtc_valid_tm(&alrm->time);
-}
-
-static int nuc900_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *alrm)
-{
-       struct nuc900_rtc *rtc = dev_get_drvdata(dev);
-       struct nuc900_bcd_time tm;
-       unsigned long val;
-       int *err;
-
-       nuc900_rtc_bin2bcd(dev, &alrm->time, &tm);
-
-       err = check_rtc_access_enable(rtc);
-       if (IS_ERR(err))
-               return PTR_ERR(err);
-
-       val = tm.bcd_mday | tm.bcd_mon | tm.bcd_year;
-       __raw_writel(val, rtc->rtc_reg + REG_RTC_CAR);
-
-       val = tm.bcd_sec | tm.bcd_min | tm.bcd_hour;
-       __raw_writel(val, rtc->rtc_reg + REG_RTC_TAR);
-
-       return 0;
-}
-
-static const struct rtc_class_ops nuc900_rtc_ops = {
-       .read_time = nuc900_rtc_read_time,
-       .set_time = nuc900_rtc_set_time,
-       .read_alarm = nuc900_rtc_read_alarm,
-       .set_alarm = nuc900_rtc_set_alarm,
-       .alarm_irq_enable = nuc900_alarm_irq_enable,
-};
-
-static int __init nuc900_rtc_probe(struct platform_device *pdev)
-{
-       struct resource *res;
-       struct nuc900_rtc *nuc900_rtc;
-
-       nuc900_rtc = devm_kzalloc(&pdev->dev, sizeof(struct nuc900_rtc),
-                               GFP_KERNEL);
-       if (!nuc900_rtc)
-               return -ENOMEM;
-
-       res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-       nuc900_rtc->rtc_reg = devm_ioremap_resource(&pdev->dev, res);
-       if (IS_ERR(nuc900_rtc->rtc_reg))
-               return PTR_ERR(nuc900_rtc->rtc_reg);
-
-       platform_set_drvdata(pdev, nuc900_rtc);
-
-       nuc900_rtc->rtcdev = devm_rtc_device_register(&pdev->dev, pdev->name,
-                                               &nuc900_rtc_ops, THIS_MODULE);
-       if (IS_ERR(nuc900_rtc->rtcdev)) {
-               dev_err(&pdev->dev, "rtc device register failed\n");
-               return PTR_ERR(nuc900_rtc->rtcdev);
-       }
-
-       __raw_writel(__raw_readl(nuc900_rtc->rtc_reg + REG_RTC_TSSR) | MODE24,
-                                       nuc900_rtc->rtc_reg + REG_RTC_TSSR);
-
-       nuc900_rtc->irq_num = platform_get_irq(pdev, 0);
-       if (devm_request_irq(&pdev->dev, nuc900_rtc->irq_num,
-                       nuc900_rtc_interrupt, 0, "nuc900rtc", nuc900_rtc)) {
-               dev_err(&pdev->dev, "NUC900 RTC request irq failed\n");
-               return -EBUSY;
-       }
-
-       return 0;
-}
-
-static struct platform_driver nuc900_rtc_driver = {
-       .driver         = {
-               .name   = "nuc900-rtc",
-       },
-};
-
-module_platform_driver_probe(nuc900_rtc_driver, nuc900_rtc_probe);
-
-MODULE_AUTHOR("Wan ZongShun <mcuos.com@gmail.com>");
-MODULE_DESCRIPTION("nuc910/nuc920 RTC driver");
-MODULE_LICENSE("GPL");
-MODULE_ALIAS("platform:nuc900-rtc");
index fb542a9..c3691fa 100644 (file)
@@ -82,7 +82,7 @@
 #define OSC_HAS_STOPPED                BIT(7)  /* Clock has been stopped */
 
 /* PCF2123_REG_ALRM_XX BITS */
-#define ALRM_ENABLE            BIT(7)  /* MN, HR, DM, or DW alarm enable */
+#define ALRM_DISABLE           BIT(7)  /* MN, HR, DM, or DW alarm matching */
 
 /* PCF2123_REG_TMR_CLKOUT BITS */
 #define CD_TMR_4096KHZ         (0)     /* 4096 KHz countdown timer */
 
 static struct spi_driver pcf2123_driver;
 
-struct pcf2123_plat_data {
+struct pcf2123_data {
        struct rtc_device *rtc;
        struct regmap *map;
 };
@@ -119,11 +119,11 @@ static const struct regmap_config pcf2123_regmap_config = {
 
 static int pcf2123_read_offset(struct device *dev, long *offset)
 {
-       struct pcf2123_plat_data *pdata = dev_get_platdata(dev);
+       struct pcf2123_data *pcf2123 = dev_get_drvdata(dev);
        int ret, val;
        unsigned int reg;
 
-       ret = regmap_read(pdata->map, PCF2123_REG_OFFSET, &reg);
+       ret = regmap_read(pcf2123->map, PCF2123_REG_OFFSET, &reg);
        if (ret)
                return ret;
 
@@ -149,7 +149,7 @@ static int pcf2123_read_offset(struct device *dev, long *offset)
  */
 static int pcf2123_set_offset(struct device *dev, long offset)
 {
-       struct pcf2123_plat_data *pdata = dev_get_platdata(dev);
+       struct pcf2123_data *pcf2123 = dev_get_drvdata(dev);
        s8 reg;
 
        if (offset > OFFSET_STEP * 127)
@@ -169,16 +169,16 @@ static int pcf2123_set_offset(struct device *dev, long offset)
                reg |= OFFSET_COARSE;
        }
 
-       return regmap_write(pdata->map, PCF2123_REG_OFFSET, (unsigned int)reg);
+       return regmap_write(pcf2123->map, PCF2123_REG_OFFSET, (unsigned int)reg);
 }
 
 static int pcf2123_rtc_read_time(struct device *dev, struct rtc_time *tm)
 {
-       struct pcf2123_plat_data *pdata = dev_get_platdata(dev);
+       struct pcf2123_data *pcf2123 = dev_get_drvdata(dev);
        u8 rxbuf[7];
        int ret;
 
-       ret = regmap_bulk_read(pdata->map, PCF2123_REG_SC, rxbuf,
+       ret = regmap_bulk_read(pcf2123->map, PCF2123_REG_SC, rxbuf,
                                sizeof(rxbuf));
        if (ret)
                return ret;
@@ -194,9 +194,7 @@ static int pcf2123_rtc_read_time(struct device *dev, struct rtc_time *tm)
        tm->tm_mday = bcd2bin(rxbuf[3] & 0x3F);
        tm->tm_wday = rxbuf[4] & 0x07;
        tm->tm_mon = bcd2bin(rxbuf[5] & 0x1F) - 1; /* rtc mn 1-12 */
-       tm->tm_year = bcd2bin(rxbuf[6]);
-       if (tm->tm_year < 70)
-               tm->tm_year += 100;     /* assume we are in 1970...2069 */
+       tm->tm_year = bcd2bin(rxbuf[6]) + 100;
 
        dev_dbg(dev, "%s: tm is %ptR\n", __func__, tm);
 
@@ -205,14 +203,14 @@ static int pcf2123_rtc_read_time(struct device *dev, struct rtc_time *tm)
 
 static int pcf2123_rtc_set_time(struct device *dev, struct rtc_time *tm)
 {
-       struct pcf2123_plat_data *pdata = dev_get_platdata(dev);
+       struct pcf2123_data *pcf2123 = dev_get_drvdata(dev);
        u8 txbuf[7];
        int ret;
 
        dev_dbg(dev, "%s: tm is %ptR\n", __func__, tm);
 
        /* Stop the counter first */
-       ret = regmap_write(pdata->map, PCF2123_REG_CTRL1, CTRL1_STOP);
+       ret = regmap_write(pcf2123->map, PCF2123_REG_CTRL1, CTRL1_STOP);
        if (ret)
                return ret;
 
@@ -223,29 +221,37 @@ static int pcf2123_rtc_set_time(struct device *dev, struct rtc_time *tm)
        txbuf[3] = bin2bcd(tm->tm_mday & 0x3F);
        txbuf[4] = tm->tm_wday & 0x07;
        txbuf[5] = bin2bcd((tm->tm_mon + 1) & 0x1F); /* rtc mn 1-12 */
-       txbuf[6] = bin2bcd(tm->tm_year < 100 ? tm->tm_year : tm->tm_year - 100);
+       txbuf[6] = bin2bcd(tm->tm_year - 100);
 
-       ret = regmap_bulk_write(pdata->map, PCF2123_REG_SC, txbuf,
+       ret = regmap_bulk_write(pcf2123->map, PCF2123_REG_SC, txbuf,
                                sizeof(txbuf));
        if (ret)
                return ret;
 
        /* Start the counter */
-       ret = regmap_write(pdata->map, PCF2123_REG_CTRL1, CTRL1_CLEAR);
+       ret = regmap_write(pcf2123->map, PCF2123_REG_CTRL1, CTRL1_CLEAR);
        if (ret)
                return ret;
 
        return 0;
 }
 
+static int pcf2123_rtc_alarm_irq_enable(struct device *dev, unsigned int en)
+{
+       struct pcf2123_data *pcf2123 = dev_get_drvdata(dev);
+
+       return regmap_update_bits(pcf2123->map, PCF2123_REG_CTRL2, CTRL2_AIE,
+                                 en ? CTRL2_AIE : 0);
+}
+
 static int pcf2123_rtc_read_alarm(struct device *dev, struct rtc_wkalrm *alm)
 {
-       struct pcf2123_plat_data *pdata = dev_get_platdata(dev);
+       struct pcf2123_data *pcf2123 = dev_get_drvdata(dev);
        u8 rxbuf[4];
        int ret;
        unsigned int val = 0;
 
-       ret = regmap_bulk_read(pdata->map, PCF2123_REG_ALRM_MN, rxbuf,
+       ret = regmap_bulk_read(pcf2123->map, PCF2123_REG_ALRM_MN, rxbuf,
                                sizeof(rxbuf));
        if (ret)
                return ret;
@@ -257,7 +263,7 @@ static int pcf2123_rtc_read_alarm(struct device *dev, struct rtc_wkalrm *alm)
 
        dev_dbg(dev, "%s: alm is %ptR\n", __func__, &alm->time);
 
-       ret = regmap_read(pdata->map, PCF2123_REG_CTRL2, &val);
+       ret = regmap_read(pcf2123->map, PCF2123_REG_CTRL2, &val);
        if (ret)
                return ret;
 
@@ -268,19 +274,19 @@ static int pcf2123_rtc_read_alarm(struct device *dev, struct rtc_wkalrm *alm)
 
 static int pcf2123_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *alm)
 {
-       struct pcf2123_plat_data *pdata = dev_get_platdata(dev);
+       struct pcf2123_data *pcf2123 = dev_get_drvdata(dev);
        u8 txbuf[4];
        int ret;
 
        dev_dbg(dev, "%s: alm is %ptR\n", __func__, &alm->time);
 
-       /* Ensure alarm flag is clear */
-       ret = regmap_update_bits(pdata->map, PCF2123_REG_CTRL2, CTRL2_AF, 0);
+       /* Disable alarm interrupt */
+       ret = regmap_update_bits(pcf2123->map, PCF2123_REG_CTRL2, CTRL2_AIE, 0);
        if (ret)
                return ret;
 
-       /* Disable alarm interrupt */
-       ret = regmap_update_bits(pdata->map, PCF2123_REG_CTRL2, CTRL2_AIE, 0);
+       /* Ensure alarm flag is clear */
+       ret = regmap_update_bits(pcf2123->map, PCF2123_REG_CTRL2, CTRL2_AF, 0);
        if (ret)
                return ret;
 
@@ -288,42 +294,34 @@ static int pcf2123_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *alm)
        txbuf[0] = bin2bcd(alm->time.tm_min & 0x7F);
        txbuf[1] = bin2bcd(alm->time.tm_hour & 0x3F);
        txbuf[2] = bin2bcd(alm->time.tm_mday & 0x3F);
-       txbuf[3] = bin2bcd(alm->time.tm_wday & 0x07);
+       txbuf[3] = ALRM_DISABLE;
 
-       ret = regmap_bulk_write(pdata->map, PCF2123_REG_ALRM_MN, txbuf,
+       ret = regmap_bulk_write(pcf2123->map, PCF2123_REG_ALRM_MN, txbuf,
                                sizeof(txbuf));
        if (ret)
                return ret;
 
-       /* Enable alarm interrupt */
-       if (alm->enabled)       {
-               ret = regmap_update_bits(pdata->map, PCF2123_REG_CTRL2,
-                                               CTRL2_AIE, CTRL2_AIE);
-               if (ret)
-                       return ret;
-       }
-
-       return 0;
+       return pcf2123_rtc_alarm_irq_enable(dev, alm->enabled);
 }
 
 static irqreturn_t pcf2123_rtc_irq(int irq, void *dev)
 {
-       struct pcf2123_plat_data *pdata = dev_get_platdata(dev);
-       struct mutex *lock = &pdata->rtc->ops_lock;
+       struct pcf2123_data *pcf2123 = dev_get_drvdata(dev);
+       struct mutex *lock = &pcf2123->rtc->ops_lock;
        unsigned int val = 0;
        int ret = IRQ_NONE;
 
        mutex_lock(lock);
-       regmap_read(pdata->map, PCF2123_REG_CTRL2, &val);
+       regmap_read(pcf2123->map, PCF2123_REG_CTRL2, &val);
 
        /* Alarm? */
        if (val & CTRL2_AF) {
                ret = IRQ_HANDLED;
 
                /* Clear alarm flag */
-               regmap_update_bits(pdata->map, PCF2123_REG_CTRL2, CTRL2_AF, 0);
+               regmap_update_bits(pcf2123->map, PCF2123_REG_CTRL2, CTRL2_AF, 0);
 
-               rtc_update_irq(pdata->rtc, 1, RTC_IRQF | RTC_AF);
+               rtc_update_irq(pcf2123->rtc, 1, RTC_IRQF | RTC_AF);
        }
 
        mutex_unlock(lock);
@@ -333,23 +331,23 @@ static irqreturn_t pcf2123_rtc_irq(int irq, void *dev)
 
 static int pcf2123_reset(struct device *dev)
 {
-       struct pcf2123_plat_data *pdata = dev_get_platdata(dev);
+       struct pcf2123_data *pcf2123 = dev_get_drvdata(dev);
        int ret;
        unsigned int val = 0;
 
-       ret = regmap_write(pdata->map, PCF2123_REG_CTRL1, CTRL1_SW_RESET);
+       ret = regmap_write(pcf2123->map, PCF2123_REG_CTRL1, CTRL1_SW_RESET);
        if (ret)
                return ret;
 
        /* Stop the counter */
        dev_dbg(dev, "stopping RTC\n");
-       ret = regmap_write(pdata->map, PCF2123_REG_CTRL1, CTRL1_STOP);
+       ret = regmap_write(pcf2123->map, PCF2123_REG_CTRL1, CTRL1_STOP);
        if (ret)
                return ret;
 
        /* See if the counter was actually stopped */
        dev_dbg(dev, "checking for presence of RTC\n");
-       ret = regmap_read(pdata->map, PCF2123_REG_CTRL1, &val);
+       ret = regmap_read(pcf2123->map, PCF2123_REG_CTRL1, &val);
        if (ret)
                return ret;
 
@@ -358,7 +356,7 @@ static int pcf2123_reset(struct device *dev)
                return -ENODEV;
 
        /* Start the counter */
-       ret = regmap_write(pdata->map, PCF2123_REG_CTRL1, CTRL1_CLEAR);
+       ret = regmap_write(pcf2123->map, PCF2123_REG_CTRL1, CTRL1_CLEAR);
        if (ret)
                return ret;
 
@@ -372,26 +370,27 @@ static const struct rtc_class_ops pcf2123_rtc_ops = {
        .set_offset     = pcf2123_set_offset,
        .read_alarm     = pcf2123_rtc_read_alarm,
        .set_alarm      = pcf2123_rtc_set_alarm,
+       .alarm_irq_enable = pcf2123_rtc_alarm_irq_enable,
 };
 
 static int pcf2123_probe(struct spi_device *spi)
 {
        struct rtc_device *rtc;
        struct rtc_time tm;
-       struct pcf2123_plat_data *pdata;
+       struct pcf2123_data *pcf2123;
        int ret = 0;
 
-       pdata = devm_kzalloc(&spi->dev, sizeof(struct pcf2123_plat_data),
+       pcf2123 = devm_kzalloc(&spi->dev, sizeof(struct pcf2123_data),
                                GFP_KERNEL);
-       if (!pdata)
+       if (!pcf2123)
                return -ENOMEM;
-       spi->dev.platform_data = pdata;
 
-       pdata->map = devm_regmap_init_spi(spi, &pcf2123_regmap_config);
+       dev_set_drvdata(&spi->dev, pcf2123);
 
-       if (IS_ERR(pdata->map)) {
+       pcf2123->map = devm_regmap_init_spi(spi, &pcf2123_regmap_config);
+       if (IS_ERR(pcf2123->map)) {
                dev_err(&spi->dev, "regmap init failed.\n");
-               goto kfree_exit;
+               return PTR_ERR(pcf2123->map);
        }
 
        ret = pcf2123_rtc_read_time(&spi->dev, &tm);
@@ -399,7 +398,7 @@ static int pcf2123_probe(struct spi_device *spi)
                ret = pcf2123_reset(&spi->dev);
                if (ret < 0) {
                        dev_err(&spi->dev, "chip not found\n");
-                       goto kfree_exit;
+                       return ret;
                }
        }
 
@@ -407,16 +406,11 @@ static int pcf2123_probe(struct spi_device *spi)
                        (spi->max_speed_hz + 500) / 1000);
 
        /* Finalize the initialization */
-       rtc = devm_rtc_device_register(&spi->dev, pcf2123_driver.driver.name,
-                       &pcf2123_rtc_ops, THIS_MODULE);
-
-       if (IS_ERR(rtc)) {
-               dev_err(&spi->dev, "failed to register.\n");
-               ret = PTR_ERR(rtc);
-               goto kfree_exit;
-       }
+       rtc = devm_rtc_allocate_device(&spi->dev);
+       if (IS_ERR(rtc))
+               return PTR_ERR(rtc);
 
-       pdata->rtc = rtc;
+       pcf2123->rtc = rtc;
 
        /* Register alarm irq */
        if (spi->irq > 0) {
@@ -434,19 +428,25 @@ static int pcf2123_probe(struct spi_device *spi)
         * support to this driver to generate interrupts more than once
         * per minute.
         */
-       pdata->rtc->uie_unsupported = 1;
+       rtc->uie_unsupported = 1;
+       rtc->ops = &pcf2123_rtc_ops;
+       rtc->range_min = RTC_TIMESTAMP_BEGIN_2000;
+       rtc->range_max = RTC_TIMESTAMP_END_2099;
+       rtc->set_start_time = true;
 
-       return 0;
+       ret = rtc_register_device(rtc);
+       if (ret)
+               return ret;
 
-kfree_exit:
-       spi->dev.platform_data = NULL;
-       return ret;
+       return 0;
 }
 
 #ifdef CONFIG_OF
 static const struct of_device_id pcf2123_dt_ids[] = {
-       { .compatible = "nxp,rtc-pcf2123", },
+       { .compatible = "nxp,pcf2123", },
        { .compatible = "microcrystal,rv2123", },
+       /* Deprecated, do not use */
+       { .compatible = "nxp,rtc-pcf2123", },
        { /* sentinel */ }
 };
 MODULE_DEVICE_TABLE(of, pcf2123_dt_ids);
index 8632f58..02b069c 100644 (file)
@@ -5,6 +5,9 @@
  *
  * Author: Renaud Cerrato <r.cerrato@til-technologies.fr>
  *
+ * Watchdog and tamper functions
+ * Author: Bruno Thomsen <bruno.thomsen@gmail.com>
+ *
  * based on the other drivers in this same directory.
  *
  * Datasheet: http://cache.nxp.com/documents/data_sheet/PCF2127.pdf
 #include <linux/module.h>
 #include <linux/of.h>
 #include <linux/regmap.h>
+#include <linux/watchdog.h>
+
+/* Control register 1 */
+#define PCF2127_REG_CTRL1              0x00
+#define PCF2127_BIT_CTRL1_TSF1                 BIT(4)
+/* Control register 2 */
+#define PCF2127_REG_CTRL2              0x01
+#define PCF2127_BIT_CTRL2_TSIE                 BIT(2)
+#define PCF2127_BIT_CTRL2_TSF2                 BIT(5)
+/* Control register 3 */
+#define PCF2127_REG_CTRL3              0x02
+#define PCF2127_BIT_CTRL3_BLIE                 BIT(0)
+#define PCF2127_BIT_CTRL3_BIE                  BIT(1)
+#define PCF2127_BIT_CTRL3_BLF                  BIT(2)
+#define PCF2127_BIT_CTRL3_BF                   BIT(3)
+#define PCF2127_BIT_CTRL3_BTSE                 BIT(4)
+/* Time and date registers */
+#define PCF2127_REG_SC                 0x03
+#define PCF2127_BIT_SC_OSF                     BIT(7)
+#define PCF2127_REG_MN                 0x04
+#define PCF2127_REG_HR                 0x05
+#define PCF2127_REG_DM                 0x06
+#define PCF2127_REG_DW                 0x07
+#define PCF2127_REG_MO                 0x08
+#define PCF2127_REG_YR                 0x09
+/* Watchdog registers */
+#define PCF2127_REG_WD_CTL             0x10
+#define PCF2127_BIT_WD_CTL_TF0                 BIT(0)
+#define PCF2127_BIT_WD_CTL_TF1                 BIT(1)
+#define PCF2127_BIT_WD_CTL_CD0                 BIT(6)
+#define PCF2127_BIT_WD_CTL_CD1                 BIT(7)
+#define PCF2127_REG_WD_VAL             0x11
+/* Tamper timestamp registers */
+#define PCF2127_REG_TS_CTRL            0x12
+#define PCF2127_BIT_TS_CTRL_TSOFF              BIT(6)
+#define PCF2127_BIT_TS_CTRL_TSM                        BIT(7)
+#define PCF2127_REG_TS_SC              0x13
+#define PCF2127_REG_TS_MN              0x14
+#define PCF2127_REG_TS_HR              0x15
+#define PCF2127_REG_TS_DM              0x16
+#define PCF2127_REG_TS_MO              0x17
+#define PCF2127_REG_TS_YR              0x18
+/*
+ * RAM registers
+ * PCF2127 has 512 bytes general-purpose static RAM (SRAM) that is
+ * battery backed and can survive a power outage.
+ * PCF2129 doesn't have this feature.
+ */
+#define PCF2127_REG_RAM_ADDR_MSB       0x1A
+#define PCF2127_REG_RAM_WRT_CMD                0x1C
+#define PCF2127_REG_RAM_RD_CMD         0x1D
 
-#define PCF2127_REG_CTRL1       (0x00)  /* Control Register 1 */
-#define PCF2127_REG_CTRL2       (0x01)  /* Control Register 2 */
-
-#define PCF2127_REG_CTRL3       (0x02)  /* Control Register 3 */
-#define PCF2127_REG_CTRL3_BLF          BIT(2)
-
-#define PCF2127_REG_SC          (0x03)  /* datetime */
-#define PCF2127_REG_MN          (0x04)
-#define PCF2127_REG_HR          (0x05)
-#define PCF2127_REG_DM          (0x06)
-#define PCF2127_REG_DW          (0x07)
-#define PCF2127_REG_MO          (0x08)
-#define PCF2127_REG_YR          (0x09)
-
-/* the pcf2127 has 512 bytes nvmem, pcf2129 doesn't */
-#define PCF2127_REG_RAM_addr_MSB       0x1a
-#define PCF2127_REG_RAM_wrt_cmd        0x1c
-#define PCF2127_REG_RAM_rd_cmd         0x1d
-
-#define PCF2127_OSF             BIT(7)  /* Oscillator Fail flag */
+/* Watchdog timer value constants */
+#define PCF2127_WD_VAL_STOP            0
+#define PCF2127_WD_VAL_MIN             2
+#define PCF2127_WD_VAL_MAX             255
+#define PCF2127_WD_VAL_DEFAULT         60
 
 struct pcf2127 {
        struct rtc_device *rtc;
+       struct watchdog_device wdd;
        struct regmap *regmap;
 };
 
@@ -54,30 +94,25 @@ static int pcf2127_rtc_read_time(struct device *dev, struct rtc_time *tm)
        struct pcf2127 *pcf2127 = dev_get_drvdata(dev);
        unsigned char buf[10];
        int ret;
-       int i;
 
-       for (i = 0; i <= PCF2127_REG_CTRL3; i++) {
-               ret = regmap_read(pcf2127->regmap, PCF2127_REG_CTRL1 + i,
-                                 (unsigned int *)(buf + i));
-               if (ret) {
-                       dev_err(dev, "%s: read error\n", __func__);
-                       return ret;
-               }
-       }
-
-       ret = regmap_bulk_read(pcf2127->regmap, PCF2127_REG_SC,
-                              (buf + PCF2127_REG_SC),
-                              ARRAY_SIZE(buf) - PCF2127_REG_SC);
+       /*
+        * Avoid reading CTRL2 register as it causes WD_VAL register
+        * value to reset to 0 which means watchdog is stopped.
+        */
+       ret = regmap_bulk_read(pcf2127->regmap, PCF2127_REG_CTRL3,
+                              (buf + PCF2127_REG_CTRL3),
+                              ARRAY_SIZE(buf) - PCF2127_REG_CTRL3);
        if (ret) {
                dev_err(dev, "%s: read error\n", __func__);
                return ret;
        }
 
-       if (buf[PCF2127_REG_CTRL3] & PCF2127_REG_CTRL3_BLF)
+       if (buf[PCF2127_REG_CTRL3] & PCF2127_BIT_CTRL3_BLF)
                dev_info(dev,
                        "low voltage detected, check/replace RTC battery.\n");
 
-       if (buf[PCF2127_REG_SC] & PCF2127_OSF) {
+       /* Clock integrity is not guaranteed when OSF flag is set. */
+       if (buf[PCF2127_REG_SC] & PCF2127_BIT_SC_OSF) {
                /*
                 * no need clear the flag here,
                 * it will be cleared once the new date is saved
@@ -88,14 +123,12 @@ static int pcf2127_rtc_read_time(struct device *dev, struct rtc_time *tm)
        }
 
        dev_dbg(dev,
-               "%s: raw data is cr1=%02x, cr2=%02x, cr3=%02x, "
-               "sec=%02x, min=%02x, hr=%02x, "
+               "%s: raw data is cr3=%02x, sec=%02x, min=%02x, hr=%02x, "
                "mday=%02x, wday=%02x, mon=%02x, year=%02x\n",
-               __func__,
-               buf[0], buf[1], buf[2],
-               buf[3], buf[4], buf[5],
-               buf[6], buf[7], buf[8], buf[9]);
-
+               __func__, buf[PCF2127_REG_CTRL3], buf[PCF2127_REG_SC],
+               buf[PCF2127_REG_MN], buf[PCF2127_REG_HR],
+               buf[PCF2127_REG_DM], buf[PCF2127_REG_DW],
+               buf[PCF2127_REG_MO], buf[PCF2127_REG_YR]);
 
        tm->tm_sec = bcd2bin(buf[PCF2127_REG_SC] & 0x7F);
        tm->tm_min = bcd2bin(buf[PCF2127_REG_MN] & 0x7F);
@@ -166,7 +199,7 @@ static int pcf2127_rtc_ioctl(struct device *dev,
                if (ret)
                        return ret;
 
-               touser = touser & PCF2127_REG_CTRL3_BLF ? 1 : 0;
+               touser = touser & PCF2127_BIT_CTRL3_BLF ? 1 : 0;
 
                if (copy_to_user((void __user *)arg, &touser, sizeof(int)))
                        return -EFAULT;
@@ -192,12 +225,12 @@ static int pcf2127_nvmem_read(void *priv, unsigned int offset,
        int ret;
        unsigned char offsetbuf[] = { offset >> 8, offset };
 
-       ret = regmap_bulk_write(pcf2127->regmap, PCF2127_REG_RAM_addr_MSB,
+       ret = regmap_bulk_write(pcf2127->regmap, PCF2127_REG_RAM_ADDR_MSB,
                                offsetbuf, 2);
        if (ret)
                return ret;
 
-       ret = regmap_bulk_read(pcf2127->regmap, PCF2127_REG_RAM_rd_cmd,
+       ret = regmap_bulk_read(pcf2127->regmap, PCF2127_REG_RAM_RD_CMD,
                               val, bytes);
 
        return ret ?: bytes;
@@ -210,17 +243,176 @@ static int pcf2127_nvmem_write(void *priv, unsigned int offset,
        int ret;
        unsigned char offsetbuf[] = { offset >> 8, offset };
 
-       ret = regmap_bulk_write(pcf2127->regmap, PCF2127_REG_RAM_addr_MSB,
+       ret = regmap_bulk_write(pcf2127->regmap, PCF2127_REG_RAM_ADDR_MSB,
                                offsetbuf, 2);
        if (ret)
                return ret;
 
-       ret = regmap_bulk_write(pcf2127->regmap, PCF2127_REG_RAM_wrt_cmd,
+       ret = regmap_bulk_write(pcf2127->regmap, PCF2127_REG_RAM_WRT_CMD,
                                val, bytes);
 
        return ret ?: bytes;
 }
 
+/* watchdog driver */
+
+static int pcf2127_wdt_ping(struct watchdog_device *wdd)
+{
+       struct pcf2127 *pcf2127 = watchdog_get_drvdata(wdd);
+
+       return regmap_write(pcf2127->regmap, PCF2127_REG_WD_VAL, wdd->timeout);
+}
+
+/*
+ * Restart watchdog timer if feature is active.
+ *
+ * Note: Reading CTRL2 register causes watchdog to stop which is unfortunate,
+ * since register also contain control/status flags for other features.
+ * Always call this function after reading CTRL2 register.
+ */
+static int pcf2127_wdt_active_ping(struct watchdog_device *wdd)
+{
+       int ret = 0;
+
+       if (watchdog_active(wdd)) {
+               ret = pcf2127_wdt_ping(wdd);
+               if (ret)
+                       dev_err(wdd->parent,
+                               "%s: watchdog restart failed, ret=%d\n",
+                               __func__, ret);
+       }
+
+       return ret;
+}
+
+static int pcf2127_wdt_start(struct watchdog_device *wdd)
+{
+       return pcf2127_wdt_ping(wdd);
+}
+
+static int pcf2127_wdt_stop(struct watchdog_device *wdd)
+{
+       struct pcf2127 *pcf2127 = watchdog_get_drvdata(wdd);
+
+       return regmap_write(pcf2127->regmap, PCF2127_REG_WD_VAL,
+                           PCF2127_WD_VAL_STOP);
+}
+
+static int pcf2127_wdt_set_timeout(struct watchdog_device *wdd,
+                                  unsigned int new_timeout)
+{
+       dev_dbg(wdd->parent, "new watchdog timeout: %is (old: %is)\n",
+               new_timeout, wdd->timeout);
+
+       wdd->timeout = new_timeout;
+
+       return pcf2127_wdt_active_ping(wdd);
+}
+
+static const struct watchdog_info pcf2127_wdt_info = {
+       .identity = "NXP PCF2127/PCF2129 Watchdog",
+       .options = WDIOF_KEEPALIVEPING | WDIOF_SETTIMEOUT,
+};
+
+static const struct watchdog_ops pcf2127_watchdog_ops = {
+       .owner = THIS_MODULE,
+       .start = pcf2127_wdt_start,
+       .stop = pcf2127_wdt_stop,
+       .ping = pcf2127_wdt_ping,
+       .set_timeout = pcf2127_wdt_set_timeout,
+};
+
+/* sysfs interface */
+
+static ssize_t timestamp0_store(struct device *dev,
+                               struct device_attribute *attr,
+                               const char *buf, size_t count)
+{
+       struct pcf2127 *pcf2127 = dev_get_drvdata(dev->parent);
+       int ret;
+
+       ret = regmap_update_bits(pcf2127->regmap, PCF2127_REG_CTRL1,
+                                PCF2127_BIT_CTRL1_TSF1, 0);
+       if (ret) {
+               dev_err(dev, "%s: update ctrl1 ret=%d\n", __func__, ret);
+               return ret;
+       }
+
+       ret = regmap_update_bits(pcf2127->regmap, PCF2127_REG_CTRL2,
+                                PCF2127_BIT_CTRL2_TSF2, 0);
+       if (ret) {
+               dev_err(dev, "%s: update ctrl2 ret=%d\n", __func__, ret);
+               return ret;
+       }
+
+       ret = pcf2127_wdt_active_ping(&pcf2127->wdd);
+       if (ret)
+               return ret;
+
+       return count;
+};
+
+static ssize_t timestamp0_show(struct device *dev,
+                              struct device_attribute *attr, char *buf)
+{
+       struct pcf2127 *pcf2127 = dev_get_drvdata(dev->parent);
+       struct rtc_time tm;
+       int ret;
+       unsigned char data[25];
+
+       ret = regmap_bulk_read(pcf2127->regmap, PCF2127_REG_CTRL1, data,
+                              sizeof(data));
+       if (ret) {
+               dev_err(dev, "%s: read error ret=%d\n", __func__, ret);
+               return ret;
+       }
+
+       dev_dbg(dev,
+               "%s: raw data is cr1=%02x, cr2=%02x, cr3=%02x, ts_sc=%02x, "
+               "ts_mn=%02x, ts_hr=%02x, ts_dm=%02x, ts_mo=%02x, ts_yr=%02x\n",
+               __func__, data[PCF2127_REG_CTRL1], data[PCF2127_REG_CTRL2],
+               data[PCF2127_REG_CTRL3], data[PCF2127_REG_TS_SC],
+               data[PCF2127_REG_TS_MN], data[PCF2127_REG_TS_HR],
+               data[PCF2127_REG_TS_DM], data[PCF2127_REG_TS_MO],
+               data[PCF2127_REG_TS_YR]);
+
+       ret = pcf2127_wdt_active_ping(&pcf2127->wdd);
+       if (ret)
+               return ret;
+
+       if (!(data[PCF2127_REG_CTRL1] & PCF2127_BIT_CTRL1_TSF1) &&
+           !(data[PCF2127_REG_CTRL2] & PCF2127_BIT_CTRL2_TSF2))
+               return 0;
+
+       tm.tm_sec = bcd2bin(data[PCF2127_REG_TS_SC] & 0x7F);
+       tm.tm_min = bcd2bin(data[PCF2127_REG_TS_MN] & 0x7F);
+       tm.tm_hour = bcd2bin(data[PCF2127_REG_TS_HR] & 0x3F);
+       tm.tm_mday = bcd2bin(data[PCF2127_REG_TS_DM] & 0x3F);
+       /* TS_MO register (month) value range: 1-12 */
+       tm.tm_mon = bcd2bin(data[PCF2127_REG_TS_MO] & 0x1F) - 1;
+       tm.tm_year = bcd2bin(data[PCF2127_REG_TS_YR]);
+       if (tm.tm_year < 70)
+               tm.tm_year += 100; /* assume we are in 1970...2069 */
+
+       ret = rtc_valid_tm(&tm);
+       if (ret)
+               return ret;
+
+       return sprintf(buf, "%llu\n",
+                      (unsigned long long)rtc_tm_to_time64(&tm));
+};
+
+static DEVICE_ATTR_RW(timestamp0);
+
+static struct attribute *pcf2127_attrs[] = {
+       &dev_attr_timestamp0.attr,
+       NULL
+};
+
+static const struct attribute_group pcf2127_attr_group = {
+       .attrs  = pcf2127_attrs,
+};
+
 static int pcf2127_probe(struct device *dev, struct regmap *regmap,
                        const char *name, bool has_nvmem)
 {
@@ -237,11 +429,22 @@ static int pcf2127_probe(struct device *dev, struct regmap *regmap,
 
        dev_set_drvdata(dev, pcf2127);
 
-       pcf2127->rtc = devm_rtc_device_register(dev, name, &pcf2127_rtc_ops,
-                                               THIS_MODULE);
+       pcf2127->rtc = devm_rtc_allocate_device(dev);
        if (IS_ERR(pcf2127->rtc))
                return PTR_ERR(pcf2127->rtc);
 
+       pcf2127->rtc->ops = &pcf2127_rtc_ops;
+
+       pcf2127->wdd.parent = dev;
+       pcf2127->wdd.info = &pcf2127_wdt_info;
+       pcf2127->wdd.ops = &pcf2127_watchdog_ops;
+       pcf2127->wdd.min_timeout = PCF2127_WD_VAL_MIN;
+       pcf2127->wdd.max_timeout = PCF2127_WD_VAL_MAX;
+       pcf2127->wdd.timeout = PCF2127_WD_VAL_DEFAULT;
+       pcf2127->wdd.min_hw_heartbeat_ms = 500;
+
+       watchdog_set_drvdata(&pcf2127->wdd, pcf2127);
+
        if (has_nvmem) {
                struct nvmem_config nvmem_cfg = {
                        .priv = pcf2127,
@@ -253,7 +456,84 @@ static int pcf2127_probe(struct device *dev, struct regmap *regmap,
                ret = rtc_nvmem_register(pcf2127->rtc, &nvmem_cfg);
        }
 
-       return ret;
+       /*
+        * Watchdog timer enabled and reset pin /RST activated when timed out.
+        * Select 1Hz clock source for watchdog timer.
+        * Timer is not started until WD_VAL is loaded with a valid value.
+        * Note: Countdown timer disabled and not available.
+        */
+       ret = regmap_update_bits(pcf2127->regmap, PCF2127_REG_WD_CTL,
+                                PCF2127_BIT_WD_CTL_CD1 |
+                                PCF2127_BIT_WD_CTL_CD0 |
+                                PCF2127_BIT_WD_CTL_TF1 |
+                                PCF2127_BIT_WD_CTL_TF0,
+                                PCF2127_BIT_WD_CTL_CD1 |
+                                PCF2127_BIT_WD_CTL_CD0 |
+                                PCF2127_BIT_WD_CTL_TF1);
+       if (ret) {
+               dev_err(dev, "%s: watchdog config (wd_ctl) failed\n", __func__);
+               return ret;
+       }
+
+#ifdef CONFIG_WATCHDOG
+       ret = devm_watchdog_register_device(dev, &pcf2127->wdd);
+       if (ret)
+               return ret;
+#endif /* CONFIG_WATCHDOG */
+
+       /*
+        * Disable battery low/switch-over timestamp and interrupts.
+        * Clear battery interrupt flags which can block new trigger events.
+        * Note: This is the default chip behaviour but added to ensure
+        * correct tamper timestamp and interrupt function.
+        */
+       ret = regmap_update_bits(pcf2127->regmap, PCF2127_REG_CTRL3,
+                                PCF2127_BIT_CTRL3_BTSE |
+                                PCF2127_BIT_CTRL3_BF |
+                                PCF2127_BIT_CTRL3_BIE |
+                                PCF2127_BIT_CTRL3_BLIE, 0);
+       if (ret) {
+               dev_err(dev, "%s: interrupt config (ctrl3) failed\n",
+                       __func__);
+               return ret;
+       }
+
+       /*
+        * Enable timestamp function and store timestamp of first trigger
+        * event until TSF1 and TFS2 interrupt flags are cleared.
+        */
+       ret = regmap_update_bits(pcf2127->regmap, PCF2127_REG_TS_CTRL,
+                                PCF2127_BIT_TS_CTRL_TSOFF |
+                                PCF2127_BIT_TS_CTRL_TSM,
+                                PCF2127_BIT_TS_CTRL_TSM);
+       if (ret) {
+               dev_err(dev, "%s: tamper detection config (ts_ctrl) failed\n",
+                       __func__);
+               return ret;
+       }
+
+       /*
+        * Enable interrupt generation when TSF1 or TSF2 timestamp flags
+        * are set. Interrupt signal is an open-drain output and can be
+        * left floating if unused.
+        */
+       ret = regmap_update_bits(pcf2127->regmap, PCF2127_REG_CTRL2,
+                                PCF2127_BIT_CTRL2_TSIE,
+                                PCF2127_BIT_CTRL2_TSIE);
+       if (ret) {
+               dev_err(dev, "%s: tamper detection config (ctrl2) failed\n",
+                       __func__);
+               return ret;
+       }
+
+       ret = rtc_add_group(pcf2127->rtc, &pcf2127_attr_group);
+       if (ret) {
+               dev_err(dev, "%s: tamper sysfs registering failed\n",
+                       __func__);
+               return ret;
+       }
+
+       return rtc_register_device(pcf2127->rtc);
 }
 
 #ifdef CONFIG_OF
index a075e77..3450d61 100644 (file)
@@ -166,7 +166,12 @@ static int pcf85363_rtc_set_time(struct device *dev, struct rtc_time *tm)
        buf[DT_YEARS] = bin2bcd(tm->tm_year % 100);
 
        ret = regmap_bulk_write(pcf85363->regmap, CTRL_STOP_EN,
-                               tmp, sizeof(tmp));
+                               tmp, 2);
+       if (ret)
+               return ret;
+
+       ret = regmap_bulk_write(pcf85363->regmap, DT_100THS,
+                               buf, sizeof(tmp) - 2);
        if (ret)
                return ret;
 
index ac159d2..24baa47 100644 (file)
@@ -196,8 +196,9 @@ static irqreturn_t pcf8563_irq(int irq, void *dev_id)
  * In the routines that deal directly with the pcf8563 hardware, we use
  * rtc_time -- month 0-11, hour 0-23, yr = calendar year-epoch.
  */
-static int pcf8563_get_datetime(struct i2c_client *client, struct rtc_time *tm)
+static int pcf8563_rtc_read_time(struct device *dev, struct rtc_time *tm)
 {
+       struct i2c_client *client = to_i2c_client(dev);
        struct pcf8563 *pcf8563 = i2c_get_clientdata(client);
        unsigned char buf[9];
        int err;
@@ -228,9 +229,7 @@ static int pcf8563_get_datetime(struct i2c_client *client, struct rtc_time *tm)
        tm->tm_mday = bcd2bin(buf[PCF8563_REG_DM] & 0x3F);
        tm->tm_wday = buf[PCF8563_REG_DW] & 0x07;
        tm->tm_mon = bcd2bin(buf[PCF8563_REG_MO] & 0x1F) - 1; /* rtc mn 1-12 */
-       tm->tm_year = bcd2bin(buf[PCF8563_REG_YR]);
-       if (tm->tm_year < 70)
-               tm->tm_year += 100;     /* assume we are in 1970...2069 */
+       tm->tm_year = bcd2bin(buf[PCF8563_REG_YR]) + 100;
        /* detect the polarity heuristically. see note above. */
        pcf8563->c_polarity = (buf[PCF8563_REG_MO] & PCF8563_MO_C) ?
                (tm->tm_year >= 100) : (tm->tm_year < 100);
@@ -244,8 +243,9 @@ static int pcf8563_get_datetime(struct i2c_client *client, struct rtc_time *tm)
        return 0;
 }
 
-static int pcf8563_set_datetime(struct i2c_client *client, struct rtc_time *tm)
+static int pcf8563_rtc_set_time(struct device *dev, struct rtc_time *tm)
 {
+       struct i2c_client *client = to_i2c_client(dev);
        struct pcf8563 *pcf8563 = i2c_get_clientdata(client);
        unsigned char buf[9];
 
@@ -266,7 +266,7 @@ static int pcf8563_set_datetime(struct i2c_client *client, struct rtc_time *tm)
        buf[PCF8563_REG_MO] = bin2bcd(tm->tm_mon + 1);
 
        /* year and century */
-       buf[PCF8563_REG_YR] = bin2bcd(tm->tm_year % 100);
+       buf[PCF8563_REG_YR] = bin2bcd(tm->tm_year - 100);
        if (pcf8563->c_polarity ? (tm->tm_year >= 100) : (tm->tm_year < 100))
                buf[PCF8563_REG_MO] |= PCF8563_MO_C;
 
@@ -299,8 +299,8 @@ static int pcf8563_rtc_ioctl(struct device *dev, unsigned int cmd, unsigned long
                 * because of the cached voltage_low value but do it
                 * anyway for consistency.
                 */
-               if (pcf8563_get_datetime(to_i2c_client(dev), &tm))
-                       pcf8563_set_datetime(to_i2c_client(dev), &tm);
+               if (pcf8563_rtc_read_time(dev, &tm))
+                       pcf8563_rtc_set_time(dev, &tm);
 
                /* Clear the cached value. */
                pcf8563->voltage_low = 0;
@@ -314,16 +314,6 @@ static int pcf8563_rtc_ioctl(struct device *dev, unsigned int cmd, unsigned long
 #define pcf8563_rtc_ioctl NULL
 #endif
 
-static int pcf8563_rtc_read_time(struct device *dev, struct rtc_time *tm)
-{
-       return pcf8563_get_datetime(to_i2c_client(dev), tm);
-}
-
-static int pcf8563_rtc_set_time(struct device *dev, struct rtc_time *tm)
-{
-       return pcf8563_set_datetime(to_i2c_client(dev), tm);
-}
-
 static int pcf8563_rtc_read_alarm(struct device *dev, struct rtc_wkalrm *tm)
 {
        struct i2c_client *client = to_i2c_client(dev);
@@ -591,13 +581,17 @@ static int pcf8563_probe(struct i2c_client *client,
                return err;
        }
 
-       pcf8563->rtc = devm_rtc_device_register(&client->dev,
-                               pcf8563_driver.driver.name,
-                               &pcf8563_rtc_ops, THIS_MODULE);
-
+       pcf8563->rtc = devm_rtc_allocate_device(&client->dev);
        if (IS_ERR(pcf8563->rtc))
                return PTR_ERR(pcf8563->rtc);
 
+       pcf8563->rtc->ops = &pcf8563_rtc_ops;
+       /* the pcf8563 alarm only supports a minute accuracy */
+       pcf8563->rtc->uie_unsupported = 1;
+       pcf8563->rtc->range_min = RTC_TIMESTAMP_BEGIN_2000;
+       pcf8563->rtc->range_max = RTC_TIMESTAMP_END_2099;
+       pcf8563->rtc->set_start_time = true;
+
        if (client->irq > 0) {
                err = devm_request_threaded_irq(&client->dev, client->irq,
                                NULL, pcf8563_irq,
@@ -608,17 +602,17 @@ static int pcf8563_probe(struct i2c_client *client,
                                                                client->irq);
                        return err;
                }
-
        }
 
+       err = rtc_register_device(pcf8563->rtc);
+       if (err)
+               return err;
+
 #ifdef CONFIG_COMMON_CLK
        /* register clk in common clk framework */
        pcf8563_clkout_register_clk(pcf8563);
 #endif
 
-       /* the pcf8563 alarm only supports a minute accuracy */
-       pcf8563->rtc->uie_unsupported = 1;
-
        return 0;
 }
 
@@ -632,6 +626,8 @@ MODULE_DEVICE_TABLE(i2c, pcf8563_id);
 #ifdef CONFIG_OF
 static const struct of_device_id pcf8563_of_match[] = {
        { .compatible = "nxp,pcf8563" },
+       { .compatible = "epson,rtc8564" },
+       { .compatible = "microcrystal,rv8564" },
        {}
 };
 MODULE_DEVICE_TABLE(of, pcf8563_of_match);
index 1c4de6e..17653ed 100644 (file)
@@ -308,10 +308,8 @@ static int pic32_rtc_probe(struct platform_device *pdev)
        platform_set_drvdata(pdev, pdata);
 
        pdata->alarm_irq = platform_get_irq(pdev, 0);
-       if (pdata->alarm_irq < 0) {
-               dev_err(&pdev->dev, "no irq for alarm\n");
+       if (pdata->alarm_irq < 0)
                return pdata->alarm_irq;
-       }
 
        res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
        pdata->reg_base = devm_ioremap_resource(&pdev->dev, res);
index 9f9839c..f5a30e0 100644 (file)
@@ -468,10 +468,8 @@ static int pm8xxx_rtc_probe(struct platform_device *pdev)
        }
 
        rtc_dd->rtc_alarm_irq = platform_get_irq(pdev, 0);
-       if (rtc_dd->rtc_alarm_irq < 0) {
-               dev_err(&pdev->dev, "Alarm IRQ resource absent!\n");
+       if (rtc_dd->rtc_alarm_irq < 0)
                return -ENXIO;
-       }
 
        rtc_dd->allow_set_time = of_property_read_bool(pdev->dev.of_node,
                                                      "allow-set-time");
index 63b9e73..89ff713 100644 (file)
@@ -186,16 +186,12 @@ static int puv3_rtc_probe(struct platform_device *pdev)
 
        /* find the IRQs */
        puv3_rtc_tickno = platform_get_irq(pdev, 1);
-       if (puv3_rtc_tickno < 0) {
-               dev_err(&pdev->dev, "no irq for rtc tick\n");
+       if (puv3_rtc_tickno < 0)
                return -ENOENT;
-       }
 
        puv3_rtc_alarmno = platform_get_irq(pdev, 0);
-       if (puv3_rtc_alarmno < 0) {
-               dev_err(&pdev->dev, "no irq for alarm\n");
+       if (puv3_rtc_alarmno < 0)
                return -ENOENT;
-       }
 
        dev_dbg(&pdev->dev, "PKUnity_rtc: tick irq %d, alarm irq %d\n",
                 puv3_rtc_tickno, puv3_rtc_alarmno);
@@ -239,10 +235,8 @@ static int puv3_rtc_probe(struct platform_device *pdev)
        /* register RTC and exit */
        rtc->ops = &puv3_rtcops;
        ret = rtc_register_device(rtc);
-       if (ret) {
-               dev_err(&pdev->dev, "cannot attach rtc\n");
+       if (ret)
                goto err_nortc;
-       }
 
        /* platform setup code should have handled this; sigh */
        if (!device_can_wakeup(&pdev->dev))
index a7827fe..d2f1d8f 100644 (file)
@@ -324,15 +324,11 @@ static int __init pxa_rtc_probe(struct platform_device *pdev)
        }
 
        sa1100_rtc->irq_1hz = platform_get_irq(pdev, 0);
-       if (sa1100_rtc->irq_1hz < 0) {
-               dev_err(dev, "No 1Hz IRQ resource defined\n");
+       if (sa1100_rtc->irq_1hz < 0)
                return -ENXIO;
-       }
        sa1100_rtc->irq_alarm = platform_get_irq(pdev, 1);
-       if (sa1100_rtc->irq_alarm < 0) {
-               dev_err(dev, "No alarm IRQ resource defined\n");
+       if (sa1100_rtc->irq_alarm < 0)
                return -ENXIO;
-       }
 
        pxa_rtc->base = devm_ioremap(dev, pxa_rtc->ress->start,
                                resource_size(pxa_rtc->ress));
index c34540b..c0334c6 100644 (file)
@@ -434,12 +434,8 @@ static int rk808_rtc_probe(struct platform_device *pdev)
        rk808_rtc->rtc->ops = &rk808_rtc_ops;
 
        rk808_rtc->irq = platform_get_irq(pdev, 0);
-       if (rk808_rtc->irq < 0) {
-               if (rk808_rtc->irq != -EPROBE_DEFER)
-                       dev_err(&pdev->dev, "Wake up is not possible as irq = %d\n",
-                               rk808_rtc->irq);
+       if (rk808_rtc->irq < 0)
                return rk808_rtc->irq;
-       }
 
        /* request alarm irq of rk808 */
        ret = devm_request_threaded_irq(&pdev->dev, rk808_rtc->irq, NULL,
index 06884eb..2b31666 100644 (file)
@@ -639,9 +639,8 @@ static int rv3028_probe(struct i2c_client *client)
                dev_warn(&client->dev, "An alarm may have been missed.\n");
 
        rv3028->rtc = devm_rtc_allocate_device(&client->dev);
-       if (IS_ERR(rv3028->rtc)) {
+       if (IS_ERR(rv3028->rtc))
                return PTR_ERR(rv3028->rtc);
-       }
 
        if (client->irq > 0) {
                ret = devm_request_threaded_irq(&client->dev, client->irq,
index 4a0e8ec..4cdf658 100644 (file)
@@ -278,13 +278,13 @@ static int rv3029_eeprom_read(struct device *dev, u8 reg,
 static int rv3029_eeprom_write(struct device *dev, u8 reg,
                               u8 const buf[], size_t len)
 {
-       int ret;
+       int ret, err;
        size_t i;
        u8 tmp;
 
-       ret = rv3029_eeprom_enter(dev);
-       if (ret < 0)
-               return ret;
+       err = rv3029_eeprom_enter(dev);
+       if (err < 0)
+               return err;
 
        for (i = 0; i < len; i++, reg++) {
                ret = rv3029_read_regs(dev, reg, &tmp, 1);
@@ -300,11 +300,11 @@ static int rv3029_eeprom_write(struct device *dev, u8 reg,
                        break;
        }
 
-       ret = rv3029_eeprom_exit(dev);
-       if (ret < 0)
-               return ret;
+       err = rv3029_eeprom_exit(dev);
+       if (err < 0)
+               return err;
 
-       return 0;
+       return ret;
 }
 
 static int rv3029_eeprom_update_bits(struct device *dev,
index fc52434..4960f0a 100644 (file)
@@ -564,9 +564,8 @@ static int rv8803_probe(struct i2c_client *client,
                dev_warn(&client->dev, "An alarm maybe have been missed.\n");
 
        rv8803->rtc = devm_rtc_allocate_device(&client->dev);
-       if (IS_ERR(rv8803->rtc)) {
+       if (IS_ERR(rv8803->rtc))
                return PTR_ERR(rv8803->rtc);
-       }
 
        if (client->irq > 0) {
                err = devm_request_threaded_irq(&client->dev, client->irq,
index 84806ff..da34cfd 100644 (file)
@@ -434,37 +434,32 @@ static int s35390a_probe(struct i2c_client *client,
        char buf, status1;
        struct device *dev = &client->dev;
 
-       if (!i2c_check_functionality(client->adapter, I2C_FUNC_I2C)) {
-               err = -ENODEV;
-               goto exit;
-       }
+       if (!i2c_check_functionality(client->adapter, I2C_FUNC_I2C))
+               return -ENODEV;
 
        s35390a = devm_kzalloc(dev, sizeof(struct s35390a), GFP_KERNEL);
-       if (!s35390a) {
-               err = -ENOMEM;
-               goto exit;
-       }
+       if (!s35390a)
+               return -ENOMEM;
 
        s35390a->client[0] = client;
        i2c_set_clientdata(client, s35390a);
 
        /* This chip uses multiple addresses, use dummy devices for them */
        for (i = 1; i < 8; ++i) {
-               s35390a->client[i] = i2c_new_dummy(client->adapter,
-                                       client->addr + i);
-               if (!s35390a->client[i]) {
+               s35390a->client[i] = devm_i2c_new_dummy_device(dev,
+                                                              client->adapter,
+                                                              client->addr + i);
+               if (IS_ERR(s35390a->client[i])) {
                        dev_err(dev, "Address %02x unavailable\n",
                                client->addr + i);
-                       err = -EBUSY;
-                       goto exit_dummy;
+                       return PTR_ERR(s35390a->client[i]);
                }
        }
 
        err_read = s35390a_read_status(s35390a, &status1);
        if (err_read < 0) {
-               err = err_read;
                dev_err(dev, "error resetting chip\n");
-               goto exit_dummy;
+               return err_read;
        }
 
        if (status1 & S35390A_FLAG_24H)
@@ -478,13 +473,13 @@ static int s35390a_probe(struct i2c_client *client,
                err = s35390a_set_reg(s35390a, S35390A_CMD_STATUS2, &buf, 1);
                if (err < 0) {
                        dev_err(dev, "error disabling alarm");
-                       goto exit_dummy;
+                       return err;
                }
        } else {
                err = s35390a_disable_test_mode(s35390a);
                if (err < 0) {
                        dev_err(dev, "error disabling test mode\n");
-                       goto exit_dummy;
+                       return err;
                }
        }
 
@@ -493,10 +488,8 @@ static int s35390a_probe(struct i2c_client *client,
        s35390a->rtc = devm_rtc_device_register(dev, s35390a_driver.driver.name,
                                                &s35390a_rtc_ops, THIS_MODULE);
 
-       if (IS_ERR(s35390a->rtc)) {
-               err = PTR_ERR(s35390a->rtc);
-               goto exit_dummy;
-       }
+       if (IS_ERR(s35390a->rtc))
+               return PTR_ERR(s35390a->rtc);
 
        /* supports per-minute alarms only, therefore set uie_unsupported */
        s35390a->rtc->uie_unsupported = 1;
@@ -505,26 +498,6 @@ static int s35390a_probe(struct i2c_client *client,
                rtc_update_irq(s35390a->rtc, 1, RTC_AF);
 
        return 0;
-
-exit_dummy:
-       for (i = 1; i < 8; ++i)
-               if (s35390a->client[i])
-                       i2c_unregister_device(s35390a->client[i]);
-
-exit:
-       return err;
-}
-
-static int s35390a_remove(struct i2c_client *client)
-{
-       unsigned int i;
-       struct s35390a *s35390a = i2c_get_clientdata(client);
-
-       for (i = 1; i < 8; ++i)
-               if (s35390a->client[i])
-                       i2c_unregister_device(s35390a->client[i]);
-
-       return 0;
 }
 
 static struct i2c_driver s35390a_driver = {
@@ -533,7 +506,6 @@ static struct i2c_driver s35390a_driver = {
                .of_match_table = of_match_ptr(s35390a_of_match),
        },
        .probe          = s35390a_probe,
-       .remove         = s35390a_remove,
        .id_table       = s35390a_id,
 };
 
index 74bf647..7801249 100644 (file)
@@ -453,10 +453,8 @@ static int s3c_rtc_probe(struct platform_device *pdev)
 
        /* find the IRQs */
        info->irq_tick = platform_get_irq(pdev, 1);
-       if (info->irq_tick < 0) {
-               dev_err(&pdev->dev, "no irq for rtc tick\n");
+       if (info->irq_tick < 0)
                return info->irq_tick;
-       }
 
        info->dev = &pdev->dev;
        info->data = of_device_get_match_data(&pdev->dev);
@@ -470,10 +468,8 @@ static int s3c_rtc_probe(struct platform_device *pdev)
        platform_set_drvdata(pdev, info);
 
        info->irq_alarm = platform_get_irq(pdev, 0);
-       if (info->irq_alarm < 0) {
-               dev_err(&pdev->dev, "no irq for alarm\n");
+       if (info->irq_alarm < 0)
                return info->irq_alarm;
-       }
 
        dev_dbg(&pdev->dev, "s3c2410_rtc: tick irq %d, alarm irq %d\n",
                info->irq_tick, info->irq_alarm);
index c7f1bf8..eb9dde4 100644 (file)
@@ -760,10 +760,10 @@ static int s5m_rtc_probe(struct platform_device *pdev)
                return -ENODEV;
        }
 
-       info->i2c = i2c_new_dummy(s5m87xx->i2c->adapter, RTC_I2C_ADDR);
-       if (!info->i2c) {
+       info->i2c = i2c_new_dummy_device(s5m87xx->i2c->adapter, RTC_I2C_ADDR);
+       if (IS_ERR(info->i2c)) {
                dev_err(&pdev->dev, "Failed to allocate I2C for RTC\n");
-               return -ENODEV;
+               return PTR_ERR(info->i2c);
        }
 
        info->regmap = devm_regmap_init_i2c(info->i2c, regmap_cfg);
index b4eb3b3..b956768 100644 (file)
@@ -138,7 +138,7 @@ static int sprd_rtc_lock_alarm(struct sprd_rtc *rtc, bool lock)
        if (ret)
                return ret;
 
-       val &= ~(SPRD_RTC_ALMLOCK_MASK | SPRD_RTC_POWEROFF_ALM_FLAG);
+       val &= ~SPRD_RTC_ALMLOCK_MASK;
        if (lock)
                val |= SPRD_RTC_ALM_LOCK;
        else
@@ -614,10 +614,8 @@ static int sprd_rtc_probe(struct platform_device *pdev)
        }
 
        rtc->irq = platform_get_irq(pdev, 0);
-       if (rtc->irq < 0) {
-               dev_err(&pdev->dev, "failed to get RTC irq number\n");
+       if (rtc->irq < 0)
                return rtc->irq;
-       }
 
        rtc->rtc = devm_rtc_allocate_device(&pdev->dev);
        if (IS_ERR(rtc->rtc))
@@ -656,7 +654,6 @@ static int sprd_rtc_probe(struct platform_device *pdev)
        rtc->rtc->range_max = 5662310399LL;
        ret = rtc_register_device(rtc->rtc);
        if (ret) {
-               dev_err(&pdev->dev, "failed to register rtc device\n");
                device_init_wakeup(&pdev->dev, 0);
                return ret;
        }
index 42cb90d..a7aa943 100644 (file)
@@ -193,10 +193,8 @@ static int sd3078_probe(struct i2c_client *client,
        sd3078->rtc->range_max = RTC_TIMESTAMP_END_2099;
 
        ret = rtc_register_device(sd3078->rtc);
-       if (ret) {
-               dev_err(&client->dev, "failed to register rtc device\n");
+       if (ret)
                return ret;
-       }
 
        sd3078_enable_reg_write(sd3078);
 
index 7ee673a..757f4da 100644 (file)
@@ -151,7 +151,7 @@ static int snvs_rtc_read_time(struct device *dev, struct rtc_time *tm)
        struct snvs_rtc_data *data = dev_get_drvdata(dev);
        unsigned long time = rtc_read_lp_counter(data);
 
-       rtc_time_to_tm(time, tm);
+       rtc_time64_to_tm(time, tm);
 
        return 0;
 }
@@ -159,11 +159,9 @@ static int snvs_rtc_read_time(struct device *dev, struct rtc_time *tm)
 static int snvs_rtc_set_time(struct device *dev, struct rtc_time *tm)
 {
        struct snvs_rtc_data *data = dev_get_drvdata(dev);
-       unsigned long time;
+       unsigned long time = rtc_tm_to_time64(tm);
        int ret;
 
-       rtc_tm_to_time(tm, &time);
-
        /* Disable RTC first */
        ret = snvs_rtc_enable(data, false);
        if (ret)
@@ -185,7 +183,7 @@ static int snvs_rtc_read_alarm(struct device *dev, struct rtc_wkalrm *alrm)
        u32 lptar, lpsr;
 
        regmap_read(data->regmap, data->offset + SNVS_LPTAR, &lptar);
-       rtc_time_to_tm(lptar, &alrm->time);
+       rtc_time64_to_tm(lptar, &alrm->time);
 
        regmap_read(data->regmap, data->offset + SNVS_LPSR, &lpsr);
        alrm->pending = (lpsr & SNVS_LPSR_LPTA) ? 1 : 0;
@@ -207,12 +205,9 @@ static int snvs_rtc_alarm_irq_enable(struct device *dev, unsigned int enable)
 static int snvs_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *alrm)
 {
        struct snvs_rtc_data *data = dev_get_drvdata(dev);
-       struct rtc_time *alrm_tm = &alrm->time;
-       unsigned long time;
+       unsigned long time = rtc_tm_to_time64(&alrm->time);
        int ret;
 
-       rtc_tm_to_time(alrm_tm, &time);
-
        regmap_update_bits(data->regmap, data->offset + SNVS_LPCR, SNVS_LPCR_LPTA_EN, 0);
        ret = rtc_write_sync_lp(data);
        if (ret)
@@ -279,6 +274,10 @@ static int snvs_rtc_probe(struct platform_device *pdev)
        if (!data)
                return -ENOMEM;
 
+       data->rtc = devm_rtc_allocate_device(&pdev->dev);
+       if (IS_ERR(data->rtc))
+               return PTR_ERR(data->rtc);
+
        data->regmap = syscon_regmap_lookup_by_phandle(pdev->dev.of_node, "regmap");
 
        if (IS_ERR(data->regmap)) {
@@ -343,10 +342,10 @@ static int snvs_rtc_probe(struct platform_device *pdev)
                goto error_rtc_device_register;
        }
 
-       data->rtc = devm_rtc_device_register(&pdev->dev, pdev->name,
-                                       &snvs_rtc_ops, THIS_MODULE);
-       if (IS_ERR(data->rtc)) {
-               ret = PTR_ERR(data->rtc);
+       data->rtc->ops = &snvs_rtc_ops;
+       data->rtc->range_max = U32_MAX;
+       ret = rtc_register_device(data->rtc);
+       if (ret) {
                dev_err(&pdev->dev, "failed to register rtc: %d\n", ret);
                goto error_rtc_device_register;
        }
index 0567944..9f23b24 100644 (file)
@@ -358,10 +358,8 @@ static int spear_rtc_probe(struct platform_device *pdev)
 
        /* alarm irqs */
        irq = platform_get_irq(pdev, 0);
-       if (irq < 0) {
-               dev_err(&pdev->dev, "no update irq?\n");
+       if (irq < 0)
                return irq;
-       }
 
        status = devm_request_irq(&pdev->dev, irq, spear_rtc_irq, 0, pdev->name,
                        config);
index 773a199..2999e33 100644 (file)
@@ -776,7 +776,6 @@ static int stm32_rtc_probe(struct platform_device *pdev)
 
        rtc->irq_alarm = platform_get_irq(pdev, 0);
        if (rtc->irq_alarm <= 0) {
-               dev_err(&pdev->dev, "no alarm irq\n");
                ret = rtc->irq_alarm;
                goto err;
        }
index c0e75c3..5e2bd9f 100644 (file)
 /* Control register */
 #define SUN6I_LOSC_CTRL                                0x0000
 #define SUN6I_LOSC_CTRL_KEY                    (0x16aa << 16)
+#define SUN6I_LOSC_CTRL_AUTO_SWT_BYPASS                BIT(15)
 #define SUN6I_LOSC_CTRL_ALM_DHMS_ACC           BIT(9)
 #define SUN6I_LOSC_CTRL_RTC_HMS_ACC            BIT(8)
 #define SUN6I_LOSC_CTRL_RTC_YMD_ACC            BIT(7)
+#define SUN6I_LOSC_CTRL_EXT_LOSC_EN            BIT(4)
 #define SUN6I_LOSC_CTRL_EXT_OSC                        BIT(0)
 #define SUN6I_LOSC_CTRL_ACC_MASK               GENMASK(9, 7)
 
@@ -128,6 +130,8 @@ struct sun6i_rtc_clk_data {
        unsigned int has_prescaler : 1;
        unsigned int has_out_clk : 1;
        unsigned int export_iosc : 1;
+       unsigned int has_losc_en : 1;
+       unsigned int has_auto_swt : 1;
 };
 
 struct sun6i_rtc_dev {
@@ -190,6 +194,10 @@ static int sun6i_rtc_osc_set_parent(struct clk_hw *hw, u8 index)
        val &= ~SUN6I_LOSC_CTRL_EXT_OSC;
        val |= SUN6I_LOSC_CTRL_KEY;
        val |= index ? SUN6I_LOSC_CTRL_EXT_OSC : 0;
+       if (rtc->data->has_losc_en) {
+               val &= ~SUN6I_LOSC_CTRL_EXT_LOSC_EN;
+               val |= index ? SUN6I_LOSC_CTRL_EXT_LOSC_EN : 0;
+       }
        writel(val, rtc->base + SUN6I_LOSC_CTRL);
        spin_unlock_irqrestore(&rtc->lock, flags);
 
@@ -215,6 +223,7 @@ static void __init sun6i_rtc_clk_init(struct device_node *node,
        const char *iosc_name = "rtc-int-osc";
        const char *clkout_name = "osc32k-out";
        const char *parents[2];
+       u32 reg;
 
        rtc = kzalloc(sizeof(*rtc), GFP_KERNEL);
        if (!rtc)
@@ -235,9 +244,18 @@ static void __init sun6i_rtc_clk_init(struct device_node *node,
                goto err;
        }
 
+       reg = SUN6I_LOSC_CTRL_KEY;
+       if (rtc->data->has_auto_swt) {
+               /* Bypass auto-switch to int osc, on ext losc failure */
+               reg |= SUN6I_LOSC_CTRL_AUTO_SWT_BYPASS;
+               writel(reg, rtc->base + SUN6I_LOSC_CTRL);
+       }
+
        /* Switch to the external, more precise, oscillator */
-       writel(SUN6I_LOSC_CTRL_KEY | SUN6I_LOSC_CTRL_EXT_OSC,
-              rtc->base + SUN6I_LOSC_CTRL);
+       reg |= SUN6I_LOSC_CTRL_EXT_OSC;
+       if (rtc->data->has_losc_en)
+               reg |= SUN6I_LOSC_CTRL_EXT_LOSC_EN;
+       writel(reg, rtc->base + SUN6I_LOSC_CTRL);
 
        /* Yes, I know, this is ugly. */
        sun6i_rtc = rtc;
@@ -279,7 +297,7 @@ static void __init sun6i_rtc_clk_init(struct device_node *node,
 
        of_property_read_string_index(node, "clock-output-names", 1,
                                      &clkout_name);
-       rtc->ext_losc = clk_register_gate(NULL, clkout_name, rtc->hw.init->name,
+       rtc->ext_losc = clk_register_gate(NULL, clkout_name, init.name,
                                          0, rtc->base + SUN6I_LOSC_OUT_GATING,
                                          SUN6I_LOSC_OUT_GATING_EN_OFFSET, 0,
                                          &rtc->lock);
@@ -345,6 +363,23 @@ CLK_OF_DECLARE_DRIVER(sun8i_h3_rtc_clk, "allwinner,sun8i-h3-rtc",
 CLK_OF_DECLARE_DRIVER(sun50i_h5_rtc_clk, "allwinner,sun50i-h5-rtc",
                      sun8i_h3_rtc_clk_init);
 
+static const struct sun6i_rtc_clk_data sun50i_h6_rtc_data = {
+       .rc_osc_rate = 16000000,
+       .fixed_prescaler = 32,
+       .has_prescaler = 1,
+       .has_out_clk = 1,
+       .export_iosc = 1,
+       .has_losc_en = 1,
+       .has_auto_swt = 1,
+};
+
+static void __init sun50i_h6_rtc_clk_init(struct device_node *node)
+{
+       sun6i_rtc_clk_init(node, &sun50i_h6_rtc_data);
+}
+CLK_OF_DECLARE_DRIVER(sun50i_h6_rtc_clk, "allwinner,sun50i-h6-rtc",
+                     sun50i_h6_rtc_clk_init);
+
 static const struct sun6i_rtc_clk_data sun8i_v3_rtc_data = {
        .rc_osc_rate = 32000,
        .has_out_clk = 1,
@@ -598,6 +633,33 @@ static const struct rtc_class_ops sun6i_rtc_ops = {
        .alarm_irq_enable       = sun6i_rtc_alarm_irq_enable
 };
 
+#ifdef CONFIG_PM_SLEEP
+/* Enable IRQ wake on suspend, to wake up from RTC. */
+static int sun6i_rtc_suspend(struct device *dev)
+{
+       struct sun6i_rtc_dev *chip = dev_get_drvdata(dev);
+
+       if (device_may_wakeup(dev))
+               enable_irq_wake(chip->irq);
+
+       return 0;
+}
+
+/* Disable IRQ wake on resume. */
+static int sun6i_rtc_resume(struct device *dev)
+{
+       struct sun6i_rtc_dev *chip = dev_get_drvdata(dev);
+
+       if (device_may_wakeup(dev))
+               disable_irq_wake(chip->irq);
+
+       return 0;
+}
+#endif
+
+static SIMPLE_DEV_PM_OPS(sun6i_rtc_pm_ops,
+       sun6i_rtc_suspend, sun6i_rtc_resume);
+
 static int sun6i_rtc_probe(struct platform_device *pdev)
 {
        struct sun6i_rtc_dev *chip = sun6i_rtc;
@@ -610,10 +672,8 @@ static int sun6i_rtc_probe(struct platform_device *pdev)
        chip->dev = &pdev->dev;
 
        chip->irq = platform_get_irq(pdev, 0);
-       if (chip->irq < 0) {
-               dev_err(&pdev->dev, "No IRQ resource\n");
+       if (chip->irq < 0)
                return chip->irq;
-       }
 
        ret = devm_request_irq(&pdev->dev, chip->irq, sun6i_rtc_alarmirq,
                               0, dev_name(&pdev->dev), chip);
@@ -650,6 +710,8 @@ static int sun6i_rtc_probe(struct platform_device *pdev)
 
        clk_prepare_enable(chip->losc);
 
+       device_init_wakeup(&pdev->dev, 1);
+
        chip->rtc = devm_rtc_device_register(&pdev->dev, "rtc-sun6i",
                                             &sun6i_rtc_ops, THIS_MODULE);
        if (IS_ERR(chip->rtc)) {
@@ -675,6 +737,7 @@ static const struct of_device_id sun6i_rtc_dt_ids[] = {
        { .compatible = "allwinner,sun8i-r40-rtc" },
        { .compatible = "allwinner,sun8i-v3-rtc" },
        { .compatible = "allwinner,sun50i-h5-rtc" },
+       { .compatible = "allwinner,sun50i-h6-rtc" },
        { /* sentinel */ },
 };
 MODULE_DEVICE_TABLE(of, sun6i_rtc_dt_ids);
@@ -684,6 +747,7 @@ static struct platform_driver sun6i_rtc_driver = {
        .driver         = {
                .name           = "sun6i-rtc",
                .of_match_table = sun6i_rtc_dt_ids,
+               .pm = &sun6i_rtc_pm_ops,
        },
 };
 builtin_platform_driver(sun6i_rtc_driver);
index 6eeabb8..9b6f248 100644 (file)
@@ -442,10 +442,8 @@ static int sunxi_rtc_probe(struct platform_device *pdev)
                return PTR_ERR(chip->base);
 
        chip->irq = platform_get_irq(pdev, 0);
-       if (chip->irq < 0) {
-               dev_err(&pdev->dev, "No IRQ resource\n");
+       if (chip->irq < 0)
                return chip->irq;
-       }
        ret = devm_request_irq(&pdev->dev, chip->irq, sunxi_rtc_alarmirq,
                        0, dev_name(&pdev->dev), chip);
        if (ret) {
@@ -474,15 +472,7 @@ static int sunxi_rtc_probe(struct platform_device *pdev)
 
        chip->rtc->ops = &sunxi_rtc_ops;
 
-       ret = rtc_register_device(chip->rtc);
-       if (ret) {
-               dev_err(&pdev->dev, "unable to register device\n");
-               return ret;
-       }
-
-       dev_info(&pdev->dev, "RTC enabled\n");
-
-       return 0;
+       return rtc_register_device(chip->rtc);
 }
 
 static struct platform_driver sunxi_rtc_driver = {
index 8fa1b3f..69d695b 100644 (file)
@@ -290,10 +290,8 @@ static int tegra_rtc_probe(struct platform_device *pdev)
                return PTR_ERR(info->base);
 
        ret = platform_get_irq(pdev, 0);
-       if (ret <= 0) {
-               dev_err(&pdev->dev, "failed to get platform IRQ: %d\n", ret);
+       if (ret <= 0)
                return ret;
-       }
 
        info->irq = ret;
 
@@ -334,10 +332,8 @@ static int tegra_rtc_probe(struct platform_device *pdev)
        }
 
        ret = rtc_register_device(info->rtc);
-       if (ret) {
-               dev_err(&pdev->dev, "failed to register device: %d\n", ret);
+       if (ret)
                goto disable_clk;
-       }
 
        dev_notice(&pdev->dev, "Tegra internal Real Time Clock\n");
 
index d6434e5..859d901 100644 (file)
@@ -259,7 +259,6 @@ static int tps6586x_rtc_probe(struct platform_device *pdev)
        rtc->rtc = devm_rtc_allocate_device(&pdev->dev);
        if (IS_ERR(rtc->rtc)) {
                ret = PTR_ERR(rtc->rtc);
-               dev_err(&pdev->dev, "RTC allocate device: ret %d\n", ret);
                goto fail_rtc_register;
        }
 
@@ -280,10 +279,8 @@ static int tps6586x_rtc_probe(struct platform_device *pdev)
        disable_irq(rtc->irq);
 
        ret = rtc_register_device(rtc->rtc);
-       if (ret) {
-               dev_err(&pdev->dev, "RTC device register: ret %d\n", ret);
+       if (ret)
                goto fail_rtc_register;
-       }
 
        return 0;
 
index 7078f6d..2c0467a 100644 (file)
@@ -425,13 +425,7 @@ static int tps65910_rtc_probe(struct platform_device *pdev)
        tps_rtc->rtc->range_min = RTC_TIMESTAMP_BEGIN_2000;
        tps_rtc->rtc->range_max = RTC_TIMESTAMP_END_2099;
 
-       ret = rtc_register_device(tps_rtc->rtc);
-       if (ret) {
-               dev_err(&pdev->dev, "RTC device register: err %d\n", ret);
-               return ret;
-       }
-
-       return 0;
+       return rtc_register_device(tps_rtc->rtc);
 }
 
 #ifdef CONFIG_PM_SLEEP
index f59d232..d5d14cf 100644 (file)
@@ -212,10 +212,8 @@ static int vt8500_rtc_probe(struct platform_device *pdev)
        platform_set_drvdata(pdev, vt8500_rtc);
 
        vt8500_rtc->irq_alarm = platform_get_irq(pdev, 0);
-       if (vt8500_rtc->irq_alarm < 0) {
-               dev_err(&pdev->dev, "No alarm IRQ resource defined\n");
+       if (vt8500_rtc->irq_alarm < 0)
                return vt8500_rtc->irq_alarm;
-       }
 
        res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
        vt8500_rtc->regbase = devm_ioremap_resource(&pdev->dev, res);
index 9888383..9683fbf 100644 (file)
@@ -157,10 +157,8 @@ static int xgene_rtc_probe(struct platform_device *pdev)
                return PTR_ERR(pdata->rtc);
 
        irq = platform_get_irq(pdev, 0);
-       if (irq < 0) {
-               dev_err(&pdev->dev, "No IRQ resource\n");
+       if (irq < 0)
                return irq;
-       }
        ret = devm_request_irq(&pdev->dev, irq, xgene_rtc_interrupt, 0,
                               dev_name(&pdev->dev), pdata);
        if (ret) {
index 0063959..2c76275 100644 (file)
@@ -218,10 +218,8 @@ static int xlnx_rtc_probe(struct platform_device *pdev)
                return PTR_ERR(xrtcdev->reg_base);
 
        xrtcdev->alarm_irq = platform_get_irq_byname(pdev, "alarm");
-       if (xrtcdev->alarm_irq < 0) {
-               dev_err(&pdev->dev, "no irq resource\n");
+       if (xrtcdev->alarm_irq < 0)
                return xrtcdev->alarm_irq;
-       }
        ret = devm_request_irq(&pdev->dev, xrtcdev->alarm_irq,
                               xlnx_rtc_interrupt, 0,
                               dev_name(&pdev->dev), xrtcdev);
@@ -231,10 +229,8 @@ static int xlnx_rtc_probe(struct platform_device *pdev)
        }
 
        xrtcdev->sec_irq = platform_get_irq_byname(pdev, "sec");
-       if (xrtcdev->sec_irq < 0) {
-               dev_err(&pdev->dev, "no irq resource\n");
+       if (xrtcdev->sec_irq < 0)
                return xrtcdev->sec_irq;
-       }
        ret = devm_request_irq(&pdev->dev, xrtcdev->sec_irq,
                               xlnx_rtc_interrupt, 0,
                               dev_name(&pdev->dev), xrtcdev);
index 0005ec9..b42a937 100644 (file)
@@ -372,7 +372,7 @@ int ccwgroup_create_dev(struct device *parent, struct ccwgroup_driver *gdrv,
                goto error;
        }
        /* Check for trailing stuff. */
-       if (i == num_devices && strlen(buf) > 0) {
+       if (i == num_devices && buf && strlen(buf) > 0) {
                rc = -EINVAL;
                goto error;
        }
index 22c5581..1fbfb0a 100644 (file)
@@ -1388,6 +1388,8 @@ device_initcall(cio_settle_init);
 
 int sch_is_pseudo_sch(struct subchannel *sch)
 {
+       if (!sch->dev.parent)
+               return 0;
        return sch == to_css(sch->dev.parent)->pseudo_subchannel;
 }
 
index d722458..65841af 100644 (file)
@@ -124,9 +124,7 @@ EXPORT_SYMBOL(ccw_device_is_multipath);
 /**
  * ccw_device_clear() - terminate I/O request processing
  * @cdev: target ccw device
- * @intparm: interruption parameter; value is only used if no I/O is
- *          outstanding, otherwise the intparm associated with the I/O request
- *          is returned
+ * @intparm: interruption parameter to be returned upon conclusion of csch
  *
  * ccw_device_clear() calls csch on @cdev's subchannel.
  * Returns:
@@ -179,6 +177,9 @@ int ccw_device_clear(struct ccw_device *cdev, unsigned long intparm)
  * completed during the time specified by @expires. If a timeout occurs, the
  * channel program is terminated via xsch, hsch or csch, and the device's
  * interrupt handler will be called with an irb containing ERR_PTR(-%ETIMEDOUT).
+ * The interruption handler will echo back the @intparm specified here, unless
+ * another interruption parameter is specified by a subsequent invocation of
+ * ccw_device_halt() or ccw_device_clear().
  * Returns:
  *  %0, if the operation was successful;
  *  -%EBUSY, if the device is busy, or status pending;
@@ -256,6 +257,9 @@ int ccw_device_start_timeout_key(struct ccw_device *cdev, struct ccw1 *cpa,
  * Start a S/390 channel program. When the interrupt arrives, the
  * IRQ handler is called, either immediately, delayed (dev-end missing,
  * or sense required) or never (no IRQ handler registered).
+ * The interruption handler will echo back the @intparm specified here, unless
+ * another interruption parameter is specified by a subsequent invocation of
+ * ccw_device_halt() or ccw_device_clear().
  * Returns:
  *  %0, if the operation was successful;
  *  -%EBUSY, if the device is busy, or status pending;
@@ -287,6 +291,9 @@ int ccw_device_start_key(struct ccw_device *cdev, struct ccw1 *cpa,
  * Start a S/390 channel program. When the interrupt arrives, the
  * IRQ handler is called, either immediately, delayed (dev-end missing,
  * or sense required) or never (no IRQ handler registered).
+ * The interruption handler will echo back the @intparm specified here, unless
+ * another interruption parameter is specified by a subsequent invocation of
+ * ccw_device_halt() or ccw_device_clear().
  * Returns:
  *  %0, if the operation was successful;
  *  -%EBUSY, if the device is busy, or status pending;
@@ -322,6 +329,9 @@ int ccw_device_start(struct ccw_device *cdev, struct ccw1 *cpa,
  * completed during the time specified by @expires. If a timeout occurs, the
  * channel program is terminated via xsch, hsch or csch, and the device's
  * interrupt handler will be called with an irb containing ERR_PTR(-%ETIMEDOUT).
+ * The interruption handler will echo back the @intparm specified here, unless
+ * another interruption parameter is specified by a subsequent invocation of
+ * ccw_device_halt() or ccw_device_clear().
  * Returns:
  *  %0, if the operation was successful;
  *  -%EBUSY, if the device is busy, or status pending;
@@ -343,11 +353,12 @@ int ccw_device_start_timeout(struct ccw_device *cdev, struct ccw1 *cpa,
 /**
  * ccw_device_halt() - halt I/O request processing
  * @cdev: target ccw device
- * @intparm: interruption parameter; value is only used if no I/O is
- *          outstanding, otherwise the intparm associated with the I/O request
- *          is returned
+ * @intparm: interruption parameter to be returned upon conclusion of hsch
  *
  * ccw_device_halt() calls hsch on @cdev's subchannel.
+ * The interruption handler will echo back the @intparm specified here, unless
+ * another interruption parameter is specified by a subsequent invocation of
+ * ccw_device_clear().
  * Returns:
  *  %0 on success,
  *  -%ENODEV on device not operational,
index a76b8a8..a191506 100644 (file)
@@ -1322,24 +1322,24 @@ static int ap_get_compatible_type(ap_qid_t qid, int rawtype, unsigned int func)
        /* < CEX2A is not supported */
        if (rawtype < AP_DEVICE_TYPE_CEX2A)
                return 0;
-       /* up to CEX6 known and fully supported */
-       if (rawtype <= AP_DEVICE_TYPE_CEX6)
+       /* up to CEX7 known and fully supported */
+       if (rawtype <= AP_DEVICE_TYPE_CEX7)
                return rawtype;
        /*
-        * unknown new type > CEX6, check for compatibility
+        * unknown new type > CEX7, check for compatibility
         * to the highest known and supported type which is
-        * currently CEX6 with the help of the QACT function.
+        * currently CEX7 with the help of the QACT function.
         */
        if (ap_qact_available()) {
                struct ap_queue_status status;
                union ap_qact_ap_info apinfo = {0};
 
                apinfo.mode = (func >> 26) & 0x07;
-               apinfo.cat = AP_DEVICE_TYPE_CEX6;
+               apinfo.cat = AP_DEVICE_TYPE_CEX7;
                status = ap_qact(qid, 0, &apinfo);
                if (status.response_code == AP_RESPONSE_NORMAL
                    && apinfo.cat >= AP_DEVICE_TYPE_CEX2A
-                   && apinfo.cat <= AP_DEVICE_TYPE_CEX6)
+                   && apinfo.cat <= AP_DEVICE_TYPE_CEX7)
                        comp_type = apinfo.cat;
        }
        if (!comp_type)
index 6f3cf37..433b7b6 100644 (file)
@@ -1,6 +1,6 @@
 /* SPDX-License-Identifier: GPL-2.0+ */
 /*
- * Copyright IBM Corp. 2006, 2012
+ * Copyright IBM Corp. 2006, 2019
  * Author(s): Cornelia Huck <cornelia.huck@de.ibm.com>
  *           Martin Schwidefsky <schwidefsky@de.ibm.com>
  *           Ralph Wuerthner <rwuerthn@de.ibm.com>
@@ -63,6 +63,7 @@ static inline int ap_test_bit(unsigned int *ptr, unsigned int nr)
 #define AP_DEVICE_TYPE_CEX4    10
 #define AP_DEVICE_TYPE_CEX5    11
 #define AP_DEVICE_TYPE_CEX6    12
+#define AP_DEVICE_TYPE_CEX7    13
 
 /*
  * Known function facilities
index f76a1d0..9de3d46 100644 (file)
@@ -1363,9 +1363,122 @@ static struct attribute_group ccadata_attr_group = {
        .bin_attrs = ccadata_attrs,
 };
 
+#define CCACIPHERTOKENSIZE     (sizeof(struct cipherkeytoken) + 80)
+
+/*
+ * Sysfs attribute read function for all secure key ccacipher binary attributes.
+ * The implementation can not deal with partial reads, because a new random
+ * secure key blob is generated with each read. In case of partial reads
+ * (i.e. off != 0 or count < key blob size) -EINVAL is returned.
+ */
+static ssize_t pkey_ccacipher_aes_attr_read(enum pkey_key_size keybits,
+                                           bool is_xts, char *buf, loff_t off,
+                                           size_t count)
+{
+       size_t keysize;
+       int rc;
+
+       if (off != 0 || count < CCACIPHERTOKENSIZE)
+               return -EINVAL;
+       if (is_xts)
+               if (count < 2 * CCACIPHERTOKENSIZE)
+                       return -EINVAL;
+
+       keysize = CCACIPHERTOKENSIZE;
+       rc = cca_gencipherkey(-1, -1, keybits, 0, buf, &keysize);
+       if (rc)
+               return rc;
+       memset(buf + keysize, 0, CCACIPHERTOKENSIZE - keysize);
+
+       if (is_xts) {
+               keysize = CCACIPHERTOKENSIZE;
+               rc = cca_gencipherkey(-1, -1, keybits, 0,
+                                     buf + CCACIPHERTOKENSIZE, &keysize);
+               if (rc)
+                       return rc;
+               memset(buf + CCACIPHERTOKENSIZE + keysize, 0,
+                      CCACIPHERTOKENSIZE - keysize);
+
+               return 2 * CCACIPHERTOKENSIZE;
+       }
+
+       return CCACIPHERTOKENSIZE;
+}
+
+static ssize_t ccacipher_aes_128_read(struct file *filp,
+                                     struct kobject *kobj,
+                                     struct bin_attribute *attr,
+                                     char *buf, loff_t off,
+                                     size_t count)
+{
+       return pkey_ccacipher_aes_attr_read(PKEY_SIZE_AES_128, false, buf,
+                                           off, count);
+}
+
+static ssize_t ccacipher_aes_192_read(struct file *filp,
+                                     struct kobject *kobj,
+                                     struct bin_attribute *attr,
+                                     char *buf, loff_t off,
+                                     size_t count)
+{
+       return pkey_ccacipher_aes_attr_read(PKEY_SIZE_AES_192, false, buf,
+                                           off, count);
+}
+
+static ssize_t ccacipher_aes_256_read(struct file *filp,
+                                     struct kobject *kobj,
+                                     struct bin_attribute *attr,
+                                     char *buf, loff_t off,
+                                     size_t count)
+{
+       return pkey_ccacipher_aes_attr_read(PKEY_SIZE_AES_256, false, buf,
+                                           off, count);
+}
+
+static ssize_t ccacipher_aes_128_xts_read(struct file *filp,
+                                         struct kobject *kobj,
+                                         struct bin_attribute *attr,
+                                         char *buf, loff_t off,
+                                         size_t count)
+{
+       return pkey_ccacipher_aes_attr_read(PKEY_SIZE_AES_128, true, buf,
+                                           off, count);
+}
+
+static ssize_t ccacipher_aes_256_xts_read(struct file *filp,
+                                         struct kobject *kobj,
+                                         struct bin_attribute *attr,
+                                         char *buf, loff_t off,
+                                         size_t count)
+{
+       return pkey_ccacipher_aes_attr_read(PKEY_SIZE_AES_256, true, buf,
+                                           off, count);
+}
+
+static BIN_ATTR_RO(ccacipher_aes_128, CCACIPHERTOKENSIZE);
+static BIN_ATTR_RO(ccacipher_aes_192, CCACIPHERTOKENSIZE);
+static BIN_ATTR_RO(ccacipher_aes_256, CCACIPHERTOKENSIZE);
+static BIN_ATTR_RO(ccacipher_aes_128_xts, 2 * CCACIPHERTOKENSIZE);
+static BIN_ATTR_RO(ccacipher_aes_256_xts, 2 * CCACIPHERTOKENSIZE);
+
+static struct bin_attribute *ccacipher_attrs[] = {
+       &bin_attr_ccacipher_aes_128,
+       &bin_attr_ccacipher_aes_192,
+       &bin_attr_ccacipher_aes_256,
+       &bin_attr_ccacipher_aes_128_xts,
+       &bin_attr_ccacipher_aes_256_xts,
+       NULL
+};
+
+static struct attribute_group ccacipher_attr_group = {
+       .name      = "ccacipher",
+       .bin_attrs = ccacipher_attrs,
+};
+
 static const struct attribute_group *pkey_attr_groups[] = {
        &protkey_attr_group,
        &ccadata_attr_group,
+       &ccacipher_attr_group,
        NULL,
 };
 
index 003662a..be2520c 100644 (file)
@@ -36,6 +36,8 @@ static struct ap_device_id ap_queue_ids[] = {
          .match_flags = AP_DEVICE_ID_MATCH_QUEUE_TYPE },
        { .dev_type = AP_DEVICE_TYPE_CEX6,
          .match_flags = AP_DEVICE_ID_MATCH_QUEUE_TYPE },
+       { .dev_type = AP_DEVICE_TYPE_CEX7,
+         .match_flags = AP_DEVICE_ID_MATCH_QUEUE_TYPE },
        { /* end of sibling */ },
 };
 
index 2d3f273..d464618 100644 (file)
@@ -1,6 +1,6 @@
 /* SPDX-License-Identifier: GPL-2.0+ */
 /*
- *  Copyright IBM Corp. 2001, 2018
+ *  Copyright IBM Corp. 2001, 2019
  *  Author(s): Robert Burroughs
  *            Eric Rossman (edrossma@us.ibm.com)
  *            Cornelia Huck <cornelia.huck@de.ibm.com>
@@ -29,6 +29,7 @@
 #define ZCRYPT_CEX4           10
 #define ZCRYPT_CEX5           11
 #define ZCRYPT_CEX6           12
+#define ZCRYPT_CEX7           13
 
 /**
  * Large random numbers are pulled in 4096 byte chunks from the crypto cards
index f58d8de..442e3d6 100644 (file)
@@ -1,6 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0
 /*
- *  Copyright IBM Corp. 2012
+ *  Copyright IBM Corp. 2012, 2019
  *  Author(s): Holger Dengler <hd@linux.vnet.ibm.com>
  */
 
@@ -38,8 +38,8 @@
 #define CEX4_CLEANUP_TIME      (900*HZ)
 
 MODULE_AUTHOR("IBM Corporation");
-MODULE_DESCRIPTION("CEX4/CEX5/CEX6 Cryptographic Card device driver, " \
-                  "Copyright IBM Corp. 2018");
+MODULE_DESCRIPTION("CEX4/CEX5/CEX6/CEX7 Cryptographic Card device driver, " \
+                  "Copyright IBM Corp. 2019");
 MODULE_LICENSE("GPL");
 
 static struct ap_device_id zcrypt_cex4_card_ids[] = {
@@ -49,6 +49,8 @@ static struct ap_device_id zcrypt_cex4_card_ids[] = {
          .match_flags = AP_DEVICE_ID_MATCH_CARD_TYPE },
        { .dev_type = AP_DEVICE_TYPE_CEX6,
          .match_flags = AP_DEVICE_ID_MATCH_CARD_TYPE },
+       { .dev_type = AP_DEVICE_TYPE_CEX7,
+         .match_flags = AP_DEVICE_ID_MATCH_CARD_TYPE },
        { /* end of list */ },
 };
 
@@ -61,6 +63,8 @@ static struct ap_device_id zcrypt_cex4_queue_ids[] = {
          .match_flags = AP_DEVICE_ID_MATCH_QUEUE_TYPE },
        { .dev_type = AP_DEVICE_TYPE_CEX6,
          .match_flags = AP_DEVICE_ID_MATCH_QUEUE_TYPE },
+       { .dev_type = AP_DEVICE_TYPE_CEX7,
+         .match_flags = AP_DEVICE_ID_MATCH_QUEUE_TYPE },
        { /* end of list */ },
 };
 
@@ -146,7 +150,7 @@ static const struct attribute_group cca_queue_attr_group = {
 };
 
 /**
- * Probe function for CEX4/CEX5/CEX6 card device. It always
+ * Probe function for CEX4/CEX5/CEX6/CEX7 card device. It always
  * accepts the AP device since the bus_match already checked
  * the hardware type.
  * @ap_dev: pointer to the AP device.
@@ -158,25 +162,31 @@ static int zcrypt_cex4_card_probe(struct ap_device *ap_dev)
         * MEX_1k, MEX_2k, MEX_4k, CRT_1k, CRT_2k, CRT_4k, RNG, SECKEY
         */
        static const int CEX4A_SPEED_IDX[] = {
-                14, 19, 249, 42, 228, 1458, 0, 0};
+                14,  19, 249, 42, 228, 1458, 0, 0};
        static const int CEX5A_SPEED_IDX[] = {
-                 8,  9,  20, 18,  66,  458, 0, 0};
+                 8,   9,  20, 18,  66,  458, 0, 0};
        static const int CEX6A_SPEED_IDX[] = {
-                 6,  9,  20, 17,  65,  438, 0, 0};
+                 6,   9,  20, 17,  65,  438, 0, 0};
+       static const int CEX7A_SPEED_IDX[] = {
+                 6,   8,  17, 15,  54,  362, 0, 0};
 
        static const int CEX4C_SPEED_IDX[] = {
                 59,  69, 308, 83, 278, 2204, 209, 40};
        static const int CEX5C_SPEED_IDX[] = {
-                24,  31,  50, 37,  90,  479,  27, 10};
+                24,  31,  50, 37,  90,  479,  27, 10};
        static const int CEX6C_SPEED_IDX[] = {
-                16,  20,  32, 27,  77,  455,  23,  9};
+                16,  20,  32, 27,  77,  455,  24,  9};
+       static const int CEX7C_SPEED_IDX[] = {
+                14,  16,  26, 23,  64,  376,  23,  8};
 
        static const int CEX4P_SPEED_IDX[] = {
-               224, 313, 3560, 359, 605, 2827, 0, 50};
+                 0,   0,   0,   0,   0,   0,   0,  50};
        static const int CEX5P_SPEED_IDX[] = {
-                63,  84,  156,  83, 142,  533, 0, 10};
+                 0,   0,   0,   0,   0,   0,   0,  10};
        static const int CEX6P_SPEED_IDX[] = {
-                55,  70,  121,  73, 129,  522, 0,  9};
+                 0,   0,   0,   0,   0,   0,   0,   9};
+       static const int CEX7P_SPEED_IDX[] = {
+                 0,   0,   0,   0,   0,   0,   0,   8};
 
        struct ap_card *ac = to_ap_card(&ap_dev->device);
        struct zcrypt_card *zc;
@@ -198,11 +208,19 @@ static int zcrypt_cex4_card_probe(struct ap_device *ap_dev)
                        zc->user_space_type = ZCRYPT_CEX5;
                        memcpy(zc->speed_rating, CEX5A_SPEED_IDX,
                               sizeof(CEX5A_SPEED_IDX));
-               } else {
+               } else if (ac->ap_dev.device_type == AP_DEVICE_TYPE_CEX6) {
                        zc->type_string = "CEX6A";
                        zc->user_space_type = ZCRYPT_CEX6;
                        memcpy(zc->speed_rating, CEX6A_SPEED_IDX,
                               sizeof(CEX6A_SPEED_IDX));
+               } else {
+                       zc->type_string = "CEX7A";
+                       /* wrong user space type, just for compatibility
+                        * with the ZCRYPT_STATUS_MASK ioctl.
+                        */
+                       zc->user_space_type = ZCRYPT_CEX6;
+                       memcpy(zc->speed_rating, CEX7A_SPEED_IDX,
+                              sizeof(CEX7A_SPEED_IDX));
                }
                zc->min_mod_size = CEX4A_MIN_MOD_SIZE;
                if (ap_test_bit(&ac->functions, AP_FUNC_MEX4K) &&
@@ -232,7 +250,7 @@ static int zcrypt_cex4_card_probe(struct ap_device *ap_dev)
                        zc->user_space_type = ZCRYPT_CEX3C;
                        memcpy(zc->speed_rating, CEX5C_SPEED_IDX,
                               sizeof(CEX5C_SPEED_IDX));
-               } else {
+               } else if (ac->ap_dev.device_type == AP_DEVICE_TYPE_CEX6) {
                        zc->type_string = "CEX6C";
                        /* wrong user space type, must be CEX6
                         * just keep it for cca compatibility
@@ -240,6 +258,14 @@ static int zcrypt_cex4_card_probe(struct ap_device *ap_dev)
                        zc->user_space_type = ZCRYPT_CEX3C;
                        memcpy(zc->speed_rating, CEX6C_SPEED_IDX,
                               sizeof(CEX6C_SPEED_IDX));
+               } else {
+                       zc->type_string = "CEX7C";
+                       /* wrong user space type, must be CEX7
+                        * just keep it for cca compatibility
+                        */
+                       zc->user_space_type = ZCRYPT_CEX3C;
+                       memcpy(zc->speed_rating, CEX7C_SPEED_IDX,
+                              sizeof(CEX7C_SPEED_IDX));
                }
                zc->min_mod_size = CEX4C_MIN_MOD_SIZE;
                zc->max_mod_size = CEX4C_MAX_MOD_SIZE;
@@ -255,11 +281,19 @@ static int zcrypt_cex4_card_probe(struct ap_device *ap_dev)
                        zc->user_space_type = ZCRYPT_CEX5;
                        memcpy(zc->speed_rating, CEX5P_SPEED_IDX,
                               sizeof(CEX5P_SPEED_IDX));
-               } else {
+               } else if (ac->ap_dev.device_type == AP_DEVICE_TYPE_CEX6) {
                        zc->type_string = "CEX6P";
                        zc->user_space_type = ZCRYPT_CEX6;
                        memcpy(zc->speed_rating, CEX6P_SPEED_IDX,
                               sizeof(CEX6P_SPEED_IDX));
+               } else {
+                       zc->type_string = "CEX7P";
+                       /* wrong user space type, just for compatibility
+                        * with the ZCRYPT_STATUS_MASK ioctl.
+                        */
+                       zc->user_space_type = ZCRYPT_CEX6;
+                       memcpy(zc->speed_rating, CEX7P_SPEED_IDX,
+                              sizeof(CEX7P_SPEED_IDX));
                }
                zc->min_mod_size = CEX4C_MIN_MOD_SIZE;
                zc->max_mod_size = CEX4C_MAX_MOD_SIZE;
@@ -289,8 +323,8 @@ out:
 }
 
 /**
- * This is called to remove the CEX4/CEX5/CEX6 card driver information
- * if an AP card device is removed.
+ * This is called to remove the CEX4/CEX5/CEX6/CEX7 card driver
+ * information if an AP card device is removed.
  */
 static void zcrypt_cex4_card_remove(struct ap_device *ap_dev)
 {
@@ -311,7 +345,7 @@ static struct ap_driver zcrypt_cex4_card_driver = {
 };
 
 /**
- * Probe function for CEX4/CEX5/CEX6 queue device. It always
+ * Probe function for CEX4/CEX5/CEX6/CEX7 queue device. It always
  * accepts the AP device since the bus_match already checked
  * the hardware type.
  * @ap_dev: pointer to the AP device.
@@ -369,7 +403,7 @@ out:
 }
 
 /**
- * This is called to remove the CEX4/CEX5/CEX6 queue driver
+ * This is called to remove the CEX4/CEX5/CEX6/CEX7 queue driver
  * information if an AP queue device is removed.
  */
 static void zcrypt_cex4_queue_remove(struct ap_device *ap_dev)
index 644f7f5..4a85878 100644 (file)
@@ -27,7 +27,6 @@
 #include <linux/moduleparam.h>
 #include <linux/pci.h>
 #include <linux/aer.h>
-#include <linux/pci-aspm.h>
 #include <linux/slab.h>
 #include <linux/mutex.h>
 #include <linux/spinlock.h>
index 261d8e4..f5781e3 100644 (file)
@@ -565,8 +565,7 @@ static void asd_destroy_ha_caches(struct asd_ha_struct *asd_ha)
        if (asd_ha->hw_prof.scb_ext)
                asd_free_coherent(asd_ha, asd_ha->hw_prof.scb_ext);
 
-       if (asd_ha->hw_prof.ddb_bitmap)
-               kfree(asd_ha->hw_prof.ddb_bitmap);
+       kfree(asd_ha->hw_prof.ddb_bitmap);
        asd_ha->hw_prof.ddb_bitmap = NULL;
 
        for (i = 0; i < ASD_MAX_PHYS; i++) {
@@ -641,12 +640,10 @@ Err:
 
 static void asd_destroy_global_caches(void)
 {
-       if (asd_dma_token_cache)
-               kmem_cache_destroy(asd_dma_token_cache);
+       kmem_cache_destroy(asd_dma_token_cache);
        asd_dma_token_cache = NULL;
 
-       if (asd_ascb_cache)
-               kmem_cache_destroy(asd_ascb_cache);
+       kmem_cache_destroy(asd_ascb_cache);
        asd_ascb_cache = NULL;
 }
 
index b2014cb..22f06be 100644 (file)
@@ -536,7 +536,7 @@ bfad_im_scsi_host_alloc(struct bfad_s *bfad, struct bfad_im_port_s *im_port,
                        struct device *dev)
 {
        struct bfad_im_port_pointer *im_portp;
-       int error = 1;
+       int error;
 
        mutex_lock(&bfad_mutex);
        error = idr_alloc(&bfad_im_port_index, im_port, 0, 0, GFP_KERNEL);
index 9ff9429..b4bfab5 100644 (file)
@@ -428,7 +428,6 @@ static int bnx2fc_rcv(struct sk_buff *skb, struct net_device *dev,
        struct fc_lport *lport;
        struct bnx2fc_interface *interface;
        struct fcoe_ctlr *ctlr;
-       struct fc_frame_header *fh;
        struct fcoe_rcv_info *fr;
        struct fcoe_percpu_s *bg;
        struct sk_buff *tmp_skb;
@@ -463,7 +462,6 @@ static int bnx2fc_rcv(struct sk_buff *skb, struct net_device *dev,
                goto err;
 
        skb_set_transport_header(skb, sizeof(struct fcoe_hdr));
-       fh = (struct fc_frame_header *) skb_transport_header(skb);
 
        fr = fcoe_dev_from_skb(skb);
        fr->fr_dev = lport;
index 747f019..f069e09 100644 (file)
@@ -633,7 +633,6 @@ static void bnx2fc_process_unsol_compl(struct bnx2fc_rport *tgt, u16 wqe)
        u16 xid;
        u32 frame_len, len;
        struct bnx2fc_cmd *io_req = NULL;
-       struct fcoe_task_ctx_entry *task, *task_page;
        struct bnx2fc_interface *interface = tgt->port->priv;
        struct bnx2fc_hba *hba = interface->hba;
        int task_idx, index;
@@ -711,9 +710,6 @@ static void bnx2fc_process_unsol_compl(struct bnx2fc_rport *tgt, u16 wqe)
 
                task_idx = xid / BNX2FC_TASKS_PER_PAGE;
                index = xid % BNX2FC_TASKS_PER_PAGE;
-               task_page = (struct fcoe_task_ctx_entry *)
-                                       hba->task_ctx[task_idx];
-               task = &(task_page[index]);
 
                io_req = (struct bnx2fc_cmd *)hba->cmd_mgr->cmds[xid];
                if (!io_req)
@@ -839,9 +835,6 @@ ret_err_rqe:
 
                task_idx = xid / BNX2FC_TASKS_PER_PAGE;
                index = xid % BNX2FC_TASKS_PER_PAGE;
-               task_page = (struct fcoe_task_ctx_entry *)
-                            interface->hba->task_ctx[task_idx];
-               task = &(task_page[index]);
                io_req = (struct bnx2fc_cmd *)hba->cmd_mgr->cmds[xid];
                if (!io_req)
                        goto ret_warn_rqe;
@@ -1122,7 +1115,6 @@ static void bnx2fc_process_ofld_cmpl(struct bnx2fc_hba *hba,
                                        struct fcoe_kcqe *ofld_kcqe)
 {
        struct bnx2fc_rport             *tgt;
-       struct fcoe_port                *port;
        struct bnx2fc_interface         *interface;
        u32                             conn_id;
        u32                             context_id;
@@ -1136,7 +1128,6 @@ static void bnx2fc_process_ofld_cmpl(struct bnx2fc_hba *hba,
        }
        BNX2FC_TGT_DBG(tgt, "Entered ofld compl - context_id = 0x%x\n",
                ofld_kcqe->fcoe_conn_context_id);
-       port = tgt->port;
        interface = tgt->port->priv;
        if (hba != interface->hba) {
                printk(KERN_ERR PFX "ERROR:ofld_cmpl: HBA mis-match\n");
@@ -1463,10 +1454,7 @@ void bnx2fc_init_seq_cleanup_task(struct bnx2fc_cmd *seq_clnp_req,
 {
        struct scsi_cmnd *sc_cmd = orig_io_req->sc_cmd;
        struct bnx2fc_rport *tgt = seq_clnp_req->tgt;
-       struct bnx2fc_interface *interface = tgt->port->priv;
        struct fcoe_bd_ctx *bd = orig_io_req->bd_tbl->bd_tbl;
-       struct fcoe_task_ctx_entry *orig_task;
-       struct fcoe_task_ctx_entry *task_page;
        struct fcoe_ext_mul_sges_ctx *sgl;
        u8 task_type = FCOE_TASK_TYPE_SEQUENCE_CLEANUP;
        u8 orig_task_type;
@@ -1528,10 +1516,6 @@ void bnx2fc_init_seq_cleanup_task(struct bnx2fc_cmd *seq_clnp_req,
                orig_task_idx = orig_xid / BNX2FC_TASKS_PER_PAGE;
                index = orig_xid % BNX2FC_TASKS_PER_PAGE;
 
-               task_page = (struct fcoe_task_ctx_entry *)
-                            interface->hba->task_ctx[orig_task_idx];
-               orig_task = &(task_page[index]);
-
                /* Multiple SGEs were used for this IO */
                sgl = &task->rxwr_only.union_ctx.read_info.sgl_ctx.sgl;
                sgl->mul_sgl.cur_sge_addr.lo = (u32)phys_addr;
index 9e50e5b..da00ca5 100644 (file)
@@ -930,7 +930,6 @@ abts_err:
 int bnx2fc_initiate_seq_cleanup(struct bnx2fc_cmd *orig_io_req, u32 offset,
                                enum fc_rctl r_ctl)
 {
-       struct fc_lport *lport;
        struct bnx2fc_rport *tgt = orig_io_req->tgt;
        struct bnx2fc_interface *interface;
        struct fcoe_port *port;
@@ -948,7 +947,6 @@ int bnx2fc_initiate_seq_cleanup(struct bnx2fc_cmd *orig_io_req, u32 offset,
 
        port = orig_io_req->port;
        interface = port->priv;
-       lport = port->lport;
 
        cb_arg = kzalloc(sizeof(struct bnx2fc_els_cb_arg), GFP_ATOMIC);
        if (!cb_arg) {
@@ -999,7 +997,6 @@ cleanup_err:
 
 int bnx2fc_initiate_cleanup(struct bnx2fc_cmd *io_req)
 {
-       struct fc_lport *lport;
        struct bnx2fc_rport *tgt = io_req->tgt;
        struct bnx2fc_interface *interface;
        struct fcoe_port *port;
@@ -1015,7 +1012,6 @@ int bnx2fc_initiate_cleanup(struct bnx2fc_cmd *io_req)
 
        port = io_req->port;
        interface = port->priv;
-       lport = port->lport;
 
        cleanup_io_req = bnx2fc_elstm_alloc(tgt, BNX2FC_CLEANUP);
        if (!cleanup_io_req) {
@@ -1927,8 +1923,6 @@ void bnx2fc_process_scsi_cmd_compl(struct bnx2fc_cmd *io_req,
        struct fcoe_fcp_rsp_payload *fcp_rsp;
        struct bnx2fc_rport *tgt = io_req->tgt;
        struct scsi_cmnd *sc_cmd;
-       struct Scsi_Host *host;
-
 
        /* scsi_cmd_cmpl is called with tgt lock held */
 
@@ -1957,7 +1951,6 @@ void bnx2fc_process_scsi_cmd_compl(struct bnx2fc_cmd *io_req,
        /* parse fcp_rsp and obtain sense data from RQ if available */
        bnx2fc_parse_fcp_rsp(io_req, fcp_rsp, num_rq);
 
-       host = sc_cmd->device->host;
        if (!sc_cmd->SCp.ptr) {
                printk(KERN_ERR PFX "SCp.ptr is NULL\n");
                return;
index 03bd896..0ca6951 100644 (file)
@@ -1316,7 +1316,6 @@ csio_wr_fixup_host_params(struct csio_hw *hw)
        u32 fl_align = clsz < 32 ? 32 : clsz;
        u32 pack_align;
        u32 ingpad, ingpack;
-       int pcie_cap;
 
        csio_wr_reg32(hw, HOSTPAGESIZEPF0_V(s_hps) | HOSTPAGESIZEPF1_V(s_hps) |
                      HOSTPAGESIZEPF2_V(s_hps) | HOSTPAGESIZEPF3_V(s_hps) |
@@ -1347,8 +1346,7 @@ csio_wr_fixup_host_params(struct csio_hw *hw)
         * multiple of the Maximum Payload Size.
         */
        pack_align = fl_align;
-       pcie_cap = pci_find_capability(hw->pdev, PCI_CAP_ID_EXP);
-       if (pcie_cap) {
+       if (pci_is_pcie(hw->pdev)) {
                u32 mps, mps_log;
                u16 devctl;
 
@@ -1356,9 +1354,7 @@ csio_wr_fixup_host_params(struct csio_hw *hw)
                 * [bits 7:5] encodes sizes as powers of 2 starting at
                 * 128 bytes.
                 */
-               pci_read_config_word(hw->pdev,
-                                    pcie_cap + PCI_EXP_DEVCTL,
-                                    &devctl);
+               pcie_capability_read_word(hw->pdev, PCI_EXP_DEVCTL, &devctl);
                mps_log = ((devctl & PCI_EXP_DEVCTL_PAYLOAD) >> 5) + 7;
                mps = 1 << mps_log;
                if (mps > pack_align)
index b1f4724..93ef97a 100644 (file)
@@ -753,10 +753,13 @@ static void term_intr(struct cxlflash_cfg *cfg, enum undo_level level,
                /* SISL_MSI_ASYNC_ERROR is setup only for the primary HWQ */
                if (index == PRIMARY_HWQ)
                        cfg->ops->unmap_afu_irq(hwq->ctx_cookie, 3, hwq);
+               /* fall through */
        case UNMAP_TWO:
                cfg->ops->unmap_afu_irq(hwq->ctx_cookie, 2, hwq);
+               /* fall through */
        case UNMAP_ONE:
                cfg->ops->unmap_afu_irq(hwq->ctx_cookie, 1, hwq);
+               /* fall through */
        case FREE_IRQ:
                cfg->ops->free_afu_irqs(hwq->ctx_cookie);
                /* fall through */
@@ -973,14 +976,18 @@ static void cxlflash_remove(struct pci_dev *pdev)
        switch (cfg->init_state) {
        case INIT_STATE_CDEV:
                cxlflash_release_chrdev(cfg);
+               /* fall through */
        case INIT_STATE_SCSI:
                cxlflash_term_local_luns(cfg);
                scsi_remove_host(cfg->host);
+               /* fall through */
        case INIT_STATE_AFU:
                term_afu(cfg);
+               /* fall through */
        case INIT_STATE_PCI:
                cfg->ops->destroy_afu(cfg->afu_cookie);
                pci_disable_device(pdev);
+               /* fall through */
        case INIT_STATE_NONE:
                free_mem(cfg);
                scsi_host_put(cfg->host);
@@ -2353,11 +2360,11 @@ retry:
                        cxlflash_schedule_async_reset(cfg);
                        break;
                }
-               /* fall through to retry */
+               /* fall through to retry */
        case -EAGAIN:
                if (++nretry < 2)
                        goto retry;
-               /* fall through to exit */
+               /* fall through to exit */
        default:
                break;
        }
@@ -3017,6 +3024,7 @@ retry:
                wait_event(cfg->reset_waitq, cfg->state != STATE_RESET);
                if (cfg->state == STATE_NORMAL)
                        goto retry;
+               /* else, fall through */
        default:
                /* Ideally should not happen */
                dev_err(dev, "%s: Device is not ready, state=%d\n",
index 65f1fe3..5efc959 100644 (file)
@@ -546,6 +546,8 @@ static void send_mode_select(struct work_struct *work)
        spin_unlock(&ctlr->ms_lock);
 
  retry:
+       memset(cdb, 0, sizeof(cdb));
+
        data_size = rdac_failover_get(ctlr, &list, cdb);
 
        RDAC_LOG(RDAC_LOG_FAILOVER, sdev, "array %s, ctlr %d, "
index 950cd92..eb7d139 100644 (file)
@@ -762,14 +762,10 @@ u32 esas2r_get_uncached_size(struct esas2r_adapter *a)
 
 static void esas2r_init_pci_cfg_space(struct esas2r_adapter *a)
 {
-       int pcie_cap_reg;
-
-       pcie_cap_reg = pci_find_capability(a->pcid, PCI_CAP_ID_EXP);
-       if (pcie_cap_reg) {
+       if (pci_is_pcie(a->pcid)) {
                u16 devcontrol;
 
-               pci_read_config_word(a->pcid, pcie_cap_reg + PCI_EXP_DEVCTL,
-                                    &devcontrol);
+               pcie_capability_read_word(a->pcid, PCI_EXP_DEVCTL, &devcontrol);
 
                if ((devcontrol & PCI_EXP_DEVCTL_READRQ) >
                     PCI_EXP_DEVCTL_READRQ_512B) {
@@ -778,9 +774,8 @@ static void esas2r_init_pci_cfg_space(struct esas2r_adapter *a)
 
                        devcontrol &= ~PCI_EXP_DEVCTL_READRQ;
                        devcontrol |= PCI_EXP_DEVCTL_READRQ_512B;
-                       pci_write_config_word(a->pcid,
-                                             pcie_cap_reg + PCI_EXP_DEVCTL,
-                                             devcontrol);
+                       pcie_capability_write_word(a->pcid, PCI_EXP_DEVCTL,
+                                                  devcontrol);
                }
        }
 }
index 3d13052..442c5e7 100644 (file)
@@ -757,7 +757,6 @@ static int hba_ioctl_callback(struct esas2r_adapter *a,
 
                struct atto_hba_get_adapter_info *gai =
                        &hi->data.get_adap_info;
-               int pcie_cap_reg;
 
                if (hi->flags & HBAF_TUNNEL) {
                        hi->status = ATTO_STS_UNSUPPORTED;
@@ -784,17 +783,14 @@ static int hba_ioctl_callback(struct esas2r_adapter *a,
                gai->pci.dev_num = PCI_SLOT(a->pcid->devfn);
                gai->pci.func_num = PCI_FUNC(a->pcid->devfn);
 
-               pcie_cap_reg = pci_find_capability(a->pcid, PCI_CAP_ID_EXP);
-               if (pcie_cap_reg) {
+               if (pci_is_pcie(a->pcid)) {
                        u16 stat;
                        u32 caps;
 
-                       pci_read_config_word(a->pcid,
-                                            pcie_cap_reg + PCI_EXP_LNKSTA,
-                                            &stat);
-                       pci_read_config_dword(a->pcid,
-                                             pcie_cap_reg + PCI_EXP_LNKCAP,
-                                             &caps);
+                       pcie_capability_read_word(a->pcid, PCI_EXP_LNKSTA,
+                                                 &stat);
+                       pcie_capability_read_dword(a->pcid, PCI_EXP_LNKCAP,
+                                                  &caps);
 
                        gai->pci.link_speed_curr =
                                (u8)(stat & PCI_EXP_LNKSTA_CLS);
index 587d4bb..25dae9f 100644 (file)
@@ -1250,15 +1250,21 @@ static int __init fcoe_if_init(void)
        /* attach to scsi transport */
        fcoe_nport_scsi_transport =
                fc_attach_transport(&fcoe_nport_fc_functions);
+       if (!fcoe_nport_scsi_transport)
+               goto err;
+
        fcoe_vport_scsi_transport =
                fc_attach_transport(&fcoe_vport_fc_functions);
-
-       if (!fcoe_nport_scsi_transport) {
-               printk(KERN_ERR "fcoe: Failed to attach to the FC transport\n");
-               return -ENODEV;
-       }
+       if (!fcoe_vport_scsi_transport)
+               goto err_vport;
 
        return 0;
+
+err_vport:
+       fc_release_transport(fcoe_nport_scsi_transport);
+err:
+       printk(KERN_ERR "fcoe: Failed to attach to the FC transport\n");
+       return -ENODEV;
 }
 
 /**
@@ -1617,7 +1623,6 @@ static inline int fcoe_filter_frames(struct fc_lport *lport,
        else
                fr_flags(fp) |= FCPHF_CRC_UNCHECKED;
 
-       fh = (struct fc_frame_header *) skb_transport_header(skb);
        fh = fc_frame_header_get(fp);
        if (fh->fh_r_ctl == FC_RCTL_DD_SOL_DATA && fh->fh_type == FC_TYPE_FCP)
                return 0;
index b5e6697..772bdc9 100644 (file)
@@ -166,7 +166,7 @@ static int fdomain_test_loopback(int base)
 
 static void fdomain_reset(int base)
 {
-       outb(1, base + REG_BCTL);
+       outb(BCTL_RST, base + REG_BCTL);
        mdelay(20);
        outb(0, base + REG_BCTL);
        mdelay(1150);
@@ -306,7 +306,7 @@ static void fdomain_work(struct work_struct *work)
        status = inb(fd->base + REG_BSTAT);
 
        if (status & BSTAT_REQ) {
-               switch (status & 0x0e) {
+               switch (status & (BSTAT_MSG | BSTAT_CMD | BSTAT_IO)) {
                case BSTAT_CMD: /* COMMAND OUT */
                        outb(cmd->cmnd[cmd->SCp.sent_command++],
                             fd->base + REG_SCSI_DATA);
@@ -331,7 +331,7 @@ static void fdomain_work(struct work_struct *work)
                case BSTAT_MSG | BSTAT_CMD:     /* MESSAGE OUT */
                        outb(MESSAGE_REJECT, fd->base + REG_SCSI_DATA);
                        break;
-               case BSTAT_MSG | BSTAT_IO | BSTAT_CMD:  /* MESSAGE IN */
+               case BSTAT_MSG | BSTAT_CMD | BSTAT_IO:  /* MESSAGE IN */
                        cmd->SCp.Message = inb(fd->base + REG_SCSI_DATA);
                        if (!cmd->SCp.Message)
                                ++done;
index 28639ad..f2da4fa 100644 (file)
@@ -131,8 +131,7 @@ static int fdomain_isa_match(struct device *dev, unsigned int ndev)
        if (!request_region(base, FDOMAIN_REGION_SIZE, "fdomain_isa"))
                return 0;
 
-       irq = irqs[(inb(base + REG_CFG1) & 0x0e) >> 1];
-
+       irq = irqs[(inb(base + REG_CFG1) & CFG1_IRQ_MASK) >> 1];
 
        if (sig)
                this_id = sig->this_id;
@@ -164,7 +163,7 @@ static int fdomain_isa_param_match(struct device *dev, unsigned int ndev)
        }
 
        if (irq_ <= 0)
-               irq_ = irqs[(inb(io[ndev] + REG_CFG1) & 0x0e) >> 1];
+               irq_ = irqs[(inb(io[ndev] + REG_CFG1) & CFG1_IRQ_MASK) >> 1];
 
        sh = fdomain_create(io[ndev], irq_, scsi_id[ndev], dev);
        if (!sh) {
index 21991c9..13f7d88 100644 (file)
@@ -52,7 +52,6 @@ static struct fc_trace_flag_type *fc_trc_flag;
  */
 int fnic_debugfs_init(void)
 {
-       int rc = -1;
        fnic_trace_debugfs_root = debugfs_create_dir("fnic", NULL);
 
        fnic_stats_debugfs_root = debugfs_create_dir("statistics",
@@ -70,8 +69,7 @@ int fnic_debugfs_init(void)
                fc_trc_flag->fc_clear = 4;
        }
 
-       rc = 0;
-       return rc;
+       return 0;
 }
 
 /*
index 911a5ad..673887e 100644 (file)
@@ -52,6 +52,7 @@ void fnic_handle_link(struct work_struct *work)
        unsigned long flags;
        int old_link_status;
        u32 old_link_down_cnt;
+       u64 old_port_speed, new_port_speed;
 
        spin_lock_irqsave(&fnic->fnic_lock, flags);
 
@@ -62,14 +63,19 @@ void fnic_handle_link(struct work_struct *work)
 
        old_link_down_cnt = fnic->link_down_cnt;
        old_link_status = fnic->link_status;
+       old_port_speed = atomic64_read(
+                       &fnic->fnic_stats.misc_stats.current_port_speed);
+
        fnic->link_status = vnic_dev_link_status(fnic->vdev);
        fnic->link_down_cnt = vnic_dev_link_down_cnt(fnic->vdev);
 
+       new_port_speed = vnic_dev_port_speed(fnic->vdev);
        atomic64_set(&fnic->fnic_stats.misc_stats.current_port_speed,
-                       vnic_dev_port_speed(fnic->vdev));
-       shost_printk(KERN_INFO, fnic->lport->host, "Current vnic speed set to :  %llu\n",
-                       (u64)atomic64_read(
-                       &fnic->fnic_stats.misc_stats.current_port_speed));
+                       new_port_speed);
+       if (old_port_speed != new_port_speed)
+               shost_printk(KERN_INFO, fnic->lport->host,
+                               "Current vnic speed set to :  %llu\n",
+                               new_port_speed);
 
        switch (vnic_dev_port_speed(fnic->vdev)) {
        case DCEM_PORTSPEED_10G:
index da4602b..2fb2731 100644 (file)
@@ -254,7 +254,7 @@ int fnic_set_intr_mode(struct fnic *fnic)
                int vecs = n + m + o + 1;
 
                if (pci_alloc_irq_vectors(fnic->pdev, vecs, vecs,
-                               PCI_IRQ_MSIX) < 0) {
+                               PCI_IRQ_MSIX) == vecs) {
                        fnic->rq_count = n;
                        fnic->raw_wq_count = m;
                        fnic->wq_copy_count = o;
@@ -280,7 +280,7 @@ int fnic_set_intr_mode(struct fnic *fnic)
            fnic->wq_copy_count >= 1 &&
            fnic->cq_count >= 3 &&
            fnic->intr_count >= 1 &&
-           pci_alloc_irq_vectors(fnic->pdev, 1, 1, PCI_IRQ_MSI) < 0) {
+           pci_alloc_irq_vectors(fnic->pdev, 1, 1, PCI_IRQ_MSI) == 1) {
                fnic->rq_count = 1;
                fnic->raw_wq_count = 1;
                fnic->wq_copy_count = 1;
index 9621831..a0d01ae 100644 (file)
@@ -453,7 +453,7 @@ int fnic_get_stats_data(struct stats_debug_info *debug,
                  (u64)atomic64_read(&stats->misc_stats.frame_errors));
 
        len += snprintf(debug->debug_buffer + len, buf_size - len,
-                       "Firmware reported port seed: %llu\n",
+                       "Firmware reported port speed: %llu\n",
                        (u64)atomic64_read(
                                &stats->misc_stats.current_port_speed));
 
index 42a02cc..720c4d6 100644 (file)
 #define HISI_SAS_MAX_DEVICES HISI_SAS_MAX_ITCT_ENTRIES
 #define HISI_SAS_RESET_BIT     0
 #define HISI_SAS_REJECT_CMD_BIT        1
-#define HISI_SAS_RESERVED_IPTT_CNT  96
+#define HISI_SAS_MAX_COMMANDS (HISI_SAS_QUEUE_SLOTS)
+#define HISI_SAS_RESERVED_IPTT  96
+#define HISI_SAS_UNRESERVED_IPTT \
+       (HISI_SAS_MAX_COMMANDS - HISI_SAS_RESERVED_IPTT)
+
+#define HISI_SAS_IOST_ITCT_CACHE_NUM 64
+#define HISI_SAS_IOST_ITCT_CACHE_DW_SZ 10
 
 #define HISI_SAS_STATUS_BUF_SZ (sizeof(struct hisi_sas_status_buffer))
 #define HISI_SAS_COMMAND_TABLE_SZ (sizeof(union hisi_sas_command_table))
@@ -128,7 +134,6 @@ struct hisi_sas_rst {
 
 #define HISI_SAS_DECLARE_RST_WORK_ON_STACK(r) \
        DECLARE_COMPLETION_ONSTACK(c); \
-       DECLARE_WORK(w, hisi_sas_sync_rst_work_handler); \
        struct hisi_sas_rst r = HISI_SAS_RST_WORK_INIT(r, c)
 
 enum hisi_sas_bit_err_type {
@@ -249,6 +254,22 @@ struct hisi_sas_debugfs_reg {
        };
 };
 
+struct hisi_sas_iost_itct_cache {
+       u32 data[HISI_SAS_IOST_ITCT_CACHE_DW_SZ];
+};
+
+enum hisi_sas_debugfs_reg_array_member {
+       DEBUGFS_GLOBAL = 0,
+       DEBUGFS_AXI,
+       DEBUGFS_RAS,
+       DEBUGFS_REGS_NUM
+};
+
+enum hisi_sas_debugfs_cache_type {
+       HISI_SAS_ITCT_CACHE,
+       HISI_SAS_IOST_CACHE,
+};
+
 struct hisi_sas_hw {
        int (*hw_init)(struct hisi_hba *hisi_hba);
        void (*setup_itct)(struct hisi_hba *hisi_hba,
@@ -257,7 +278,6 @@ struct hisi_sas_hw {
                                struct domain_device *device);
        struct hisi_sas_device *(*alloc_dev)(struct domain_device *device);
        void (*sl_notify_ssp)(struct hisi_hba *hisi_hba, int phy_no);
-       int (*get_free_slot)(struct hisi_hba *hisi_hba, struct hisi_sas_dq *dq);
        void (*start_delivery)(struct hisi_sas_dq *dq);
        void (*prep_ssp)(struct hisi_hba *hisi_hba,
                        struct hisi_sas_slot *slot);
@@ -268,8 +288,6 @@ struct hisi_sas_hw {
        void (*prep_abort)(struct hisi_hba *hisi_hba,
                          struct hisi_sas_slot *slot,
                          int device_id, int abort_flag, int tag_to_abort);
-       int (*slot_complete)(struct hisi_hba *hisi_hba,
-                            struct hisi_sas_slot *slot);
        void (*phys_init)(struct hisi_hba *hisi_hba);
        void (*phy_start)(struct hisi_hba *hisi_hba, int phy_no);
        void (*phy_disable)(struct hisi_hba *hisi_hba, int phy_no);
@@ -288,15 +306,18 @@ struct hisi_sas_hw {
        u32 (*get_phys_state)(struct hisi_hba *hisi_hba);
        int (*write_gpio)(struct hisi_hba *hisi_hba, u8 reg_type,
                                u8 reg_index, u8 reg_count, u8 *write_data);
-       int (*wait_cmds_complete_timeout)(struct hisi_hba *hisi_hba,
-                                         int delay_ms, int timeout_ms);
+       void (*wait_cmds_complete_timeout)(struct hisi_hba *hisi_hba,
+                                          int delay_ms, int timeout_ms);
        void (*snapshot_prepare)(struct hisi_hba *hisi_hba);
        void (*snapshot_restore)(struct hisi_hba *hisi_hba);
-       int max_command_entries;
+       int (*set_bist)(struct hisi_hba *hisi_hba, bool enable);
+       void (*read_iost_itct_cache)(struct hisi_hba *hisi_hba,
+                                    enum hisi_sas_debugfs_cache_type type,
+                                    u32 *cache);
        int complete_hdr_size;
        struct scsi_host_template *sht;
 
-       const struct hisi_sas_debugfs_reg *debugfs_reg_global;
+       const struct hisi_sas_debugfs_reg *debugfs_reg_array[DEBUGFS_REGS_NUM];
        const struct hisi_sas_debugfs_reg *debugfs_reg_port;
 };
 
@@ -371,16 +392,28 @@ struct hisi_hba {
        int cq_nvecs;
        unsigned int *reply_map;
 
+       /* bist */
+       enum sas_linkrate debugfs_bist_linkrate;
+       int debugfs_bist_code_mode;
+       int debugfs_bist_phy_no;
+       int debugfs_bist_mode;
+       u32 debugfs_bist_cnt;
+       int debugfs_bist_enable;
+
        /* debugfs memories */
-       u32 *debugfs_global_reg;
+       /* Put Global AXI and RAS Register into register array */
+       u32 *debugfs_regs[DEBUGFS_REGS_NUM];
        u32 *debugfs_port_reg[HISI_SAS_MAX_PHYS];
        void *debugfs_complete_hdr[HISI_SAS_MAX_QUEUES];
        struct hisi_sas_cmd_hdr *debugfs_cmd_hdr[HISI_SAS_MAX_QUEUES];
        struct hisi_sas_iost *debugfs_iost;
        struct hisi_sas_itct *debugfs_itct;
+       u64 *debugfs_iost_cache;
+       u64 *debugfs_itct_cache;
 
        struct dentry *debugfs_dir;
        struct dentry *debugfs_dump_dentry;
+       struct dentry *debugfs_bist_dentry;
        bool debugfs_snapshot;
 };
 
@@ -533,7 +566,6 @@ extern u8 hisi_sas_get_ata_protocol(struct host_to_dev_fis *fis,
 extern struct hisi_sas_port *to_hisi_sas_port(struct asd_sas_port *sas_port);
 extern void hisi_sas_sata_done(struct sas_task *task,
                            struct hisi_sas_slot *slot);
-extern int hisi_sas_get_ncq_tag(struct sas_task *task, u32 *tag);
 extern int hisi_sas_get_fw_info(struct hisi_hba *hisi_hba);
 extern int hisi_sas_probe(struct platform_device *pdev,
                          const struct hisi_sas_hw *ops);
index cb746cf..d1513fd 100644 (file)
@@ -118,21 +118,6 @@ void hisi_sas_sata_done(struct sas_task *task,
 }
 EXPORT_SYMBOL_GPL(hisi_sas_sata_done);
 
-int hisi_sas_get_ncq_tag(struct sas_task *task, u32 *tag)
-{
-       struct ata_queued_cmd *qc = task->uldd_task;
-
-       if (qc) {
-               if (qc->tf.command == ATA_CMD_FPDMA_WRITE ||
-                       qc->tf.command == ATA_CMD_FPDMA_READ) {
-                       *tag = qc->tag;
-                       return 1;
-               }
-       }
-       return 0;
-}
-EXPORT_SYMBOL_GPL(hisi_sas_get_ncq_tag);
-
 /*
  * This function assumes linkrate mask fits in 8 bits, which it
  * does for all HW versions supported.
@@ -180,8 +165,8 @@ static void hisi_sas_slot_index_free(struct hisi_hba *hisi_hba, int slot_idx)
 {
        unsigned long flags;
 
-       if (hisi_hba->hw->slot_index_alloc || (slot_idx >=
-           hisi_hba->hw->max_command_entries - HISI_SAS_RESERVED_IPTT_CNT)) {
+       if (hisi_hba->hw->slot_index_alloc ||
+           slot_idx >= HISI_SAS_UNRESERVED_IPTT) {
                spin_lock_irqsave(&hisi_hba->lock, flags);
                hisi_sas_slot_index_clear(hisi_hba, slot_idx);
                spin_unlock_irqrestore(&hisi_hba->lock, flags);
@@ -211,8 +196,7 @@ static int hisi_sas_slot_index_alloc(struct hisi_hba *hisi_hba,
        if (index >= hisi_hba->slot_index_count) {
                index = find_next_zero_bit(bitmap,
                                hisi_hba->slot_index_count,
-                               hisi_hba->hw->max_command_entries -
-                               HISI_SAS_RESERVED_IPTT_CNT);
+                               HISI_SAS_UNRESERVED_IPTT);
                if (index >= hisi_hba->slot_index_count) {
                        spin_unlock_irqrestore(&hisi_hba->lock, flags);
                        return -SAS_QUEUE_FULL;
@@ -301,7 +285,7 @@ static void hisi_sas_task_prep_abort(struct hisi_hba *hisi_hba,
 
 static void hisi_sas_dma_unmap(struct hisi_hba *hisi_hba,
                               struct sas_task *task, int n_elem,
-                              int n_elem_req, int n_elem_resp)
+                              int n_elem_req)
 {
        struct device *dev = hisi_hba->dev;
 
@@ -315,16 +299,13 @@ static void hisi_sas_dma_unmap(struct hisi_hba *hisi_hba,
                        if (n_elem_req)
                                dma_unmap_sg(dev, &task->smp_task.smp_req,
                                             1, DMA_TO_DEVICE);
-                       if (n_elem_resp)
-                               dma_unmap_sg(dev, &task->smp_task.smp_resp,
-                                            1, DMA_FROM_DEVICE);
                }
        }
 }
 
 static int hisi_sas_dma_map(struct hisi_hba *hisi_hba,
                            struct sas_task *task, int *n_elem,
-                           int *n_elem_req, int *n_elem_resp)
+                           int *n_elem_req)
 {
        struct device *dev = hisi_hba->dev;
        int rc;
@@ -332,7 +313,7 @@ static int hisi_sas_dma_map(struct hisi_hba *hisi_hba,
        if (sas_protocol_ata(task->task_proto)) {
                *n_elem = task->num_scatter;
        } else {
-               unsigned int req_len, resp_len;
+               unsigned int req_len;
 
                if (task->num_scatter) {
                        *n_elem = dma_map_sg(dev, task->scatter,
@@ -353,17 +334,6 @@ static int hisi_sas_dma_map(struct hisi_hba *hisi_hba,
                                rc = -EINVAL;
                                goto err_out_dma_unmap;
                        }
-                       *n_elem_resp = dma_map_sg(dev, &task->smp_task.smp_resp,
-                                                 1, DMA_FROM_DEVICE);
-                       if (!*n_elem_resp) {
-                               rc = -ENOMEM;
-                               goto err_out_dma_unmap;
-                       }
-                       resp_len = sg_dma_len(&task->smp_task.smp_resp);
-                       if (resp_len & 0x3) {
-                               rc = -EINVAL;
-                               goto err_out_dma_unmap;
-                       }
                }
        }
 
@@ -378,7 +348,7 @@ static int hisi_sas_dma_map(struct hisi_hba *hisi_hba,
 err_out_dma_unmap:
        /* It would be better to call dma_unmap_sg() here, but it's messy */
        hisi_sas_dma_unmap(hisi_hba, task, *n_elem,
-                          *n_elem_req, *n_elem_resp);
+                          *n_elem_req);
 prep_out:
        return rc;
 }
@@ -450,7 +420,7 @@ static int hisi_sas_task_prep(struct sas_task *task,
        struct asd_sas_port *sas_port = device->port;
        struct device *dev = hisi_hba->dev;
        int dlvry_queue_slot, dlvry_queue, rc, slot_idx;
-       int n_elem = 0, n_elem_dif = 0, n_elem_req = 0, n_elem_resp = 0;
+       int n_elem = 0, n_elem_dif = 0, n_elem_req = 0;
        struct hisi_sas_dq *dq;
        unsigned long flags;
        int wr_q_index;
@@ -486,7 +456,7 @@ static int hisi_sas_task_prep(struct sas_task *task,
        }
 
        rc = hisi_sas_dma_map(hisi_hba, task, &n_elem,
-                             &n_elem_req, &n_elem_resp);
+                             &n_elem_req);
        if (rc < 0)
                goto prep_out;
 
@@ -520,13 +490,8 @@ static int hisi_sas_task_prep(struct sas_task *task,
        slot = &hisi_hba->slot_info[slot_idx];
 
        spin_lock_irqsave(&dq->lock, flags);
-       wr_q_index = hisi_hba->hw->get_free_slot(hisi_hba, dq);
-       if (wr_q_index < 0) {
-               spin_unlock_irqrestore(&dq->lock, flags);
-               rc = -EAGAIN;
-               goto err_out_tag;
-       }
-
+       wr_q_index = dq->wr_point;
+       dq->wr_point = (dq->wr_point + 1) % HISI_SAS_QUEUE_SLOTS;
        list_add_tail(&slot->delivery, &dq->list);
        spin_unlock_irqrestore(&dq->lock, flags);
        spin_lock_irqsave(&sas_dev->lock, flags);
@@ -551,7 +516,8 @@ static int hisi_sas_task_prep(struct sas_task *task,
 
        memset(slot->cmd_hdr, 0, sizeof(struct hisi_sas_cmd_hdr));
        memset(hisi_sas_cmd_hdr_addr_mem(slot), 0, HISI_SAS_COMMAND_TABLE_SZ);
-       memset(hisi_sas_status_buf_addr_mem(slot), 0, HISI_SAS_STATUS_BUF_SZ);
+       memset(hisi_sas_status_buf_addr_mem(slot), 0,
+              sizeof(struct hisi_sas_err_record));
 
        switch (task->task_proto) {
        case SAS_PROTOCOL_SMP:
@@ -580,14 +546,12 @@ static int hisi_sas_task_prep(struct sas_task *task,
 
        return 0;
 
-err_out_tag:
-       hisi_sas_slot_index_free(hisi_hba, slot_idx);
 err_out_dif_dma_unmap:
        if (!sas_protocol_ata(task->task_proto))
                hisi_sas_dif_dma_unmap(hisi_hba, task, n_elem_dif);
 err_out_dma_unmap:
        hisi_sas_dma_unmap(hisi_hba, task, n_elem,
-                          n_elem_req, n_elem_resp);
+                          n_elem_req);
 prep_out:
        dev_err(dev, "task prep: failed[%d]!\n", rc);
        return rc;
@@ -719,13 +683,13 @@ static struct hisi_sas_device *hisi_sas_alloc_dev(struct domain_device *device)
        return sas_dev;
 }
 
-#define HISI_SAS_SRST_ATA_DISK_CNT 3
+#define HISI_SAS_DISK_RECOVER_CNT 3
 static int hisi_sas_init_device(struct domain_device *device)
 {
        int rc = TMF_RESP_FUNC_COMPLETE;
        struct scsi_lun lun;
        struct hisi_sas_tmf_task tmf_task;
-       int retry = HISI_SAS_SRST_ATA_DISK_CNT;
+       int retry = HISI_SAS_DISK_RECOVER_CNT;
        struct hisi_hba *hisi_hba = dev_to_hisi_hba(device);
        struct device *dev = hisi_hba->dev;
        struct sas_phy *local_phy;
@@ -735,10 +699,14 @@ static int hisi_sas_init_device(struct domain_device *device)
                int_to_scsilun(0, &lun);
 
                tmf_task.tmf = TMF_CLEAR_TASK_SET;
-               rc = hisi_sas_debug_issue_ssp_tmf(device, lun.scsi_lun,
-                                                 &tmf_task);
-               if (rc == TMF_RESP_FUNC_COMPLETE)
-                       hisi_sas_release_task(hisi_hba, device);
+               while (retry-- > 0) {
+                       rc = hisi_sas_debug_issue_ssp_tmf(device, lun.scsi_lun,
+                                                         &tmf_task);
+                       if (rc == TMF_RESP_FUNC_COMPLETE) {
+                               hisi_sas_release_task(hisi_hba, device);
+                               break;
+                       }
+               }
                break;
        case SAS_SATA_DEV:
        case SAS_SATA_PM:
@@ -1081,21 +1049,22 @@ static void hisi_sas_dev_gone(struct domain_device *device)
        dev_info(dev, "dev[%d:%x] is gone\n",
                 sas_dev->device_id, sas_dev->dev_type);
 
+       down(&hisi_hba->sem);
        if (!test_bit(HISI_SAS_RESET_BIT, &hisi_hba->flags)) {
                hisi_sas_internal_task_abort(hisi_hba, device,
                                             HISI_SAS_INT_ABT_DEV, 0);
 
                hisi_sas_dereg_device(hisi_hba, device);
 
-               down(&hisi_hba->sem);
                hisi_hba->hw->clear_itct(hisi_hba, sas_dev);
-               up(&hisi_hba->sem);
                device->lldd_dev = NULL;
        }
 
        if (hisi_hba->hw->free_device)
                hisi_hba->hw->free_device(sas_dev);
        sas_dev->dev_type = SAS_PHY_UNUSED;
+       sas_dev->sas_device = NULL;
+       up(&hisi_hba->sem);
 }
 
 static int hisi_sas_queue_command(struct sas_task *task, gfp_t gfp_flags)
@@ -1423,8 +1392,7 @@ static void hisi_sas_refresh_port_id(struct hisi_hba *hisi_hba)
        }
 }
 
-static void hisi_sas_rescan_topology(struct hisi_hba *hisi_hba, u32 old_state,
-                             u32 state)
+static void hisi_sas_rescan_topology(struct hisi_hba *hisi_hba, u32 state)
 {
        struct sas_ha_struct *sas_ha = &hisi_hba->sha;
        struct asd_sas_port *_sas_port = NULL;
@@ -1576,16 +1544,16 @@ void hisi_sas_controller_reset_done(struct hisi_hba *hisi_hba)
        msleep(1000);
        hisi_sas_refresh_port_id(hisi_hba);
        clear_bit(HISI_SAS_REJECT_CMD_BIT, &hisi_hba->flags);
-       up(&hisi_hba->sem);
 
        if (hisi_hba->reject_stp_links_msk)
                hisi_sas_terminate_stp_reject(hisi_hba);
        hisi_sas_reset_init_all_devices(hisi_hba);
+       up(&hisi_hba->sem);
        scsi_unblock_requests(shost);
        clear_bit(HISI_SAS_RESET_BIT, &hisi_hba->flags);
 
        state = hisi_hba->hw->get_phys_state(hisi_hba);
-       hisi_sas_rescan_topology(hisi_hba, hisi_hba->phy_state, state);
+       hisi_sas_rescan_topology(hisi_hba, state);
 }
 EXPORT_SYMBOL_GPL(hisi_sas_controller_reset_done);
 
@@ -1770,24 +1738,34 @@ static int hisi_sas_debug_I_T_nexus_reset(struct domain_device *device)
        struct hisi_sas_device *sas_dev = device->lldd_dev;
        struct hisi_hba *hisi_hba = dev_to_hisi_hba(device);
        struct sas_ha_struct *sas_ha = &hisi_hba->sha;
-       struct asd_sas_phy *sas_phy = sas_ha->sas_phy[local_phy->number];
-       struct hisi_sas_phy *phy = container_of(sas_phy,
-                       struct hisi_sas_phy, sas_phy);
        DECLARE_COMPLETION_ONSTACK(phyreset);
        int rc, reset_type;
 
+       if (!local_phy->enabled) {
+               sas_put_local_phy(local_phy);
+               return -ENODEV;
+       }
+
        if (scsi_is_sas_phy_local(local_phy)) {
+               struct asd_sas_phy *sas_phy =
+                       sas_ha->sas_phy[local_phy->number];
+               struct hisi_sas_phy *phy =
+                       container_of(sas_phy, struct hisi_sas_phy, sas_phy);
                phy->in_reset = 1;
                phy->reset_completion = &phyreset;
        }
 
        reset_type = (sas_dev->dev_status == HISI_SAS_DEV_INIT ||
-                     !dev_is_sata(device)) ? 1 : 0;
+                     !dev_is_sata(device)) ? true : false;
 
        rc = sas_phy_reset(local_phy, reset_type);
        sas_put_local_phy(local_phy);
 
        if (scsi_is_sas_phy_local(local_phy)) {
+               struct asd_sas_phy *sas_phy =
+                       sas_ha->sas_phy[local_phy->number];
+               struct hisi_sas_phy *phy =
+                       container_of(sas_phy, struct hisi_sas_phy, sas_phy);
                int ret = wait_for_completion_timeout(&phyreset, 2 * HZ);
                unsigned long flags;
 
@@ -1802,9 +1780,10 @@ static int hisi_sas_debug_I_T_nexus_reset(struct domain_device *device)
        } else if (sas_dev->dev_status != HISI_SAS_DEV_INIT) {
                /*
                 * If in init state, we rely on caller to wait for link to be
-                * ready; otherwise, delay.
+                * ready; otherwise, except phy reset is fail, delay.
                 */
-               msleep(2000);
+               if (!rc)
+                       msleep(2000);
        }
 
        return rc;
@@ -1845,21 +1824,21 @@ static int hisi_sas_lu_reset(struct domain_device *device, u8 *lun)
        struct device *dev = hisi_hba->dev;
        int rc = TMF_RESP_FUNC_FAILED;
 
+       /* Clear internal IO and then lu reset */
+       rc = hisi_sas_internal_task_abort(hisi_hba, device,
+                                         HISI_SAS_INT_ABT_DEV, 0);
+       if (rc < 0) {
+               dev_err(dev, "lu_reset: internal abort failed\n");
+               goto out;
+       }
+       hisi_sas_dereg_device(hisi_hba, device);
+
        if (dev_is_sata(device)) {
                struct sas_phy *phy;
 
-               /* Clear internal IO and then hardreset */
-               rc = hisi_sas_internal_task_abort(hisi_hba, device,
-                                                 HISI_SAS_INT_ABT_DEV, 0);
-               if (rc < 0) {
-                       dev_err(dev, "lu_reset: internal abort failed\n");
-                       goto out;
-               }
-               hisi_sas_dereg_device(hisi_hba, device);
-
                phy = sas_get_local_phy(device);
 
-               rc = sas_phy_reset(phy, 1);
+               rc = sas_phy_reset(phy, true);
 
                if (rc == 0)
                        hisi_sas_release_task(hisi_hba, device);
@@ -1867,14 +1846,6 @@ static int hisi_sas_lu_reset(struct domain_device *device, u8 *lun)
        } else {
                struct hisi_sas_tmf_task tmf_task = { .tmf =  TMF_LU_RESET };
 
-               rc = hisi_sas_internal_task_abort(hisi_hba, device,
-                                                 HISI_SAS_INT_ABT_DEV, 0);
-               if (rc < 0) {
-                       dev_err(dev, "lu_reset: internal abort failed\n");
-                       goto out;
-               }
-               hisi_sas_dereg_device(hisi_hba, device);
-
                rc = hisi_sas_debug_issue_ssp_tmf(device, lun, &tmf_task);
                if (rc == TMF_RESP_FUNC_COMPLETE)
                        hisi_sas_release_task(hisi_hba, device);
@@ -1964,7 +1935,7 @@ hisi_sas_internal_abort_task_exec(struct hisi_hba *hisi_hba, int device_id,
        struct asd_sas_port *sas_port = device->port;
        struct hisi_sas_cmd_hdr *cmd_hdr_base;
        int dlvry_queue_slot, dlvry_queue, n_elem = 0, rc, slot_idx;
-       unsigned long flags, flags_dq = 0;
+       unsigned long flags;
        int wr_q_index;
 
        if (unlikely(test_bit(HISI_SAS_REJECT_CMD_BIT, &hisi_hba->flags)))
@@ -1983,15 +1954,11 @@ hisi_sas_internal_abort_task_exec(struct hisi_hba *hisi_hba, int device_id,
        slot_idx = rc;
        slot = &hisi_hba->slot_info[slot_idx];
 
-       spin_lock_irqsave(&dq->lock, flags_dq);
-       wr_q_index = hisi_hba->hw->get_free_slot(hisi_hba, dq);
-       if (wr_q_index < 0) {
-               spin_unlock_irqrestore(&dq->lock, flags_dq);
-               rc = -EAGAIN;
-               goto err_out_tag;
-       }
+       spin_lock_irqsave(&dq->lock, flags);
+       wr_q_index = dq->wr_point;
+       dq->wr_point = (dq->wr_point + 1) % HISI_SAS_QUEUE_SLOTS;
        list_add_tail(&slot->delivery, &dq->list);
-       spin_unlock_irqrestore(&dq->lock, flags_dq);
+       spin_unlock_irqrestore(&dq->lock, flags);
        spin_lock_irqsave(&sas_dev->lock, flags);
        list_add_tail(&slot->entry, &sas_dev->list);
        spin_unlock_irqrestore(&sas_dev->lock, flags);
@@ -2012,7 +1979,8 @@ hisi_sas_internal_abort_task_exec(struct hisi_hba *hisi_hba, int device_id,
 
        memset(slot->cmd_hdr, 0, sizeof(struct hisi_sas_cmd_hdr));
        memset(hisi_sas_cmd_hdr_addr_mem(slot), 0, HISI_SAS_COMMAND_TABLE_SZ);
-       memset(hisi_sas_status_buf_addr_mem(slot), 0, HISI_SAS_STATUS_BUF_SZ);
+       memset(hisi_sas_status_buf_addr_mem(slot), 0,
+              sizeof(struct hisi_sas_err_record));
 
        hisi_sas_task_prep_abort(hisi_hba, slot, device_id,
                                      abort_flag, task_tag);
@@ -2028,8 +1996,6 @@ hisi_sas_internal_abort_task_exec(struct hisi_hba *hisi_hba, int device_id,
 
        return 0;
 
-err_out_tag:
-       hisi_sas_slot_index_free(hisi_hba, slot_idx);
 err_out:
        dev_err(dev, "internal abort task prep: failed[%d]!\n", rc);
 
@@ -2089,6 +2055,9 @@ _hisi_sas_internal_task_abort(struct hisi_hba *hisi_hba,
 
        /* Internal abort timed out */
        if ((task->task_state_flags & SAS_TASK_STATE_ABORTED)) {
+               if (hisi_sas_debugfs_enable && hisi_hba->debugfs_itct)
+                       queue_work(hisi_hba->wq, &hisi_hba->debugfs_work);
+
                if (!(task->task_state_flags & SAS_TASK_STATE_DONE)) {
                        struct hisi_sas_slot *slot = task->lldd_task;
 
@@ -2123,7 +2092,7 @@ _hisi_sas_internal_task_abort(struct hisi_hba *hisi_hba,
        }
 
 exit:
-       dev_dbg(dev, "internal task abort: task to dev %016llx task=%p resp: 0x%x sts 0x%x\n",
+       dev_dbg(dev, "internal task abort: task to dev %016llx task=%pK resp: 0x%x sts 0x%x\n",
                SAS_ADDR(device->sas_addr), task,
                task->task_status.resp, /* 0 is complete, -1 is undelivered */
                task->task_status.stat);
@@ -2291,7 +2260,7 @@ static struct sas_domain_function_template hisi_sas_transport_ops = {
 
 void hisi_sas_init_mem(struct hisi_hba *hisi_hba)
 {
-       int i, s, j, max_command_entries = hisi_hba->hw->max_command_entries;
+       int i, s, j, max_command_entries = HISI_SAS_MAX_COMMANDS;
        struct hisi_sas_breakpoint *sata_breakpoint = hisi_hba->sata_breakpoint;
 
        for (i = 0; i < hisi_hba->queue_count; i++) {
@@ -2328,7 +2297,7 @@ EXPORT_SYMBOL_GPL(hisi_sas_init_mem);
 int hisi_sas_alloc(struct hisi_hba *hisi_hba)
 {
        struct device *dev = hisi_hba->dev;
-       int i, j, s, max_command_entries = hisi_hba->hw->max_command_entries;
+       int i, j, s, max_command_entries = HISI_SAS_MAX_COMMANDS;
        int max_command_entries_ru, sz_slot_buf_ru;
        int blk_cnt, slots_per_blk;
 
@@ -2379,7 +2348,7 @@ int hisi_sas_alloc(struct hisi_hba *hisi_hba)
 
        s = HISI_SAS_MAX_ITCT_ENTRIES * sizeof(struct hisi_sas_itct);
        hisi_hba->itct = dmam_alloc_coherent(dev, s, &hisi_hba->itct_dma,
-                                            GFP_KERNEL | __GFP_ZERO);
+                                            GFP_KERNEL);
        if (!hisi_hba->itct)
                goto err_out;
 
@@ -2396,7 +2365,7 @@ int hisi_sas_alloc(struct hisi_hba *hisi_hba)
        else
                sz_slot_buf_ru = sizeof(struct hisi_sas_slot_buf_table);
        sz_slot_buf_ru = roundup(sz_slot_buf_ru, 64);
-       s = lcm(max_command_entries_ru, sz_slot_buf_ru);
+       s = max(lcm(max_command_entries_ru, sz_slot_buf_ru), PAGE_SIZE);
        blk_cnt = (max_command_entries_ru * sz_slot_buf_ru) / s;
        slots_per_blk = s / sz_slot_buf_ru;
 
@@ -2406,7 +2375,7 @@ int hisi_sas_alloc(struct hisi_hba *hisi_hba)
                void *buf;
 
                buf = dmam_alloc_coherent(dev, s, &buf_dma,
-                                         GFP_KERNEL | __GFP_ZERO);
+                                         GFP_KERNEL);
                if (!buf)
                        goto err_out;
 
@@ -2455,11 +2424,9 @@ int hisi_sas_alloc(struct hisi_hba *hisi_hba)
                                        GFP_KERNEL);
        if (!hisi_hba->sata_breakpoint)
                goto err_out;
-       hisi_sas_init_mem(hisi_hba);
 
        hisi_sas_slot_index_init(hisi_hba);
-       hisi_hba->last_slot_index = hisi_hba->hw->max_command_entries -
-               HISI_SAS_RESERVED_IPTT_CNT;
+       hisi_hba->last_slot_index = HISI_SAS_UNRESERVED_IPTT;
 
        hisi_hba->wq = create_singlethread_workqueue(dev_name(dev));
        if (!hisi_hba->wq) {
@@ -2610,8 +2577,7 @@ static struct Scsi_Host *hisi_sas_shost_alloc(struct platform_device *pdev,
                goto err_out;
        }
 
-       res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-       hisi_hba->regs = devm_ioremap_resource(dev, res);
+       hisi_hba->regs = devm_platform_ioremap_resource(pdev, 0);
        if (IS_ERR(hisi_hba->regs))
                goto err_out;
 
@@ -2672,13 +2638,11 @@ int hisi_sas_probe(struct platform_device *pdev,
        shost->max_channel = 1;
        shost->max_cmd_len = 16;
        if (hisi_hba->hw->slot_index_alloc) {
-               shost->can_queue = hisi_hba->hw->max_command_entries;
-               shost->cmd_per_lun = hisi_hba->hw->max_command_entries;
+               shost->can_queue = HISI_SAS_MAX_COMMANDS;
+               shost->cmd_per_lun = HISI_SAS_MAX_COMMANDS;
        } else {
-               shost->can_queue = hisi_hba->hw->max_command_entries -
-                       HISI_SAS_RESERVED_IPTT_CNT;
-               shost->cmd_per_lun = hisi_hba->hw->max_command_entries -
-                       HISI_SAS_RESERVED_IPTT_CNT;
+               shost->can_queue = HISI_SAS_UNRESERVED_IPTT;
+               shost->cmd_per_lun = HISI_SAS_UNRESERVED_IPTT;
        }
 
        sha->sas_ha_name = DRV_NAME;
@@ -2769,21 +2733,52 @@ static void hisi_sas_debugfs_snapshot_port_reg(struct hisi_hba *hisi_hba)
 
 static void hisi_sas_debugfs_snapshot_global_reg(struct hisi_hba *hisi_hba)
 {
-       u32 *databuf = (u32 *)hisi_hba->debugfs_global_reg;
+       u32 *databuf = hisi_hba->debugfs_regs[DEBUGFS_GLOBAL];
+       const struct hisi_sas_hw *hw = hisi_hba->hw;
        const struct hisi_sas_debugfs_reg *global =
-               hisi_hba->hw->debugfs_reg_global;
+                       hw->debugfs_reg_array[DEBUGFS_GLOBAL];
        int i;
 
        for (i = 0; i < global->count; i++, databuf++)
                *databuf = global->read_global_reg(hisi_hba, 4 * i);
 }
 
+static void hisi_sas_debugfs_snapshot_axi_reg(struct hisi_hba *hisi_hba)
+{
+       u32 *databuf = hisi_hba->debugfs_regs[DEBUGFS_AXI];
+       const struct hisi_sas_hw *hw = hisi_hba->hw;
+       const struct hisi_sas_debugfs_reg *axi =
+                       hw->debugfs_reg_array[DEBUGFS_AXI];
+       int i;
+
+       for (i = 0; i < axi->count; i++, databuf++)
+               *databuf = axi->read_global_reg(hisi_hba,
+                                               4 * i + axi->base_off);
+}
+
+static void hisi_sas_debugfs_snapshot_ras_reg(struct hisi_hba *hisi_hba)
+{
+       u32 *databuf = hisi_hba->debugfs_regs[DEBUGFS_RAS];
+       const struct hisi_sas_hw *hw = hisi_hba->hw;
+       const struct hisi_sas_debugfs_reg *ras =
+                       hw->debugfs_reg_array[DEBUGFS_RAS];
+       int i;
+
+       for (i = 0; i < ras->count; i++, databuf++)
+               *databuf = ras->read_global_reg(hisi_hba,
+                                               4 * i + ras->base_off);
+}
+
 static void hisi_sas_debugfs_snapshot_itct_reg(struct hisi_hba *hisi_hba)
 {
+       void *cachebuf = hisi_hba->debugfs_itct_cache;
        void *databuf = hisi_hba->debugfs_itct;
        struct hisi_sas_itct *itct;
        int i;
 
+       hisi_hba->hw->read_iost_itct_cache(hisi_hba, HISI_SAS_ITCT_CACHE,
+                                          cachebuf);
+
        itct = hisi_hba->itct;
 
        for (i = 0; i < HISI_SAS_MAX_ITCT_ENTRIES; i++, itct++) {
@@ -2794,11 +2789,15 @@ static void hisi_sas_debugfs_snapshot_itct_reg(struct hisi_hba *hisi_hba)
 
 static void hisi_sas_debugfs_snapshot_iost_reg(struct hisi_hba *hisi_hba)
 {
-       int max_command_entries = hisi_hba->hw->max_command_entries;
+       int max_command_entries = HISI_SAS_MAX_COMMANDS;
+       void *cachebuf = hisi_hba->debugfs_iost_cache;
        void *databuf = hisi_hba->debugfs_iost;
        struct hisi_sas_iost *iost;
        int i;
 
+       hisi_hba->hw->read_iost_itct_cache(hisi_hba, HISI_SAS_IOST_CACHE,
+                                          cachebuf);
+
        iost = hisi_hba->iost;
 
        for (i = 0; i < max_command_entries; i++, iost++) {
@@ -2845,9 +2844,9 @@ static int hisi_sas_debugfs_global_show(struct seq_file *s, void *p)
 {
        struct hisi_hba *hisi_hba = s->private;
        const struct hisi_sas_hw *hw = hisi_hba->hw;
-       const struct hisi_sas_debugfs_reg *reg_global = hw->debugfs_reg_global;
+       const void *reg_global = hw->debugfs_reg_array[DEBUGFS_GLOBAL];
 
-       hisi_sas_debugfs_print_reg(hisi_hba->debugfs_global_reg,
+       hisi_sas_debugfs_print_reg(hisi_hba->debugfs_regs[DEBUGFS_GLOBAL],
                                   reg_global, s);
 
        return 0;
@@ -2867,6 +2866,58 @@ static const struct file_operations hisi_sas_debugfs_global_fops = {
        .owner = THIS_MODULE,
 };
 
+static int hisi_sas_debugfs_axi_show(struct seq_file *s, void *p)
+{
+       struct hisi_hba *hisi_hba = s->private;
+       const struct hisi_sas_hw *hw = hisi_hba->hw;
+       const void *reg_axi = hw->debugfs_reg_array[DEBUGFS_AXI];
+
+       hisi_sas_debugfs_print_reg(hisi_hba->debugfs_regs[DEBUGFS_AXI],
+                                  reg_axi, s);
+
+       return 0;
+}
+
+static int hisi_sas_debugfs_axi_open(struct inode *inode, struct file *filp)
+{
+       return single_open(filp, hisi_sas_debugfs_axi_show,
+                          inode->i_private);
+}
+
+static const struct file_operations hisi_sas_debugfs_axi_fops = {
+       .open = hisi_sas_debugfs_axi_open,
+       .read = seq_read,
+       .llseek = seq_lseek,
+       .release = single_release,
+       .owner = THIS_MODULE,
+};
+
+static int hisi_sas_debugfs_ras_show(struct seq_file *s, void *p)
+{
+       struct hisi_hba *hisi_hba = s->private;
+       const struct hisi_sas_hw *hw = hisi_hba->hw;
+       const void *reg_ras = hw->debugfs_reg_array[DEBUGFS_RAS];
+
+       hisi_sas_debugfs_print_reg(hisi_hba->debugfs_regs[DEBUGFS_RAS],
+                                  reg_ras, s);
+
+       return 0;
+}
+
+static int hisi_sas_debugfs_ras_open(struct inode *inode, struct file *filp)
+{
+       return single_open(filp, hisi_sas_debugfs_ras_show,
+                          inode->i_private);
+}
+
+static const struct file_operations hisi_sas_debugfs_ras_fops = {
+       .open = hisi_sas_debugfs_ras_open,
+       .read = seq_read,
+       .llseek = seq_lseek,
+       .release = single_release,
+       .owner = THIS_MODULE,
+};
+
 static int hisi_sas_debugfs_port_show(struct seq_file *s, void *p)
 {
        struct hisi_sas_phy *phy = s->private;
@@ -2893,8 +2944,8 @@ static const struct file_operations hisi_sas_debugfs_port_fops = {
        .owner = THIS_MODULE,
 };
 
-static int hisi_sas_show_row_64(struct seq_file *s, int index,
-                               int sz, __le64 *ptr)
+static void hisi_sas_show_row_64(struct seq_file *s, int index,
+                                int sz, __le64 *ptr)
 {
        int i;
 
@@ -2907,12 +2958,10 @@ static int hisi_sas_show_row_64(struct seq_file *s, int index,
        }
 
        seq_puts(s, "\n");
-
-       return 0;
 }
 
-static int hisi_sas_show_row_32(struct seq_file *s, int index,
-                               int sz, __le32 *ptr)
+static void hisi_sas_show_row_32(struct seq_file *s, int index,
+                                int sz, __le32 *ptr)
 {
        int i;
 
@@ -2924,11 +2973,9 @@ static int hisi_sas_show_row_32(struct seq_file *s, int index,
                        seq_puts(s, "\n\t");
        }
        seq_puts(s, "\n");
-
-       return 0;
 }
 
-static int hisi_sas_cq_show_slot(struct seq_file *s, int slot, void *cq_ptr)
+static void hisi_sas_cq_show_slot(struct seq_file *s, int slot, void *cq_ptr)
 {
        struct hisi_sas_cq *cq = cq_ptr;
        struct hisi_hba *hisi_hba = cq->hisi_hba;
@@ -2936,20 +2983,18 @@ static int hisi_sas_cq_show_slot(struct seq_file *s, int slot, void *cq_ptr)
        __le32 *complete_hdr = complete_queue +
                        (hisi_hba->hw->complete_hdr_size * slot);
 
-       return hisi_sas_show_row_32(s, slot,
-                               hisi_hba->hw->complete_hdr_size,
-                               complete_hdr);
+       hisi_sas_show_row_32(s, slot,
+                            hisi_hba->hw->complete_hdr_size,
+                            complete_hdr);
 }
 
 static int hisi_sas_debugfs_cq_show(struct seq_file *s, void *p)
 {
        struct hisi_sas_cq *cq = s->private;
-       int slot, ret;
+       int slot;
 
        for (slot = 0; slot < HISI_SAS_QUEUE_SLOTS; slot++) {
-               ret = hisi_sas_cq_show_slot(s, slot, cq);
-               if (ret)
-                       return ret;
+               hisi_sas_cq_show_slot(s, slot, cq);
        }
        return 0;
 }
@@ -2967,7 +3012,7 @@ static const struct file_operations hisi_sas_debugfs_cq_fops = {
        .owner = THIS_MODULE,
 };
 
-static int hisi_sas_dq_show_slot(struct seq_file *s, int slot, void *dq_ptr)
+static void hisi_sas_dq_show_slot(struct seq_file *s, int slot, void *dq_ptr)
 {
        struct hisi_sas_dq *dq = dq_ptr;
        struct hisi_hba *hisi_hba = dq->hisi_hba;
@@ -2975,18 +3020,15 @@ static int hisi_sas_dq_show_slot(struct seq_file *s, int slot, void *dq_ptr)
        __le32 *cmd_hdr = cmd_queue +
                sizeof(struct hisi_sas_cmd_hdr) * slot;
 
-       return hisi_sas_show_row_32(s, slot, sizeof(struct hisi_sas_cmd_hdr),
-                                   cmd_hdr);
+       hisi_sas_show_row_32(s, slot, sizeof(struct hisi_sas_cmd_hdr), cmd_hdr);
 }
 
 static int hisi_sas_debugfs_dq_show(struct seq_file *s, void *p)
 {
-       int slot, ret;
+       int slot;
 
        for (slot = 0; slot < HISI_SAS_QUEUE_SLOTS; slot++) {
-               ret = hisi_sas_dq_show_slot(s, slot, s->private);
-               if (ret)
-                       return ret;
+               hisi_sas_dq_show_slot(s, slot, s->private);
        }
        return 0;
 }
@@ -3008,14 +3050,12 @@ static int hisi_sas_debugfs_iost_show(struct seq_file *s, void *p)
 {
        struct hisi_hba *hisi_hba = s->private;
        struct hisi_sas_iost *debugfs_iost = hisi_hba->debugfs_iost;
-       int i, ret, max_command_entries = hisi_hba->hw->max_command_entries;
-       __le64 *iost = &debugfs_iost->qw0;
+       int i, max_command_entries = HISI_SAS_MAX_COMMANDS;
 
        for (i = 0; i < max_command_entries; i++, debugfs_iost++) {
-               ret = hisi_sas_show_row_64(s, i, sizeof(*debugfs_iost),
-                                          iost);
-               if (ret)
-                       return ret;
+               __le64 *iost = &debugfs_iost->qw0;
+
+               hisi_sas_show_row_64(s, i, sizeof(*debugfs_iost), iost);
        }
 
        return 0;
@@ -3034,18 +3074,56 @@ static const struct file_operations hisi_sas_debugfs_iost_fops = {
        .owner = THIS_MODULE,
 };
 
+static int hisi_sas_debugfs_iost_cache_show(struct seq_file *s, void *p)
+{
+       struct hisi_hba *hisi_hba = s->private;
+       struct hisi_sas_iost_itct_cache *iost_cache =
+               (struct hisi_sas_iost_itct_cache *)hisi_hba->debugfs_iost_cache;
+       u32 cache_size = HISI_SAS_IOST_ITCT_CACHE_DW_SZ * 4;
+       int i, tab_idx;
+       __le64 *iost;
+
+       for (i = 0; i < HISI_SAS_IOST_ITCT_CACHE_NUM; i++, iost_cache++) {
+               /*
+                * Data struct of IOST cache:
+                * Data[1]: BIT0~15: Table index
+                *          Bit16:   Valid mask
+                * Data[2]~[9]: IOST table
+                */
+               tab_idx = (iost_cache->data[1] & 0xffff);
+               iost = (__le64 *)iost_cache;
+
+               hisi_sas_show_row_64(s, tab_idx, cache_size, iost);
+       }
+
+       return 0;
+}
+
+static int hisi_sas_debugfs_iost_cache_open(struct inode *inode,
+                                           struct file *filp)
+{
+       return single_open(filp, hisi_sas_debugfs_iost_cache_show,
+                          inode->i_private);
+}
+
+static const struct file_operations hisi_sas_debugfs_iost_cache_fops = {
+       .open = hisi_sas_debugfs_iost_cache_open,
+       .read = seq_read,
+       .llseek = seq_lseek,
+       .release = single_release,
+       .owner = THIS_MODULE,
+};
+
 static int hisi_sas_debugfs_itct_show(struct seq_file *s, void *p)
 {
-       int i, ret;
+       int i;
        struct hisi_hba *hisi_hba = s->private;
        struct hisi_sas_itct *debugfs_itct = hisi_hba->debugfs_itct;
-       __le64 *itct = &debugfs_itct->qw0;
 
        for (i = 0; i < HISI_SAS_MAX_ITCT_ENTRIES; i++, debugfs_itct++) {
-               ret = hisi_sas_show_row_64(s, i, sizeof(*debugfs_itct),
-                                          itct);
-               if (ret)
-                       return ret;
+               __le64 *itct = &debugfs_itct->qw0;
+
+               hisi_sas_show_row_64(s, i, sizeof(*debugfs_itct), itct);
        }
 
        return 0;
@@ -3064,6 +3142,46 @@ static const struct file_operations hisi_sas_debugfs_itct_fops = {
        .owner = THIS_MODULE,
 };
 
+static int hisi_sas_debugfs_itct_cache_show(struct seq_file *s, void *p)
+{
+       struct hisi_hba *hisi_hba = s->private;
+       struct hisi_sas_iost_itct_cache *itct_cache =
+               (struct hisi_sas_iost_itct_cache *)hisi_hba->debugfs_itct_cache;
+       u32 cache_size = HISI_SAS_IOST_ITCT_CACHE_DW_SZ * 4;
+       int i, tab_idx;
+       __le64 *itct;
+
+       for (i = 0; i < HISI_SAS_IOST_ITCT_CACHE_NUM; i++, itct_cache++) {
+               /*
+                * Data struct of ITCT cache:
+                * Data[1]: BIT0~15: Table index
+                *          Bit16:   Valid mask
+                * Data[2]~[9]: ITCT table
+                */
+               tab_idx = itct_cache->data[1] & 0xffff;
+               itct = (__le64 *)itct_cache;
+
+               hisi_sas_show_row_64(s, tab_idx, cache_size, itct);
+       }
+
+       return 0;
+}
+
+static int hisi_sas_debugfs_itct_cache_open(struct inode *inode,
+                                           struct file *filp)
+{
+       return single_open(filp, hisi_sas_debugfs_itct_cache_show,
+                          inode->i_private);
+}
+
+static const struct file_operations hisi_sas_debugfs_itct_cache_fops = {
+       .open = hisi_sas_debugfs_itct_cache_open,
+       .read = seq_read,
+       .llseek = seq_lseek,
+       .release = single_release,
+       .owner = THIS_MODULE,
+};
+
 static void hisi_sas_debugfs_create_files(struct hisi_hba *hisi_hba)
 {
        struct dentry *dump_dentry;
@@ -3110,9 +3228,21 @@ static void hisi_sas_debugfs_create_files(struct hisi_hba *hisi_hba)
        debugfs_create_file("iost", 0400, dump_dentry, hisi_hba,
                            &hisi_sas_debugfs_iost_fops);
 
+       debugfs_create_file("iost_cache", 0400, dump_dentry, hisi_hba,
+                           &hisi_sas_debugfs_iost_cache_fops);
+
        debugfs_create_file("itct", 0400, dump_dentry, hisi_hba,
                            &hisi_sas_debugfs_itct_fops);
 
+       debugfs_create_file("itct_cache", 0400, dump_dentry, hisi_hba,
+                           &hisi_sas_debugfs_itct_cache_fops);
+
+       debugfs_create_file("axi", 0400, dump_dentry, hisi_hba,
+                           &hisi_sas_debugfs_axi_fops);
+
+       debugfs_create_file("ras", 0400, dump_dentry, hisi_hba,
+                           &hisi_sas_debugfs_ras_fops);
+
        return;
 }
 
@@ -3122,6 +3252,8 @@ static void hisi_sas_debugfs_snapshot_regs(struct hisi_hba *hisi_hba)
 
        hisi_sas_debugfs_snapshot_global_reg(hisi_hba);
        hisi_sas_debugfs_snapshot_port_reg(hisi_hba);
+       hisi_sas_debugfs_snapshot_axi_reg(hisi_hba);
+       hisi_sas_debugfs_snapshot_ras_reg(hisi_hba);
        hisi_sas_debugfs_snapshot_cq_reg(hisi_hba);
        hisi_sas_debugfs_snapshot_dq_reg(hisi_hba);
        hisi_sas_debugfs_snapshot_itct_reg(hisi_hba);
@@ -3162,6 +3294,382 @@ static const struct file_operations hisi_sas_debugfs_trigger_dump_fops = {
        .owner = THIS_MODULE,
 };
 
+enum {
+       HISI_SAS_BIST_LOOPBACK_MODE_DIGITAL = 0,
+       HISI_SAS_BIST_LOOPBACK_MODE_SERDES,
+       HISI_SAS_BIST_LOOPBACK_MODE_REMOTE,
+};
+
+enum {
+       HISI_SAS_BIST_CODE_MODE_PRBS7 = 0,
+       HISI_SAS_BIST_CODE_MODE_PRBS23,
+       HISI_SAS_BIST_CODE_MODE_PRBS31,
+       HISI_SAS_BIST_CODE_MODE_JTPAT,
+       HISI_SAS_BIST_CODE_MODE_CJTPAT,
+       HISI_SAS_BIST_CODE_MODE_SCRAMBED_0,
+       HISI_SAS_BIST_CODE_MODE_TRAIN,
+       HISI_SAS_BIST_CODE_MODE_TRAIN_DONE,
+       HISI_SAS_BIST_CODE_MODE_HFTP,
+       HISI_SAS_BIST_CODE_MODE_MFTP,
+       HISI_SAS_BIST_CODE_MODE_LFTP,
+       HISI_SAS_BIST_CODE_MODE_FIXED_DATA,
+};
+
+static const struct {
+       int             value;
+       char            *name;
+} hisi_sas_debugfs_loop_linkrate[] = {
+       { SAS_LINK_RATE_1_5_GBPS, "1.5 Gbit" },
+       { SAS_LINK_RATE_3_0_GBPS, "3.0 Gbit" },
+       { SAS_LINK_RATE_6_0_GBPS, "6.0 Gbit" },
+       { SAS_LINK_RATE_12_0_GBPS, "12.0 Gbit" },
+};
+
+static int hisi_sas_debugfs_bist_linkrate_show(struct seq_file *s, void *p)
+{
+       struct hisi_hba *hisi_hba = s->private;
+       int i;
+
+       for (i = 0; i < ARRAY_SIZE(hisi_sas_debugfs_loop_linkrate); i++) {
+               int match = (hisi_hba->debugfs_bist_linkrate ==
+                            hisi_sas_debugfs_loop_linkrate[i].value);
+
+               seq_printf(s, "%s%s%s ", match ? "[" : "",
+                          hisi_sas_debugfs_loop_linkrate[i].name,
+                          match ? "]" : "");
+       }
+       seq_puts(s, "\n");
+
+       return 0;
+}
+
+static ssize_t hisi_sas_debugfs_bist_linkrate_write(struct file *filp,
+                                                   const char __user *buf,
+                                                   size_t count, loff_t *ppos)
+{
+       struct seq_file *m = filp->private_data;
+       struct hisi_hba *hisi_hba = m->private;
+       char kbuf[16] = {}, *pkbuf;
+       bool found = false;
+       int i;
+
+       if (hisi_hba->debugfs_bist_enable)
+               return -EPERM;
+
+       if (count >= sizeof(kbuf))
+               return -EOVERFLOW;
+
+       if (copy_from_user(kbuf, buf, count))
+               return -EINVAL;
+
+       pkbuf = strstrip(kbuf);
+
+       for (i = 0; i < ARRAY_SIZE(hisi_sas_debugfs_loop_linkrate); i++) {
+               if (!strncmp(hisi_sas_debugfs_loop_linkrate[i].name,
+                            pkbuf, 16)) {
+                       hisi_hba->debugfs_bist_linkrate =
+                               hisi_sas_debugfs_loop_linkrate[i].value;
+                       found = true;
+                       break;
+               }
+       }
+
+       if (!found)
+               return -EINVAL;
+
+       return count;
+}
+
+static int hisi_sas_debugfs_bist_linkrate_open(struct inode *inode,
+                                              struct file *filp)
+{
+       return single_open(filp, hisi_sas_debugfs_bist_linkrate_show,
+                          inode->i_private);
+}
+
+static const struct file_operations hisi_sas_debugfs_bist_linkrate_ops = {
+       .open = hisi_sas_debugfs_bist_linkrate_open,
+       .read = seq_read,
+       .write = hisi_sas_debugfs_bist_linkrate_write,
+       .llseek = seq_lseek,
+       .release = single_release,
+       .owner = THIS_MODULE,
+};
+
+static const struct {
+       int             value;
+       char            *name;
+} hisi_sas_debugfs_loop_code_mode[] = {
+       { HISI_SAS_BIST_CODE_MODE_PRBS7, "PRBS7" },
+       { HISI_SAS_BIST_CODE_MODE_PRBS23, "PRBS23" },
+       { HISI_SAS_BIST_CODE_MODE_PRBS31, "PRBS31" },
+       { HISI_SAS_BIST_CODE_MODE_JTPAT, "JTPAT" },
+       { HISI_SAS_BIST_CODE_MODE_CJTPAT, "CJTPAT" },
+       { HISI_SAS_BIST_CODE_MODE_SCRAMBED_0, "SCRAMBED_0" },
+       { HISI_SAS_BIST_CODE_MODE_TRAIN, "TRAIN" },
+       { HISI_SAS_BIST_CODE_MODE_TRAIN_DONE, "TRAIN_DONE" },
+       { HISI_SAS_BIST_CODE_MODE_HFTP, "HFTP" },
+       { HISI_SAS_BIST_CODE_MODE_MFTP, "MFTP" },
+       { HISI_SAS_BIST_CODE_MODE_LFTP, "LFTP" },
+       { HISI_SAS_BIST_CODE_MODE_FIXED_DATA, "FIXED_DATA" },
+};
+
+static int hisi_sas_debugfs_bist_code_mode_show(struct seq_file *s, void *p)
+{
+       struct hisi_hba *hisi_hba = s->private;
+       int i;
+
+       for (i = 0; i < ARRAY_SIZE(hisi_sas_debugfs_loop_code_mode); i++) {
+               int match = (hisi_hba->debugfs_bist_code_mode ==
+                            hisi_sas_debugfs_loop_code_mode[i].value);
+
+               seq_printf(s, "%s%s%s ", match ? "[" : "",
+                          hisi_sas_debugfs_loop_code_mode[i].name,
+                          match ? "]" : "");
+       }
+       seq_puts(s, "\n");
+
+       return 0;
+}
+
+static ssize_t hisi_sas_debugfs_bist_code_mode_write(struct file *filp,
+                                                    const char __user *buf,
+                                                    size_t count,
+                                                    loff_t *ppos)
+{
+       struct seq_file *m = filp->private_data;
+       struct hisi_hba *hisi_hba = m->private;
+       char kbuf[16] = {}, *pkbuf;
+       bool found = false;
+       int i;
+
+       if (hisi_hba->debugfs_bist_enable)
+               return -EPERM;
+
+       if (count >= sizeof(kbuf))
+               return -EINVAL;
+
+       if (copy_from_user(kbuf, buf, count))
+               return -EOVERFLOW;
+
+       pkbuf = strstrip(kbuf);
+
+       for (i = 0; i < ARRAY_SIZE(hisi_sas_debugfs_loop_code_mode); i++) {
+               if (!strncmp(hisi_sas_debugfs_loop_code_mode[i].name,
+                            pkbuf, 16)) {
+                       hisi_hba->debugfs_bist_code_mode =
+                               hisi_sas_debugfs_loop_code_mode[i].value;
+                       found = true;
+                       break;
+               }
+       }
+
+       if (!found)
+               return -EINVAL;
+
+       return count;
+}
+
+static int hisi_sas_debugfs_bist_code_mode_open(struct inode *inode,
+                                               struct file *filp)
+{
+       return single_open(filp, hisi_sas_debugfs_bist_code_mode_show,
+                          inode->i_private);
+}
+
+static const struct file_operations hisi_sas_debugfs_bist_code_mode_ops = {
+       .open = hisi_sas_debugfs_bist_code_mode_open,
+       .read = seq_read,
+       .write = hisi_sas_debugfs_bist_code_mode_write,
+       .llseek = seq_lseek,
+       .release = single_release,
+       .owner = THIS_MODULE,
+};
+
+static ssize_t hisi_sas_debugfs_bist_phy_write(struct file *filp,
+                                              const char __user *buf,
+                                              size_t count, loff_t *ppos)
+{
+       struct seq_file *m = filp->private_data;
+       struct hisi_hba *hisi_hba = m->private;
+       unsigned int phy_no;
+       int val;
+
+       if (hisi_hba->debugfs_bist_enable)
+               return -EPERM;
+
+       val = kstrtouint_from_user(buf, count, 0, &phy_no);
+       if (val)
+               return val;
+
+       if (phy_no >= hisi_hba->n_phy)
+               return -EINVAL;
+
+       hisi_hba->debugfs_bist_phy_no = phy_no;
+
+       return count;
+}
+
+static int hisi_sas_debugfs_bist_phy_show(struct seq_file *s, void *p)
+{
+       struct hisi_hba *hisi_hba = s->private;
+
+       seq_printf(s, "%d\n", hisi_hba->debugfs_bist_phy_no);
+
+       return 0;
+}
+
+static int hisi_sas_debugfs_bist_phy_open(struct inode *inode,
+                                         struct file *filp)
+{
+       return single_open(filp, hisi_sas_debugfs_bist_phy_show,
+                          inode->i_private);
+}
+
+static const struct file_operations hisi_sas_debugfs_bist_phy_ops = {
+       .open = hisi_sas_debugfs_bist_phy_open,
+       .read = seq_read,
+       .write = hisi_sas_debugfs_bist_phy_write,
+       .llseek = seq_lseek,
+       .release = single_release,
+       .owner = THIS_MODULE,
+};
+
+static const struct {
+       int             value;
+       char            *name;
+} hisi_sas_debugfs_loop_modes[] = {
+       { HISI_SAS_BIST_LOOPBACK_MODE_DIGITAL, "digial" },
+       { HISI_SAS_BIST_LOOPBACK_MODE_SERDES, "serdes" },
+       { HISI_SAS_BIST_LOOPBACK_MODE_REMOTE, "remote" },
+};
+
+static int hisi_sas_debugfs_bist_mode_show(struct seq_file *s, void *p)
+{
+       struct hisi_hba *hisi_hba = s->private;
+       int i;
+
+       for (i = 0; i < ARRAY_SIZE(hisi_sas_debugfs_loop_modes); i++) {
+               int match = (hisi_hba->debugfs_bist_mode ==
+                            hisi_sas_debugfs_loop_modes[i].value);
+
+               seq_printf(s, "%s%s%s ", match ? "[" : "",
+                          hisi_sas_debugfs_loop_modes[i].name,
+                          match ? "]" : "");
+       }
+       seq_puts(s, "\n");
+
+       return 0;
+}
+
+static ssize_t hisi_sas_debugfs_bist_mode_write(struct file *filp,
+                                               const char __user *buf,
+                                               size_t count, loff_t *ppos)
+{
+       struct seq_file *m = filp->private_data;
+       struct hisi_hba *hisi_hba = m->private;
+       char kbuf[16] = {}, *pkbuf;
+       bool found = false;
+       int i;
+
+       if (hisi_hba->debugfs_bist_enable)
+               return -EPERM;
+
+       if (count >= sizeof(kbuf))
+               return -EINVAL;
+
+       if (copy_from_user(kbuf, buf, count))
+               return -EOVERFLOW;
+
+       pkbuf = strstrip(kbuf);
+
+       for (i = 0; i < ARRAY_SIZE(hisi_sas_debugfs_loop_modes); i++) {
+               if (!strncmp(hisi_sas_debugfs_loop_modes[i].name, pkbuf, 16)) {
+                       hisi_hba->debugfs_bist_mode =
+                               hisi_sas_debugfs_loop_modes[i].value;
+                       found = true;
+                       break;
+               }
+       }
+
+       if (!found)
+               return -EINVAL;
+
+       return count;
+}
+
+static int hisi_sas_debugfs_bist_mode_open(struct inode *inode,
+                                          struct file *filp)
+{
+       return single_open(filp, hisi_sas_debugfs_bist_mode_show,
+                          inode->i_private);
+}
+
+static const struct file_operations hisi_sas_debugfs_bist_mode_ops = {
+       .open = hisi_sas_debugfs_bist_mode_open,
+       .read = seq_read,
+       .write = hisi_sas_debugfs_bist_mode_write,
+       .llseek = seq_lseek,
+       .release = single_release,
+       .owner = THIS_MODULE,
+};
+
+static ssize_t hisi_sas_debugfs_bist_enable_write(struct file *filp,
+                                                 const char __user *buf,
+                                                 size_t count, loff_t *ppos)
+{
+       struct seq_file *m = filp->private_data;
+       struct hisi_hba *hisi_hba = m->private;
+       unsigned int enable;
+       int val;
+
+       val = kstrtouint_from_user(buf, count, 0, &enable);
+       if (val)
+               return val;
+
+       if (enable > 1)
+               return -EINVAL;
+
+       if (enable == hisi_hba->debugfs_bist_enable)
+               return count;
+
+       if (!hisi_hba->hw->set_bist)
+               return -EPERM;
+
+       val = hisi_hba->hw->set_bist(hisi_hba, enable);
+       if (val < 0)
+               return val;
+
+       hisi_hba->debugfs_bist_enable = enable;
+
+       return count;
+}
+
+static int hisi_sas_debugfs_bist_enable_show(struct seq_file *s, void *p)
+{
+       struct hisi_hba *hisi_hba = s->private;
+
+       seq_printf(s, "%d\n", hisi_hba->debugfs_bist_enable);
+
+       return 0;
+}
+
+static int hisi_sas_debugfs_bist_enable_open(struct inode *inode,
+                                            struct file *filp)
+{
+       return single_open(filp, hisi_sas_debugfs_bist_enable_show,
+                          inode->i_private);
+}
+
+static const struct file_operations hisi_sas_debugfs_bist_enable_ops = {
+       .open = hisi_sas_debugfs_bist_enable_open,
+       .read = seq_read,
+       .write = hisi_sas_debugfs_bist_enable_write,
+       .llseek = seq_lseek,
+       .release = single_release,
+       .owner = THIS_MODULE,
+};
+
 void hisi_sas_debugfs_work_handler(struct work_struct *work)
 {
        struct hisi_hba *hisi_hba =
@@ -3175,89 +3683,165 @@ void hisi_sas_debugfs_work_handler(struct work_struct *work)
 }
 EXPORT_SYMBOL_GPL(hisi_sas_debugfs_work_handler);
 
-void hisi_sas_debugfs_init(struct hisi_hba *hisi_hba)
+void hisi_sas_debugfs_release(struct hisi_hba *hisi_hba)
 {
-       int max_command_entries = hisi_hba->hw->max_command_entries;
        struct device *dev = hisi_hba->dev;
-       int p, i, c, d;
+       int i;
+
+       devm_kfree(dev, hisi_hba->debugfs_iost_cache);
+       devm_kfree(dev, hisi_hba->debugfs_itct_cache);
+       devm_kfree(dev, hisi_hba->debugfs_iost);
+
+       for (i = 0; i < hisi_hba->queue_count; i++)
+               devm_kfree(dev, hisi_hba->debugfs_cmd_hdr[i]);
+
+       for (i = 0; i < hisi_hba->queue_count; i++)
+               devm_kfree(dev, hisi_hba->debugfs_complete_hdr[i]);
+
+       for (i = 0; i < DEBUGFS_REGS_NUM; i++)
+               devm_kfree(dev, hisi_hba->debugfs_regs[i]);
+
+       for (i = 0; i < hisi_hba->n_phy; i++)
+               devm_kfree(dev, hisi_hba->debugfs_port_reg[i]);
+}
+
+int hisi_sas_debugfs_alloc(struct hisi_hba *hisi_hba)
+{
+       const struct hisi_sas_hw *hw = hisi_hba->hw;
+       struct device *dev = hisi_hba->dev;
+       int p, c, d;
        size_t sz;
 
-       hisi_hba->debugfs_dir = debugfs_create_dir(dev_name(dev),
-                                                  hisi_sas_debugfs_dir);
-       debugfs_create_file("trigger_dump", 0600,
-                           hisi_hba->debugfs_dir,
-                           hisi_hba,
-                           &hisi_sas_debugfs_trigger_dump_fops);
+       hisi_hba->debugfs_dump_dentry =
+                       debugfs_create_dir("dump", hisi_hba->debugfs_dir);
 
-       /* Alloc buffer for global */
-       sz = hisi_hba->hw->debugfs_reg_global->count * 4;
-       hisi_hba->debugfs_global_reg =
-               devm_kmalloc(dev, sz, GFP_KERNEL);
+       sz = hw->debugfs_reg_array[DEBUGFS_GLOBAL]->count * 4;
+       hisi_hba->debugfs_regs[DEBUGFS_GLOBAL] =
+                               devm_kmalloc(dev, sz, GFP_KERNEL);
 
-       if (!hisi_hba->debugfs_global_reg)
-               goto fail_global;
+       if (!hisi_hba->debugfs_regs[DEBUGFS_GLOBAL])
+               goto fail;
 
-       /* Alloc buffer for port */
-       sz = hisi_hba->hw->debugfs_reg_port->count * 4;
+       sz = hw->debugfs_reg_port->count * 4;
        for (p = 0; p < hisi_hba->n_phy; p++) {
                hisi_hba->debugfs_port_reg[p] =
                        devm_kmalloc(dev, sz, GFP_KERNEL);
 
                if (!hisi_hba->debugfs_port_reg[p])
-                       goto fail_port;
+                       goto fail;
        }
 
-       /* Alloc buffer for cq */
-       sz = hisi_hba->hw->complete_hdr_size * HISI_SAS_QUEUE_SLOTS;
+       sz = hw->debugfs_reg_array[DEBUGFS_AXI]->count * 4;
+       hisi_hba->debugfs_regs[DEBUGFS_AXI] =
+               devm_kmalloc(dev, sz, GFP_KERNEL);
+
+       if (!hisi_hba->debugfs_regs[DEBUGFS_AXI])
+               goto fail;
+
+       sz = hw->debugfs_reg_array[DEBUGFS_RAS]->count * 4;
+       hisi_hba->debugfs_regs[DEBUGFS_RAS] =
+               devm_kmalloc(dev, sz, GFP_KERNEL);
+
+       if (!hisi_hba->debugfs_regs[DEBUGFS_RAS])
+               goto fail;
+
+       sz = hw->complete_hdr_size * HISI_SAS_QUEUE_SLOTS;
        for (c = 0; c < hisi_hba->queue_count; c++) {
                hisi_hba->debugfs_complete_hdr[c] =
                        devm_kmalloc(dev, sz, GFP_KERNEL);
 
                if (!hisi_hba->debugfs_complete_hdr[c])
-                       goto fail_cq;
+                       goto fail;
        }
 
-       /* Alloc buffer for dq */
        sz = sizeof(struct hisi_sas_cmd_hdr) * HISI_SAS_QUEUE_SLOTS;
        for (d = 0; d < hisi_hba->queue_count; d++) {
                hisi_hba->debugfs_cmd_hdr[d] =
                        devm_kmalloc(dev, sz, GFP_KERNEL);
 
                if (!hisi_hba->debugfs_cmd_hdr[d])
-                       goto fail_iost_dq;
+                       goto fail;
        }
 
-       /* Alloc buffer for iost */
-       sz = max_command_entries * sizeof(struct hisi_sas_iost);
+       sz = HISI_SAS_MAX_COMMANDS * sizeof(struct hisi_sas_iost);
 
        hisi_hba->debugfs_iost = devm_kmalloc(dev, sz, GFP_KERNEL);
        if (!hisi_hba->debugfs_iost)
-               goto fail_iost_dq;
+               goto fail;
+
+       sz = HISI_SAS_IOST_ITCT_CACHE_NUM *
+            sizeof(struct hisi_sas_iost_itct_cache);
+
+       hisi_hba->debugfs_iost_cache = devm_kmalloc(dev, sz, GFP_KERNEL);
+       if (!hisi_hba->debugfs_iost_cache)
+               goto fail;
+
+       sz = HISI_SAS_IOST_ITCT_CACHE_NUM *
+            sizeof(struct hisi_sas_iost_itct_cache);
+
+       hisi_hba->debugfs_itct_cache = devm_kmalloc(dev, sz, GFP_KERNEL);
+       if (!hisi_hba->debugfs_itct_cache)
+               goto fail;
 
-       /* Alloc buffer for itct */
        /* New memory allocation must be locate before itct */
        sz = HISI_SAS_MAX_ITCT_ENTRIES * sizeof(struct hisi_sas_itct);
 
        hisi_hba->debugfs_itct = devm_kmalloc(dev, sz, GFP_KERNEL);
        if (!hisi_hba->debugfs_itct)
-               goto fail_itct;
+               goto fail;
 
-       return;
-fail_itct:
-       devm_kfree(dev, hisi_hba->debugfs_iost);
-fail_iost_dq:
-       for (i = 0; i < d; i++)
-               devm_kfree(dev, hisi_hba->debugfs_cmd_hdr[i]);
-fail_cq:
-       for (i = 0; i < c; i++)
-               devm_kfree(dev, hisi_hba->debugfs_complete_hdr[i]);
-fail_port:
-       for (i = 0; i < p; i++)
-               devm_kfree(dev, hisi_hba->debugfs_port_reg[i]);
-       devm_kfree(dev, hisi_hba->debugfs_global_reg);
-fail_global:
-       debugfs_remove_recursive(hisi_hba->debugfs_dir);
-       dev_dbg(dev, "failed to init debugfs!\n");
+       return 0;
+fail:
+       hisi_sas_debugfs_release(hisi_hba);
+       return -ENOMEM;
+}
+
+void hisi_sas_debugfs_bist_init(struct hisi_hba *hisi_hba)
+{
+       hisi_hba->debugfs_bist_dentry =
+                       debugfs_create_dir("bist", hisi_hba->debugfs_dir);
+       debugfs_create_file("link_rate", 0600,
+                           hisi_hba->debugfs_bist_dentry, hisi_hba,
+                           &hisi_sas_debugfs_bist_linkrate_ops);
+
+       debugfs_create_file("code_mode", 0600,
+                           hisi_hba->debugfs_bist_dentry, hisi_hba,
+                           &hisi_sas_debugfs_bist_code_mode_ops);
+
+       debugfs_create_file("phy_id", 0600, hisi_hba->debugfs_bist_dentry,
+                           hisi_hba, &hisi_sas_debugfs_bist_phy_ops);
+
+       debugfs_create_u32("cnt", 0600, hisi_hba->debugfs_bist_dentry,
+                          &hisi_hba->debugfs_bist_cnt);
+
+       debugfs_create_file("loopback_mode", 0600,
+                           hisi_hba->debugfs_bist_dentry,
+                           hisi_hba, &hisi_sas_debugfs_bist_mode_ops);
+
+       debugfs_create_file("enable", 0600, hisi_hba->debugfs_bist_dentry,
+                           hisi_hba, &hisi_sas_debugfs_bist_enable_ops);
+
+       hisi_hba->debugfs_bist_linkrate = SAS_LINK_RATE_1_5_GBPS;
+}
+
+void hisi_sas_debugfs_init(struct hisi_hba *hisi_hba)
+{
+       struct device *dev = hisi_hba->dev;
+
+       hisi_hba->debugfs_dir = debugfs_create_dir(dev_name(dev),
+                                                  hisi_sas_debugfs_dir);
+       debugfs_create_file("trigger_dump", 0600,
+                           hisi_hba->debugfs_dir,
+                           hisi_hba,
+                           &hisi_sas_debugfs_trigger_dump_fops);
+
+       /* create bist structures */
+       hisi_sas_debugfs_bist_init(hisi_hba);
+
+       if (hisi_sas_debugfs_alloc(hisi_hba)) {
+               debugfs_remove_recursive(hisi_hba->debugfs_dir);
+               dev_dbg(dev, "failed to init debugfs!\n");
+       }
 }
 EXPORT_SYMBOL_GPL(hisi_sas_debugfs_init);
 
index 3912216..b861a0f 100644 (file)
@@ -401,8 +401,6 @@ enum {
        TRANS_RX_SMP_RESP_TIMEOUT_ERR, /* 0x31a */
 };
 
-#define HISI_SAS_COMMAND_ENTRIES_V1_HW 8192
-
 #define HISI_SAS_PHY_MAX_INT_NR (HISI_SAS_PHY_INT_NR * HISI_SAS_MAX_PHYS)
 #define HISI_SAS_CQ_MAX_INT_NR (HISI_SAS_MAX_QUEUES)
 #define HISI_SAS_FATAL_INT_NR (2)
@@ -418,13 +416,6 @@ static u32 hisi_sas_read32(struct hisi_hba *hisi_hba, u32 off)
        return readl(regs);
 }
 
-static u32 hisi_sas_read32_relaxed(struct hisi_hba *hisi_hba, u32 off)
-{
-       void __iomem *regs = hisi_hba->regs + off;
-
-       return readl_relaxed(regs);
-}
-
 static void hisi_sas_write32(struct hisi_hba *hisi_hba,
                                    u32 off, u32 val)
 {
@@ -866,30 +857,6 @@ static int get_wideport_bitmap_v1_hw(struct hisi_hba *hisi_hba, int port_id)
        return bitmap;
 }
 
-/*
- * The callpath to this function and upto writing the write
- * queue pointer should be safe from interruption.
- */
-static int
-get_free_slot_v1_hw(struct hisi_hba *hisi_hba, struct hisi_sas_dq *dq)
-{
-       struct device *dev = hisi_hba->dev;
-       int queue = dq->id;
-       u32 r, w;
-
-       w = dq->wr_point;
-       r = hisi_sas_read32_relaxed(hisi_hba,
-                               DLVRY_Q_0_RD_PTR + (queue * 0x14));
-       if (r == (w+1) % HISI_SAS_QUEUE_SLOTS) {
-               dev_warn(dev, "could not find free slot\n");
-               return -EAGAIN;
-       }
-
-       dq->wr_point = (dq->wr_point + 1) % HISI_SAS_QUEUE_SLOTS;
-
-       return w;
-}
-
 /* DQ lock must be taken here */
 static void start_delivery_v1_hw(struct hisi_sas_dq *dq)
 {
@@ -1308,21 +1275,17 @@ static int slot_complete_v1_hw(struct hisi_hba *hisi_hba,
        }
        case SAS_PROTOCOL_SMP:
        {
-               void *to;
                struct scatterlist *sg_resp = &task->smp_task.smp_resp;
+               void *to = page_address(sg_page(sg_resp));
 
                ts->stat = SAM_STAT_GOOD;
-               to = kmap_atomic(sg_page(sg_resp));
 
-               dma_unmap_sg(dev, &task->smp_task.smp_resp, 1,
-                            DMA_FROM_DEVICE);
                dma_unmap_sg(dev, &task->smp_task.smp_req, 1,
                             DMA_TO_DEVICE);
                memcpy(to + sg_resp->offset,
                       hisi_sas_status_buf_addr_mem(slot) +
                       sizeof(struct hisi_sas_err_record),
-                      sg_dma_len(sg_resp));
-               kunmap_atomic(to);
+                      sg_resp->length);
                break;
        }
        case SAS_PROTOCOL_SATA:
@@ -1534,11 +1497,9 @@ static irqreturn_t cq_interrupt_v1_hw(int irq, void *p)
        struct hisi_sas_complete_v1_hdr *complete_queue =
                        (struct hisi_sas_complete_v1_hdr *)
                        hisi_hba->complete_hdr[queue];
-       u32 irq_value, rd_point = cq->rd_point, wr_point;
+       u32 rd_point = cq->rd_point, wr_point;
 
        spin_lock(&hisi_hba->lock);
-       irq_value = hisi_sas_read32(hisi_hba, OQ_INT_SRC);
-
        hisi_sas_write32(hisi_hba, OQ_INT_SRC, 1 << queue);
        wr_point = hisi_sas_read32(hisi_hba,
                        COMPL_Q_0_WR_PTR + (0x14 * queue));
@@ -1820,9 +1781,7 @@ static const struct hisi_sas_hw hisi_sas_v1_hw = {
        .clear_itct = clear_itct_v1_hw,
        .prep_smp = prep_smp_v1_hw,
        .prep_ssp = prep_ssp_v1_hw,
-       .get_free_slot = get_free_slot_v1_hw,
        .start_delivery = start_delivery_v1_hw,
-       .slot_complete = slot_complete_v1_hw,
        .phys_init = phys_init_v1_hw,
        .phy_start = start_phy_v1_hw,
        .phy_disable = disable_phy_v1_hw,
@@ -1830,7 +1789,6 @@ static const struct hisi_sas_hw hisi_sas_v1_hw = {
        .phy_set_linkrate = phy_set_linkrate_v1_hw,
        .phy_get_max_linkrate = phy_get_max_linkrate_v1_hw,
        .get_wideport_bitmap = get_wideport_bitmap_v1_hw,
-       .max_command_entries = HISI_SAS_COMMAND_ENTRIES_V1_HW,
        .complete_hdr_size = sizeof(struct hisi_sas_complete_v1_hdr),
        .sht = &sht_v1_hw,
 };
index e9b15d4..8e96a25 100644 (file)
@@ -1637,31 +1637,6 @@ static int get_wideport_bitmap_v2_hw(struct hisi_hba *hisi_hba, int port_id)
        return bitmap;
 }
 
-/*
- * The callpath to this function and upto writing the write
- * queue pointer should be safe from interruption.
- */
-static int
-get_free_slot_v2_hw(struct hisi_hba *hisi_hba, struct hisi_sas_dq *dq)
-{
-       struct device *dev = hisi_hba->dev;
-       int queue = dq->id;
-       u32 r, w;
-
-       w = dq->wr_point;
-       r = hisi_sas_read32_relaxed(hisi_hba,
-                               DLVRY_Q_0_RD_PTR + (queue * 0x14));
-       if (r == (w+1) % HISI_SAS_QUEUE_SLOTS) {
-               dev_warn(dev, "full queue=%d r=%d w=%d\n",
-                               queue, r, w);
-               return -EAGAIN;
-       }
-
-       dq->wr_point = (dq->wr_point + 1) % HISI_SAS_QUEUE_SLOTS;
-
-       return w;
-}
-
 /* DQ lock must be taken here */
 static void start_delivery_v2_hw(struct hisi_sas_dq *dq)
 {
@@ -2418,7 +2393,7 @@ slot_complete_v2_hw(struct hisi_hba *hisi_hba, struct hisi_sas_slot *slot)
                        slot_err_v2_hw(hisi_hba, task, slot, 2);
 
                if (ts->stat != SAS_DATA_UNDERRUN)
-                       dev_info(dev, "erroneous completion iptt=%d task=%p dev id=%d CQ hdr: 0x%x 0x%x 0x%x 0x%x Error info: 0x%x 0x%x 0x%x 0x%x\n",
+                       dev_info(dev, "erroneous completion iptt=%d task=%pK dev id=%d CQ hdr: 0x%x 0x%x 0x%x 0x%x Error info: 0x%x 0x%x 0x%x 0x%x\n",
                                 slot->idx, task, sas_dev->device_id,
                                 complete_hdr->dw0, complete_hdr->dw1,
                                 complete_hdr->act, complete_hdr->dw3,
@@ -2444,20 +2419,16 @@ slot_complete_v2_hw(struct hisi_hba *hisi_hba, struct hisi_sas_slot *slot)
        case SAS_PROTOCOL_SMP:
        {
                struct scatterlist *sg_resp = &task->smp_task.smp_resp;
-               void *to;
+               void *to = page_address(sg_page(sg_resp));
 
                ts->stat = SAM_STAT_GOOD;
-               to = kmap_atomic(sg_page(sg_resp));
 
-               dma_unmap_sg(dev, &task->smp_task.smp_resp, 1,
-                            DMA_FROM_DEVICE);
                dma_unmap_sg(dev, &task->smp_task.smp_req, 1,
                             DMA_TO_DEVICE);
                memcpy(to + sg_resp->offset,
                       hisi_sas_status_buf_addr_mem(slot) +
                       sizeof(struct hisi_sas_err_record),
-                      sg_dma_len(sg_resp));
-               kunmap_atomic(to);
+                      sg_resp->length);
                break;
        }
        case SAS_PROTOCOL_SATA:
@@ -2484,7 +2455,7 @@ out:
        spin_lock_irqsave(&task->task_state_lock, flags);
        if (task->task_state_flags & SAS_TASK_STATE_ABORTED) {
                spin_unlock_irqrestore(&task->task_state_lock, flags);
-               dev_info(dev, "slot complete: task(%p) aborted\n", task);
+               dev_info(dev, "slot complete: task(%pK) aborted\n", task);
                return SAS_ABORTED_TASK;
        }
        task->task_state_flags |= SAS_TASK_STATE_DONE;
@@ -2495,7 +2466,7 @@ out:
                spin_lock_irqsave(&device->done_lock, flags);
                if (test_bit(SAS_HA_FROZEN, &ha->state)) {
                        spin_unlock_irqrestore(&device->done_lock, flags);
-                       dev_info(dev, "slot complete: task(%p) ignored\n",
+                       dev_info(dev, "slot complete: task(%pK) ignored\n",
                                 task);
                        return sts;
                }
@@ -2563,7 +2534,10 @@ static void prep_ata_v2_hw(struct hisi_hba *hisi_hba,
        hdr->dw1 = cpu_to_le32(dw1);
 
        /* dw2 */
-       if (task->ata_task.use_ncq && hisi_sas_get_ncq_tag(task, &hdr_tag)) {
+       if (task->ata_task.use_ncq) {
+               struct ata_queued_cmd *qc = task->uldd_task;
+
+               hdr_tag = qc->tag;
                task->ata_task.fis.sector_count |= (u8) (hdr_tag << 3);
                dw2 |= hdr_tag << CMD_HDR_NCQ_TAG_OFF;
        }
@@ -3333,8 +3307,8 @@ static int interrupt_init_v2_hw(struct hisi_hba *hisi_hba)
 {
        struct platform_device *pdev = hisi_hba->platform_dev;
        struct device *dev = &pdev->dev;
-       int irq, rc, irq_map[128];
-       int i, phy_no, fatal_no, queue_no, k;
+       int irq, rc = 0, irq_map[128];
+       int i, phy_no, fatal_no, queue_no;
 
        for (i = 0; i < 128; i++)
                irq_map[i] = platform_get_irq(pdev, i);
@@ -3347,7 +3321,7 @@ static int interrupt_init_v2_hw(struct hisi_hba *hisi_hba)
                        dev_err(dev, "irq init: could not request phy interrupt %d, rc=%d\n",
                                irq, rc);
                        rc = -ENOENT;
-                       goto free_phy_int_irqs;
+                       goto err_out;
                }
        }
 
@@ -3361,7 +3335,7 @@ static int interrupt_init_v2_hw(struct hisi_hba *hisi_hba)
                        dev_err(dev, "irq init: could not request sata interrupt %d, rc=%d\n",
                                irq, rc);
                        rc = -ENOENT;
-                       goto free_sata_int_irqs;
+                       goto err_out;
                }
        }
 
@@ -3373,7 +3347,7 @@ static int interrupt_init_v2_hw(struct hisi_hba *hisi_hba)
                        dev_err(dev, "irq init: could not request fatal interrupt %d, rc=%d\n",
                                irq, rc);
                        rc = -ENOENT;
-                       goto free_fatal_int_irqs;
+                       goto err_out;
                }
        }
 
@@ -3388,34 +3362,14 @@ static int interrupt_init_v2_hw(struct hisi_hba *hisi_hba)
                        dev_err(dev, "irq init: could not request cq interrupt %d, rc=%d\n",
                                irq, rc);
                        rc = -ENOENT;
-                       goto free_cq_int_irqs;
+                       goto err_out;
                }
                tasklet_init(t, cq_tasklet_v2_hw, (unsigned long)cq);
        }
 
        hisi_hba->cq_nvecs = hisi_hba->queue_count;
 
-       return 0;
-
-free_cq_int_irqs:
-       for (k = 0; k < queue_no; k++) {
-               struct hisi_sas_cq *cq = &hisi_hba->cq[k];
-
-               free_irq(irq_map[k + 96], cq);
-               tasklet_kill(&cq->tasklet);
-       }
-free_fatal_int_irqs:
-       for (k = 0; k < fatal_no; k++)
-               free_irq(irq_map[k + 81], hisi_hba);
-free_sata_int_irqs:
-       for (k = 0; k < phy_no; k++) {
-               struct hisi_sas_phy *phy = &hisi_hba->phy[k];
-
-               free_irq(irq_map[k + 72], phy);
-       }
-free_phy_int_irqs:
-       for (k = 0; k < i; k++)
-               free_irq(irq_map[k + 1], hisi_hba);
+err_out:
        return rc;
 }
 
@@ -3544,8 +3498,8 @@ static int write_gpio_v2_hw(struct hisi_hba *hisi_hba, u8 reg_type,
        return 0;
 }
 
-static int wait_cmds_complete_timeout_v2_hw(struct hisi_hba *hisi_hba,
-                                           int delay_ms, int timeout_ms)
+static void wait_cmds_complete_timeout_v2_hw(struct hisi_hba *hisi_hba,
+                                            int delay_ms, int timeout_ms)
 {
        struct device *dev = hisi_hba->dev;
        int entries, entries_old = 0, time;
@@ -3559,12 +3513,13 @@ static int wait_cmds_complete_timeout_v2_hw(struct hisi_hba *hisi_hba,
                msleep(delay_ms);
        }
 
-       if (time >= timeout_ms)
-               return -ETIMEDOUT;
+       if (time >= timeout_ms) {
+               dev_dbg(dev, "Wait commands complete timeout!\n");
+               return;
+       }
 
        dev_dbg(dev, "wait commands complete %dms\n", time);
 
-       return 0;
 }
 
 static struct device_attribute *host_attrs_v2_hw[] = {
@@ -3606,9 +3561,7 @@ static const struct hisi_sas_hw hisi_sas_v2_hw = {
        .prep_ssp = prep_ssp_v2_hw,
        .prep_stp = prep_ata_v2_hw,
        .prep_abort = prep_abort_v2_hw,
-       .get_free_slot = get_free_slot_v2_hw,
        .start_delivery = start_delivery_v2_hw,
-       .slot_complete = slot_complete_v2_hw,
        .phys_init = phys_init_v2_hw,
        .phy_start = start_phy_v2_hw,
        .phy_disable = disable_phy_v2_hw,
@@ -3616,7 +3569,6 @@ static const struct hisi_sas_hw hisi_sas_v2_hw = {
        .get_events = phy_get_events_v2_hw,
        .phy_set_linkrate = phy_set_linkrate_v2_hw,
        .phy_get_max_linkrate = phy_get_max_linkrate_v2_hw,
-       .max_command_entries = HISI_SAS_COMMAND_ENTRIES_V2_HW,
        .complete_hdr_size = sizeof(struct hisi_sas_complete_v2_hdr),
        .soft_reset = soft_reset_v2_hw,
        .get_phys_state = get_phys_state_v2_hw,
index 5f0f6df..cb8d087 100644 (file)
@@ -71,6 +71,7 @@
 #define HGC_DQE_ECC_MB_ADDR_OFF        16
 #define HGC_DQE_ECC_MB_ADDR_MSK (0xfff << HGC_DQE_ECC_MB_ADDR_OFF)
 #define CHNL_INT_STATUS                        0x148
+#define TAB_DFX                                0x14c
 #define HGC_ITCT_ECC_ADDR              0x150
 #define HGC_ITCT_ECC_1B_ADDR_OFF               0
 #define HGC_ITCT_ECC_1B_ADDR_MSK               (0x3ff << \
@@ -83,6 +84,7 @@
 #define AXI_ERR_INFO_MSK               (0xff << AXI_ERR_INFO_OFF)
 #define FIFO_ERR_INFO_OFF              8
 #define FIFO_ERR_INFO_MSK              (0xff << FIFO_ERR_INFO_OFF)
+#define TAB_RD_TYPE                    0x15c
 #define INT_COAL_EN                    0x19c
 #define OQ_INT_COAL_TIME               0x1a0
 #define OQ_INT_COAL_CNT                        0x1a4
 #define PHY_CFG_PHY_RST_OFF            3
 #define PHY_CFG_PHY_RST_MSK            (0x1 << PHY_CFG_PHY_RST_OFF)
 #define PROG_PHY_LINK_RATE             (PORT_BASE + 0x8)
+#define CFG_PROG_PHY_LINK_RATE_OFF     8
+#define CFG_PROG_PHY_LINK_RATE_MSK     (0xf << CFG_PROG_PHY_LINK_RATE_OFF)
 #define PHY_CTRL                       (PORT_BASE + 0x14)
 #define PHY_CTRL_RESET_OFF             0
 #define PHY_CTRL_RESET_MSK             (0x1 << PHY_CTRL_RESET_OFF)
 #define CMD_HDR_PIR_OFF                        8
 #define CMD_HDR_PIR_MSK                        (0x1 << CMD_HDR_PIR_OFF)
 #define SERDES_CFG                     (PORT_BASE + 0x1c)
+#define CFG_ALOS_CHK_DISABLE_OFF       9
+#define CFG_ALOS_CHK_DISABLE_MSK       (0x1 << CFG_ALOS_CHK_DISABLE_OFF)
+#define SAS_PHY_BIST_CTRL              (PORT_BASE + 0x2c)
+#define CFG_BIST_MODE_SEL_OFF          0
+#define CFG_BIST_MODE_SEL_MSK          (0xf << CFG_BIST_MODE_SEL_OFF)
+#define CFG_LOOP_TEST_MODE_OFF         14
+#define CFG_LOOP_TEST_MODE_MSK         (0x3 << CFG_LOOP_TEST_MODE_OFF)
+#define CFG_RX_BIST_EN_OFF             16
+#define CFG_RX_BIST_EN_MSK             (0x1 << CFG_RX_BIST_EN_OFF)
+#define CFG_TX_BIST_EN_OFF             17
+#define CFG_TX_BIST_EN_MSK             (0x1 << CFG_TX_BIST_EN_OFF)
+#define CFG_BIST_TEST_OFF              18
+#define CFG_BIST_TEST_MSK              (0x1 << CFG_BIST_TEST_OFF)
+#define SAS_PHY_BIST_CODE              (PORT_BASE + 0x30)
+#define SAS_PHY_BIST_CODE1             (PORT_BASE + 0x34)
+#define SAS_BIST_ERR_CNT               (PORT_BASE + 0x38)
 #define SL_CFG                         (PORT_BASE + 0x84)
 #define AIP_LIMIT                      (PORT_BASE + 0x90)
 #define SL_CONTROL                     (PORT_BASE + 0x94)
@@ -499,13 +519,6 @@ static u32 hisi_sas_read32(struct hisi_hba *hisi_hba, u32 off)
        return readl(regs);
 }
 
-static u32 hisi_sas_read32_relaxed(struct hisi_hba *hisi_hba, u32 off)
-{
-       void __iomem *regs = hisi_hba->regs + off;
-
-       return readl_relaxed(regs);
-}
-
 static void hisi_sas_write32(struct hisi_hba *hisi_hba, u32 off, u32 val)
 {
        void __iomem *regs = hisi_hba->regs + off;
@@ -1006,31 +1019,6 @@ static int get_wideport_bitmap_v3_hw(struct hisi_hba *hisi_hba, int port_id)
        return bitmap;
 }
 
-/**
- * The callpath to this function and upto writing the write
- * queue pointer should be safe from interruption.
- */
-static int
-get_free_slot_v3_hw(struct hisi_hba *hisi_hba, struct hisi_sas_dq *dq)
-{
-       struct device *dev = hisi_hba->dev;
-       int queue = dq->id;
-       u32 r, w;
-
-       w = dq->wr_point;
-       r = hisi_sas_read32_relaxed(hisi_hba,
-                               DLVRY_Q_0_RD_PTR + (queue * 0x14));
-       if (r == (w+1) % HISI_SAS_QUEUE_SLOTS) {
-               dev_warn(dev, "full queue=%d r=%d w=%d\n",
-                        queue, r, w);
-               return -EAGAIN;
-       }
-
-       dq->wr_point = (dq->wr_point + 1) % HISI_SAS_QUEUE_SLOTS;
-
-       return w;
-}
-
 static void start_delivery_v3_hw(struct hisi_sas_dq *dq)
 {
        struct hisi_hba *hisi_hba = dq->hisi_hba;
@@ -1386,7 +1374,10 @@ static void prep_ata_v3_hw(struct hisi_hba *hisi_hba,
        hdr->dw1 = cpu_to_le32(dw1);
 
        /* dw2 */
-       if (task->ata_task.use_ncq && hisi_sas_get_ncq_tag(task, &hdr_tag)) {
+       if (task->ata_task.use_ncq) {
+               struct ata_queued_cmd *qc = task->uldd_task;
+
+               hdr_tag = qc->tag;
                task->ata_task.fis.sector_count |= (u8) (hdr_tag << 3);
                dw2 |= hdr_tag << CMD_HDR_NCQ_TAG_OFF;
        }
@@ -1944,7 +1935,7 @@ static void fatal_ecc_int_v3_hw(struct hisi_hba *hisi_hba)
        u32 irq_value, irq_msk;
 
        irq_msk = hisi_sas_read32(hisi_hba, SAS_ECC_INTR_MSK);
-       hisi_sas_write32(hisi_hba, SAS_ECC_INTR_MSK, irq_msk | 0xffffffff);
+       hisi_sas_write32(hisi_hba, SAS_ECC_INTR_MSK, 0xffffffff);
 
        irq_value = hisi_sas_read32(hisi_hba, SAS_ECC_INTR);
        if (irq_value)
@@ -2220,7 +2211,7 @@ slot_complete_v3_hw(struct hisi_hba *hisi_hba, struct hisi_sas_slot *slot)
 
                slot_err_v3_hw(hisi_hba, task, slot);
                if (ts->stat != SAS_DATA_UNDERRUN)
-                       dev_info(dev, "erroneous completion iptt=%d task=%p dev id=%d CQ hdr: 0x%x 0x%x 0x%x 0x%x Error info: 0x%x 0x%x 0x%x 0x%x\n",
+                       dev_info(dev, "erroneous completion iptt=%d task=%pK dev id=%d CQ hdr: 0x%x 0x%x 0x%x 0x%x Error info: 0x%x 0x%x 0x%x 0x%x\n",
                                 slot->idx, task, sas_dev->device_id,
                                 dw0, dw1, complete_hdr->act, dw3,
                                 error_info[0], error_info[1],
@@ -2241,20 +2232,16 @@ slot_complete_v3_hw(struct hisi_hba *hisi_hba, struct hisi_sas_slot *slot)
        }
        case SAS_PROTOCOL_SMP: {
                struct scatterlist *sg_resp = &task->smp_task.smp_resp;
-               void *to;
+               void *to = page_address(sg_page(sg_resp));
 
                ts->stat = SAM_STAT_GOOD;
-               to = kmap_atomic(sg_page(sg_resp));
 
-               dma_unmap_sg(dev, &task->smp_task.smp_resp, 1,
-                            DMA_FROM_DEVICE);
                dma_unmap_sg(dev, &task->smp_task.smp_req, 1,
                             DMA_TO_DEVICE);
                memcpy(to + sg_resp->offset,
                        hisi_sas_status_buf_addr_mem(slot) +
                       sizeof(struct hisi_sas_err_record),
-                      sg_dma_len(sg_resp));
-               kunmap_atomic(to);
+                      sg_resp->length);
                break;
        }
        case SAS_PROTOCOL_SATA:
@@ -2279,7 +2266,7 @@ out:
        spin_lock_irqsave(&task->task_state_lock, flags);
        if (task->task_state_flags & SAS_TASK_STATE_ABORTED) {
                spin_unlock_irqrestore(&task->task_state_lock, flags);
-               dev_info(dev, "slot complete: task(%p) aborted\n", task);
+               dev_info(dev, "slot complete: task(%pK) aborted\n", task);
                return SAS_ABORTED_TASK;
        }
        task->task_state_flags |= SAS_TASK_STATE_DONE;
@@ -2290,7 +2277,7 @@ out:
                spin_lock_irqsave(&device->done_lock, flags);
                if (test_bit(SAS_HA_FROZEN, &ha->state)) {
                        spin_unlock_irqrestore(&device->done_lock, flags);
-                       dev_info(dev, "slot complete: task(%p) ignored\n ",
+                       dev_info(dev, "slot complete: task(%pK) ignored\n ",
                                 task);
                        return sts;
                }
@@ -2385,8 +2372,7 @@ static int interrupt_init_v3_hw(struct hisi_hba *hisi_hba)
 {
        struct device *dev = hisi_hba->dev;
        struct pci_dev *pdev = hisi_hba->pci_dev;
-       int vectors, rc;
-       int i, k;
+       int vectors, rc, i;
        int max_msi = HISI_SAS_MSI_COUNT_V3_HW, min_msi;
 
        if (auto_affine_msi_experimental) {
@@ -2434,7 +2420,7 @@ static int interrupt_init_v3_hw(struct hisi_hba *hisi_hba)
        if (rc) {
                dev_err(dev, "could not request chnl interrupt, rc=%d\n", rc);
                rc = -ENOENT;
-               goto free_phy_irq;
+               goto free_irq_vectors;
        }
 
        rc = devm_request_irq(dev, pci_irq_vector(pdev, 11),
@@ -2443,7 +2429,7 @@ static int interrupt_init_v3_hw(struct hisi_hba *hisi_hba)
        if (rc) {
                dev_err(dev, "could not request fatal interrupt, rc=%d\n", rc);
                rc = -ENOENT;
-               goto free_chnl_interrupt;
+               goto free_irq_vectors;
        }
 
        /* Init tasklets for cq only */
@@ -2460,7 +2446,7 @@ static int interrupt_init_v3_hw(struct hisi_hba *hisi_hba)
                        dev_err(dev, "could not request cq%d interrupt, rc=%d\n",
                                i, rc);
                        rc = -ENOENT;
-                       goto free_cq_irqs;
+                       goto free_irq_vectors;
                }
 
                tasklet_init(t, cq_tasklet_v3_hw, (unsigned long)cq);
@@ -2468,18 +2454,6 @@ static int interrupt_init_v3_hw(struct hisi_hba *hisi_hba)
 
        return 0;
 
-free_cq_irqs:
-       for (k = 0; k < i; k++) {
-               struct hisi_sas_cq *cq = &hisi_hba->cq[k];
-               int nr = hisi_sas_intr_conv ? 16 : 16 + k;
-
-               free_irq(pci_irq_vector(pdev, nr), cq);
-       }
-       free_irq(pci_irq_vector(pdev, 11), hisi_hba);
-free_chnl_interrupt:
-       free_irq(pci_irq_vector(pdev, 2), hisi_hba);
-free_phy_irq:
-       free_irq(pci_irq_vector(pdev, 1), hisi_hba);
 free_irq_vectors:
        pci_free_irq_vectors(pdev);
        return rc;
@@ -2620,8 +2594,8 @@ static int write_gpio_v3_hw(struct hisi_hba *hisi_hba, u8 reg_type,
        return 0;
 }
 
-static int wait_cmds_complete_timeout_v3_hw(struct hisi_hba *hisi_hba,
-                                           int delay_ms, int timeout_ms)
+static void wait_cmds_complete_timeout_v3_hw(struct hisi_hba *hisi_hba,
+                                            int delay_ms, int timeout_ms)
 {
        struct device *dev = hisi_hba->dev;
        int entries, entries_old = 0, time;
@@ -2635,12 +2609,12 @@ static int wait_cmds_complete_timeout_v3_hw(struct hisi_hba *hisi_hba,
                msleep(delay_ms);
        }
 
-       if (time >= timeout_ms)
-               return -ETIMEDOUT;
+       if (time >= timeout_ms) {
+               dev_dbg(dev, "Wait commands complete timeout!\n");
+               return;
+       }
 
        dev_dbg(dev, "wait commands complete %dms\n", time);
-
-       return 0;
 }
 
 static ssize_t intr_conv_v3_hw_show(struct device *dev,
@@ -2887,16 +2861,45 @@ static const struct hisi_sas_debugfs_reg debugfs_global_reg = {
        .read_global_reg = hisi_sas_read32,
 };
 
+static const struct hisi_sas_debugfs_reg_lu debugfs_axi_reg_lu[] = {
+       HISI_SAS_DEBUGFS_REG(AM_CFG_MAX_TRANS),
+       HISI_SAS_DEBUGFS_REG(AM_CFG_SINGLE_PORT_MAX_TRANS),
+       HISI_SAS_DEBUGFS_REG(AXI_CFG),
+       HISI_SAS_DEBUGFS_REG(AM_ROB_ECC_ERR_ADDR),
+       {}
+};
+
+static const struct hisi_sas_debugfs_reg debugfs_axi_reg = {
+       .lu = debugfs_axi_reg_lu,
+       .count = 0x61,
+       .base_off = AXI_MASTER_CFG_BASE,
+       .read_global_reg = hisi_sas_read32,
+};
+
+static const struct hisi_sas_debugfs_reg_lu debugfs_ras_reg_lu[] = {
+       HISI_SAS_DEBUGFS_REG(SAS_RAS_INTR1),
+       HISI_SAS_DEBUGFS_REG(SAS_RAS_INTR0_MASK),
+       HISI_SAS_DEBUGFS_REG(SAS_RAS_INTR1_MASK),
+       HISI_SAS_DEBUGFS_REG(CFG_SAS_RAS_INTR_MASK),
+       HISI_SAS_DEBUGFS_REG(SAS_RAS_INTR2),
+       HISI_SAS_DEBUGFS_REG(SAS_RAS_INTR2_MASK),
+       {}
+};
+
+static const struct hisi_sas_debugfs_reg debugfs_ras_reg = {
+       .lu = debugfs_ras_reg_lu,
+       .count = 0x10,
+       .base_off = RAS_BASE,
+       .read_global_reg = hisi_sas_read32,
+};
+
 static void debugfs_snapshot_prepare_v3_hw(struct hisi_hba *hisi_hba)
 {
-       struct device *dev = hisi_hba->dev;
-
        set_bit(HISI_SAS_REJECT_CMD_BIT, &hisi_hba->flags);
 
        hisi_sas_write32(hisi_hba, DLVRY_QUEUE_ENABLE, 0);
 
-       if (wait_cmds_complete_timeout_v3_hw(hisi_hba, 100, 5000) == -ETIMEDOUT)
-               dev_dbg(dev, "Wait commands complete timeout!\n");
+       wait_cmds_complete_timeout_v3_hw(hisi_hba, 100, 5000);
 
        hisi_sas_kill_tasklets(hisi_hba);
 }
@@ -2909,6 +2912,142 @@ static void debugfs_snapshot_restore_v3_hw(struct hisi_hba *hisi_hba)
        clear_bit(HISI_SAS_REJECT_CMD_BIT, &hisi_hba->flags);
 }
 
+static void read_iost_itct_cache_v3_hw(struct hisi_hba *hisi_hba,
+                                      enum hisi_sas_debugfs_cache_type type,
+                                      u32 *cache)
+{
+       u32 cache_dw_size = HISI_SAS_IOST_ITCT_CACHE_DW_SZ *
+                           HISI_SAS_IOST_ITCT_CACHE_NUM;
+       u32 *buf = cache;
+       u32 i, val;
+
+       hisi_sas_write32(hisi_hba, TAB_RD_TYPE, type);
+
+       for (i = 0; i < HISI_SAS_IOST_ITCT_CACHE_DW_SZ; i++) {
+               val = hisi_sas_read32(hisi_hba, TAB_DFX);
+               if (val == 0xffffffff)
+                       break;
+       }
+
+       if (val != 0xffffffff) {
+               pr_err("Issue occur when reading IOST/ITCT cache!\n");
+               return;
+       }
+
+       memset(buf, 0, cache_dw_size * 4);
+       buf[0] = val;
+
+       for (i = 1; i < cache_dw_size; i++)
+               buf[i] = hisi_sas_read32(hisi_hba, TAB_DFX);
+}
+
+static void hisi_sas_bist_test_prep_v3_hw(struct hisi_hba *hisi_hba)
+{
+       u32 reg_val;
+       int phy_id = hisi_hba->debugfs_bist_phy_no;
+
+       /* disable PHY */
+       hisi_sas_phy_enable(hisi_hba, phy_id, 0);
+
+       /* disable ALOS */
+       reg_val = hisi_sas_phy_read32(hisi_hba, phy_id, SERDES_CFG);
+       reg_val |= CFG_ALOS_CHK_DISABLE_MSK;
+       hisi_sas_phy_write32(hisi_hba, phy_id, SERDES_CFG, reg_val);
+}
+
+static void hisi_sas_bist_test_restore_v3_hw(struct hisi_hba *hisi_hba)
+{
+       u32 reg_val;
+       int phy_id = hisi_hba->debugfs_bist_phy_no;
+
+       /* disable loopback */
+       reg_val = hisi_sas_phy_read32(hisi_hba, phy_id, SAS_PHY_BIST_CTRL);
+       reg_val &= ~(CFG_RX_BIST_EN_MSK | CFG_TX_BIST_EN_MSK |
+                    CFG_BIST_TEST_MSK);
+       hisi_sas_phy_write32(hisi_hba, phy_id, SAS_PHY_BIST_CTRL, reg_val);
+
+       /* enable ALOS */
+       reg_val = hisi_sas_phy_read32(hisi_hba, phy_id, SERDES_CFG);
+       reg_val &= ~CFG_ALOS_CHK_DISABLE_MSK;
+       hisi_sas_phy_write32(hisi_hba, phy_id, SERDES_CFG, reg_val);
+
+       /* restore the linkrate */
+       reg_val = hisi_sas_phy_read32(hisi_hba, phy_id, PROG_PHY_LINK_RATE);
+       /* init OOB link rate as 1.5 Gbits */
+       reg_val &= ~CFG_PROG_PHY_LINK_RATE_MSK;
+       reg_val |= (0x8 << CFG_PROG_PHY_LINK_RATE_OFF);
+       hisi_sas_phy_write32(hisi_hba, phy_id, PROG_PHY_LINK_RATE, reg_val);
+
+       /* enable PHY */
+       hisi_sas_phy_enable(hisi_hba, phy_id, 1);
+}
+
+#define SAS_PHY_BIST_CODE_INIT 0x1
+#define SAS_PHY_BIST_CODE1_INIT        0X80
+static int debugfs_set_bist_v3_hw(struct hisi_hba *hisi_hba, bool enable)
+{
+       u32 reg_val, mode_tmp;
+       u32 linkrate = hisi_hba->debugfs_bist_linkrate;
+       u32 phy_id = hisi_hba->debugfs_bist_phy_no;
+       u32 code_mode = hisi_hba->debugfs_bist_code_mode;
+       u32 path_mode = hisi_hba->debugfs_bist_mode;
+       struct device *dev = hisi_hba->dev;
+
+       dev_info(dev, "BIST info:linkrate=%d phy_id=%d code_mode=%d path_mode=%d\n",
+                linkrate, phy_id, code_mode, path_mode);
+       mode_tmp = path_mode ? 2 : 1;
+       if (enable) {
+               /* some preparations before bist test */
+               hisi_sas_bist_test_prep_v3_hw(hisi_hba);
+
+               /* set linkrate of bit test*/
+               reg_val = hisi_sas_phy_read32(hisi_hba, phy_id,
+                                             PROG_PHY_LINK_RATE);
+               reg_val &= ~CFG_PROG_PHY_LINK_RATE_MSK;
+               reg_val |= (linkrate << CFG_PROG_PHY_LINK_RATE_OFF);
+               hisi_sas_phy_write32(hisi_hba, phy_id,
+                                    PROG_PHY_LINK_RATE, reg_val);
+
+               /* set code mode of bit test */
+               reg_val = hisi_sas_phy_read32(hisi_hba, phy_id,
+                                             SAS_PHY_BIST_CTRL);
+               reg_val &= ~(CFG_BIST_MODE_SEL_MSK |
+                               CFG_LOOP_TEST_MODE_MSK |
+                               CFG_RX_BIST_EN_MSK |
+                               CFG_TX_BIST_EN_MSK |
+                               CFG_BIST_TEST_MSK);
+               reg_val |= ((code_mode << CFG_BIST_MODE_SEL_OFF) |
+                           (mode_tmp << CFG_LOOP_TEST_MODE_OFF) |
+                           CFG_BIST_TEST_MSK);
+               hisi_sas_phy_write32(hisi_hba, phy_id,
+                                    SAS_PHY_BIST_CTRL, reg_val);
+
+               mdelay(100);
+               reg_val |= (CFG_RX_BIST_EN_MSK | CFG_TX_BIST_EN_MSK);
+               hisi_sas_phy_write32(hisi_hba, phy_id,
+                                    SAS_PHY_BIST_CTRL, reg_val);
+
+               /* set the bist init value */
+               hisi_sas_phy_write32(hisi_hba, phy_id,
+                                    SAS_PHY_BIST_CODE,
+                                    SAS_PHY_BIST_CODE_INIT);
+               hisi_sas_phy_write32(hisi_hba, phy_id,
+                                    SAS_PHY_BIST_CODE1,
+                                    SAS_PHY_BIST_CODE1_INIT);
+
+               /* clear error bit */
+               mdelay(100);
+               hisi_sas_phy_read32(hisi_hba, phy_id, SAS_BIST_ERR_CNT);
+       } else {
+               /* disable bist test and recover it */
+               hisi_hba->debugfs_bist_cnt += hisi_sas_phy_read32(hisi_hba,
+                               phy_id, SAS_BIST_ERR_CNT);
+               hisi_sas_bist_test_restore_v3_hw(hisi_hba);
+       }
+
+       return 0;
+}
+
 static struct scsi_host_template sht_v3_hw = {
        .name                   = DRV_NAME,
        .module                 = THIS_MODULE,
@@ -2935,7 +3074,6 @@ static struct scsi_host_template sht_v3_hw = {
 static const struct hisi_sas_hw hisi_sas_v3_hw = {
        .hw_init = hisi_sas_v3_init,
        .setup_itct = setup_itct_v3_hw,
-       .max_command_entries = HISI_SAS_COMMAND_ENTRIES_V3_HW,
        .get_wideport_bitmap = get_wideport_bitmap_v3_hw,
        .complete_hdr_size = sizeof(struct hisi_sas_complete_v3_hdr),
        .clear_itct = clear_itct_v3_hw,
@@ -2944,9 +3082,7 @@ static const struct hisi_sas_hw hisi_sas_v3_hw = {
        .prep_smp = prep_smp_v3_hw,
        .prep_stp = prep_ata_v3_hw,
        .prep_abort = prep_abort_v3_hw,
-       .get_free_slot = get_free_slot_v3_hw,
        .start_delivery = start_delivery_v3_hw,
-       .slot_complete = slot_complete_v3_hw,
        .phys_init = phys_init_v3_hw,
        .phy_start = start_phy_v3_hw,
        .phy_disable = disable_phy_v3_hw,
@@ -2959,10 +3095,14 @@ static const struct hisi_sas_hw hisi_sas_v3_hw = {
        .get_events = phy_get_events_v3_hw,
        .write_gpio = write_gpio_v3_hw,
        .wait_cmds_complete_timeout = wait_cmds_complete_timeout_v3_hw,
-       .debugfs_reg_global = &debugfs_global_reg,
+       .debugfs_reg_array[DEBUGFS_GLOBAL] = &debugfs_global_reg,
+       .debugfs_reg_array[DEBUGFS_AXI] = &debugfs_axi_reg,
+       .debugfs_reg_array[DEBUGFS_RAS] = &debugfs_ras_reg,
        .debugfs_reg_port = &debugfs_port_reg,
        .snapshot_prepare = debugfs_snapshot_prepare_v3_hw,
        .snapshot_restore = debugfs_snapshot_restore_v3_hw,
+       .read_iost_itct_cache = read_iost_itct_cache_v3_hw,
+       .set_bist = debugfs_set_bist_v3_hw,
 };
 
 static struct Scsi_Host *
@@ -2993,8 +3133,6 @@ hisi_sas_shost_alloc_pci(struct pci_dev *pdev)
        else
                hisi_hba->prot_mask = prot_mask;
 
-       timer_setup(&hisi_hba->timer, NULL, 0);
-
        if (hisi_sas_get_fw_info(hisi_hba) < 0)
                goto err_out;
 
@@ -3076,17 +3214,14 @@ hisi_sas_v3_probe(struct pci_dev *pdev, const struct pci_device_id *id)
        shost->max_lun = ~0;
        shost->max_channel = 1;
        shost->max_cmd_len = 16;
-       shost->can_queue = hisi_hba->hw->max_command_entries -
-               HISI_SAS_RESERVED_IPTT_CNT;
-       shost->cmd_per_lun = hisi_hba->hw->max_command_entries -
-               HISI_SAS_RESERVED_IPTT_CNT;
+       shost->can_queue = HISI_SAS_UNRESERVED_IPTT;
+       shost->cmd_per_lun = HISI_SAS_UNRESERVED_IPTT;
 
        sha->sas_ha_name = DRV_NAME;
        sha->dev = dev;
        sha->lldd_module = THIS_MODULE;
        sha->sas_addr = &hisi_hba->sas_addr[0];
        sha->num_phys = hisi_hba->n_phy;
-       sha->core.shost = hisi_hba->shost;
 
        for (i = 0; i < hisi_hba->n_phy; i++) {
                sha->sas_phy[i] = &hisi_hba->phy[i].sas_phy;
@@ -3273,15 +3408,21 @@ static int hisi_sas_v3_resume(struct pci_dev *pdev)
        pci_enable_wake(pdev, PCI_D0, 0);
        pci_restore_state(pdev);
        rc = pci_enable_device(pdev);
-       if (rc)
+       if (rc) {
                dev_err(dev, "enable device failed during resume (%d)\n", rc);
+               return rc;
+       }
 
        pci_set_master(pdev);
        scsi_unblock_requests(shost);
        clear_bit(HISI_SAS_REJECT_CMD_BIT, &hisi_hba->flags);
 
        sas_prep_resume_ha(sha);
-       init_reg_v3_hw(hisi_hba);
+       rc = hw_init_v3_hw(hisi_hba);
+       if (rc) {
+               scsi_remove_host(shost);
+               pci_disable_device(pdev);
+       }
        hisi_hba->hw->phys_init(hisi_hba);
        sas_resume_ha(sha);
        clear_bit(HISI_SAS_RESET_BIT, &hisi_hba->flags);
index 1bb6aad..ac39ed7 100644 (file)
@@ -21,7 +21,6 @@
 #include <linux/interrupt.h>
 #include <linux/types.h>
 #include <linux/pci.h>
-#include <linux/pci-aspm.h>
 #include <linux/kernel.h>
 #include <linux/slab.h>
 #include <linux/delay.h>
index 8cdbac0..df897df 100644 (file)
@@ -1830,6 +1830,7 @@ static int ibmvfc_bsg_request(struct bsg_job *job)
                port_id = (bsg_request->rqst_data.h_els.port_id[0] << 16) |
                        (bsg_request->rqst_data.h_els.port_id[1] << 8) |
                        bsg_request->rqst_data.h_els.port_id[2];
+               /* fall through */
        case FC_BSG_RPT_ELS:
                fc_flags = IBMVFC_FC_ELS;
                break;
@@ -1838,6 +1839,7 @@ static int ibmvfc_bsg_request(struct bsg_job *job)
                port_id = (bsg_request->rqst_data.h_ct.port_id[0] << 16) |
                        (bsg_request->rqst_data.h_ct.port_id[1] << 8) |
                        bsg_request->rqst_data.h_ct.port_id[2];
+               /* fall through */
        case FC_BSG_RPT_CT:
                fc_flags = IBMVFC_FC_CT_IU;
                break;
@@ -4020,6 +4022,7 @@ static void ibmvfc_npiv_login_done(struct ibmvfc_event *evt)
                return;
        case IBMVFC_MAD_CRQ_ERROR:
                ibmvfc_retry_host_init(vhost);
+               /* fall through */
        case IBMVFC_MAD_DRIVER_FAILED:
                ibmvfc_free_event(evt);
                return;
index 7f95353..a929fe7 100644 (file)
@@ -1581,6 +1581,7 @@ static long ibmvscsis_adapter_info(struct scsi_info *vscsi,
        case H_PERMISSION:
                if (connection_broken(vscsi))
                        flag_bits = (RESPONSE_Q_DOWN | CLIENT_FAILED);
+               /* Fall through */
        default:
                dev_err(&vscsi->dev, "adapter_info: h_copy_rdma to client failed, rc %ld\n",
                        rc);
@@ -2492,8 +2493,10 @@ static long ibmvscsis_ping_response(struct scsi_info *vscsi)
                break;
        case H_CLOSED:
                vscsi->flags |= CLIENT_FAILED;
+               /* Fall through */
        case H_DROPPED:
                vscsi->flags |= RESPONSE_Q_DOWN;
+               /* Fall through */
        case H_REMOTE_PARM:
                dev_err(&vscsi->dev, "ping_response: h_send_crq failed, rc %ld\n",
                        rc);
index bade2e0..691acbd 100644 (file)
@@ -51,6 +51,8 @@ struct lpfc_sli2_slim;
                cmnd for menlo needs nearly twice as for firmware
                downloads using bsg */
 
+#define LPFC_DEFAULT_XPSGL_SIZE        256
+#define LPFC_MAX_SG_TABLESIZE  0xffff
 #define LPFC_MIN_SG_SLI4_BUF_SZ        0x800   /* based on LPFC_DEFAULT_SG_SEG_CNT */
 #define LPFC_MAX_BG_SLI4_SEG_CNT_DIF 128 /* sg element count for BlockGuard */
 #define LPFC_MAX_SG_SEG_CNT_DIF 512    /* sg element count per scsi cmnd  */
@@ -732,14 +734,13 @@ struct lpfc_hba {
 #define HBA_AER_ENABLED                0x1000 /* AER enabled with HBA */
 #define HBA_DEVLOSS_TMO         0x2000 /* HBA in devloss timeout */
 #define HBA_RRQ_ACTIVE         0x4000 /* process the rrq active list */
-#define HBA_FCP_IOQ_FLUSH      0x8000 /* FCP I/O queues being flushed */
+#define HBA_IOQ_FLUSH          0x8000 /* FCP/NVME I/O queues being flushed */
 #define HBA_FW_DUMP_OP         0x10000 /* Skips fn reset before FW dump */
 #define HBA_RECOVERABLE_UE     0x20000 /* Firmware supports recoverable UE */
 #define HBA_FORCED_LINK_SPEED  0x40000 /*
                                         * Firmware supports Forced Link Speed
                                         * capability
                                         */
-#define HBA_NVME_IOQ_FLUSH      0x80000 /* NVME IO queues flushed. */
 #define HBA_FLOGI_ISSUED       0x100000 /* FLOGI was issued */
 
        uint32_t fcp_ring_in_use; /* When polling test if intr-hndlr active*/
@@ -795,10 +796,12 @@ struct lpfc_hba {
        uint8_t  mds_diags_support;
        uint8_t  bbcredit_support;
        uint8_t  enab_exp_wqcq_pages;
+       u8       nsler; /* Firmware supports FC-NVMe-2 SLER */
 
        /* HBA Config Parameters */
        uint32_t cfg_ack0;
        uint32_t cfg_xri_rebalancing;
+       uint32_t cfg_xpsgl;
        uint32_t cfg_enable_npiv;
        uint32_t cfg_enable_rrq;
        uint32_t cfg_topology;
@@ -905,6 +908,7 @@ struct lpfc_hba {
        wait_queue_head_t    work_waitq;
        struct task_struct   *worker_thread;
        unsigned long data_flags;
+       uint32_t border_sge_num;
 
        uint32_t hbq_in_use;            /* HBQs in use flag */
        uint32_t hbq_count;             /* Count of configured HBQs */
@@ -987,6 +991,7 @@ struct lpfc_hba {
        struct dma_pool *lpfc_nvmet_drb_pool; /* data receive buffer pool */
        struct dma_pool *lpfc_hbq_pool; /* SLI3 hbq buffer pool */
        struct dma_pool *txrdy_payload_pool;
+       struct dma_pool *lpfc_cmd_rsp_buf_pool;
        struct lpfc_dma_pool lpfc_mbuf_safety_pool;
 
        mempool_t *mbox_mem_pool;
@@ -1034,8 +1039,6 @@ struct lpfc_hba {
        struct dentry *debug_hbqinfo;
        struct dentry *debug_dumpHostSlim;
        struct dentry *debug_dumpHBASlim;
-       struct dentry *debug_dumpData;   /* BlockGuard BPL */
-       struct dentry *debug_dumpDif;    /* BlockGuard BPL */
        struct dentry *debug_InjErrLBA;  /* LBA to inject errors at */
        struct dentry *debug_InjErrNPortID;  /* NPortID to inject errors at */
        struct dentry *debug_InjErrWWPN;  /* WWPN to inject errors at */
index d655586..25aa7a5 100644 (file)
@@ -841,7 +841,8 @@ lpfc_hdw_show(struct device *dev, struct device_attribute *attr, char *buf)
        lpfc_vpd_t *vp = &phba->vpd;
 
        lpfc_jedec_to_ascii(vp->rev.biuRev, hdw);
-       return scnprintf(buf, PAGE_SIZE, "%s\n", hdw);
+       return scnprintf(buf, PAGE_SIZE, "%s %08x %08x\n", hdw,
+                        vp->rev.smRev, vp->rev.smFwRev);
 }
 
 /**
@@ -3682,8 +3683,8 @@ lpfc_update_rport_devloss_tmo(struct lpfc_vport *vport)
                if (rport)
                        remoteport = rport->remoteport;
                spin_unlock(&vport->phba->hbalock);
-               if (remoteport)
-                       nvme_fc_set_remoteport_devloss(rport->remoteport,
+               if (rport && remoteport)
+                       nvme_fc_set_remoteport_devloss(remoteport,
                                                       vport->cfg_devloss_tmo);
 #endif
        }
@@ -5467,15 +5468,12 @@ LPFC_ATTR_RW(nvmet_fb_size, 0, 0, 65536,
  * lpfc_nvme_enable_fb: Enable NVME first burst on I and T functions.
  * For the Initiator (I), enabling this parameter means that an NVMET
  * PRLI response with FBA enabled and an FB_SIZE set to a nonzero value will be
- * processed by the initiator for subsequent NVME FCP IO. For the target
- * function (T), enabling this parameter qualifies the lpfc_nvmet_fb_size
- * driver parameter as the target function's first burst size returned to the
- * initiator in the target's NVME PRLI response. Parameter supported on physical
- * port only - no NPIV support.
+ * processed by the initiator for subsequent NVME FCP IO.
+ * Currently, this feature is not supported on the NVME target
  * Value range is [0,1]. Default value is 0 (disabled).
  */
 LPFC_ATTR_RW(nvme_enable_fb, 0, 0, 1,
-            "Enable First Burst feature on I and T functions.");
+            "Enable First Burst feature for NVME Initiator.");
 
 /*
 # lpfc_max_scsicmpl_time: Use scsi command completion time to control I/O queue
@@ -5927,7 +5925,7 @@ lpfc_sg_seg_cnt_init(struct lpfc_hba *phba, int val)
  *       1  = MDS Diagnostics enabled
  * Value range is [0,1]. Default value is 0.
  */
-LPFC_ATTR_R(enable_mds_diags, 0, 0, 1, "Enable MDS Diagnostics");
+LPFC_ATTR_RW(enable_mds_diags, 0, 0, 1, "Enable MDS Diagnostics");
 
 /*
  * lpfc_ras_fwlog_buffsize: Firmware logging host buffer size
@@ -6859,10 +6857,31 @@ lpfc_get_starget_port_name(struct scsi_target *starget)
 static void
 lpfc_set_rport_loss_tmo(struct fc_rport *rport, uint32_t timeout)
 {
+       struct lpfc_rport_data *rdata = rport->dd_data;
+       struct lpfc_nodelist *ndlp = rdata->pnode;
+#if (IS_ENABLED(CONFIG_NVME_FC))
+       struct lpfc_nvme_rport *nrport = NULL;
+#endif
+
        if (timeout)
                rport->dev_loss_tmo = timeout;
        else
                rport->dev_loss_tmo = 1;
+
+       if (!ndlp || !NLP_CHK_NODE_ACT(ndlp)) {
+               dev_info(&rport->dev, "Cannot find remote node to "
+                                     "set rport dev loss tmo, port_id x%x\n",
+                                     rport->port_id);
+               return;
+       }
+
+#if (IS_ENABLED(CONFIG_NVME_FC))
+       nrport = lpfc_ndlp_get_nrport(ndlp);
+
+       if (nrport && nrport->remoteport)
+               nvme_fc_set_remoteport_devloss(nrport->remoteport,
+                                              rport->dev_loss_tmo);
+#endif
 }
 
 /**
@@ -7059,6 +7078,21 @@ struct fc_function_template lpfc_vport_transport_functions = {
 };
 
 /**
+ * lpfc_get_hba_function_mode - Used to determine the HBA function in FCoE
+ * Mode
+ * @phba: lpfc_hba pointer.
+ **/
+static void
+lpfc_get_hba_function_mode(struct lpfc_hba *phba)
+{
+       /* If it's a SkyHawk FCoE adapter */
+       if (phba->pcidev->device == PCI_DEVICE_ID_SKYHAWK)
+               phba->hba_flag |= HBA_FCOE_MODE;
+       else
+               phba->hba_flag &= ~HBA_FCOE_MODE;
+}
+
+/**
  * lpfc_get_cfgparam - Used during probe_one to init the adapter structure
  * @phba: lpfc_hba pointer.
  **/
@@ -7114,8 +7148,18 @@ lpfc_get_cfgparam(struct lpfc_hba *phba)
        else
                phba->cfg_poll = lpfc_poll;
 
-       if (phba->cfg_enable_bg)
+       /* Get the function mode */
+       lpfc_get_hba_function_mode(phba);
+
+       /* BlockGuard allowed for FC only. */
+       if (phba->cfg_enable_bg && phba->hba_flag & HBA_FCOE_MODE) {
+               lpfc_printf_log(phba, KERN_INFO, LOG_INIT,
+                               "0581 BlockGuard feature not supported\n");
+               /* If set, clear the BlockGuard support param */
+               phba->cfg_enable_bg = 0;
+       } else if (phba->cfg_enable_bg) {
                phba->sli3_options |= LPFC_SLI3_BG_ENABLED;
+       }
 
        lpfc_suppress_rsp_init(phba, lpfc_suppress_rsp);
 
@@ -7175,16 +7219,6 @@ lpfc_get_cfgparam(struct lpfc_hba *phba)
        lpfc_ras_fwlog_level_init(phba, lpfc_ras_fwlog_level);
        lpfc_ras_fwlog_func_init(phba, lpfc_ras_fwlog_func);
 
-
-       /* If the NVME FC4 type is enabled, scale the sg_seg_cnt to
-        * accommodate 512K and 1M IOs in a single nvme buf and supply
-        * enough NVME LS iocb buffers for larger connectivity counts.
-        */
-       if (phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME) {
-               phba->cfg_sg_seg_cnt = LPFC_MAX_NVME_SEG_CNT;
-               phba->cfg_iocb_cnt = 5;
-       }
-
        return;
 }
 
index b7216d6..39a736b 100644 (file)
@@ -1040,7 +1040,7 @@ lpfc_bsg_ct_unsol_event(struct lpfc_hba *phba, struct lpfc_sli_ring *pring,
                                if (!dmabuf) {
                                        lpfc_printf_log(phba, KERN_ERR,
                                                LOG_LIBDFC, "2616 No dmabuf "
-                                               "found for iocbq 0x%p\n",
+                                               "found for iocbq x%px\n",
                                                iocbq);
                                        kfree(evt_dat->data);
                                        kfree(evt_dat);
@@ -1276,9 +1276,7 @@ lpfc_bsg_hba_set_event(struct bsg_job *job)
        return 0; /* call job done later */
 
 job_error:
-       if (dd_data != NULL)
-               kfree(dd_data);
-
+       kfree(dd_data);
        job->dd_data = NULL;
        return rc;
 }
@@ -1571,7 +1569,6 @@ lpfc_issue_ct_rsp(struct lpfc_hba *phba, struct bsg_job *job, uint32_t tag,
                "2722 Xmit CT response on exchange x%x Data: x%x x%x x%x\n",
                icmd->ulpContext, icmd->ulpIoTag, tag, phba->link_state);
 
-       ctiocb->iocb_cmpl = NULL;
        ctiocb->iocb_flag |= LPFC_IO_LIBDFC;
        ctiocb->vport = phba->pport;
        ctiocb->context1 = dd_data;
@@ -5451,7 +5448,9 @@ ras_job_error:
        bsg_reply->result = rc;
 
        /* complete the job back to userspace */
-       bsg_job_done(job, bsg_reply->result, bsg_reply->reply_payload_rcv_len);
+       if (!rc)
+               bsg_job_done(job, bsg_reply->result,
+                            bsg_reply->reply_payload_rcv_len);
        return rc;
 }
 
@@ -5530,8 +5529,9 @@ ras_job_error:
        bsg_reply->result = rc;
 
        /* complete the job back to userspace */
-       bsg_job_done(job, bsg_reply->result,
-                      bsg_reply->reply_payload_rcv_len);
+       if (!rc)
+               bsg_job_done(job, bsg_reply->result,
+                            bsg_reply->reply_payload_rcv_len);
 
        return rc;
 }
@@ -5591,7 +5591,9 @@ ras_job_error:
        bsg_reply->result = rc;
 
        /* complete the job back to userspace */
-       bsg_job_done(job, bsg_reply->result, bsg_reply->reply_payload_rcv_len);
+       if (!rc)
+               bsg_job_done(job, bsg_reply->result,
+                            bsg_reply->reply_payload_rcv_len);
 
        return rc;
 }
@@ -5673,7 +5675,9 @@ lpfc_bsg_get_ras_fwlog(struct bsg_job *job)
 
 ras_job_error:
        bsg_reply->result = rc;
-       bsg_job_done(job, bsg_reply->result, bsg_reply->reply_payload_rcv_len);
+       if (!rc)
+               bsg_job_done(job, bsg_reply->result,
+                            bsg_reply->reply_payload_rcv_len);
 
        return rc;
 }
@@ -5744,8 +5748,9 @@ lpfc_get_trunk_info(struct bsg_job *job)
                                phba->sli4_hba.link_state.logical_speed / 1000;
 job_error:
        bsg_reply->result = rc;
-       bsg_job_done(job, bsg_reply->result,
-                      bsg_reply->reply_payload_rcv_len);
+       if (!rc)
+               bsg_job_done(job, bsg_reply->result,
+                            bsg_reply->reply_payload_rcv_len);
        return rc;
 
 }
index 68e9f96..b2ad8c7 100644 (file)
@@ -326,7 +326,7 @@ void lpfc_sli_bemem_bcopy(void *, void *, uint32_t);
 void lpfc_sli_abort_iocb_ring(struct lpfc_hba *, struct lpfc_sli_ring *);
 void lpfc_sli_abort_fcp_rings(struct lpfc_hba *phba);
 void lpfc_sli_hba_iocb_abort(struct lpfc_hba *);
-void lpfc_sli_flush_fcp_rings(struct lpfc_hba *);
+void lpfc_sli_flush_io_rings(struct lpfc_hba *phba);
 int lpfc_sli_ringpostbuf_put(struct lpfc_hba *, struct lpfc_sli_ring *,
                             struct lpfc_dmabuf *);
 struct lpfc_dmabuf *lpfc_sli_ringpostbuf_get(struct lpfc_hba *,
@@ -433,16 +433,6 @@ int lpfc_sli4_get_allocated_extnts(struct lpfc_hba *, uint16_t,
 int lpfc_sli4_get_avail_extnt_rsrc(struct lpfc_hba *, uint16_t,
                                          uint16_t *, uint16_t *);
 
-/* externs BlockGuard */
-extern char *_dump_buf_data;
-extern unsigned long _dump_buf_data_order;
-extern char *_dump_buf_dif;
-extern unsigned long _dump_buf_dif_order;
-extern spinlock_t _dump_buf_lock;
-extern int _dump_buf_done;
-extern spinlock_t pgcnt_lock;
-extern unsigned int pgcnt;
-
 /* Interface exported by fabric iocb scheduler */
 void lpfc_fabric_abort_nport(struct lpfc_nodelist *);
 void lpfc_fabric_abort_hba(struct lpfc_hba *);
@@ -595,6 +585,7 @@ void lpfc_release_io_buf(struct lpfc_hba *phba, struct lpfc_io_buf *ncmd,
                         struct lpfc_sli4_hdw_queue *qp);
 void lpfc_nvme_cmd_template(void);
 void lpfc_nvmet_cmd_template(void);
+void lpfc_nvme_cancel_iocb(struct lpfc_hba *phba, struct lpfc_iocbq *pwqeIn);
 extern int lpfc_enable_nvmet_cnt;
 extern unsigned long long lpfc_enable_nvmet[];
 extern int lpfc_no_hba_reset_cnt;
index ec72c39..25e8670 100644 (file)
@@ -462,6 +462,7 @@ lpfc_prep_node_fc4type(struct lpfc_vport *vport, uint32_t Did, uint8_t fc4_type)
        struct lpfc_nodelist *ndlp;
 
        if ((vport->port_type != LPFC_NPIV_PORT) ||
+           (fc4_type == FC_TYPE_FCP) ||
            !(vport->ct_flags & FC_CT_RFF_ID) || !vport->cfg_restrict_login) {
 
                ndlp = lpfc_setup_disc_node(vport, Did);
@@ -480,10 +481,20 @@ lpfc_prep_node_fc4type(struct lpfc_vport *vport, uint32_t Did, uint8_t fc4_type)
 
                        lpfc_printf_vlog(vport, KERN_INFO, LOG_DISCOVERY,
                                         "0238 Process x%06x NameServer Rsp "
-                                        "Data: x%x x%x x%x x%x\n", Did,
+                                        "Data: x%x x%x x%x x%x x%x\n", Did,
                                         ndlp->nlp_flag, ndlp->nlp_fc4_type,
-                                        vport->fc_flag,
+                                        ndlp->nlp_state, vport->fc_flag,
                                         vport->fc_rscn_id_cnt);
+
+                       /* if ndlp needs to be discovered and prior
+                        * state of ndlp hit devloss, change state to
+                        * allow rediscovery.
+                        */
+                       if (ndlp->nlp_flag & NLP_NPR_2B_DISC &&
+                           ndlp->nlp_state == NLP_STE_UNUSED_NODE) {
+                               lpfc_nlp_set_state(vport, ndlp,
+                                                  NLP_STE_NPR_NODE);
+                       }
                } else {
                        lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_CT,
                                "Skip1 GID_FTrsp: did:x%x flg:x%x cnt:%d",
@@ -491,9 +502,9 @@ lpfc_prep_node_fc4type(struct lpfc_vport *vport, uint32_t Did, uint8_t fc4_type)
 
                        lpfc_printf_vlog(vport, KERN_INFO, LOG_DISCOVERY,
                                         "0239 Skip x%06x NameServer Rsp "
-                                        "Data: x%x x%x\n", Did,
-                                        vport->fc_flag,
-                                        vport->fc_rscn_id_cnt);
+                                        "Data: x%x x%x %p\n",
+                                        Did, vport->fc_flag,
+                                        vport->fc_rscn_id_cnt, ndlp);
                }
        } else {
                if (!(vport->fc_flag & FC_RSCN_MODE) ||
@@ -751,9 +762,11 @@ lpfc_cmpl_ct_cmd_gid_ft(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
                if (CTrsp->CommandResponse.bits.CmdRsp ==
                    cpu_to_be16(SLI_CT_RESPONSE_FS_ACC)) {
                        lpfc_printf_vlog(vport, KERN_INFO, LOG_DISCOVERY,
-                                        "0208 NameServer Rsp Data: x%x x%x\n",
+                                        "0208 NameServer Rsp Data: x%x x%x "
+                                        "sz x%x\n",
                                         vport->fc_flag,
-                                        CTreq->un.gid.Fc4Type);
+                                        CTreq->un.gid.Fc4Type,
+                                        irsp->un.genreq64.bdl.bdeSize);
 
                        lpfc_ns_rsp(vport,
                                    outp,
@@ -814,6 +827,11 @@ lpfc_cmpl_ct_cmd_gid_ft(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
                }
                vport->gidft_inp--;
        }
+
+       lpfc_printf_vlog(vport, KERN_INFO, LOG_DISCOVERY,
+                        "4216 GID_FT cmpl inp %d disc %d\n",
+                        vport->gidft_inp, vport->num_disc_nodes);
+
        /* Link up / RSCN discovery */
        if ((vport->num_disc_nodes == 0) &&
            (vport->gidft_inp == 0)) {
@@ -1209,14 +1227,34 @@ lpfc_cmpl_ct_cmd_gft_id(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
                        if (fc4_data_1 &  LPFC_FC4_TYPE_BITMASK)
                                ndlp->nlp_fc4_type |= NLP_FC4_NVME;
                        lpfc_printf_vlog(vport, KERN_INFO, LOG_DISCOVERY,
-                                        "3064 Setting ndlp %p, DID x%06x with "
-                                        "FC4 x%08x, Data: x%08x x%08x\n",
+                                        "3064 Setting ndlp x%px, DID x%06x "
+                                        "with FC4 x%08x, Data: x%08x x%08x "
+                                        "%d\n",
                                         ndlp, did, ndlp->nlp_fc4_type,
-                                        FC_TYPE_FCP, FC_TYPE_NVME);
-                       ndlp->nlp_prev_state = NLP_STE_REG_LOGIN_ISSUE;
-
-                       lpfc_nlp_set_state(vport, ndlp, NLP_STE_PRLI_ISSUE);
-                       lpfc_issue_els_prli(vport, ndlp, 0);
+                                        FC_TYPE_FCP, FC_TYPE_NVME,
+                                        ndlp->nlp_state);
+
+                       if (ndlp->nlp_state == NLP_STE_REG_LOGIN_ISSUE &&
+                           ndlp->nlp_fc4_type) {
+                               ndlp->nlp_prev_state = NLP_STE_REG_LOGIN_ISSUE;
+
+                               lpfc_nlp_set_state(vport, ndlp,
+                                                  NLP_STE_PRLI_ISSUE);
+                               lpfc_issue_els_prli(vport, ndlp, 0);
+                       } else if (!ndlp->nlp_fc4_type) {
+                               /* If fc4 type is still unknown, then LOGO */
+                               lpfc_printf_vlog(vport, KERN_INFO,
+                                                LOG_DISCOVERY,
+                                                "6443 Sending LOGO ndlp x%px,"
+                                                "DID x%06x with fc4_type: "
+                                                "x%08x, state: %d\n",
+                                                ndlp, did, ndlp->nlp_fc4_type,
+                                                ndlp->nlp_state);
+                               lpfc_issue_els_logo(vport, ndlp, 0);
+                               ndlp->nlp_prev_state = NLP_STE_REG_LOGIN_ISSUE;
+                               lpfc_nlp_set_state(vport, ndlp,
+                                                  NLP_STE_NPR_NODE);
+                       }
                }
        } else
                lpfc_printf_vlog(vport, KERN_ERR, LOG_DISCOVERY,
@@ -2515,7 +2553,7 @@ lpfc_fdmi_port_attr_max_frame(struct lpfc_vport *vport,
        ae = (struct lpfc_fdmi_attr_entry *)&ad->AttrValue;
 
        hsp = (struct serv_parm *)&vport->fc_sparam;
-       ae->un.AttrInt = (((uint32_t) hsp->cmn.bbRcvSizeMsb) << 8) |
+       ae->un.AttrInt = (((uint32_t) hsp->cmn.bbRcvSizeMsb & 0x0F) << 8) |
                          (uint32_t) hsp->cmn.bbRcvSizeLsb;
        ae->un.AttrInt = cpu_to_be32(ae->un.AttrInt);
        size = FOURBYTES + sizeof(uint32_t);
index 1ee857d..8d34be6 100644 (file)
@@ -361,7 +361,7 @@ lpfc_debugfs_hbqinfo_data(struct lpfc_hba *phba, char *buf, int size)
                        phys = ((uint64_t)hbq_buf->dbuf.phys & 0xffffffff);
                        if (phys == le32_to_cpu(hbqe->bde.addrLow)) {
                                len +=  scnprintf(buf+len, size-len,
-                                       "Buf%d: %p %06x\n", i,
+                                       "Buf%d: x%px %06x\n", i,
                                        hbq_buf->dbuf.virt, hbq_buf->tag);
                                found = 1;
                                break;
@@ -416,8 +416,7 @@ lpfc_debugfs_commonxripools_data(struct lpfc_hba *phba, char *buf, int size)
                qp = &phba->sli4_hba.hdwq[lpfc_debugfs_last_xripool];
 
                len += scnprintf(buf + len, size - len, "HdwQ %d Info ", i);
-               spin_lock_irqsave(&qp->abts_scsi_buf_list_lock, iflag);
-               spin_lock(&qp->abts_nvme_buf_list_lock);
+               spin_lock_irqsave(&qp->abts_io_buf_list_lock, iflag);
                spin_lock(&qp->io_buf_list_get_lock);
                spin_lock(&qp->io_buf_list_put_lock);
                out = qp->total_io_bufs - (qp->get_io_bufs + qp->put_io_bufs +
@@ -430,8 +429,7 @@ lpfc_debugfs_commonxripools_data(struct lpfc_hba *phba, char *buf, int size)
                        qp->abts_nvme_io_bufs, out);
                spin_unlock(&qp->io_buf_list_put_lock);
                spin_unlock(&qp->io_buf_list_get_lock);
-               spin_unlock(&qp->abts_nvme_buf_list_lock);
-               spin_unlock_irqrestore(&qp->abts_scsi_buf_list_lock, iflag);
+               spin_unlock_irqrestore(&qp->abts_io_buf_list_lock, iflag);
 
                lpfc_debugfs_last_xripool++;
                if (lpfc_debugfs_last_xripool >= phba->cfg_hdw_queue)
@@ -533,9 +531,7 @@ lpfc_debugfs_multixripools_data(struct lpfc_hba *phba, char *buf, int size)
                        continue;
                pbl_pool = &multixri_pool->pbl_pool;
                pvt_pool = &multixri_pool->pvt_pool;
-               txcmplq_cnt = qp->fcp_wq->pring->txcmplq_cnt;
-               if (qp->nvme_wq)
-                       txcmplq_cnt += qp->nvme_wq->pring->txcmplq_cnt;
+               txcmplq_cnt = qp->io_wq->pring->txcmplq_cnt;
 
                scnprintf(tmp, sizeof(tmp),
                          "%03d: %4d %4d %4d %4d | %10d %10d ",
@@ -2166,89 +2162,6 @@ out:
        return rc;
 }
 
-static int
-lpfc_debugfs_dumpData_open(struct inode *inode, struct file *file)
-{
-       struct lpfc_debug *debug;
-       int rc = -ENOMEM;
-
-       if (!_dump_buf_data)
-               return -EBUSY;
-
-       debug = kmalloc(sizeof(*debug), GFP_KERNEL);
-       if (!debug)
-               goto out;
-
-       /* Round to page boundary */
-       pr_err("9059 BLKGRD:  %s: _dump_buf_data=0x%p\n",
-                       __func__, _dump_buf_data);
-       debug->buffer = _dump_buf_data;
-       if (!debug->buffer) {
-               kfree(debug);
-               goto out;
-       }
-
-       debug->len = (1 << _dump_buf_data_order) << PAGE_SHIFT;
-       file->private_data = debug;
-
-       rc = 0;
-out:
-       return rc;
-}
-
-static int
-lpfc_debugfs_dumpDif_open(struct inode *inode, struct file *file)
-{
-       struct lpfc_debug *debug;
-       int rc = -ENOMEM;
-
-       if (!_dump_buf_dif)
-               return -EBUSY;
-
-       debug = kmalloc(sizeof(*debug), GFP_KERNEL);
-       if (!debug)
-               goto out;
-
-       /* Round to page boundary */
-       pr_err("9060 BLKGRD: %s: _dump_buf_dif=0x%p file=%pD\n",
-                       __func__, _dump_buf_dif, file);
-       debug->buffer = _dump_buf_dif;
-       if (!debug->buffer) {
-               kfree(debug);
-               goto out;
-       }
-
-       debug->len = (1 << _dump_buf_dif_order) << PAGE_SHIFT;
-       file->private_data = debug;
-
-       rc = 0;
-out:
-       return rc;
-}
-
-static ssize_t
-lpfc_debugfs_dumpDataDif_write(struct file *file, const char __user *buf,
-                 size_t nbytes, loff_t *ppos)
-{
-       /*
-        * The Data/DIF buffers only save one failing IO
-        * The write op is used as a reset mechanism after an IO has
-        * already been saved to the next one can be saved
-        */
-       spin_lock(&_dump_buf_lock);
-
-       memset((void *)_dump_buf_data, 0,
-                       ((1 << PAGE_SHIFT) << _dump_buf_data_order));
-       memset((void *)_dump_buf_dif, 0,
-                       ((1 << PAGE_SHIFT) << _dump_buf_dif_order));
-
-       _dump_buf_done = 0;
-
-       spin_unlock(&_dump_buf_lock);
-
-       return nbytes;
-}
-
 static ssize_t
 lpfc_debugfs_dif_err_read(struct file *file, char __user *buf,
        size_t nbytes, loff_t *ppos)
@@ -2461,17 +2374,6 @@ lpfc_debugfs_release(struct inode *inode, struct file *file)
        return 0;
 }
 
-static int
-lpfc_debugfs_dumpDataDif_release(struct inode *inode, struct file *file)
-{
-       struct lpfc_debug *debug = file->private_data;
-
-       debug->buffer = NULL;
-       kfree(debug);
-
-       return 0;
-}
-
 /**
  * lpfc_debugfs_multixripools_write - Clear multi-XRI pools statistics
  * @file: The file pointer to read from.
@@ -3786,23 +3688,13 @@ lpfc_idiag_wqs_for_cq(struct lpfc_hba *phba, char *wqtype, char *pbuffer,
        int qidx;
 
        for (qidx = 0; qidx < phba->cfg_hdw_queue; qidx++) {
-               qp = phba->sli4_hba.hdwq[qidx].fcp_wq;
+               qp = phba->sli4_hba.hdwq[qidx].io_wq;
                if (qp->assoc_qid != cq_id)
                        continue;
                *len = __lpfc_idiag_print_wq(qp, wqtype, pbuffer, *len);
                if (*len >= max_cnt)
                        return 1;
        }
-       if (phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME) {
-               for (qidx = 0; qidx < phba->cfg_hdw_queue; qidx++) {
-                       qp = phba->sli4_hba.hdwq[qidx].nvme_wq;
-                       if (qp->assoc_qid != cq_id)
-                               continue;
-                       *len = __lpfc_idiag_print_wq(qp, wqtype, pbuffer, *len);
-                       if (*len >= max_cnt)
-                               return 1;
-               }
-       }
        return 0;
 }
 
@@ -3868,9 +3760,9 @@ lpfc_idiag_cqs_for_eq(struct lpfc_hba *phba, char *pbuffer,
        struct lpfc_queue *qp;
        int rc;
 
-       qp = phba->sli4_hba.hdwq[eqidx].fcp_cq;
+       qp = phba->sli4_hba.hdwq[eqidx].io_cq;
 
-       *len = __lpfc_idiag_print_cq(qp, "FCP", pbuffer, *len);
+       *len = __lpfc_idiag_print_cq(qp, "IO", pbuffer, *len);
 
        /* Reset max counter */
        qp->CQ_max_cqe = 0;
@@ -3878,28 +3770,11 @@ lpfc_idiag_cqs_for_eq(struct lpfc_hba *phba, char *pbuffer,
        if (*len >= max_cnt)
                return 1;
 
-       rc = lpfc_idiag_wqs_for_cq(phba, "FCP", pbuffer, len,
+       rc = lpfc_idiag_wqs_for_cq(phba, "IO", pbuffer, len,
                                   max_cnt, qp->queue_id);
        if (rc)
                return 1;
 
-       if (phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME) {
-               qp = phba->sli4_hba.hdwq[eqidx].nvme_cq;
-
-               *len = __lpfc_idiag_print_cq(qp, "NVME", pbuffer, *len);
-
-               /* Reset max counter */
-               qp->CQ_max_cqe = 0;
-
-               if (*len >= max_cnt)
-                       return 1;
-
-               rc = lpfc_idiag_wqs_for_cq(phba, "NVME", pbuffer, len,
-                                          max_cnt, qp->queue_id);
-               if (rc)
-                       return 1;
-       }
-
        if ((eqidx < phba->cfg_nvmet_mrq) && phba->nvmet_support) {
                /* NVMET CQset */
                qp = phba->sli4_hba.nvmet_cqset[eqidx];
@@ -4348,7 +4223,7 @@ lpfc_idiag_queacc_write(struct file *file, const char __user *buf,
                if (phba->sli4_hba.hdwq) {
                        for (qidx = 0; qidx < phba->cfg_hdw_queue;
                                                                qidx++) {
-                               qp = phba->sli4_hba.hdwq[qidx].fcp_cq;
+                               qp = phba->sli4_hba.hdwq[qidx].io_cq;
                                if (qp && qp->queue_id == queid) {
                                        /* Sanity check */
                                        rc = lpfc_idiag_que_param_check(
@@ -4360,22 +4235,6 @@ lpfc_idiag_queacc_write(struct file *file, const char __user *buf,
                                }
                        }
                }
-               /* NVME complete queue */
-               if (phba->sli4_hba.hdwq) {
-                       qidx = 0;
-                       do {
-                               qp = phba->sli4_hba.hdwq[qidx].nvme_cq;
-                               if (qp && qp->queue_id == queid) {
-                                       /* Sanity check */
-                                       rc = lpfc_idiag_que_param_check(
-                                               qp, index, count);
-                                       if (rc)
-                                               goto error_out;
-                                       idiag.ptr_private = qp;
-                                       goto pass_check;
-                               }
-                       } while (++qidx < phba->cfg_hdw_queue);
-               }
                goto error_out;
                break;
        case LPFC_IDIAG_MQ:
@@ -4419,20 +4278,7 @@ lpfc_idiag_queacc_write(struct file *file, const char __user *buf,
                if (phba->sli4_hba.hdwq) {
                        /* FCP/SCSI work queue */
                        for (qidx = 0; qidx < phba->cfg_hdw_queue; qidx++) {
-                               qp = phba->sli4_hba.hdwq[qidx].fcp_wq;
-                               if (qp && qp->queue_id == queid) {
-                                       /* Sanity check */
-                                       rc = lpfc_idiag_que_param_check(
-                                               qp, index, count);
-                                       if (rc)
-                                               goto error_out;
-                                       idiag.ptr_private = qp;
-                                       goto pass_check;
-                               }
-                       }
-                       /* NVME work queue */
-                       for (qidx = 0; qidx < phba->cfg_hdw_queue; qidx++) {
-                               qp = phba->sli4_hba.hdwq[qidx].nvme_wq;
+                               qp = phba->sli4_hba.hdwq[qidx].io_wq;
                                if (qp && qp->queue_id == queid) {
                                        /* Sanity check */
                                        rc = lpfc_idiag_que_param_check(
@@ -5508,26 +5354,6 @@ static const struct file_operations lpfc_debugfs_op_cpucheck = {
        .release =      lpfc_debugfs_release,
 };
 
-#undef lpfc_debugfs_op_dumpData
-static const struct file_operations lpfc_debugfs_op_dumpData = {
-       .owner =        THIS_MODULE,
-       .open =         lpfc_debugfs_dumpData_open,
-       .llseek =       lpfc_debugfs_lseek,
-       .read =         lpfc_debugfs_read,
-       .write =        lpfc_debugfs_dumpDataDif_write,
-       .release =      lpfc_debugfs_dumpDataDif_release,
-};
-
-#undef lpfc_debugfs_op_dumpDif
-static const struct file_operations lpfc_debugfs_op_dumpDif = {
-       .owner =        THIS_MODULE,
-       .open =         lpfc_debugfs_dumpDif_open,
-       .llseek =       lpfc_debugfs_lseek,
-       .read =         lpfc_debugfs_read,
-       .write =        lpfc_debugfs_dumpDataDif_write,
-       .release =      lpfc_debugfs_dumpDataDif_release,
-};
-
 #undef lpfc_debugfs_op_dif_err
 static const struct file_operations lpfc_debugfs_op_dif_err = {
        .owner =        THIS_MODULE,
@@ -5924,20 +5750,6 @@ lpfc_debugfs_initialize(struct lpfc_vport *vport)
                } else
                        phba->debug_dumpHostSlim = NULL;
 
-               /* Setup dumpData */
-               snprintf(name, sizeof(name), "dumpData");
-               phba->debug_dumpData =
-                       debugfs_create_file(name, S_IFREG|S_IRUGO|S_IWUSR,
-                                phba->hba_debugfs_root,
-                                phba, &lpfc_debugfs_op_dumpData);
-
-               /* Setup dumpDif */
-               snprintf(name, sizeof(name), "dumpDif");
-               phba->debug_dumpDif =
-                       debugfs_create_file(name, S_IFREG|S_IRUGO|S_IWUSR,
-                                phba->hba_debugfs_root,
-                                phba, &lpfc_debugfs_op_dumpDif);
-
                /* Setup DIF Error Injections */
                snprintf(name, sizeof(name), "InjErrLBA");
                phba->debug_InjErrLBA =
@@ -6315,12 +6127,6 @@ lpfc_debugfs_terminate(struct lpfc_vport *vport)
                debugfs_remove(phba->debug_dumpHostSlim); /* HostSlim */
                phba->debug_dumpHostSlim = NULL;
 
-               debugfs_remove(phba->debug_dumpData); /* dumpData */
-               phba->debug_dumpData = NULL;
-
-               debugfs_remove(phba->debug_dumpDif); /* dumpDif */
-               phba->debug_dumpDif = NULL;
-
                debugfs_remove(phba->debug_InjErrLBA); /* InjErrLBA */
                phba->debug_InjErrLBA = NULL;
 
@@ -6442,12 +6248,7 @@ lpfc_debug_dump_all_queues(struct lpfc_hba *phba)
        lpfc_debug_dump_wq(phba, DUMP_NVMELS, 0);
 
        for (idx = 0; idx < phba->cfg_hdw_queue; idx++)
-               lpfc_debug_dump_wq(phba, DUMP_FCP, idx);
-
-       if (phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME) {
-               for (idx = 0; idx < phba->cfg_hdw_queue; idx++)
-                       lpfc_debug_dump_wq(phba, DUMP_NVME, idx);
-       }
+               lpfc_debug_dump_wq(phba, DUMP_IO, idx);
 
        lpfc_debug_dump_hdr_rq(phba);
        lpfc_debug_dump_dat_rq(phba);
@@ -6459,12 +6260,7 @@ lpfc_debug_dump_all_queues(struct lpfc_hba *phba)
        lpfc_debug_dump_cq(phba, DUMP_NVMELS, 0);
 
        for (idx = 0; idx < phba->cfg_hdw_queue; idx++)
-               lpfc_debug_dump_cq(phba, DUMP_FCP, idx);
-
-       if (phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME) {
-               for (idx = 0; idx < phba->cfg_hdw_queue; idx++)
-                       lpfc_debug_dump_cq(phba, DUMP_NVME, idx);
-       }
+               lpfc_debug_dump_cq(phba, DUMP_IO, idx);
 
        /*
         * Dump Event Queues (EQs)
index 3407087..20f2537 100644 (file)
@@ -291,8 +291,7 @@ struct lpfc_idiag {
 #define LPFC_DUMP_MULTIXRIPOOL_SIZE 8192
 
 enum {
-       DUMP_FCP,
-       DUMP_NVME,
+       DUMP_IO,
        DUMP_MBX,
        DUMP_ELS,
        DUMP_NVMELS,
@@ -415,12 +414,9 @@ lpfc_debug_dump_wq(struct lpfc_hba *phba, int qtype, int wqidx)
        struct lpfc_queue *wq;
        char *qtypestr;
 
-       if (qtype == DUMP_FCP) {
-               wq = phba->sli4_hba.hdwq[wqidx].fcp_wq;
-               qtypestr = "FCP";
-       } else if (qtype == DUMP_NVME) {
-               wq = phba->sli4_hba.hdwq[wqidx].nvme_wq;
-               qtypestr = "NVME";
+       if (qtype == DUMP_IO) {
+               wq = phba->sli4_hba.hdwq[wqidx].io_wq;
+               qtypestr = "IO";
        } else if (qtype == DUMP_MBX) {
                wq = phba->sli4_hba.mbx_wq;
                qtypestr = "MBX";
@@ -433,7 +429,7 @@ lpfc_debug_dump_wq(struct lpfc_hba *phba, int qtype, int wqidx)
        } else
                return;
 
-       if (qtype == DUMP_FCP || qtype == DUMP_NVME)
+       if (qtype == DUMP_IO)
                pr_err("%s WQ: WQ[Idx:%d|Qid:%d]\n",
                        qtypestr, wqidx, wq->queue_id);
        else
@@ -459,17 +455,13 @@ lpfc_debug_dump_cq(struct lpfc_hba *phba, int qtype, int wqidx)
        char *qtypestr;
        int eqidx;
 
-       /* fcp/nvme wq and cq are 1:1, thus same indexes */
+       /* io wq and cq are 1:1, thus same indexes */
        eq = NULL;
 
-       if (qtype == DUMP_FCP) {
-               wq = phba->sli4_hba.hdwq[wqidx].fcp_wq;
-               cq = phba->sli4_hba.hdwq[wqidx].fcp_cq;
-               qtypestr = "FCP";
-       } else if (qtype == DUMP_NVME) {
-               wq = phba->sli4_hba.hdwq[wqidx].nvme_wq;
-               cq = phba->sli4_hba.hdwq[wqidx].nvme_cq;
-               qtypestr = "NVME";
+       if (qtype == DUMP_IO) {
+               wq = phba->sli4_hba.hdwq[wqidx].io_wq;
+               cq = phba->sli4_hba.hdwq[wqidx].io_cq;
+               qtypestr = "IO";
        } else if (qtype == DUMP_MBX) {
                wq = phba->sli4_hba.mbx_wq;
                cq = phba->sli4_hba.mbx_cq;
@@ -496,7 +488,7 @@ lpfc_debug_dump_cq(struct lpfc_hba *phba, int qtype, int wqidx)
                eq = phba->sli4_hba.hdwq[0].hba_eq;
        }
 
-       if (qtype == DUMP_FCP || qtype == DUMP_NVME)
+       if (qtype == DUMP_IO)
                pr_err("%s CQ: WQ[Idx:%d|Qid%d]->CQ[Idx%d|Qid%d]"
                        "->EQ[Idx:%d|Qid:%d]:\n",
                        qtypestr, wqidx, wq->queue_id, wqidx, cq->queue_id,
@@ -572,20 +564,11 @@ lpfc_debug_dump_wq_by_id(struct lpfc_hba *phba, int qid)
        int wq_idx;
 
        for (wq_idx = 0; wq_idx < phba->cfg_hdw_queue; wq_idx++)
-               if (phba->sli4_hba.hdwq[wq_idx].fcp_wq->queue_id == qid)
+               if (phba->sli4_hba.hdwq[wq_idx].io_wq->queue_id == qid)
                        break;
        if (wq_idx < phba->cfg_hdw_queue) {
-               pr_err("FCP WQ[Idx:%d|Qid:%d]\n", wq_idx, qid);
-               lpfc_debug_dump_q(phba->sli4_hba.hdwq[wq_idx].fcp_wq);
-               return;
-       }
-
-       for (wq_idx = 0; wq_idx < phba->cfg_hdw_queue; wq_idx++)
-               if (phba->sli4_hba.hdwq[wq_idx].nvme_wq->queue_id == qid)
-                       break;
-       if (wq_idx < phba->cfg_hdw_queue) {
-               pr_err("NVME WQ[Idx:%d|Qid:%d]\n", wq_idx, qid);
-               lpfc_debug_dump_q(phba->sli4_hba.hdwq[wq_idx].nvme_wq);
+               pr_err("IO WQ[Idx:%d|Qid:%d]\n", wq_idx, qid);
+               lpfc_debug_dump_q(phba->sli4_hba.hdwq[wq_idx].io_wq);
                return;
        }
 
@@ -654,22 +637,12 @@ lpfc_debug_dump_cq_by_id(struct lpfc_hba *phba, int qid)
        int cq_idx;
 
        for (cq_idx = 0; cq_idx < phba->cfg_hdw_queue; cq_idx++)
-               if (phba->sli4_hba.hdwq[cq_idx].fcp_cq->queue_id == qid)
-                       break;
-
-       if (cq_idx < phba->cfg_hdw_queue) {
-               pr_err("FCP CQ[Idx:%d|Qid:%d]\n", cq_idx, qid);
-               lpfc_debug_dump_q(phba->sli4_hba.hdwq[cq_idx].fcp_cq);
-               return;
-       }
-
-       for (cq_idx = 0; cq_idx < phba->cfg_hdw_queue; cq_idx++)
-               if (phba->sli4_hba.hdwq[cq_idx].nvme_cq->queue_id == qid)
+               if (phba->sli4_hba.hdwq[cq_idx].io_cq->queue_id == qid)
                        break;
 
        if (cq_idx < phba->cfg_hdw_queue) {
-               pr_err("NVME CQ[Idx:%d|Qid:%d]\n", cq_idx, qid);
-               lpfc_debug_dump_q(phba->sli4_hba.hdwq[cq_idx].nvme_cq);
+               pr_err("IO CQ[Idx:%d|Qid:%d]\n", cq_idx, qid);
+               lpfc_debug_dump_q(phba->sli4_hba.hdwq[cq_idx].io_cq);
                return;
        }
 
index 1c89c9f..482e4a8 100644 (file)
@@ -112,6 +112,8 @@ struct lpfc_nodelist {
        uint8_t         nlp_retry;              /* used for ELS retries */
        uint8_t         nlp_fcp_info;           /* class info, bits 0-3 */
 #define NLP_FCP_2_DEVICE   0x10                        /* FCP-2 device */
+       u8              nlp_nvme_info;          /* NVME NSLER Support */
+#define NLP_NVME_NSLER     0x1                 /* NVME NSLER device */
 
        uint16_t        nlp_usg_map;    /* ndlp management usage bitmap */
 #define NLP_USG_NODE_ACT_BIT   0x1     /* Indicate ndlp is actively used */
@@ -157,6 +159,7 @@ struct lpfc_node_rrq {
 /* Defines for nlp_flag (uint32) */
 #define NLP_IGNR_REG_CMPL  0x00000001 /* Rcvd rscn before we cmpl reg login */
 #define NLP_REG_LOGIN_SEND 0x00000002   /* sent reglogin to adapter */
+#define NLP_RELEASE_RPI    0x00000004   /* Release RPI to free pool */
 #define NLP_SUPPRESS_RSP   0x00000010  /* Remote NPort supports suppress rsp */
 #define NLP_PLOGI_SND      0x00000020  /* sent PLOGI request for this entry */
 #define NLP_PRLI_SND       0x00000040  /* sent PRLI request for this entry */
index f12780f..d530399 100644 (file)
@@ -1052,17 +1052,18 @@ stop_rr_fcf_flogi:
                if (lpfc_els_retry(phba, cmdiocb, rspiocb))
                        goto out;
 
+               lpfc_printf_vlog(vport, KERN_WARNING, LOG_ELS,
+                                "0150 FLOGI failure Status:x%x/x%x "
+                                "xri x%x TMO:x%x\n",
+                                irsp->ulpStatus, irsp->un.ulpWord[4],
+                                cmdiocb->sli4_xritag, irsp->ulpTimeout);
+
                /* If this is not a loop open failure, bail out */
                if (!(irsp->ulpStatus == IOSTAT_LOCAL_REJECT &&
                      ((irsp->un.ulpWord[4] & IOERR_PARAM_MASK) ==
                                        IOERR_LOOP_OPEN_FAILURE)))
                        goto flogifail;
 
-               lpfc_printf_vlog(vport, KERN_WARNING, LOG_ELS,
-                                "0150 FLOGI failure Status:x%x/x%x xri x%x TMO:x%x\n",
-                                irsp->ulpStatus, irsp->un.ulpWord[4],
-                                cmdiocb->sli4_xritag, irsp->ulpTimeout);
-
                /* FLOGI failed, so there is no fabric */
                spin_lock_irq(shost->host_lock);
                vport->fc_flag &= ~(FC_FABRIC | FC_PUBLIC_LOOP);
@@ -1207,6 +1208,39 @@ out:
 }
 
 /**
+ * lpfc_cmpl_els_link_down - Completion callback function for ELS command
+ *                           aborted during a link down
+ * @phba: pointer to lpfc hba data structure.
+ * @cmdiocb: pointer to lpfc command iocb data structure.
+ * @rspiocb: pointer to lpfc response iocb data structure.
+ *
+ */
+static void
+lpfc_cmpl_els_link_down(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
+                       struct lpfc_iocbq *rspiocb)
+{
+       IOCB_t *irsp;
+       uint32_t *pcmd;
+       uint32_t cmd;
+
+       pcmd = (uint32_t *)(((struct lpfc_dmabuf *)cmdiocb->context2)->virt);
+       cmd = *pcmd;
+       irsp = &rspiocb->iocb;
+
+       lpfc_printf_log(phba, KERN_INFO, LOG_ELS,
+                       "6445 ELS completes after LINK_DOWN: "
+                       " Status %x/%x cmd x%x flg x%x\n",
+                       irsp->ulpStatus, irsp->un.ulpWord[4], cmd,
+                       cmdiocb->iocb_flag);
+
+       if (cmdiocb->iocb_flag & LPFC_IO_FABRIC) {
+               cmdiocb->iocb_flag &= ~LPFC_IO_FABRIC;
+               atomic_dec(&phba->fabric_iocb_count);
+       }
+       lpfc_els_free_iocb(phba, cmdiocb);
+}
+
+/**
  * lpfc_issue_els_flogi - Issue an flogi iocb command for a vport
  * @vport: pointer to a host virtual N_Port data structure.
  * @ndlp: pointer to a node-list data structure.
@@ -2107,7 +2141,7 @@ lpfc_issue_els_plogi(struct lpfc_vport *vport, uint32_t did, uint8_t retry)
                    !(vport->fc_flag & FC_OFFLINE_MODE)) {
                        lpfc_printf_vlog(vport, KERN_INFO, LOG_DISCOVERY,
                                         "4110 Issue PLOGI x%x deferred "
-                                        "on NPort x%x rpi x%x Data: %p\n",
+                                        "on NPort x%x rpi x%x Data: x%px\n",
                                         ndlp->nlp_defer_did, ndlp->nlp_DID,
                                         ndlp->nlp_rpi, ndlp);
 
@@ -2401,6 +2435,10 @@ lpfc_issue_els_prli(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
                npr_nvme = (struct lpfc_nvme_prli *)pcmd;
                bf_set(prli_type_code, npr_nvme, PRLI_NVME_TYPE);
                bf_set(prli_estabImagePair, npr_nvme, 0);  /* Should be 0 */
+               if (phba->nsler) {
+                       bf_set(prli_nsler, npr_nvme, 1);
+                       bf_set(prli_conf, npr_nvme, 1);
+               }
 
                /* Only initiators request first burst. */
                if ((phba->cfg_nvme_enable_fb) &&
@@ -4203,7 +4241,7 @@ lpfc_mbx_cmpl_dflt_rpi(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb)
        mempool_free(pmb, phba->mbox_mem_pool);
        if (ndlp) {
                lpfc_printf_vlog(ndlp->vport, KERN_INFO, LOG_NODE,
-                                "0006 rpi%x DID:%x flg:%x %d map:%x %p\n",
+                                "0006 rpi%x DID:%x flg:%x %d map:%x x%px\n",
                                 ndlp->nlp_rpi, ndlp->nlp_DID, ndlp->nlp_flag,
                                 kref_read(&ndlp->kref),
                                 ndlp->nlp_usg_map, ndlp);
@@ -5634,16 +5672,16 @@ lpfc_rdp_res_attach_port_names(struct fc_rdp_port_name_desc *desc,
        desc->tag = cpu_to_be32(RDP_PORT_NAMES_DESC_TAG);
        if (vport->fc_flag & FC_FABRIC) {
                memcpy(desc->port_names.wwnn, &vport->fabric_nodename,
-                               sizeof(desc->port_names.wwnn));
+                      sizeof(desc->port_names.wwnn));
 
                memcpy(desc->port_names.wwpn, &vport->fabric_portname,
-                               sizeof(desc->port_names.wwpn));
+                      sizeof(desc->port_names.wwpn));
        } else {  /* Point to Point */
                memcpy(desc->port_names.wwnn, &ndlp->nlp_nodename,
-                               sizeof(desc->port_names.wwnn));
+                      sizeof(desc->port_names.wwnn));
 
-               memcpy(desc->port_names.wwnn, &ndlp->nlp_portname,
-                               sizeof(desc->port_names.wwpn));
+               memcpy(desc->port_names.wwpn, &ndlp->nlp_portname,
+                      sizeof(desc->port_names.wwpn));
        }
 
        desc->length = cpu_to_be32(sizeof(desc->port_names));
@@ -6327,7 +6365,11 @@ lpfc_rscn_recovery_check(struct lpfc_vport *vport)
                        continue;
                }
 
-               if (ndlp->nlp_fc4_type & NLP_FC4_NVME)
+               /* Check to see if we need to NVME rescan this target
+                * remoteport.
+                */
+               if (ndlp->nlp_fc4_type & NLP_FC4_NVME &&
+                   ndlp->nlp_type & (NLP_NVME_TARGET | NLP_NVME_DISCOVERY))
                        lpfc_nvme_rescan_port(vport, ndlp);
 
                lpfc_disc_state_machine(vport, ndlp, NULL,
@@ -6441,7 +6483,11 @@ lpfc_els_rcv_rscn(struct lpfc_vport *vport, struct lpfc_iocbq *cmdiocb,
                                 *lp, vport->fc_flag, payload_len);
                lpfc_els_rsp_acc(vport, ELS_CMD_ACC, cmdiocb, ndlp, NULL);
 
-               if (ndlp->nlp_fc4_type & NLP_FC4_NVME)
+               /* Check to see if we need to NVME rescan this target
+                * remoteport.
+                */
+               if (ndlp->nlp_fc4_type & NLP_FC4_NVME &&
+                   ndlp->nlp_type & (NLP_NVME_TARGET | NLP_NVME_DISCOVERY))
                        lpfc_nvme_rescan_port(vport, ndlp);
                return 0;
        }
@@ -7960,18 +8006,40 @@ lpfc_els_flush_cmd(struct lpfc_vport *vport)
        if (phba->sli_rev == LPFC_SLI_REV4)
                spin_lock(&pring->ring_lock);
 
+       /* First we need to issue aborts to outstanding cmds on txcmpl */
        list_for_each_entry_safe(piocb, tmp_iocb, &pring->txcmplq, list) {
                if (piocb->iocb_flag & LPFC_IO_LIBDFC)
                        continue;
 
                if (piocb->vport != vport)
                        continue;
-               list_add_tail(&piocb->dlist, &abort_list);
+
+               /* On the ELS ring we can have ELS_REQUESTs or
+                * GEN_REQUESTs waiting for a response.
+                */
+               cmd = &piocb->iocb;
+               if (cmd->ulpCommand == CMD_ELS_REQUEST64_CR) {
+                       list_add_tail(&piocb->dlist, &abort_list);
+
+                       /* If the link is down when flushing ELS commands
+                        * the firmware will not complete them till after
+                        * the link comes back up. This may confuse
+                        * discovery for the new link up, so we need to
+                        * change the compl routine to just clean up the iocb
+                        * and avoid any retry logic.
+                        */
+                       if (phba->link_state == LPFC_LINK_DOWN)
+                               piocb->iocb_cmpl = lpfc_cmpl_els_link_down;
+               }
+               if (cmd->ulpCommand == CMD_GEN_REQUEST64_CR)
+                       list_add_tail(&piocb->dlist, &abort_list);
        }
+
        if (phba->sli_rev == LPFC_SLI_REV4)
                spin_unlock(&pring->ring_lock);
        spin_unlock_irq(&phba->hbalock);
-       /* Abort each iocb on the aborted list and remove the dlist links. */
+
+       /* Abort each txcmpl iocb on aborted list and remove the dlist links. */
        list_for_each_entry_safe(piocb, tmp_iocb, &abort_list, dlist) {
                spin_lock_irq(&phba->hbalock);
                list_del_init(&piocb->dlist);
@@ -7987,6 +8055,9 @@ lpfc_els_flush_cmd(struct lpfc_vport *vport)
        if (phba->sli_rev == LPFC_SLI_REV4)
                spin_lock(&pring->ring_lock);
 
+       /* No need to abort the txq list,
+        * just queue them up for lpfc_sli_cancel_iocbs
+        */
        list_for_each_entry_safe(piocb, tmp_iocb, &pring->txq, list) {
                cmd = &piocb->iocb;
 
@@ -8007,11 +8078,22 @@ lpfc_els_flush_cmd(struct lpfc_vport *vport)
                list_del_init(&piocb->list);
                list_add_tail(&piocb->list, &abort_list);
        }
+
+       /* The same holds true for any FLOGI/FDISC on the fabric_iocb_list */
+       if (vport == phba->pport) {
+               list_for_each_entry_safe(piocb, tmp_iocb,
+                                        &phba->fabric_iocb_list, list) {
+                       cmd = &piocb->iocb;
+                       list_del_init(&piocb->list);
+                       list_add_tail(&piocb->list, &abort_list);
+               }
+       }
+
        if (phba->sli_rev == LPFC_SLI_REV4)
                spin_unlock(&pring->ring_lock);
        spin_unlock_irq(&phba->hbalock);
 
-       /* Cancell all the IOCBs from the completions list */
+       /* Cancel all the IOCBs from the completions list */
        lpfc_sli_cancel_iocbs(phba, &abort_list,
                              IOSTAT_LOCAL_REJECT, IOERR_SLI_ABORTED);
 
index 28ecaa7..749286a 100644 (file)
@@ -118,6 +118,7 @@ lpfc_dev_loss_tmo_callbk(struct fc_rport *rport)
        struct lpfc_work_evt *evtp;
        int  put_node;
        int  put_rport;
+       unsigned long iflags;
 
        rdata = rport->dd_data;
        ndlp = rdata->pnode;
@@ -132,7 +133,7 @@ lpfc_dev_loss_tmo_callbk(struct fc_rport *rport)
                ndlp->nlp_sid, ndlp->nlp_DID, ndlp->nlp_flag);
 
        lpfc_printf_vlog(ndlp->vport, KERN_INFO, LOG_NODE,
-                        "3181 dev_loss_callbk x%06x, rport %p flg x%x\n",
+                        "3181 dev_loss_callbk x%06x, rport x%px flg x%x\n",
                         ndlp->nlp_DID, ndlp->rport, ndlp->nlp_flag);
 
        /* Don't defer this if we are in the process of deleting the vport
@@ -170,22 +171,22 @@ lpfc_dev_loss_tmo_callbk(struct fc_rport *rport)
        }
 
        shost = lpfc_shost_from_vport(vport);
-       spin_lock_irq(shost->host_lock);
+       spin_lock_irqsave(shost->host_lock, iflags);
        ndlp->nlp_flag |= NLP_IN_DEV_LOSS;
-       spin_unlock_irq(shost->host_lock);
+       spin_unlock_irqrestore(shost->host_lock, iflags);
 
        /* We need to hold the node by incrementing the reference
         * count until this queued work is done
         */
        evtp->evt_arg1  = lpfc_nlp_get(ndlp);
 
-       spin_lock_irq(&phba->hbalock);
+       spin_lock_irqsave(&phba->hbalock, iflags);
        if (evtp->evt_arg1) {
                evtp->evt = LPFC_EVT_DEV_LOSS;
                list_add_tail(&evtp->evt_listp, &phba->work_list);
                lpfc_worker_wake_up(phba);
        }
-       spin_unlock_irq(&phba->hbalock);
+       spin_unlock_irqrestore(&phba->hbalock, iflags);
 
        return;
 }
@@ -212,14 +213,15 @@ lpfc_dev_loss_tmo_handler(struct lpfc_nodelist *ndlp)
        int  put_node;
        int warn_on = 0;
        int fcf_inuse = 0;
+       unsigned long iflags;
 
        rport = ndlp->rport;
        vport = ndlp->vport;
        shost = lpfc_shost_from_vport(vport);
 
-       spin_lock_irq(shost->host_lock);
+       spin_lock_irqsave(shost->host_lock, iflags);
        ndlp->nlp_flag &= ~NLP_IN_DEV_LOSS;
-       spin_unlock_irq(shost->host_lock);
+       spin_unlock_irqrestore(shost->host_lock, iflags);
 
        if (!rport)
                return fcf_inuse;
@@ -235,7 +237,7 @@ lpfc_dev_loss_tmo_handler(struct lpfc_nodelist *ndlp)
                ndlp->nlp_DID, ndlp->nlp_type, rport->scsi_target_id);
 
        lpfc_printf_vlog(ndlp->vport, KERN_INFO, LOG_NODE,
-                        "3182 dev_loss_tmo_handler x%06x, rport %p flg x%x\n",
+                        "3182 dev_loss_tmo_handler x%06x, rport x%px flg x%x\n",
                         ndlp->nlp_DID, ndlp->rport, ndlp->nlp_flag);
 
        /*
@@ -903,6 +905,8 @@ lpfc_linkdown(struct lpfc_hba *phba)
                        phba->trunk_link.link1.state = 0;
                        phba->trunk_link.link2.state = 0;
                        phba->trunk_link.link3.state = 0;
+                       phba->sli4_hba.link_state.logical_speed =
+                                               LPFC_LINK_SPEED_UNKNOWN;
                }
                spin_lock_irq(shost->host_lock);
                phba->pport->fc_flag &= ~FC_LBIT;
@@ -3115,8 +3119,9 @@ lpfc_mbx_process_link_up(struct lpfc_hba *phba, struct lpfc_mbx_read_top *la)
        int rc;
        struct fcf_record *fcf_record;
        uint32_t fc_flags = 0;
+       unsigned long iflags;
 
-       spin_lock_irq(&phba->hbalock);
+       spin_lock_irqsave(&phba->hbalock, iflags);
        phba->fc_linkspeed = bf_get(lpfc_mbx_read_top_link_spd, la);
 
        if (!(phba->hba_flag & HBA_FCOE_MODE)) {
@@ -3213,12 +3218,12 @@ lpfc_mbx_process_link_up(struct lpfc_hba *phba, struct lpfc_mbx_read_top *la)
                vport->fc_myDID = phba->fc_pref_DID;
                fc_flags |= FC_LBIT;
        }
-       spin_unlock_irq(&phba->hbalock);
+       spin_unlock_irqrestore(&phba->hbalock, iflags);
 
        if (fc_flags) {
-               spin_lock_irq(shost->host_lock);
+               spin_lock_irqsave(shost->host_lock, iflags);
                vport->fc_flag |= fc_flags;
-               spin_unlock_irq(shost->host_lock);
+               spin_unlock_irqrestore(shost->host_lock, iflags);
        }
 
        lpfc_linkup(phba);
@@ -3292,22 +3297,22 @@ lpfc_mbx_process_link_up(struct lpfc_hba *phba, struct lpfc_mbx_read_top *la)
                 * The driver is expected to do FIP/FCF. Call the port
                 * and get the FCF Table.
                 */
-               spin_lock_irq(&phba->hbalock);
+               spin_lock_irqsave(&phba->hbalock, iflags);
                if (phba->hba_flag & FCF_TS_INPROG) {
-                       spin_unlock_irq(&phba->hbalock);
+                       spin_unlock_irqrestore(&phba->hbalock, iflags);
                        return;
                }
                /* This is the initial FCF discovery scan */
                phba->fcf.fcf_flag |= FCF_INIT_DISC;
-               spin_unlock_irq(&phba->hbalock);
+               spin_unlock_irqrestore(&phba->hbalock, iflags);
                lpfc_printf_log(phba, KERN_INFO, LOG_FIP | LOG_DISCOVERY,
                                "2778 Start FCF table scan at linkup\n");
                rc = lpfc_sli4_fcf_scan_read_fcf_rec(phba,
                                                     LPFC_FCOE_FCF_GET_FIRST);
                if (rc) {
-                       spin_lock_irq(&phba->hbalock);
+                       spin_lock_irqsave(&phba->hbalock, iflags);
                        phba->fcf.fcf_flag &= ~FCF_INIT_DISC;
-                       spin_unlock_irq(&phba->hbalock);
+                       spin_unlock_irqrestore(&phba->hbalock, iflags);
                        goto out;
                }
                /* Reset FCF roundrobin bmask for new discovery */
@@ -3318,7 +3323,7 @@ lpfc_mbx_process_link_up(struct lpfc_hba *phba, struct lpfc_mbx_read_top *la)
 out:
        lpfc_vport_set_state(vport, FC_VPORT_FAILED);
        lpfc_printf_vlog(vport, KERN_ERR, LOG_MBOX,
-                        "0263 Discovery Mailbox error: state: 0x%x : %p %p\n",
+                        "0263 Discovery Mailbox error: state: 0x%x : x%px x%px\n",
                         vport->port_state, sparam_mbox, cfglink_mbox);
        lpfc_issue_clear_la(phba, vport);
        return;
@@ -3366,6 +3371,7 @@ lpfc_mbx_cmpl_read_topology(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb)
        MAILBOX_t *mb = &pmb->u.mb;
        struct lpfc_dmabuf *mp = (struct lpfc_dmabuf *)(pmb->ctx_buf);
        uint8_t attn_type;
+       unsigned long iflags;
 
        /* Unblock ELS traffic */
        pring = lpfc_phba_elsring(phba);
@@ -3387,12 +3393,12 @@ lpfc_mbx_cmpl_read_topology(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb)
 
        memcpy(&phba->alpa_map[0], mp->virt, 128);
 
-       spin_lock_irq(shost->host_lock);
+       spin_lock_irqsave(shost->host_lock, iflags);
        if (bf_get(lpfc_mbx_read_top_pb, la))
                vport->fc_flag |= FC_BYPASSED_MODE;
        else
                vport->fc_flag &= ~FC_BYPASSED_MODE;
-       spin_unlock_irq(shost->host_lock);
+       spin_unlock_irqrestore(shost->host_lock, iflags);
 
        if (phba->fc_eventTag <= la->eventTag) {
                phba->fc_stat.LinkMultiEvent++;
@@ -3403,12 +3409,12 @@ lpfc_mbx_cmpl_read_topology(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb)
 
        phba->fc_eventTag = la->eventTag;
        if (phba->sli_rev < LPFC_SLI_REV4) {
-               spin_lock_irq(&phba->hbalock);
+               spin_lock_irqsave(&phba->hbalock, iflags);
                if (bf_get(lpfc_mbx_read_top_mm, la))
                        phba->sli.sli_flag |= LPFC_MENLO_MAINT;
                else
                        phba->sli.sli_flag &= ~LPFC_MENLO_MAINT;
-               spin_unlock_irq(&phba->hbalock);
+               spin_unlock_irqrestore(&phba->hbalock, iflags);
        }
 
        phba->link_events++;
@@ -3529,7 +3535,7 @@ lpfc_mbx_cmpl_reg_login(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb)
        pmb->ctx_ndlp = NULL;
 
        lpfc_printf_vlog(vport, KERN_INFO, LOG_SLI,
-                        "0002 rpi:%x DID:%x flg:%x %d map:%x %p\n",
+                        "0002 rpi:%x DID:%x flg:%x %d map:%x x%px\n",
                         ndlp->nlp_rpi, ndlp->nlp_DID, ndlp->nlp_flag,
                         kref_read(&ndlp->kref),
                         ndlp->nlp_usg_map, ndlp);
@@ -4041,7 +4047,7 @@ out:
        ndlp->nlp_type |= NLP_FABRIC;
        lpfc_nlp_set_state(vport, ndlp, NLP_STE_UNMAPPED_NODE);
        lpfc_printf_vlog(vport, KERN_INFO, LOG_SLI,
-                        "0003 rpi:%x DID:%x flg:%x %d map%x %p\n",
+                        "0003 rpi:%x DID:%x flg:%x %d map%x x%px\n",
                         ndlp->nlp_rpi, ndlp->nlp_DID, ndlp->nlp_flag,
                         kref_read(&ndlp->kref),
                         ndlp->nlp_usg_map, ndlp);
@@ -4160,7 +4166,7 @@ lpfc_register_remote_port(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp)
                fc_remote_port_rolechg(rport, rport_ids.roles);
 
        lpfc_printf_vlog(ndlp->vport, KERN_INFO, LOG_NODE,
-                        "3183 rport register x%06x, rport %p role x%x\n",
+                        "3183 rport register x%06x, rport x%px role x%x\n",
                         ndlp->nlp_DID, rport, rport_ids.roles);
 
        if ((rport->scsi_target_id != -1) &&
@@ -4184,7 +4190,7 @@ lpfc_unregister_remote_port(struct lpfc_nodelist *ndlp)
                ndlp->nlp_DID, ndlp->nlp_flag, ndlp->nlp_type);
 
        lpfc_printf_vlog(vport, KERN_INFO, LOG_NODE,
-                        "3184 rport unregister x%06x, rport %p\n",
+                        "3184 rport unregister x%06x, rport x%px\n",
                         ndlp->nlp_DID, rport);
 
        fc_remote_port_delete(rport);
@@ -4196,8 +4202,9 @@ static void
 lpfc_nlp_counters(struct lpfc_vport *vport, int state, int count)
 {
        struct Scsi_Host *shost = lpfc_shost_from_vport(vport);
+       unsigned long iflags;
 
-       spin_lock_irq(shost->host_lock);
+       spin_lock_irqsave(shost->host_lock, iflags);
        switch (state) {
        case NLP_STE_UNUSED_NODE:
                vport->fc_unused_cnt += count;
@@ -4227,7 +4234,7 @@ lpfc_nlp_counters(struct lpfc_vport *vport, int state, int count)
                        vport->fc_npr_cnt += count;
                break;
        }
-       spin_unlock_irq(shost->host_lock);
+       spin_unlock_irqrestore(shost->host_lock, iflags);
 }
 
 static void
@@ -4480,9 +4487,21 @@ lpfc_enable_node(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
                return NULL;
 
        if (phba->sli_rev == LPFC_SLI_REV4) {
-               rpi = lpfc_sli4_alloc_rpi(vport->phba);
-               if (rpi == LPFC_RPI_ALLOC_ERROR)
+               if (ndlp->nlp_rpi == LPFC_RPI_ALLOC_ERROR)
+                       rpi = lpfc_sli4_alloc_rpi(vport->phba);
+               else
+                       rpi = ndlp->nlp_rpi;
+
+               if (rpi == LPFC_RPI_ALLOC_ERROR) {
+                       lpfc_printf_vlog(vport, KERN_WARNING, LOG_NODE,
+                                        "0359 %s: ndlp:x%px "
+                                        "usgmap:x%x refcnt:%d FAILED RPI "
+                                        " ALLOC\n",
+                                        __func__,
+                                        (void *)ndlp, ndlp->nlp_usg_map,
+                                        kref_read(&ndlp->kref));
                        return NULL;
+               }
        }
 
        spin_lock_irqsave(&phba->ndlp_lock, flags);
@@ -4490,9 +4509,9 @@ lpfc_enable_node(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
        if (NLP_CHK_FREE_REQ(ndlp)) {
                spin_unlock_irqrestore(&phba->ndlp_lock, flags);
                lpfc_printf_vlog(vport, KERN_WARNING, LOG_NODE,
-                               "0277 lpfc_enable_node: ndlp:x%p "
+                               "0277 %s: ndlp:x%px "
                                "usgmap:x%x refcnt:%d\n",
-                               (void *)ndlp, ndlp->nlp_usg_map,
+                               __func__, (void *)ndlp, ndlp->nlp_usg_map,
                                kref_read(&ndlp->kref));
                goto free_rpi;
        }
@@ -4500,9 +4519,9 @@ lpfc_enable_node(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
        if (NLP_CHK_NODE_ACT(ndlp)) {
                spin_unlock_irqrestore(&phba->ndlp_lock, flags);
                lpfc_printf_vlog(vport, KERN_WARNING, LOG_NODE,
-                               "0278 lpfc_enable_node: ndlp:x%p "
+                               "0278 %s: ndlp:x%px "
                                "usgmap:x%x refcnt:%d\n",
-                               (void *)ndlp, ndlp->nlp_usg_map,
+                               __func__, (void *)ndlp, ndlp->nlp_usg_map,
                                kref_read(&ndlp->kref));
                goto free_rpi;
        }
@@ -4532,7 +4551,7 @@ lpfc_enable_node(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
                ndlp->nlp_rpi = rpi;
                lpfc_printf_vlog(vport, KERN_INFO, LOG_NODE,
                                 "0008 rpi:%x DID:%x flg:%x refcnt:%d "
-                                "map:%x %p\n", ndlp->nlp_rpi, ndlp->nlp_DID,
+                                "map:%x x%px\n", ndlp->nlp_rpi, ndlp->nlp_DID,
                                 ndlp->nlp_flag,
                                 kref_read(&ndlp->kref),
                                 ndlp->nlp_usg_map, ndlp);
@@ -4541,6 +4560,14 @@ lpfc_enable_node(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
 
        if (state != NLP_STE_UNUSED_NODE)
                lpfc_nlp_set_state(vport, ndlp, state);
+       else
+               lpfc_printf_vlog(vport, KERN_INFO, LOG_NODE,
+                                "0013 rpi:%x DID:%x flg:%x refcnt:%d "
+                                "map:%x x%px STATE=UNUSED\n",
+                                ndlp->nlp_rpi, ndlp->nlp_DID,
+                                ndlp->nlp_flag,
+                                kref_read(&ndlp->kref),
+                                ndlp->nlp_usg_map, ndlp);
 
        lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_NODE,
                "node enable:       did:x%x",
@@ -4797,7 +4824,7 @@ lpfc_nlp_logo_unreg(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb)
            (ndlp->nlp_defer_did != NLP_EVT_NOTHING_PENDING)) {
                lpfc_printf_vlog(vport, KERN_INFO, LOG_DISCOVERY,
                                 "1434 UNREG cmpl deferred logo x%x "
-                                "on NPort x%x Data: x%x %p\n",
+                                "on NPort x%x Data: x%x x%px\n",
                                 ndlp->nlp_rpi, ndlp->nlp_DID,
                                 ndlp->nlp_defer_did, ndlp);
 
@@ -4805,6 +4832,10 @@ lpfc_nlp_logo_unreg(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb)
                ndlp->nlp_defer_did = NLP_EVT_NOTHING_PENDING;
                lpfc_issue_els_plogi(vport, ndlp->nlp_DID, 0);
        } else {
+               if (ndlp->nlp_flag & NLP_RELEASE_RPI) {
+                       lpfc_sli4_free_rpi(vport->phba, ndlp->nlp_rpi);
+                       ndlp->nlp_flag &= ~NLP_RELEASE_RPI;
+               }
                ndlp->nlp_flag &= ~NLP_UNREG_INP;
        }
 }
@@ -4843,7 +4874,7 @@ lpfc_unreg_rpi(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp)
                        lpfc_printf_vlog(vport, KERN_INFO, LOG_DISCOVERY,
                                         "1436 unreg_rpi SKIP UNREG x%x on "
                                         "NPort x%x deferred x%x  flg x%x "
-                                        "Data: %p\n",
+                                        "Data: x%px\n",
                                         ndlp->nlp_rpi, ndlp->nlp_DID,
                                         ndlp->nlp_defer_did,
                                         ndlp->nlp_flag, ndlp);
@@ -4893,7 +4924,8 @@ lpfc_unreg_rpi(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp)
 
                        lpfc_printf_vlog(vport, KERN_INFO, LOG_DISCOVERY,
                                         "1433 unreg_rpi UNREG x%x on "
-                                        "NPort x%x deferred flg x%x Data:%p\n",
+                                        "NPort x%x deferred flg x%x "
+                                        "Data:x%px\n",
                                         ndlp->nlp_rpi, ndlp->nlp_DID,
                                         ndlp->nlp_flag, ndlp);
 
@@ -5034,16 +5066,16 @@ lpfc_cleanup_node(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp)
                         ndlp->nlp_state, ndlp->nlp_rpi);
        if (NLP_CHK_FREE_REQ(ndlp)) {
                lpfc_printf_vlog(vport, KERN_WARNING, LOG_NODE,
-                               "0280 lpfc_cleanup_node: ndlp:x%p "
+                               "0280 %s: ndlp:x%px "
                                "usgmap:x%x refcnt:%d\n",
-                               (void *)ndlp, ndlp->nlp_usg_map,
+                               __func__, (void *)ndlp, ndlp->nlp_usg_map,
                                kref_read(&ndlp->kref));
                lpfc_dequeue_node(vport, ndlp);
        } else {
                lpfc_printf_vlog(vport, KERN_WARNING, LOG_NODE,
-                               "0281 lpfc_cleanup_node: ndlp:x%p "
+                               "0281 %s: ndlp:x%px "
                                "usgmap:x%x refcnt:%d\n",
-                               (void *)ndlp, ndlp->nlp_usg_map,
+                               __func__, (void *)ndlp, ndlp->nlp_usg_map,
                                kref_read(&ndlp->kref));
                lpfc_disable_node(vport, ndlp);
        }
@@ -5104,6 +5136,8 @@ lpfc_cleanup_node(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp)
        list_del_init(&ndlp->els_retry_evt.evt_listp);
        list_del_init(&ndlp->dev_loss_evt.evt_listp);
        lpfc_cleanup_vports_rrqs(vport, ndlp);
+       if (phba->sli_rev == LPFC_SLI_REV4)
+               ndlp->nlp_flag |= NLP_RELEASE_RPI;
        lpfc_unreg_rpi(vport, ndlp);
 
        return 0;
@@ -5132,7 +5166,7 @@ lpfc_nlp_remove(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp)
                 * allocated by the firmware.
                 */
                lpfc_printf_vlog(vport, KERN_INFO, LOG_NODE,
-                                "0005 rpi:%x DID:%x flg:%x %d map:%x %p\n",
+                                "0005 rpi:%x DID:%x flg:%x %d map:%x x%px\n",
                                 ndlp->nlp_rpi, ndlp->nlp_DID, ndlp->nlp_flag,
                                 kref_read(&ndlp->kref),
                                 ndlp->nlp_usg_map, ndlp);
@@ -5168,8 +5202,8 @@ lpfc_nlp_remove(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp)
                 * for registered rport so need to cleanup rport
                 */
                lpfc_printf_vlog(vport, KERN_WARNING, LOG_NODE,
-                               "0940 removed node x%p DID x%x "
-                               " rport not null %p\n",
+                               "0940 removed node x%px DID x%x "
+                               " rport not null x%px\n",
                                ndlp, ndlp->nlp_DID, ndlp->rport);
                rport = ndlp->rport;
                rdata = rport->dd_data;
@@ -5243,15 +5277,15 @@ __lpfc_findnode_did(struct lpfc_vport *vport, uint32_t did)
 
        list_for_each_entry(ndlp, &vport->fc_nodes, nlp_listp) {
                if (lpfc_matchdid(vport, ndlp, did)) {
-                       data1 = (((uint32_t) ndlp->nlp_state << 24) |
-                                ((uint32_t) ndlp->nlp_xri << 16) |
-                                ((uint32_t) ndlp->nlp_type << 8) |
-                                ((uint32_t) ndlp->nlp_rpi & 0xff));
+                       data1 = (((uint32_t)ndlp->nlp_state << 24) |
+                                ((uint32_t)ndlp->nlp_xri << 16) |
+                                ((uint32_t)ndlp->nlp_type << 8) |
+                                ((uint32_t)ndlp->nlp_usg_map & 0xff));
                        lpfc_printf_vlog(vport, KERN_INFO, LOG_NODE,
                                         "0929 FIND node DID "
-                                        "Data: x%p x%x x%x x%x %p\n",
+                                        "Data: x%px x%x x%x x%x x%x x%px\n",
                                         ndlp, ndlp->nlp_DID,
-                                        ndlp->nlp_flag, data1,
+                                        ndlp->nlp_flag, data1, ndlp->nlp_rpi,
                                         ndlp->active_rrqs_xri_bitmap);
                        return ndlp;
                }
@@ -5296,7 +5330,7 @@ lpfc_findnode_mapped(struct lpfc_vport *vport)
                        spin_unlock_irqrestore(shost->host_lock, iflags);
                        lpfc_printf_vlog(vport, KERN_INFO, LOG_NODE,
                                         "2025 FIND node DID "
-                                        "Data: x%p x%x x%x x%x %p\n",
+                                        "Data: x%px x%x x%x x%x x%px\n",
                                         ndlp, ndlp->nlp_DID,
                                         ndlp->nlp_flag, data1,
                                         ndlp->active_rrqs_xri_bitmap);
@@ -5336,8 +5370,11 @@ lpfc_setup_disc_node(struct lpfc_vport *vport, uint32_t did)
                if (vport->phba->nvmet_support)
                        return NULL;
                ndlp = lpfc_enable_node(vport, ndlp, NLP_STE_NPR_NODE);
-               if (!ndlp)
+               if (!ndlp) {
+                       lpfc_printf_vlog(vport, KERN_WARNING, LOG_SLI,
+                                        "0014 Could not enable ndlp\n");
                        return NULL;
+               }
                spin_lock_irq(shost->host_lock);
                ndlp->nlp_flag |= NLP_NPR_2B_DISC;
                spin_unlock_irq(shost->host_lock);
@@ -5960,7 +5997,7 @@ lpfc_mbx_cmpl_fdmi_reg_login(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb)
        ndlp->nlp_type |= NLP_FABRIC;
        lpfc_nlp_set_state(vport, ndlp, NLP_STE_UNMAPPED_NODE);
        lpfc_printf_vlog(vport, KERN_INFO, LOG_SLI,
-                        "0004 rpi:%x DID:%x flg:%x %d map:%x %p\n",
+                        "0004 rpi:%x DID:%x flg:%x %d map:%x x%px\n",
                         ndlp->nlp_rpi, ndlp->nlp_DID, ndlp->nlp_flag,
                         kref_read(&ndlp->kref),
                         ndlp->nlp_usg_map, ndlp);
@@ -6014,8 +6051,8 @@ __lpfc_find_node(struct lpfc_vport *vport, node_filter filter, void *param)
        list_for_each_entry(ndlp, &vport->fc_nodes, nlp_listp) {
                if (filter(ndlp, param)) {
                        lpfc_printf_vlog(vport, KERN_INFO, LOG_NODE,
-                                        "3185 FIND node filter %p DID "
-                                        "ndlp %p did x%x flg x%x st x%x "
+                                        "3185 FIND node filter %ps DID "
+                                        "ndlp x%px did x%x flg x%x st x%x "
                                         "xri x%x type x%x rpi x%x\n",
                                         filter, ndlp, ndlp->nlp_DID,
                                         ndlp->nlp_flag, ndlp->nlp_state,
@@ -6025,7 +6062,7 @@ __lpfc_find_node(struct lpfc_vport *vport, node_filter filter, void *param)
                }
        }
        lpfc_printf_vlog(vport, KERN_INFO, LOG_NODE,
-                        "3186 FIND node filter %p NOT FOUND.\n", filter);
+                        "3186 FIND node filter %ps NOT FOUND.\n", filter);
        return NULL;
 }
 
@@ -6065,10 +6102,11 @@ lpfc_findnode_rpi(struct lpfc_vport *vport, uint16_t rpi)
 {
        struct Scsi_Host *shost = lpfc_shost_from_vport(vport);
        struct lpfc_nodelist *ndlp;
+       unsigned long flags;
 
-       spin_lock_irq(shost->host_lock);
+       spin_lock_irqsave(shost->host_lock, flags);
        ndlp = __lpfc_findnode_rpi(vport, rpi);
-       spin_unlock_irq(shost->host_lock);
+       spin_unlock_irqrestore(shost->host_lock, flags);
        return ndlp;
 }
 
@@ -6149,7 +6187,7 @@ lpfc_nlp_init(struct lpfc_vport *vport, uint32_t did)
                ndlp->nlp_rpi = rpi;
                lpfc_printf_vlog(vport, KERN_INFO, LOG_NODE,
                                 "0007 rpi:%x DID:%x flg:%x refcnt:%d "
-                                "map:%x %p\n", ndlp->nlp_rpi, ndlp->nlp_DID,
+                                "map:%x x%px\n", ndlp->nlp_rpi, ndlp->nlp_DID,
                                 ndlp->nlp_flag,
                                 kref_read(&ndlp->kref),
                                 ndlp->nlp_usg_map, ndlp);
@@ -6187,8 +6225,9 @@ lpfc_nlp_release(struct kref *kref)
                ndlp->nlp_DID, ndlp->nlp_flag, ndlp->nlp_type);
 
        lpfc_printf_vlog(ndlp->vport, KERN_INFO, LOG_NODE,
-                       "0279 lpfc_nlp_release: ndlp:x%p did %x "
+                       "0279 %s: ndlp:x%px did %x "
                        "usgmap:x%x refcnt:%d rpi:%x\n",
+                       __func__,
                        (void *)ndlp, ndlp->nlp_DID, ndlp->nlp_usg_map,
                        kref_read(&ndlp->kref), ndlp->nlp_rpi);
 
@@ -6200,8 +6239,6 @@ lpfc_nlp_release(struct kref *kref)
        spin_lock_irqsave(&phba->ndlp_lock, flags);
        NLP_CLR_NODE_ACT(ndlp);
        spin_unlock_irqrestore(&phba->ndlp_lock, flags);
-       if (phba->sli_rev == LPFC_SLI_REV4)
-               lpfc_sli4_free_rpi(phba, ndlp->nlp_rpi);
 
        /* free ndlp memory for final ndlp release */
        if (NLP_CHK_FREE_REQ(ndlp)) {
@@ -6237,9 +6274,9 @@ lpfc_nlp_get(struct lpfc_nodelist *ndlp)
                if (!NLP_CHK_NODE_ACT(ndlp) || NLP_CHK_FREE_ACK(ndlp)) {
                        spin_unlock_irqrestore(&phba->ndlp_lock, flags);
                        lpfc_printf_vlog(ndlp->vport, KERN_WARNING, LOG_NODE,
-                               "0276 lpfc_nlp_get: ndlp:x%p "
+                               "0276 %s: ndlp:x%px "
                                "usgmap:x%x refcnt:%d\n",
-                               (void *)ndlp, ndlp->nlp_usg_map,
+                               __func__, (void *)ndlp, ndlp->nlp_usg_map,
                                kref_read(&ndlp->kref));
                        return NULL;
                } else
@@ -6265,9 +6302,9 @@ lpfc_nlp_put(struct lpfc_nodelist *ndlp)
                return 1;
 
        lpfc_debugfs_disc_trc(ndlp->vport, LPFC_DISC_TRC_NODE,
-       "node put:        did:x%x flg:x%x refcnt:x%x",
-               ndlp->nlp_DID, ndlp->nlp_flag,
-               kref_read(&ndlp->kref));
+                       "node put:        did:x%x flg:x%x refcnt:x%x",
+                       ndlp->nlp_DID, ndlp->nlp_flag,
+                       kref_read(&ndlp->kref));
        phba = ndlp->phba;
        spin_lock_irqsave(&phba->ndlp_lock, flags);
        /* Check the ndlp memory free acknowledge flag to avoid the
@@ -6277,9 +6314,9 @@ lpfc_nlp_put(struct lpfc_nodelist *ndlp)
        if (NLP_CHK_FREE_ACK(ndlp)) {
                spin_unlock_irqrestore(&phba->ndlp_lock, flags);
                lpfc_printf_vlog(ndlp->vport, KERN_WARNING, LOG_NODE,
-                               "0274 lpfc_nlp_put: ndlp:x%p "
+                               "0274 %s: ndlp:x%px "
                                "usgmap:x%x refcnt:%d\n",
-                               (void *)ndlp, ndlp->nlp_usg_map,
+                               __func__, (void *)ndlp, ndlp->nlp_usg_map,
                                kref_read(&ndlp->kref));
                return 1;
        }
@@ -6290,9 +6327,9 @@ lpfc_nlp_put(struct lpfc_nodelist *ndlp)
        if (NLP_CHK_IACT_REQ(ndlp)) {
                spin_unlock_irqrestore(&phba->ndlp_lock, flags);
                lpfc_printf_vlog(ndlp->vport, KERN_WARNING, LOG_NODE,
-                               "0275 lpfc_nlp_put: ndlp:x%p "
+                               "0275 %s: ndlp:x%px "
                                "usgmap:x%x refcnt:%d\n",
-                               (void *)ndlp, ndlp->nlp_usg_map,
+                               __func__, (void *)ndlp, ndlp->nlp_usg_map,
                                kref_read(&ndlp->kref));
                return 1;
        }
index 5b439a6..436cdc8 100644 (file)
@@ -843,7 +843,7 @@ typedef struct _ADISC {             /* Structure is in Big Endian format */
        struct lpfc_name portName;
        struct lpfc_name nodeName;
        uint32_t DID;
-} ADISC;
+} __packed ADISC;
 
 typedef struct _FARP {         /* Structure is in Big Endian format */
        uint32_t Mflags:8;
@@ -873,7 +873,7 @@ typedef struct _FAN {               /* Structure is in Big Endian format */
        uint32_t Fdid;
        struct lpfc_name FportName;
        struct lpfc_name FnodeName;
-} FAN;
+} __packed FAN;
 
 typedef struct _SCR {          /* Structure is in Big Endian format */
        uint8_t resvd1;
@@ -917,7 +917,7 @@ typedef struct _RNID {              /* Structure is in Big Endian format */
        union {
                RNID_TOP_DISC topologyDisc;     /* topology disc (0xdf) */
        } un;
-} RNID;
+} __packed RNID;
 
 typedef struct  _RPS {         /* Structure is in Big Endian format */
        union {
index 77f9a55..bd53347 100644 (file)
@@ -2050,6 +2050,23 @@ struct sli4_sge {        /* SLI-4 */
        uint32_t sge_len;
 };
 
+struct sli4_hybrid_sgl {
+       struct list_head list_node;
+       struct sli4_sge *dma_sgl;
+       dma_addr_t dma_phys_sgl;
+};
+
+struct fcp_cmd_rsp_buf {
+       struct list_head list_node;
+
+       /* for storing cmd/rsp dma alloc'ed virt_addr */
+       struct fcp_cmnd *fcp_cmnd;
+       struct fcp_rsp *fcp_rsp;
+
+       /* for storing this cmd/rsp's dma mapped phys addr from per CPU pool */
+       dma_addr_t fcp_cmd_rsp_dma_handle;
+};
+
 struct sli4_sge_diseed {       /* SLI-4 */
        uint32_t ref_tag;
        uint32_t ref_tag_tran;
@@ -3449,6 +3466,9 @@ struct lpfc_sli4_parameters {
 #define cfg_xib_SHIFT                          4
 #define cfg_xib_MASK                           0x00000001
 #define cfg_xib_WORD                           word19
+#define cfg_xpsgl_SHIFT                                6
+#define cfg_xpsgl_MASK                         0x00000001
+#define cfg_xpsgl_WORD                         word19
 #define cfg_eqdr_SHIFT                         8
 #define cfg_eqdr_MASK                          0x00000001
 #define cfg_eqdr_WORD                          word19
@@ -3460,6 +3480,10 @@ struct lpfc_sli4_parameters {
 #define cfg_bv1s_MASK                           0x00000001
 #define cfg_bv1s_WORD                           word19
 
+#define cfg_nsler_SHIFT                         12
+#define cfg_nsler_MASK                          0x00000001
+#define cfg_nsler_WORD                          word19
+
        uint32_t word20;
 #define cfg_max_tow_xri_SHIFT                  0
 #define cfg_max_tow_xri_MASK                   0x0000ffff
@@ -4314,6 +4338,12 @@ struct wqe_common {
 #define wqe_rcvoxid_SHIFT     16
 #define wqe_rcvoxid_MASK      0x0000FFFF
 #define wqe_rcvoxid_WORD      word9
+#define wqe_sof_SHIFT         24
+#define wqe_sof_MASK          0x000000FF
+#define wqe_sof_WORD          word9
+#define wqe_eof_SHIFT         16
+#define wqe_eof_MASK          0x000000FF
+#define wqe_eof_WORD          word9
        uint32_t word10;
 #define wqe_ebde_cnt_SHIFT    0
 #define wqe_ebde_cnt_MASK     0x0000000f
@@ -4595,6 +4625,7 @@ struct lpfc_nvme_prli {
 #define prli_type_code_WORD             word1
        uint32_t word_rsvd2;
        uint32_t word_rsvd3;
+
        uint32_t word4;
 #define prli_fba_SHIFT                  0
 #define prli_fba_MASK                   0x00000001
@@ -4611,6 +4642,9 @@ struct lpfc_nvme_prli {
 #define prli_conf_SHIFT                 7
 #define prli_conf_MASK                  0x00000001
 #define prli_conf_WORD                  word4
+#define prli_nsler_SHIFT               8
+#define prli_nsler_MASK                        0x00000001
+#define prli_nsler_WORD                        word4
        uint32_t word5;
 #define prli_fb_sz_SHIFT                0
 #define prli_fb_sz_MASK                 0x0000ffff
index 1ac98be..e91377a 100644 (file)
@@ -39,6 +39,7 @@
 #include <linux/msi.h>
 #include <linux/irq.h>
 #include <linux/bitops.h>
+#include <linux/crash_dump.h>
 
 #include <scsi/scsi.h>
 #include <scsi/scsi_device.h>
 #include "lpfc_version.h"
 #include "lpfc_ids.h"
 
-char *_dump_buf_data;
-unsigned long _dump_buf_data_order;
-char *_dump_buf_dif;
-unsigned long _dump_buf_dif_order;
-spinlock_t _dump_buf_lock;
-
 /* Used when mapping IRQ vectors in a driver centric manner */
 static uint32_t lpfc_present_cpu;
 
@@ -1081,8 +1076,8 @@ lpfc_hba_down_post_s4(struct lpfc_hba *phba)
        for (idx = 0; idx < phba->cfg_hdw_queue; idx++) {
                qp = &phba->sli4_hba.hdwq[idx];
 
-               spin_lock(&qp->abts_scsi_buf_list_lock);
-               list_splice_init(&qp->lpfc_abts_scsi_buf_list,
+               spin_lock(&qp->abts_io_buf_list_lock);
+               list_splice_init(&qp->lpfc_abts_io_buf_list,
                                 &aborts);
 
                list_for_each_entry_safe(psb, psb_next, &aborts, list) {
@@ -1093,29 +1088,11 @@ lpfc_hba_down_post_s4(struct lpfc_hba *phba)
                spin_lock(&qp->io_buf_list_put_lock);
                list_splice_init(&aborts, &qp->lpfc_io_buf_list_put);
                qp->put_io_bufs += qp->abts_scsi_io_bufs;
+               qp->put_io_bufs += qp->abts_nvme_io_bufs;
                qp->abts_scsi_io_bufs = 0;
+               qp->abts_nvme_io_bufs = 0;
                spin_unlock(&qp->io_buf_list_put_lock);
-               spin_unlock(&qp->abts_scsi_buf_list_lock);
-
-               if (phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME) {
-                       spin_lock(&qp->abts_nvme_buf_list_lock);
-                       list_splice_init(&qp->lpfc_abts_nvme_buf_list,
-                                        &nvme_aborts);
-                       list_for_each_entry_safe(psb, psb_next, &nvme_aborts,
-                                                list) {
-                               psb->pCmd = NULL;
-                               psb->status = IOSTAT_SUCCESS;
-                               cnt++;
-                       }
-                       spin_lock(&qp->io_buf_list_put_lock);
-                       qp->put_io_bufs += qp->abts_nvme_io_bufs;
-                       qp->abts_nvme_io_bufs = 0;
-                       list_splice_init(&nvme_aborts,
-                                        &qp->lpfc_io_buf_list_put);
-                       spin_unlock(&qp->io_buf_list_put_lock);
-                       spin_unlock(&qp->abts_nvme_buf_list_lock);
-
-               }
+               spin_unlock(&qp->abts_io_buf_list_lock);
        }
        spin_unlock_irq(&phba->hbalock);
 
@@ -1261,6 +1238,7 @@ lpfc_hb_eq_delay_work(struct work_struct *work)
        unsigned char *eqcnt = NULL;
        uint32_t usdelay;
        int i;
+       bool update = false;
 
        if (!phba->cfg_auto_imax || phba->pport->load_flag & FC_UNLOADING)
                return;
@@ -1274,20 +1252,29 @@ lpfc_hb_eq_delay_work(struct work_struct *work)
        if (!eqcnt)
                goto requeue;
 
-       /* Loop thru all IRQ vectors */
-       for (i = 0; i < phba->cfg_irq_chann; i++) {
-               /* Get the EQ corresponding to the IRQ vector */
-               eq = phba->sli4_hba.hba_eq_hdl[i].eq;
-               if (eq && eqcnt[eq->last_cpu] < 2)
-                       eqcnt[eq->last_cpu]++;
-               continue;
-       }
+       if (phba->cfg_irq_chann > 1) {
+               /* Loop thru all IRQ vectors */
+               for (i = 0; i < phba->cfg_irq_chann; i++) {
+                       /* Get the EQ corresponding to the IRQ vector */
+                       eq = phba->sli4_hba.hba_eq_hdl[i].eq;
+                       if (!eq)
+                               continue;
+                       if (eq->q_mode) {
+                               update = true;
+                               break;
+                       }
+                       if (eqcnt[eq->last_cpu] < 2)
+                               eqcnt[eq->last_cpu]++;
+               }
+       } else
+               update = true;
 
        for_each_present_cpu(i) {
-               if (phba->cfg_irq_chann > 1 && eqcnt[i] < 2)
-                       continue;
-
                eqi = per_cpu_ptr(phba->sli4_hba.eq_info, i);
+               if (!update && eqcnt[i] < 2) {
+                       eqi->icnt = 0;
+                       continue;
+               }
 
                usdelay = (eqi->icnt / LPFC_IMAX_THRESHOLD) *
                           LPFC_EQ_DELAY_STEP;
@@ -1535,6 +1522,7 @@ lpfc_sli4_offline_eratt(struct lpfc_hba *phba)
        spin_unlock_irq(&phba->hbalock);
 
        lpfc_offline_prep(phba, LPFC_MBX_NO_WAIT);
+       lpfc_sli_flush_io_rings(phba);
        lpfc_offline(phba);
        lpfc_hba_down_post(phba);
        lpfc_unblock_mgmt_io(phba);
@@ -1796,6 +1784,7 @@ lpfc_sli4_port_sta_fn_reset(struct lpfc_hba *phba, int mbx_action,
                                "2887 Reset Needed: Attempting Port "
                                "Recovery...\n");
        lpfc_offline_prep(phba, mbx_action);
+       lpfc_sli_flush_io_rings(phba);
        lpfc_offline(phba);
        /* release interrupt for possible resource change */
        lpfc_sli4_disable_intr(phba);
@@ -1915,7 +1904,7 @@ lpfc_handle_eratt_s4(struct lpfc_hba *phba)
                lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
                                "7624 Firmware not ready: Failing UE recovery,"
                                " waited %dSec", i);
-               lpfc_sli4_offline_eratt(phba);
+               phba->link_state = LPFC_HBA_ERROR;
                break;
 
        case LPFC_SLI_INTF_IF_TYPE_2:
@@ -1989,9 +1978,8 @@ lpfc_handle_eratt_s4(struct lpfc_hba *phba)
                }
                /* fall through for not able to recover */
                lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
-                               "3152 Unrecoverable error, bring the port "
-                               "offline\n");
-               lpfc_sli4_offline_eratt(phba);
+                               "3152 Unrecoverable error\n");
+               phba->link_state = LPFC_HBA_ERROR;
                break;
        case LPFC_SLI_INTF_IF_TYPE_1:
        default:
@@ -2863,7 +2851,7 @@ lpfc_cleanup(struct lpfc_vport *vport)
                                                &vport->fc_nodes, nlp_listp) {
                                lpfc_printf_vlog(ndlp->vport, KERN_ERR,
                                                LOG_NODE,
-                                               "0282 did:x%x ndlp:x%p "
+                                               "0282 did:x%x ndlp:x%px "
                                                "usgmap:x%x refcnt:%d\n",
                                                ndlp->nlp_DID, (void *)ndlp,
                                                ndlp->nlp_usg_map,
@@ -3067,7 +3055,7 @@ lpfc_sli4_node_prep(struct lpfc_hba *phba)
                        ndlp->nlp_rpi = rpi;
                        lpfc_printf_vlog(ndlp->vport, KERN_INFO, LOG_NODE,
                                         "0009 rpi:%x DID:%x "
-                                        "flg:%x map:%x %p\n", ndlp->nlp_rpi,
+                                        "flg:%x map:%x x%px\n", ndlp->nlp_rpi,
                                         ndlp->nlp_DID, ndlp->nlp_flag,
                                         ndlp->nlp_usg_map, ndlp);
                }
@@ -3252,12 +3240,8 @@ static void lpfc_destroy_multixri_pools(struct lpfc_hba *phba)
        if (phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME)
                lpfc_destroy_expedite_pool(phba);
 
-       if (!(phba->pport->load_flag & FC_UNLOADING)) {
-               lpfc_sli_flush_fcp_rings(phba);
-
-               if (phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME)
-                       lpfc_sli_flush_nvme_rings(phba);
-       }
+       if (!(phba->pport->load_flag & FC_UNLOADING))
+               lpfc_sli_flush_io_rings(phba);
 
        hwq_count = phba->cfg_hdw_queue;
 
@@ -3491,7 +3475,7 @@ lpfc_offline_prep(struct lpfc_hba *phba, int mbx_action)
                                        lpfc_printf_vlog(ndlp->vport,
                                                         KERN_INFO, LOG_NODE,
                                                         "0011 lpfc_offline: "
-                                                        "ndlp:x%p did %x "
+                                                        "ndlp:x%px did %x "
                                                         "usgmap:x%x rpi:%x\n",
                                                         ndlp, ndlp->nlp_DID,
                                                         ndlp->nlp_usg_map,
@@ -3636,6 +3620,9 @@ lpfc_io_free(struct lpfc_hba *phba)
                        qp->put_io_bufs--;
                        dma_pool_free(phba->lpfc_sg_dma_buf_pool,
                                      lpfc_ncmd->data, lpfc_ncmd->dma_handle);
+                       if (phba->cfg_xpsgl && !phba->nvmet_support)
+                               lpfc_put_sgl_per_hdwq(phba, lpfc_ncmd);
+                       lpfc_put_cmd_rsp_buf_per_hdwq(phba, lpfc_ncmd);
                        kfree(lpfc_ncmd);
                        qp->total_io_bufs--;
                }
@@ -3649,6 +3636,9 @@ lpfc_io_free(struct lpfc_hba *phba)
                        qp->get_io_bufs--;
                        dma_pool_free(phba->lpfc_sg_dma_buf_pool,
                                      lpfc_ncmd->data, lpfc_ncmd->dma_handle);
+                       if (phba->cfg_xpsgl && !phba->nvmet_support)
+                               lpfc_put_sgl_per_hdwq(phba, lpfc_ncmd);
+                       lpfc_put_cmd_rsp_buf_per_hdwq(phba, lpfc_ncmd);
                        kfree(lpfc_ncmd);
                        qp->total_io_bufs--;
                }
@@ -4097,18 +4087,9 @@ lpfc_new_io_buf(struct lpfc_hba *phba, int num_to_alloc)
        LIST_HEAD(post_nblist);
        LIST_HEAD(nvme_nblist);
 
-       /* Sanity check to ensure our sizing is right for both SCSI and NVME */
-       if (sizeof(struct lpfc_io_buf) > LPFC_COMMON_IO_BUF_SZ) {
-               lpfc_printf_log(phba, KERN_ERR, LOG_FCP,
-                               "6426 Common buffer size %zd exceeds %d\n",
-                               sizeof(struct lpfc_io_buf),
-                               LPFC_COMMON_IO_BUF_SZ);
-               return 0;
-       }
-
        phba->sli4_hba.io_xri_cnt = 0;
        for (bcnt = 0; bcnt < num_to_alloc; bcnt++) {
-               lpfc_ncmd = kzalloc(LPFC_COMMON_IO_BUF_SZ, GFP_KERNEL);
+               lpfc_ncmd = kzalloc(sizeof(*lpfc_ncmd), GFP_KERNEL);
                if (!lpfc_ncmd)
                        break;
                /*
@@ -4124,22 +4105,30 @@ lpfc_new_io_buf(struct lpfc_hba *phba, int num_to_alloc)
                        break;
                }
 
-               /*
-                * 4K Page alignment is CRITICAL to BlockGuard, double check
-                * to be sure.
-                */
-               if ((phba->sli3_options & LPFC_SLI3_BG_ENABLED) &&
-                   (((unsigned long)(lpfc_ncmd->data) &
-                   (unsigned long)(SLI4_PAGE_SIZE - 1)) != 0)) {
-                       lpfc_printf_log(phba, KERN_ERR, LOG_FCP,
-                                       "3369 Memory alignment err: addr=%lx\n",
-                                       (unsigned long)lpfc_ncmd->data);
-                       dma_pool_free(phba->lpfc_sg_dma_buf_pool,
-                                     lpfc_ncmd->data, lpfc_ncmd->dma_handle);
-                       kfree(lpfc_ncmd);
-                       break;
+               if (phba->cfg_xpsgl && !phba->nvmet_support) {
+                       INIT_LIST_HEAD(&lpfc_ncmd->dma_sgl_xtra_list);
+               } else {
+                       /*
+                        * 4K Page alignment is CRITICAL to BlockGuard, double
+                        * check to be sure.
+                        */
+                       if ((phba->sli3_options & LPFC_SLI3_BG_ENABLED) &&
+                           (((unsigned long)(lpfc_ncmd->data) &
+                           (unsigned long)(SLI4_PAGE_SIZE - 1)) != 0)) {
+                               lpfc_printf_log(phba, KERN_ERR, LOG_FCP,
+                                               "3369 Memory alignment err: "
+                                               "addr=%lx\n",
+                                               (unsigned long)lpfc_ncmd->data);
+                               dma_pool_free(phba->lpfc_sg_dma_buf_pool,
+                                             lpfc_ncmd->data,
+                                             lpfc_ncmd->dma_handle);
+                               kfree(lpfc_ncmd);
+                               break;
+                       }
                }
 
+               INIT_LIST_HEAD(&lpfc_ncmd->dma_cmd_rsp_list);
+
                lxri = lpfc_sli4_next_xritag(phba);
                if (lxri == NO_XRI) {
                        dma_pool_free(phba->lpfc_sg_dma_buf_pool,
@@ -4318,7 +4307,11 @@ lpfc_create_port(struct lpfc_hba *phba, int instance, struct device *dev)
 
                shost->dma_boundary =
                        phba->sli4_hba.pc_sli4_params.sge_supp_len-1;
-               shost->sg_tablesize = phba->cfg_scsi_seg_cnt;
+
+               if (phba->cfg_xpsgl && !phba->nvmet_support)
+                       shost->sg_tablesize = LPFC_MAX_SG_TABLESIZE;
+               else
+                       shost->sg_tablesize = phba->cfg_scsi_seg_cnt;
        } else
                /* SLI-3 has a limited number of hardware queues (3),
                 * thus there is only one for FCP processing.
@@ -6336,6 +6329,24 @@ lpfc_sli_driver_resource_setup(struct lpfc_hba *phba)
        if (lpfc_mem_alloc(phba, BPL_ALIGN_SZ))
                return -ENOMEM;
 
+       phba->lpfc_sg_dma_buf_pool =
+               dma_pool_create("lpfc_sg_dma_buf_pool",
+                               &phba->pcidev->dev, phba->cfg_sg_dma_buf_size,
+                               BPL_ALIGN_SZ, 0);
+
+       if (!phba->lpfc_sg_dma_buf_pool)
+               goto fail_free_mem;
+
+       phba->lpfc_cmd_rsp_buf_pool =
+                       dma_pool_create("lpfc_cmd_rsp_buf_pool",
+                                       &phba->pcidev->dev,
+                                       sizeof(struct fcp_cmnd) +
+                                       sizeof(struct fcp_rsp),
+                                       BPL_ALIGN_SZ, 0);
+
+       if (!phba->lpfc_cmd_rsp_buf_pool)
+               goto fail_free_dma_buf_pool;
+
        /*
         * Enable sr-iov virtual functions if supported and configured
         * through the module parameter.
@@ -6354,6 +6365,13 @@ lpfc_sli_driver_resource_setup(struct lpfc_hba *phba)
        }
 
        return 0;
+
+fail_free_dma_buf_pool:
+       dma_pool_destroy(phba->lpfc_sg_dma_buf_pool);
+       phba->lpfc_sg_dma_buf_pool = NULL;
+fail_free_mem:
+       lpfc_mem_free(phba);
+       return -ENOMEM;
 }
 
 /**
@@ -6414,6 +6432,11 @@ lpfc_sli4_driver_resource_setup(struct lpfc_hba *phba)
        if (rc)
                return -ENODEV;
 
+       /* Allocate all driver workqueues here */
+
+       /* The lpfc_wq workqueue for deferred irq use */
+       phba->wq = alloc_workqueue("lpfc_wq", WQ_MEM_RECLAIM, 0);
+
        /*
         * Initialize timers used by driver
         */
@@ -6448,102 +6471,6 @@ lpfc_sli4_driver_resource_setup(struct lpfc_hba *phba)
         * The WQ create will allocate the ring.
         */
 
-       /*
-        * 1 for cmd, 1 for rsp, NVME adds an extra one
-        * for boundary conditions in its max_sgl_segment template.
-        */
-       extra = 2;
-       if (phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME)
-               extra++;
-
-       /*
-        * It doesn't matter what family our adapter is in, we are
-        * limited to 2 Pages, 512 SGEs, for our SGL.
-        * There are going to be 2 reserved SGEs: 1 FCP cmnd + 1 FCP rsp
-        */
-       max_buf_size = (2 * SLI4_PAGE_SIZE);
-
-       /*
-        * Since lpfc_sg_seg_cnt is module param, the sg_dma_buf_size
-        * used to create the sg_dma_buf_pool must be calculated.
-        */
-       if (phba->sli3_options & LPFC_SLI3_BG_ENABLED) {
-               /*
-                * The scsi_buf for a T10-DIF I/O holds the FCP cmnd,
-                * the FCP rsp, and a SGE. Sice we have no control
-                * over how many protection segments the SCSI Layer
-                * will hand us (ie: there could be one for every block
-                * in the IO), just allocate enough SGEs to accomidate
-                * our max amount and we need to limit lpfc_sg_seg_cnt
-                * to minimize the risk of running out.
-                */
-               phba->cfg_sg_dma_buf_size = sizeof(struct fcp_cmnd) +
-                               sizeof(struct fcp_rsp) + max_buf_size;
-
-               /* Total SGEs for scsi_sg_list and scsi_sg_prot_list */
-               phba->cfg_total_seg_cnt = LPFC_MAX_SGL_SEG_CNT;
-
-               /*
-                * If supporting DIF, reduce the seg count for scsi to
-                * allow room for the DIF sges.
-                */
-               if (phba->cfg_enable_bg &&
-                   phba->cfg_sg_seg_cnt > LPFC_MAX_BG_SLI4_SEG_CNT_DIF)
-                       phba->cfg_scsi_seg_cnt = LPFC_MAX_BG_SLI4_SEG_CNT_DIF;
-               else
-                       phba->cfg_scsi_seg_cnt = phba->cfg_sg_seg_cnt;
-
-       } else {
-               /*
-                * The scsi_buf for a regular I/O holds the FCP cmnd,
-                * the FCP rsp, a SGE for each, and a SGE for up to
-                * cfg_sg_seg_cnt data segments.
-                */
-               phba->cfg_sg_dma_buf_size = sizeof(struct fcp_cmnd) +
-                               sizeof(struct fcp_rsp) +
-                               ((phba->cfg_sg_seg_cnt + extra) *
-                               sizeof(struct sli4_sge));
-
-               /* Total SGEs for scsi_sg_list */
-               phba->cfg_total_seg_cnt = phba->cfg_sg_seg_cnt + extra;
-               phba->cfg_scsi_seg_cnt = phba->cfg_sg_seg_cnt;
-
-               /*
-                * NOTE: if (phba->cfg_sg_seg_cnt + extra) <= 256 we only
-                * need to post 1 page for the SGL.
-                */
-       }
-
-       /* Limit to LPFC_MAX_NVME_SEG_CNT for NVME. */
-       if (phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME) {
-               if (phba->cfg_sg_seg_cnt > LPFC_MAX_NVME_SEG_CNT) {
-                       lpfc_printf_log(phba, KERN_INFO, LOG_NVME | LOG_INIT,
-                                       "6300 Reducing NVME sg segment "
-                                       "cnt to %d\n",
-                                       LPFC_MAX_NVME_SEG_CNT);
-                       phba->cfg_nvme_seg_cnt = LPFC_MAX_NVME_SEG_CNT;
-               } else
-                       phba->cfg_nvme_seg_cnt = phba->cfg_sg_seg_cnt;
-       }
-
-       /* Initialize the host templates with the updated values. */
-       lpfc_vport_template.sg_tablesize = phba->cfg_scsi_seg_cnt;
-       lpfc_template.sg_tablesize = phba->cfg_scsi_seg_cnt;
-       lpfc_template_no_hr.sg_tablesize = phba->cfg_scsi_seg_cnt;
-
-       if (phba->cfg_sg_dma_buf_size  <= LPFC_MIN_SG_SLI4_BUF_SZ)
-               phba->cfg_sg_dma_buf_size = LPFC_MIN_SG_SLI4_BUF_SZ;
-       else
-               phba->cfg_sg_dma_buf_size =
-                       SLI4_PAGE_ALIGN(phba->cfg_sg_dma_buf_size);
-
-       lpfc_printf_log(phba, KERN_INFO, LOG_INIT | LOG_FCP,
-                       "9087 sg_seg_cnt:%d dmabuf_size:%d "
-                       "total:%d scsi:%d nvme:%d\n",
-                       phba->cfg_sg_seg_cnt, phba->cfg_sg_dma_buf_size,
-                       phba->cfg_total_seg_cnt,  phba->cfg_scsi_seg_cnt,
-                       phba->cfg_nvme_seg_cnt);
-
        /* Initialize buffer queue management fields */
        INIT_LIST_HEAD(&phba->hbqs[LPFC_ELS_HBQ].hbq_buffer_list);
        phba->hbqs[LPFC_ELS_HBQ].hbq_alloc_buffer = lpfc_sli4_rb_alloc;
@@ -6552,11 +6479,9 @@ lpfc_sli4_driver_resource_setup(struct lpfc_hba *phba)
        /*
         * Initialize the SLI Layer to run with lpfc SLI4 HBAs.
         */
-       if (phba->cfg_enable_fc4_type & LPFC_ENABLE_FCP) {
-               /* Initialize the Abort scsi buffer list used by driver */
-               spin_lock_init(&phba->sli4_hba.abts_scsi_buf_list_lock);
-               INIT_LIST_HEAD(&phba->sli4_hba.lpfc_abts_scsi_buf_list);
-       }
+       /* Initialize the Abort buffer list used by driver */
+       spin_lock_init(&phba->sli4_hba.abts_io_buf_list_lock);
+       INIT_LIST_HEAD(&phba->sli4_hba.lpfc_abts_io_buf_list);
 
        if (phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME) {
                /* Initialize the Abort nvme buffer list used by driver */
@@ -6764,6 +6689,131 @@ lpfc_sli4_driver_resource_setup(struct lpfc_hba *phba)
                }
        }
 
+       /*
+        * 1 for cmd, 1 for rsp, NVME adds an extra one
+        * for boundary conditions in its max_sgl_segment template.
+        */
+       extra = 2;
+       if (phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME)
+               extra++;
+
+       /*
+        * It doesn't matter what family our adapter is in, we are
+        * limited to 2 Pages, 512 SGEs, for our SGL.
+        * There are going to be 2 reserved SGEs: 1 FCP cmnd + 1 FCP rsp
+        */
+       max_buf_size = (2 * SLI4_PAGE_SIZE);
+
+       /*
+        * Since lpfc_sg_seg_cnt is module param, the sg_dma_buf_size
+        * used to create the sg_dma_buf_pool must be calculated.
+        */
+       if (phba->sli3_options & LPFC_SLI3_BG_ENABLED) {
+               /* Both cfg_enable_bg and cfg_external_dif code paths */
+
+               /*
+                * The scsi_buf for a T10-DIF I/O holds the FCP cmnd,
+                * the FCP rsp, and a SGE. Sice we have no control
+                * over how many protection segments the SCSI Layer
+                * will hand us (ie: there could be one for every block
+                * in the IO), just allocate enough SGEs to accomidate
+                * our max amount and we need to limit lpfc_sg_seg_cnt
+                * to minimize the risk of running out.
+                */
+               phba->cfg_sg_dma_buf_size = sizeof(struct fcp_cmnd) +
+                               sizeof(struct fcp_rsp) + max_buf_size;
+
+               /* Total SGEs for scsi_sg_list and scsi_sg_prot_list */
+               phba->cfg_total_seg_cnt = LPFC_MAX_SGL_SEG_CNT;
+
+               /*
+                * If supporting DIF, reduce the seg count for scsi to
+                * allow room for the DIF sges.
+                */
+               if (phba->cfg_enable_bg &&
+                   phba->cfg_sg_seg_cnt > LPFC_MAX_BG_SLI4_SEG_CNT_DIF)
+                       phba->cfg_scsi_seg_cnt = LPFC_MAX_BG_SLI4_SEG_CNT_DIF;
+               else
+                       phba->cfg_scsi_seg_cnt = phba->cfg_sg_seg_cnt;
+
+       } else {
+               /*
+                * The scsi_buf for a regular I/O holds the FCP cmnd,
+                * the FCP rsp, a SGE for each, and a SGE for up to
+                * cfg_sg_seg_cnt data segments.
+                */
+               phba->cfg_sg_dma_buf_size = sizeof(struct fcp_cmnd) +
+                               sizeof(struct fcp_rsp) +
+                               ((phba->cfg_sg_seg_cnt + extra) *
+                               sizeof(struct sli4_sge));
+
+               /* Total SGEs for scsi_sg_list */
+               phba->cfg_total_seg_cnt = phba->cfg_sg_seg_cnt + extra;
+               phba->cfg_scsi_seg_cnt = phba->cfg_sg_seg_cnt;
+
+               /*
+                * NOTE: if (phba->cfg_sg_seg_cnt + extra) <= 256 we only
+                * need to post 1 page for the SGL.
+                */
+       }
+
+       if (phba->cfg_xpsgl && !phba->nvmet_support)
+               phba->cfg_sg_dma_buf_size = LPFC_DEFAULT_XPSGL_SIZE;
+       else if (phba->cfg_sg_dma_buf_size  <= LPFC_MIN_SG_SLI4_BUF_SZ)
+               phba->cfg_sg_dma_buf_size = LPFC_MIN_SG_SLI4_BUF_SZ;
+       else
+               phba->cfg_sg_dma_buf_size =
+                               SLI4_PAGE_ALIGN(phba->cfg_sg_dma_buf_size);
+
+       phba->border_sge_num = phba->cfg_sg_dma_buf_size /
+                              sizeof(struct sli4_sge);
+
+       /* Limit to LPFC_MAX_NVME_SEG_CNT for NVME. */
+       if (phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME) {
+               if (phba->cfg_sg_seg_cnt > LPFC_MAX_NVME_SEG_CNT) {
+                       lpfc_printf_log(phba, KERN_INFO, LOG_NVME | LOG_INIT,
+                                       "6300 Reducing NVME sg segment "
+                                       "cnt to %d\n",
+                                       LPFC_MAX_NVME_SEG_CNT);
+                       phba->cfg_nvme_seg_cnt = LPFC_MAX_NVME_SEG_CNT;
+               } else
+                       phba->cfg_nvme_seg_cnt = phba->cfg_sg_seg_cnt;
+       }
+
+       /* Initialize the host templates with the updated values. */
+       lpfc_vport_template.sg_tablesize = phba->cfg_scsi_seg_cnt;
+       lpfc_template.sg_tablesize = phba->cfg_scsi_seg_cnt;
+       lpfc_template_no_hr.sg_tablesize = phba->cfg_scsi_seg_cnt;
+
+       lpfc_printf_log(phba, KERN_INFO, LOG_INIT | LOG_FCP,
+                       "9087 sg_seg_cnt:%d dmabuf_size:%d "
+                       "total:%d scsi:%d nvme:%d\n",
+                       phba->cfg_sg_seg_cnt, phba->cfg_sg_dma_buf_size,
+                       phba->cfg_total_seg_cnt,  phba->cfg_scsi_seg_cnt,
+                       phba->cfg_nvme_seg_cnt);
+
+       if (phba->cfg_sg_dma_buf_size < SLI4_PAGE_SIZE)
+               i = phba->cfg_sg_dma_buf_size;
+       else
+               i = SLI4_PAGE_SIZE;
+
+       phba->lpfc_sg_dma_buf_pool =
+                       dma_pool_create("lpfc_sg_dma_buf_pool",
+                                       &phba->pcidev->dev,
+                                       phba->cfg_sg_dma_buf_size,
+                                       i, 0);
+       if (!phba->lpfc_sg_dma_buf_pool)
+               goto out_free_bsmbx;
+
+       phba->lpfc_cmd_rsp_buf_pool =
+                       dma_pool_create("lpfc_cmd_rsp_buf_pool",
+                                       &phba->pcidev->dev,
+                                       sizeof(struct fcp_cmnd) +
+                                       sizeof(struct fcp_rsp),
+                                       i, 0);
+       if (!phba->lpfc_cmd_rsp_buf_pool)
+               goto out_free_sg_dma_buf;
+
        mempool_free(mboxq, phba->mbox_mem_pool);
 
        /* Verify OAS is supported */
@@ -6775,12 +6825,12 @@ lpfc_sli4_driver_resource_setup(struct lpfc_hba *phba)
        /* Verify all the SLI4 queues */
        rc = lpfc_sli4_queue_verify(phba);
        if (rc)
-               goto out_free_bsmbx;
+               goto out_free_cmd_rsp_buf;
 
        /* Create driver internal CQE event pool */
        rc = lpfc_sli4_cq_event_pool_create(phba);
        if (rc)
-               goto out_free_bsmbx;
+               goto out_free_cmd_rsp_buf;
 
        /* Initialize sgl lists per host */
        lpfc_init_sgl_list(phba);
@@ -6871,6 +6921,12 @@ out_free_active_sgl:
        lpfc_free_active_sgl(phba);
 out_destroy_cq_event_pool:
        lpfc_sli4_cq_event_pool_destroy(phba);
+out_free_cmd_rsp_buf:
+       dma_pool_destroy(phba->lpfc_cmd_rsp_buf_pool);
+       phba->lpfc_cmd_rsp_buf_pool = NULL;
+out_free_sg_dma_buf:
+       dma_pool_destroy(phba->lpfc_sg_dma_buf_pool);
+       phba->lpfc_sg_dma_buf_pool = NULL;
 out_free_bsmbx:
        lpfc_destroy_bootstrap_mbox(phba);
 out_free_mem:
@@ -6997,12 +7053,6 @@ lpfc_setup_driver_resource_phase2(struct lpfc_hba *phba)
                return error;
        }
 
-       /* The lpfc_wq workqueue for deferred irq use, is only used for SLI4 */
-       if (phba->sli_rev == LPFC_SLI_REV4)
-               phba->wq = alloc_workqueue("lpfc_wq", WQ_MEM_RECLAIM, 0);
-       else
-               phba->wq = NULL;
-
        return 0;
 }
 
@@ -7563,7 +7613,6 @@ lpfc_setup_bg(struct lpfc_hba *phba, struct Scsi_Host *shost)
        uint32_t old_mask;
        uint32_t old_guard;
 
-       int pagecnt = 10;
        if (phba->cfg_prot_mask && phba->cfg_prot_guard) {
                lpfc_printf_log(phba, KERN_INFO, LOG_INIT,
                                "1478 Registering BlockGuard with the "
@@ -7600,56 +7649,6 @@ lpfc_setup_bg(struct lpfc_hba *phba, struct Scsi_Host *shost)
                                "layer, Bad protection parameters: %d %d\n",
                                old_mask, old_guard);
        }
-
-       if (!_dump_buf_data) {
-               while (pagecnt) {
-                       spin_lock_init(&_dump_buf_lock);
-                       _dump_buf_data =
-                               (char *) __get_free_pages(GFP_KERNEL, pagecnt);
-                       if (_dump_buf_data) {
-                               lpfc_printf_log(phba, KERN_ERR, LOG_BG,
-                                       "9043 BLKGRD: allocated %d pages for "
-                                      "_dump_buf_data at 0x%p\n",
-                                      (1 << pagecnt), _dump_buf_data);
-                               _dump_buf_data_order = pagecnt;
-                               memset(_dump_buf_data, 0,
-                                      ((1 << PAGE_SHIFT) << pagecnt));
-                               break;
-                       } else
-                               --pagecnt;
-               }
-               if (!_dump_buf_data_order)
-                       lpfc_printf_log(phba, KERN_ERR, LOG_BG,
-                               "9044 BLKGRD: ERROR unable to allocate "
-                              "memory for hexdump\n");
-       } else
-               lpfc_printf_log(phba, KERN_ERR, LOG_BG,
-                       "9045 BLKGRD: already allocated _dump_buf_data=0x%p"
-                      "\n", _dump_buf_data);
-       if (!_dump_buf_dif) {
-               while (pagecnt) {
-                       _dump_buf_dif =
-                               (char *) __get_free_pages(GFP_KERNEL, pagecnt);
-                       if (_dump_buf_dif) {
-                               lpfc_printf_log(phba, KERN_ERR, LOG_BG,
-                                       "9046 BLKGRD: allocated %d pages for "
-                                      "_dump_buf_dif at 0x%p\n",
-                                      (1 << pagecnt), _dump_buf_dif);
-                               _dump_buf_dif_order = pagecnt;
-                               memset(_dump_buf_dif, 0,
-                                      ((1 << PAGE_SHIFT) << pagecnt));
-                               break;
-                       } else
-                               --pagecnt;
-               }
-               if (!_dump_buf_dif_order)
-                       lpfc_printf_log(phba, KERN_ERR, LOG_BG,
-                       "9047 BLKGRD: ERROR unable to allocate "
-                              "memory for hexdump\n");
-       } else
-               lpfc_printf_log(phba, KERN_ERR, LOG_BG,
-                       "9048 BLKGRD: already allocated _dump_buf_dif=0x%p\n",
-                      _dump_buf_dif);
 }
 
 /**
@@ -8309,6 +8308,10 @@ lpfc_sli4_read_config(struct lpfc_hba *phba)
                        bf_get(lpfc_mbx_rd_conf_extnts_inuse, rd_config);
                phba->sli4_hba.max_cfg_param.max_xri =
                        bf_get(lpfc_mbx_rd_conf_xri_count, rd_config);
+               /* Reduce resource usage in kdump environment */
+               if (is_kdump_kernel() &&
+                   phba->sli4_hba.max_cfg_param.max_xri > 512)
+                       phba->sli4_hba.max_cfg_param.max_xri = 512;
                phba->sli4_hba.max_cfg_param.xri_base =
                        bf_get(lpfc_mbx_rd_conf_xri_base, rd_config);
                phba->sli4_hba.max_cfg_param.max_vpi =
@@ -8382,11 +8385,6 @@ lpfc_sli4_read_config(struct lpfc_hba *phba)
                 */
                qmin -= 4;
 
-               /* If NVME is configured, double the number of CQ/WQs needed */
-               if ((phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME) &&
-                   !phba->nvmet_support)
-                       qmin /= 2;
-
                /* Check to see if there is enough for NVME */
                if ((phba->cfg_irq_chann > qmin) ||
                    (phba->cfg_hdw_queue > qmin)) {
@@ -8643,51 +8641,14 @@ lpfc_sli4_queue_verify(struct lpfc_hba *phba)
 }
 
 static int
-lpfc_alloc_nvme_wq_cq(struct lpfc_hba *phba, int wqidx)
-{
-       struct lpfc_queue *qdesc;
-       int cpu;
-
-       cpu = lpfc_find_cpu_handle(phba, wqidx, LPFC_FIND_BY_HDWQ);
-       qdesc = lpfc_sli4_queue_alloc(phba, LPFC_EXPANDED_PAGE_SIZE,
-                                     phba->sli4_hba.cq_esize,
-                                     LPFC_CQE_EXP_COUNT, cpu);
-       if (!qdesc) {
-               lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
-                               "0508 Failed allocate fast-path NVME CQ (%d)\n",
-                               wqidx);
-               return 1;
-       }
-       qdesc->qe_valid = 1;
-       qdesc->hdwq = wqidx;
-       qdesc->chann = cpu;
-       phba->sli4_hba.hdwq[wqidx].nvme_cq = qdesc;
-
-       qdesc = lpfc_sli4_queue_alloc(phba, LPFC_EXPANDED_PAGE_SIZE,
-                                     LPFC_WQE128_SIZE, LPFC_WQE_EXP_COUNT,
-                                     cpu);
-       if (!qdesc) {
-               lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
-                               "0509 Failed allocate fast-path NVME WQ (%d)\n",
-                               wqidx);
-               return 1;
-       }
-       qdesc->hdwq = wqidx;
-       qdesc->chann = wqidx;
-       phba->sli4_hba.hdwq[wqidx].nvme_wq = qdesc;
-       list_add_tail(&qdesc->wq_list, &phba->sli4_hba.lpfc_wq_list);
-       return 0;
-}
-
-static int
-lpfc_alloc_fcp_wq_cq(struct lpfc_hba *phba, int wqidx)
+lpfc_alloc_io_wq_cq(struct lpfc_hba *phba, int idx)
 {
        struct lpfc_queue *qdesc;
-       uint32_t wqesize;
+       u32 wqesize;
        int cpu;
 
-       cpu = lpfc_find_cpu_handle(phba, wqidx, LPFC_FIND_BY_HDWQ);
-       /* Create Fast Path FCP CQs */
+       cpu = lpfc_find_cpu_handle(phba, idx, LPFC_FIND_BY_HDWQ);
+       /* Create Fast Path IO CQs */
        if (phba->enab_exp_wqcq_pages)
                /* Increase the CQ size when WQEs contain an embedded cdb */
                qdesc = lpfc_sli4_queue_alloc(phba, LPFC_EXPANDED_PAGE_SIZE,
@@ -8700,15 +8661,15 @@ lpfc_alloc_fcp_wq_cq(struct lpfc_hba *phba, int wqidx)
                                              phba->sli4_hba.cq_ecount, cpu);
        if (!qdesc) {
                lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
-                       "0499 Failed allocate fast-path FCP CQ (%d)\n", wqidx);
+                       "0499 Failed allocate fast-path IO CQ (%d)\n", idx);
                return 1;
        }
        qdesc->qe_valid = 1;
-       qdesc->hdwq = wqidx;
+       qdesc->hdwq = idx;
        qdesc->chann = cpu;
-       phba->sli4_hba.hdwq[wqidx].fcp_cq = qdesc;
+       phba->sli4_hba.hdwq[idx].io_cq = qdesc;
 
-       /* Create Fast Path FCP WQs */
+       /* Create Fast Path IO WQs */
        if (phba->enab_exp_wqcq_pages) {
                /* Increase the WQ size when WQEs contain an embedded cdb */
                wqesize = (phba->fcp_embed_io) ?
@@ -8723,13 +8684,13 @@ lpfc_alloc_fcp_wq_cq(struct lpfc_hba *phba, int wqidx)
 
        if (!qdesc) {
                lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
-                               "0503 Failed allocate fast-path FCP WQ (%d)\n",
-                               wqidx);
+                               "0503 Failed allocate fast-path IO WQ (%d)\n",
+                               idx);
                return 1;
        }
-       qdesc->hdwq = wqidx;
-       qdesc->chann = wqidx;
-       phba->sli4_hba.hdwq[wqidx].fcp_wq = qdesc;
+       qdesc->hdwq = idx;
+       qdesc->chann = cpu;
+       phba->sli4_hba.hdwq[idx].io_wq = qdesc;
        list_add_tail(&qdesc->wq_list, &phba->sli4_hba.lpfc_wq_list);
        return 0;
 }
@@ -8793,12 +8754,13 @@ lpfc_sli4_queue_create(struct lpfc_hba *phba)
                        qp->get_io_bufs = 0;
                        qp->put_io_bufs = 0;
                        qp->total_io_bufs = 0;
-                       spin_lock_init(&qp->abts_scsi_buf_list_lock);
-                       INIT_LIST_HEAD(&qp->lpfc_abts_scsi_buf_list);
+                       spin_lock_init(&qp->abts_io_buf_list_lock);
+                       INIT_LIST_HEAD(&qp->lpfc_abts_io_buf_list);
                        qp->abts_scsi_io_bufs = 0;
-                       spin_lock_init(&qp->abts_nvme_buf_list_lock);
-                       INIT_LIST_HEAD(&qp->lpfc_abts_nvme_buf_list);
                        qp->abts_nvme_io_bufs = 0;
+                       INIT_LIST_HEAD(&qp->sgl_list);
+                       INIT_LIST_HEAD(&qp->cmd_rsp_buf_list);
+                       spin_lock_init(&qp->hdwq_lock);
                }
        }
 
@@ -8864,7 +8826,7 @@ lpfc_sli4_queue_create(struct lpfc_hba *phba)
                }
                qdesc->qe_valid = 1;
                qdesc->hdwq = cpup->hdwq;
-               qdesc->chann = cpu; /* First CPU this EQ is affinitised to */
+               qdesc->chann = cpu; /* First CPU this EQ is affinitized to */
                qdesc->last_cpu = qdesc->chann;
 
                /* Save the allocated EQ in the Hardware Queue */
@@ -8895,41 +8857,31 @@ lpfc_sli4_queue_create(struct lpfc_hba *phba)
                qp->hba_eq = phba->sli4_hba.hdwq[eqcpup->hdwq].hba_eq;
        }
 
-       /* Allocate SCSI SLI4 CQ/WQs */
+       /* Allocate IO Path SLI4 CQ/WQs */
        for (idx = 0; idx < phba->cfg_hdw_queue; idx++) {
-               if (lpfc_alloc_fcp_wq_cq(phba, idx))
+               if (lpfc_alloc_io_wq_cq(phba, idx))
                        goto out_error;
        }
 
-       /* Allocate NVME SLI4 CQ/WQs */
-       if (phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME) {
-               for (idx = 0; idx < phba->cfg_hdw_queue; idx++) {
-                       if (lpfc_alloc_nvme_wq_cq(phba, idx))
-                               goto out_error;
-               }
-
-               if (phba->nvmet_support) {
-                       for (idx = 0; idx < phba->cfg_nvmet_mrq; idx++) {
-                               cpu = lpfc_find_cpu_handle(phba, idx,
-                                                          LPFC_FIND_BY_HDWQ);
-                               qdesc = lpfc_sli4_queue_alloc(
-                                                     phba,
+       if (phba->nvmet_support) {
+               for (idx = 0; idx < phba->cfg_nvmet_mrq; idx++) {
+                       cpu = lpfc_find_cpu_handle(phba, idx,
+                                                  LPFC_FIND_BY_HDWQ);
+                       qdesc = lpfc_sli4_queue_alloc(phba,
                                                      LPFC_DEFAULT_PAGE_SIZE,
                                                      phba->sli4_hba.cq_esize,
                                                      phba->sli4_hba.cq_ecount,
                                                      cpu);
-                               if (!qdesc) {
-                                       lpfc_printf_log(
-                                               phba, KERN_ERR, LOG_INIT,
+                       if (!qdesc) {
+                               lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
                                                "3142 Failed allocate NVME "
                                                "CQ Set (%d)\n", idx);
-                                       goto out_error;
-                               }
-                               qdesc->qe_valid = 1;
-                               qdesc->hdwq = idx;
-                               qdesc->chann = cpu;
-                               phba->sli4_hba.nvmet_cqset[idx] = qdesc;
+                               goto out_error;
                        }
+                       qdesc->qe_valid = 1;
+                       qdesc->hdwq = idx;
+                       qdesc->chann = cpu;
+                       phba->sli4_hba.nvmet_cqset[idx] = qdesc;
                }
        }
 
@@ -8960,7 +8912,7 @@ lpfc_sli4_queue_create(struct lpfc_hba *phba)
                goto out_error;
        }
        qdesc->qe_valid = 1;
-       qdesc->chann = 0;
+       qdesc->chann = cpu;
        phba->sli4_hba.els_cq = qdesc;
 
 
@@ -8978,7 +8930,7 @@ lpfc_sli4_queue_create(struct lpfc_hba *phba)
                                "0505 Failed allocate slow-path MQ\n");
                goto out_error;
        }
-       qdesc->chann = 0;
+       qdesc->chann = cpu;
        phba->sli4_hba.mbx_wq = qdesc;
 
        /*
@@ -8994,7 +8946,7 @@ lpfc_sli4_queue_create(struct lpfc_hba *phba)
                                "0504 Failed allocate slow-path ELS WQ\n");
                goto out_error;
        }
-       qdesc->chann = 0;
+       qdesc->chann = cpu;
        phba->sli4_hba.els_wq = qdesc;
        list_add_tail(&qdesc->wq_list, &phba->sli4_hba.lpfc_wq_list);
 
@@ -9008,7 +8960,7 @@ lpfc_sli4_queue_create(struct lpfc_hba *phba)
                                        "6079 Failed allocate NVME LS CQ\n");
                        goto out_error;
                }
-               qdesc->chann = 0;
+               qdesc->chann = cpu;
                qdesc->qe_valid = 1;
                phba->sli4_hba.nvmels_cq = qdesc;
 
@@ -9021,7 +8973,7 @@ lpfc_sli4_queue_create(struct lpfc_hba *phba)
                                        "6080 Failed allocate NVME LS WQ\n");
                        goto out_error;
                }
-               qdesc->chann = 0;
+               qdesc->chann = cpu;
                phba->sli4_hba.nvmels_wq = qdesc;
                list_add_tail(&qdesc->wq_list, &phba->sli4_hba.lpfc_wq_list);
        }
@@ -9164,15 +9116,13 @@ lpfc_sli4_release_hdwq(struct lpfc_hba *phba)
        /* Loop thru all Hardware Queues */
        for (idx = 0; idx < phba->cfg_hdw_queue; idx++) {
                /* Free the CQ/WQ corresponding to the Hardware Queue */
-               lpfc_sli4_queue_free(hdwq[idx].fcp_cq);
-               lpfc_sli4_queue_free(hdwq[idx].nvme_cq);
-               lpfc_sli4_queue_free(hdwq[idx].fcp_wq);
-               lpfc_sli4_queue_free(hdwq[idx].nvme_wq);
-               hdwq[idx].hba_eq = NULL;
-               hdwq[idx].fcp_cq = NULL;
-               hdwq[idx].nvme_cq = NULL;
-               hdwq[idx].fcp_wq = NULL;
-               hdwq[idx].nvme_wq = NULL;
+               lpfc_sli4_queue_free(hdwq[idx].io_cq);
+               lpfc_sli4_queue_free(hdwq[idx].io_wq);
+               hdwq[idx].io_cq = NULL;
+               hdwq[idx].io_wq = NULL;
+               if (phba->cfg_xpsgl && !phba->nvmet_support)
+                       lpfc_free_sgl_per_hdwq(phba, &hdwq[idx]);
+               lpfc_free_cmd_rsp_buf_per_hdwq(phba, &hdwq[idx]);
        }
        /* Loop thru all IRQ vectors */
        for (idx = 0; idx < phba->cfg_irq_chann; idx++) {
@@ -9372,8 +9322,7 @@ lpfc_setup_cq_lookup(struct lpfc_hba *phba)
                list_for_each_entry(childq, &eq->child_list, list) {
                        if (childq->queue_id > phba->sli4_hba.cq_max)
                                continue;
-                       if ((childq->subtype == LPFC_FCP) ||
-                           (childq->subtype == LPFC_NVME))
+                       if (childq->subtype == LPFC_IO)
                                phba->sli4_hba.cq_lookup[childq->queue_id] =
                                        childq;
                }
@@ -9499,31 +9448,6 @@ lpfc_sli4_queue_setup(struct lpfc_hba *phba)
        }
 
        /* Loop thru all Hardware Queues */
-       if (phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME) {
-               for (qidx = 0; qidx < phba->cfg_hdw_queue; qidx++) {
-                       cpu = lpfc_find_cpu_handle(phba, qidx,
-                                                  LPFC_FIND_BY_HDWQ);
-                       cpup = &phba->sli4_hba.cpu_map[cpu];
-
-                       /* Create the CQ/WQ corresponding to the
-                        * Hardware Queue
-                        */
-                       rc = lpfc_create_wq_cq(phba,
-                                       phba->sli4_hba.hdwq[cpup->hdwq].hba_eq,
-                                       qp[qidx].nvme_cq,
-                                       qp[qidx].nvme_wq,
-                                       &phba->sli4_hba.hdwq[qidx].nvme_cq_map,
-                                       qidx, LPFC_NVME);
-                       if (rc) {
-                               lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
-                                       "6123 Failed to setup fastpath "
-                                       "NVME WQ/CQ (%d), rc = 0x%x\n",
-                                       qidx, (uint32_t)rc);
-                               goto out_destroy;
-                       }
-               }
-       }
-
        for (qidx = 0; qidx < phba->cfg_hdw_queue; qidx++) {
                cpu = lpfc_find_cpu_handle(phba, qidx, LPFC_FIND_BY_HDWQ);
                cpup = &phba->sli4_hba.cpu_map[cpu];
@@ -9531,14 +9455,15 @@ lpfc_sli4_queue_setup(struct lpfc_hba *phba)
                /* Create the CQ/WQ corresponding to the Hardware Queue */
                rc = lpfc_create_wq_cq(phba,
                                       phba->sli4_hba.hdwq[cpup->hdwq].hba_eq,
-                                      qp[qidx].fcp_cq,
-                                      qp[qidx].fcp_wq,
-                                      &phba->sli4_hba.hdwq[qidx].fcp_cq_map,
-                                      qidx, LPFC_FCP);
+                                      qp[qidx].io_cq,
+                                      qp[qidx].io_wq,
+                                      &phba->sli4_hba.hdwq[qidx].io_cq_map,
+                                      qidx,
+                                      LPFC_IO);
                if (rc) {
                        lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
                                        "0535 Failed to setup fastpath "
-                                       "FCP WQ/CQ (%d), rc = 0x%x\n",
+                                       "IO WQ/CQ (%d), rc = 0x%x\n",
                                        qidx, (uint32_t)rc);
                        goto out_destroy;
                }
@@ -9838,10 +9763,8 @@ lpfc_sli4_queue_unset(struct lpfc_hba *phba)
                for (qidx = 0; qidx < phba->cfg_hdw_queue; qidx++) {
                        /* Destroy the CQ/WQ corresponding to Hardware Queue */
                        qp = &phba->sli4_hba.hdwq[qidx];
-                       lpfc_wq_destroy(phba, qp->fcp_wq);
-                       lpfc_wq_destroy(phba, qp->nvme_wq);
-                       lpfc_cq_destroy(phba, qp->fcp_cq);
-                       lpfc_cq_destroy(phba, qp->nvme_cq);
+                       lpfc_wq_destroy(phba, qp->io_wq);
+                       lpfc_cq_destroy(phba, qp->io_cq);
                }
                /* Loop thru all IRQ vectors */
                for (qidx = 0; qidx < phba->cfg_irq_chann; qidx++) {
@@ -10711,7 +10634,7 @@ lpfc_find_hyper(struct lpfc_hba *phba, int cpu,
 static void
 lpfc_cpu_affinity_check(struct lpfc_hba *phba, int vectors)
 {
-       int i, cpu, idx, new_cpu, start_cpu, first_cpu;
+       int i, cpu, idx, next_idx, new_cpu, start_cpu, first_cpu;
        int max_phys_id, min_phys_id;
        int max_core_id, min_core_id;
        struct lpfc_vector_map_info *cpup;
@@ -10753,8 +10676,8 @@ lpfc_cpu_affinity_check(struct lpfc_hba *phba, int vectors)
 #endif
 
                lpfc_printf_log(phba, KERN_INFO, LOG_INIT,
-                               "3328 CPU physid %d coreid %d\n",
-                               cpup->phys_id, cpup->core_id);
+                               "3328 CPU %d physid %d coreid %d flag x%x\n",
+                               cpu, cpup->phys_id, cpup->core_id, cpup->flag);
 
                if (cpup->phys_id > max_phys_id)
                        max_phys_id = cpup->phys_id;
@@ -10812,17 +10735,17 @@ lpfc_cpu_affinity_check(struct lpfc_hba *phba, int vectors)
                        cpup->eq = idx;
                        cpup->irq = pci_irq_vector(phba->pcidev, idx);
 
-                       lpfc_printf_log(phba, KERN_INFO, LOG_INIT,
-                                       "3336 Set Affinity: CPU %d "
-                                       "irq %d eq %d\n",
-                                       cpu, cpup->irq, cpup->eq);
-
                        /* If this is the first CPU thats assigned to this
                         * vector, set LPFC_CPU_FIRST_IRQ.
                         */
                        if (!i)
                                cpup->flag |= LPFC_CPU_FIRST_IRQ;
                        i++;
+
+                       lpfc_printf_log(phba, KERN_INFO, LOG_INIT,
+                                       "3336 Set Affinity: CPU %d "
+                                       "irq %d eq %d flag x%x\n",
+                                       cpu, cpup->irq, cpup->eq, cpup->flag);
                }
        }
 
@@ -10936,69 +10859,103 @@ found_any:
                }
        }
 
+       /* Assign hdwq indices that are unique across all cpus in the map
+        * that are also FIRST_CPUs.
+        */
+       idx = 0;
+       for_each_present_cpu(cpu) {
+               cpup = &phba->sli4_hba.cpu_map[cpu];
+
+               /* Only FIRST IRQs get a hdwq index assignment. */
+               if (!(cpup->flag & LPFC_CPU_FIRST_IRQ))
+                       continue;
+
+               /* 1 to 1, the first LPFC_CPU_FIRST_IRQ cpus to a unique hdwq */
+               cpup->hdwq = idx;
+               idx++;
+               lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+                               "3333 Set Affinity: CPU %d (phys %d core %d): "
+                               "hdwq %d eq %d irq %d flg x%x\n",
+                               cpu, cpup->phys_id, cpup->core_id,
+                               cpup->hdwq, cpup->eq, cpup->irq, cpup->flag);
+       }
        /* Finally we need to associate a hdwq with each cpu_map entry
         * This will be 1 to 1 - hdwq to cpu, unless there are less
         * hardware queues then CPUs. For that case we will just round-robin
         * the available hardware queues as they get assigned to CPUs.
+        * The next_idx is the idx from the FIRST_CPU loop above to account
+        * for irq_chann < hdwq.  The idx is used for round-robin assignments
+        * and needs to start at 0.
         */
-       idx = 0;
+       next_idx = idx;
        start_cpu = 0;
+       idx = 0;
        for_each_present_cpu(cpu) {
                cpup = &phba->sli4_hba.cpu_map[cpu];
-               if (idx >=  phba->cfg_hdw_queue) {
-                       /* We need to reuse a Hardware Queue for another CPU,
-                        * so be smart about it and pick one that has its
-                        * IRQ/EQ mapped to the same phys_id (CPU package).
-                        * and core_id.
-                        */
-                       new_cpu = start_cpu;
-                       for (i = 0; i < phba->sli4_hba.num_present_cpu; i++) {
-                               new_cpup = &phba->sli4_hba.cpu_map[new_cpu];
-                               if ((new_cpup->hdwq != LPFC_VECTOR_MAP_EMPTY) &&
-                                   (new_cpup->phys_id == cpup->phys_id) &&
-                                   (new_cpup->core_id == cpup->core_id))
-                                       goto found_hdwq;
-                               new_cpu = cpumask_next(
-                                       new_cpu, cpu_present_mask);
-                               if (new_cpu == nr_cpumask_bits)
-                                       new_cpu = first_cpu;
-                       }
 
-                       /* If we can't match both phys_id and core_id,
-                        * settle for just a phys_id match.
-                        */
-                       new_cpu = start_cpu;
-                       for (i = 0; i < phba->sli4_hba.num_present_cpu; i++) {
-                               new_cpup = &phba->sli4_hba.cpu_map[new_cpu];
-                               if ((new_cpup->hdwq != LPFC_VECTOR_MAP_EMPTY) &&
-                                   (new_cpup->phys_id == cpup->phys_id))
-                                       goto found_hdwq;
-                               new_cpu = cpumask_next(
-                                       new_cpu, cpu_present_mask);
-                               if (new_cpu == nr_cpumask_bits)
-                                       new_cpu = first_cpu;
+               /* FIRST cpus are already mapped. */
+               if (cpup->flag & LPFC_CPU_FIRST_IRQ)
+                       continue;
+
+               /* If the cfg_irq_chann < cfg_hdw_queue, set the hdwq
+                * of the unassigned cpus to the next idx so that all
+                * hdw queues are fully utilized.
+                */
+               if (next_idx < phba->cfg_hdw_queue) {
+                       cpup->hdwq = next_idx;
+                       next_idx++;
+                       continue;
+               }
+
+               /* Not a First CPU and all hdw_queues are used.  Reuse a
+                * Hardware Queue for another CPU, so be smart about it
+                * and pick one that has its IRQ/EQ mapped to the same phys_id
+                * (CPU package) and core_id.
+                */
+               new_cpu = start_cpu;
+               for (i = 0; i < phba->sli4_hba.num_present_cpu; i++) {
+                       new_cpup = &phba->sli4_hba.cpu_map[new_cpu];
+                       if (new_cpup->hdwq != LPFC_VECTOR_MAP_EMPTY &&
+                           new_cpup->phys_id == cpup->phys_id &&
+                           new_cpup->core_id == cpup->core_id) {
+                               goto found_hdwq;
                        }
+                       new_cpu = cpumask_next(new_cpu, cpu_present_mask);
+                       if (new_cpu == nr_cpumask_bits)
+                               new_cpu = first_cpu;
+               }
 
-                       /* Otherwise just round robin on cfg_hdw_queue */
-                       cpup->hdwq = idx % phba->cfg_hdw_queue;
-                       goto logit;
-found_hdwq:
-                       /* We found an available entry, copy the IRQ info */
-                       start_cpu = cpumask_next(new_cpu, cpu_present_mask);
-                       if (start_cpu == nr_cpumask_bits)
-                               start_cpu = first_cpu;
-                       cpup->hdwq = new_cpup->hdwq;
-               } else {
-                       /* 1 to 1, CPU to hdwq */
-                       cpup->hdwq = idx;
+               /* If we can't match both phys_id and core_id,
+                * settle for just a phys_id match.
+                */
+               new_cpu = start_cpu;
+               for (i = 0; i < phba->sli4_hba.num_present_cpu; i++) {
+                       new_cpup = &phba->sli4_hba.cpu_map[new_cpu];
+                       if (new_cpup->hdwq != LPFC_VECTOR_MAP_EMPTY &&
+                           new_cpup->phys_id == cpup->phys_id)
+                               goto found_hdwq;
+
+                       new_cpu = cpumask_next(new_cpu, cpu_present_mask);
+                       if (new_cpu == nr_cpumask_bits)
+                               new_cpu = first_cpu;
                }
-logit:
+
+               /* Otherwise just round robin on cfg_hdw_queue */
+               cpup->hdwq = idx % phba->cfg_hdw_queue;
+               idx++;
+               goto logit;
+ found_hdwq:
+               /* We found an available entry, copy the IRQ info */
+               start_cpu = cpumask_next(new_cpu, cpu_present_mask);
+               if (start_cpu == nr_cpumask_bits)
+                       start_cpu = first_cpu;
+               cpup->hdwq = new_cpup->hdwq;
+ logit:
                lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
                                "3335 Set Affinity: CPU %d (phys %d core %d): "
                                "hdwq %d eq %d irq %d flg x%x\n",
                                cpu, cpup->phys_id, cpup->core_id,
                                cpup->hdwq, cpup->eq, cpup->irq, cpup->flag);
-               idx++;
        }
 
        /* The cpu_map array will be used later during initialization
@@ -11089,10 +11046,10 @@ vec_fail_out:
  * @phba: pointer to lpfc hba data structure.
  *
  * This routine is invoked to enable the MSI interrupt mode to device with
- * SLI-4 interface spec. The kernel function pci_enable_msi() is called
- * to enable the MSI vector. The device driver is responsible for calling
- * the request_irq() to register MSI vector with a interrupt the handler,
- * which is done in this function.
+ * SLI-4 interface spec. The kernel function pci_alloc_irq_vectors() is
+ * called to enable the MSI vector. The device driver is responsible for
+ * calling the request_irq() to register MSI vector with a interrupt the
+ * handler, which is done in this function.
  *
  * Return codes
  *     0 - successful
@@ -11103,20 +11060,21 @@ lpfc_sli4_enable_msi(struct lpfc_hba *phba)
 {
        int rc, index;
 
-       rc = pci_enable_msi(phba->pcidev);
-       if (!rc)
+       rc = pci_alloc_irq_vectors(phba->pcidev, 1, 1,
+                                  PCI_IRQ_MSI | PCI_IRQ_AFFINITY);
+       if (rc > 0)
                lpfc_printf_log(phba, KERN_INFO, LOG_INIT,
                                "0487 PCI enable MSI mode success.\n");
        else {
                lpfc_printf_log(phba, KERN_INFO, LOG_INIT,
                                "0488 PCI enable MSI mode failed (%d)\n", rc);
-               return rc;
+               return rc ? rc : -1;
        }
 
        rc = request_irq(phba->pcidev->irq, lpfc_sli4_intr_handler,
                         0, LPFC_DRIVER_NAME, phba);
        if (rc) {
-               pci_disable_msi(phba->pcidev);
+               pci_free_irq_vectors(phba->pcidev);
                lpfc_printf_log(phba, KERN_WARNING, LOG_INIT,
                                "0490 MSI request_irq failed (%d)\n", rc);
                return rc;
@@ -11282,11 +11240,10 @@ static void
 lpfc_sli4_xri_exchange_busy_wait(struct lpfc_hba *phba)
 {
        struct lpfc_sli4_hdw_queue *qp;
-       int idx, ccnt, fcnt;
+       int idx, ccnt;
        int wait_time = 0;
        int io_xri_cmpl = 1;
        int nvmet_xri_cmpl = 1;
-       int fcp_xri_cmpl = 1;
        int els_xri_cmpl = list_empty(&phba->sli4_hba.lpfc_abts_els_sgl_list);
 
        /* Driver just aborted IOs during the hba_unset process.  Pause
@@ -11300,32 +11257,21 @@ lpfc_sli4_xri_exchange_busy_wait(struct lpfc_hba *phba)
                lpfc_nvme_wait_for_io_drain(phba);
 
        ccnt = 0;
-       fcnt = 0;
        for (idx = 0; idx < phba->cfg_hdw_queue; idx++) {
                qp = &phba->sli4_hba.hdwq[idx];
-               fcp_xri_cmpl = list_empty(
-                       &qp->lpfc_abts_scsi_buf_list);
-               if (!fcp_xri_cmpl) /* if list is NOT empty */
-                       fcnt++;
-               if (phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME) {
-                       io_xri_cmpl = list_empty(
-                               &qp->lpfc_abts_nvme_buf_list);
-                       if (!io_xri_cmpl) /* if list is NOT empty */
-                               ccnt++;
-               }
+               io_xri_cmpl = list_empty(&qp->lpfc_abts_io_buf_list);
+               if (!io_xri_cmpl) /* if list is NOT empty */
+                       ccnt++;
        }
        if (ccnt)
                io_xri_cmpl = 0;
-       if (fcnt)
-               fcp_xri_cmpl = 0;
 
        if (phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME) {
                nvmet_xri_cmpl =
                        list_empty(&phba->sli4_hba.lpfc_abts_nvmet_ctx_list);
        }
 
-       while (!fcp_xri_cmpl || !els_xri_cmpl || !io_xri_cmpl ||
-              !nvmet_xri_cmpl) {
+       while (!els_xri_cmpl || !io_xri_cmpl || !nvmet_xri_cmpl) {
                if (wait_time > LPFC_XRI_EXCH_BUSY_WAIT_TMO) {
                        if (!nvmet_xri_cmpl)
                                lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
@@ -11334,12 +11280,7 @@ lpfc_sli4_xri_exchange_busy_wait(struct lpfc_hba *phba)
                                                wait_time/1000);
                        if (!io_xri_cmpl)
                                lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
-                                               "6100 NVME XRI exchange busy "
-                                               "wait time: %d seconds.\n",
-                                               wait_time/1000);
-                       if (!fcp_xri_cmpl)
-                               lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
-                                               "2877 FCP XRI exchange busy "
+                                               "6100 IO XRI exchange busy "
                                                "wait time: %d seconds.\n",
                                                wait_time/1000);
                        if (!els_xri_cmpl)
@@ -11355,24 +11296,15 @@ lpfc_sli4_xri_exchange_busy_wait(struct lpfc_hba *phba)
                }
 
                ccnt = 0;
-               fcnt = 0;
                for (idx = 0; idx < phba->cfg_hdw_queue; idx++) {
                        qp = &phba->sli4_hba.hdwq[idx];
-                       fcp_xri_cmpl = list_empty(
-                               &qp->lpfc_abts_scsi_buf_list);
-                       if (!fcp_xri_cmpl) /* if list is NOT empty */
-                               fcnt++;
-                       if (phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME) {
-                               io_xri_cmpl = list_empty(
-                                   &qp->lpfc_abts_nvme_buf_list);
-                               if (!io_xri_cmpl) /* if list is NOT empty */
-                                       ccnt++;
-                       }
+                       io_xri_cmpl = list_empty(
+                           &qp->lpfc_abts_io_buf_list);
+                       if (!io_xri_cmpl) /* if list is NOT empty */
+                               ccnt++;
                }
                if (ccnt)
                        io_xri_cmpl = 0;
-               if (fcnt)
-                       fcp_xri_cmpl = 0;
 
                if (phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME) {
                        nvmet_xri_cmpl = list_empty(
@@ -11616,6 +11548,9 @@ lpfc_get_sli4_parameters(struct lpfc_hba *phba, LPFC_MBOXQ_t *mboxq)
        phba->sli4_hba.extents_in_use = bf_get(cfg_ext, mbx_sli4_parameters);
        phba->sli4_hba.rpi_hdrs_in_use = bf_get(cfg_hdrr, mbx_sli4_parameters);
 
+       /* Check for Extended Pre-Registered SGL support */
+       phba->cfg_xpsgl = bf_get(cfg_xpsgl, mbx_sli4_parameters);
+
        /* Check for firmware nvme support */
        rc = (bf_get(cfg_nvme, mbx_sli4_parameters) &&
                     bf_get(cfg_xib, mbx_sli4_parameters));
@@ -11646,6 +11581,7 @@ fcponly:
                        phba->nvme_support = 0;
                        phba->nvmet_support = 0;
                        phba->cfg_nvmet_mrq = 0;
+                       phba->cfg_nvme_seg_cnt = 0;
 
                        /* If no FC4 type support, move to just SCSI support */
                        if (!(phba->cfg_enable_fc4_type & LPFC_ENABLE_FCP))
@@ -11654,6 +11590,15 @@ fcponly:
                }
        }
 
+       /* If the NVME FC4 type is enabled, scale the sg_seg_cnt to
+        * accommodate 512K and 1M IOs in a single nvme buf and supply
+        * enough NVME LS iocb buffers for larger connectivity counts.
+        */
+       if (phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME) {
+               phba->cfg_sg_seg_cnt = LPFC_MAX_NVME_SEG_CNT;
+               phba->cfg_iocb_cnt = 5;
+       }
+
        /* Only embed PBDE for if_type 6, PBDE support requires xib be set */
        if ((bf_get(lpfc_sli_intf_if_type, &phba->sli4_hba.sli_intf) !=
            LPFC_SLI_INTF_IF_TYPE_6) || (!bf_get(cfg_xib, mbx_sli4_parameters)))
@@ -11718,6 +11663,14 @@ fcponly:
        else
                phba->mds_diags_support = 0;
 
+       /*
+        * Check if the SLI port supports NSLER
+        */
+       if (bf_get(cfg_nsler, mbx_sli4_parameters))
+               phba->nsler = 1;
+       else
+               phba->nsler = 0;
+
        return 0;
 }
 
@@ -12146,7 +12099,7 @@ lpfc_sli_prep_dev_for_reset(struct lpfc_hba *phba)
        lpfc_scsi_dev_block(phba);
 
        /* Flush all driver's outstanding SCSI I/Os as we are to reset */
-       lpfc_sli_flush_fcp_rings(phba);
+       lpfc_sli_flush_io_rings(phba);
 
        /* stop all timers */
        lpfc_stop_hba_timers(phba);
@@ -12176,7 +12129,7 @@ lpfc_sli_prep_dev_for_perm_failure(struct lpfc_hba *phba)
        lpfc_stop_hba_timers(phba);
 
        /* Clean up all driver's outstanding SCSI I/Os */
-       lpfc_sli_flush_fcp_rings(phba);
+       lpfc_sli_flush_io_rings(phba);
 }
 
 /**
@@ -12948,12 +12901,8 @@ lpfc_sli4_prep_dev_for_reset(struct lpfc_hba *phba)
        /* Block all SCSI devices' I/Os on the host */
        lpfc_scsi_dev_block(phba);
 
-       /* Flush all driver's outstanding SCSI I/Os as we are to reset */
-       lpfc_sli_flush_fcp_rings(phba);
-
-       /* Flush the outstanding NVME IOs if fc4 type enabled. */
-       if (phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME)
-               lpfc_sli_flush_nvme_rings(phba);
+       /* Flush all driver's outstanding I/Os as we are to reset */
+       lpfc_sli_flush_io_rings(phba);
 
        /* stop all timers */
        lpfc_stop_hba_timers(phba);
@@ -12984,12 +12933,8 @@ lpfc_sli4_prep_dev_for_perm_failure(struct lpfc_hba *phba)
        /* stop all timers */
        lpfc_stop_hba_timers(phba);
 
-       /* Clean up all driver's outstanding SCSI I/Os */
-       lpfc_sli_flush_fcp_rings(phba);
-
-       /* Flush the outstanding NVME IOs if fc4 type enabled. */
-       if (phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME)
-               lpfc_sli_flush_nvme_rings(phba);
+       /* Clean up all driver's outstanding I/Os */
+       lpfc_sli_flush_io_rings(phba);
 }
 
 /**
@@ -13530,19 +13475,6 @@ lpfc_exit(void)
        pci_unregister_driver(&lpfc_driver);
        fc_release_transport(lpfc_transport_template);
        fc_release_transport(lpfc_vport_transport_template);
-       if (_dump_buf_data) {
-               printk(KERN_ERR "9062 BLKGRD: freeing %lu pages for "
-                               "_dump_buf_data at 0x%p\n",
-                               (1L << _dump_buf_data_order), _dump_buf_data);
-               free_pages((unsigned long)_dump_buf_data, _dump_buf_data_order);
-       }
-
-       if (_dump_buf_dif) {
-               printk(KERN_ERR "9049 BLKGRD: freeing %lu pages for "
-                               "_dump_buf_dif at 0x%p\n",
-                               (1L << _dump_buf_dif_order), _dump_buf_dif);
-               free_pages((unsigned long)_dump_buf_dif, _dump_buf_dif_order);
-       }
        idr_destroy(&lpfc_hba_index);
 }
 
index 66191fa..ae09bb8 100644 (file)
@@ -72,8 +72,8 @@ lpfc_mem_alloc_active_rrq_pool_s4(struct lpfc_hba *phba) {
  * lpfc_mem_alloc - create and allocate all PCI and memory pools
  * @phba: HBA to allocate pools for
  *
- * Description: Creates and allocates PCI pools lpfc_sg_dma_buf_pool,
- * lpfc_mbuf_pool, lpfc_hrb_pool.  Creates and allocates kmalloc-backed mempools
+ * Description: Creates and allocates PCI pools lpfc_mbuf_pool,
+ * lpfc_hrb_pool.  Creates and allocates kmalloc-backed mempools
  * for LPFC_MBOXQ_t and lpfc_nodelist.  Also allocates the VPI bitmask.
  *
  * Notes: Not interrupt-safe.  Must be called with no locks held.  If any
@@ -89,36 +89,12 @@ lpfc_mem_alloc(struct lpfc_hba *phba, int align)
        struct lpfc_dma_pool *pool = &phba->lpfc_mbuf_safety_pool;
        int i;
 
-       if (phba->sli_rev == LPFC_SLI_REV4) {
-               /* Calculate alignment */
-               if (phba->cfg_sg_dma_buf_size < SLI4_PAGE_SIZE)
-                       i = phba->cfg_sg_dma_buf_size;
-               else
-                       i = SLI4_PAGE_SIZE;
-
-               phba->lpfc_sg_dma_buf_pool =
-                       dma_pool_create("lpfc_sg_dma_buf_pool",
-                                       &phba->pcidev->dev,
-                                       phba->cfg_sg_dma_buf_size,
-                                       i, 0);
-               if (!phba->lpfc_sg_dma_buf_pool)
-                       goto fail;
-
-       } else {
-               phba->lpfc_sg_dma_buf_pool =
-                       dma_pool_create("lpfc_sg_dma_buf_pool",
-                                       &phba->pcidev->dev, phba->cfg_sg_dma_buf_size,
-                                       align, 0);
-
-               if (!phba->lpfc_sg_dma_buf_pool)
-                       goto fail;
-       }
 
        phba->lpfc_mbuf_pool = dma_pool_create("lpfc_mbuf_pool", &phba->pcidev->dev,
                                                        LPFC_BPL_SIZE,
                                                        align, 0);
        if (!phba->lpfc_mbuf_pool)
-               goto fail_free_dma_buf_pool;
+               goto fail;
 
        pool->elements = kmalloc_array(LPFC_MBUF_POOL_SIZE,
                                       sizeof(struct lpfc_dmabuf),
@@ -208,9 +184,6 @@ fail_free_drb_pool:
  fail_free_lpfc_mbuf_pool:
        dma_pool_destroy(phba->lpfc_mbuf_pool);
        phba->lpfc_mbuf_pool = NULL;
- fail_free_dma_buf_pool:
-       dma_pool_destroy(phba->lpfc_sg_dma_buf_pool);
-       phba->lpfc_sg_dma_buf_pool = NULL;
  fail:
        return -ENOMEM;
 }
@@ -248,25 +221,22 @@ lpfc_mem_free(struct lpfc_hba *phba)
 
        /* Free HBQ pools */
        lpfc_sli_hbqbuf_free_all(phba);
-       if (phba->lpfc_nvmet_drb_pool)
-               dma_pool_destroy(phba->lpfc_nvmet_drb_pool);
+       dma_pool_destroy(phba->lpfc_nvmet_drb_pool);
        phba->lpfc_nvmet_drb_pool = NULL;
-       if (phba->lpfc_drb_pool)
-               dma_pool_destroy(phba->lpfc_drb_pool);
+
+       dma_pool_destroy(phba->lpfc_drb_pool);
        phba->lpfc_drb_pool = NULL;
-       if (phba->lpfc_hrb_pool)
-               dma_pool_destroy(phba->lpfc_hrb_pool);
+
+       dma_pool_destroy(phba->lpfc_hrb_pool);
        phba->lpfc_hrb_pool = NULL;
-       if (phba->txrdy_payload_pool)
-               dma_pool_destroy(phba->txrdy_payload_pool);
+
+       dma_pool_destroy(phba->txrdy_payload_pool);
        phba->txrdy_payload_pool = NULL;
 
-       if (phba->lpfc_hbq_pool)
-               dma_pool_destroy(phba->lpfc_hbq_pool);
+       dma_pool_destroy(phba->lpfc_hbq_pool);
        phba->lpfc_hbq_pool = NULL;
 
-       if (phba->rrq_pool)
-               mempool_destroy(phba->rrq_pool);
+       mempool_destroy(phba->rrq_pool);
        phba->rrq_pool = NULL;
 
        /* Free NLP memory pool */
@@ -290,10 +260,6 @@ lpfc_mem_free(struct lpfc_hba *phba)
        dma_pool_destroy(phba->lpfc_mbuf_pool);
        phba->lpfc_mbuf_pool = NULL;
 
-       /* Free DMA buffer memory pool */
-       dma_pool_destroy(phba->lpfc_sg_dma_buf_pool);
-       phba->lpfc_sg_dma_buf_pool = NULL;
-
        /* Free Device Data memory pool */
        if (phba->device_data_mem_pool) {
                /* Ensure all objects have been returned to the pool */
@@ -366,6 +332,13 @@ lpfc_mem_free_all(struct lpfc_hba *phba)
        /* Free and destroy all the allocated memory pools */
        lpfc_mem_free(phba);
 
+       /* Free DMA buffer memory pool */
+       dma_pool_destroy(phba->lpfc_sg_dma_buf_pool);
+       phba->lpfc_sg_dma_buf_pool = NULL;
+
+       dma_pool_destroy(phba->lpfc_cmd_rsp_buf_pool);
+       phba->lpfc_cmd_rsp_buf_pool = NULL;
+
        /* Free the iocb lookup array */
        kfree(psli->iocbq_lookup);
        psli->iocbq_lookup = NULL;
index 59252bf..f4b879d 100644 (file)
@@ -614,7 +614,7 @@ lpfc_rcv_padisc(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
                }
 out:
                /* If we are authenticated, move to the proper state */
-               if (ndlp->nlp_type & NLP_FCP_TARGET)
+               if (ndlp->nlp_type & (NLP_FCP_TARGET | NLP_NVME_TARGET))
                        lpfc_nlp_set_state(vport, ndlp, NLP_STE_MAPPED_NODE);
                else
                        lpfc_nlp_set_state(vport, ndlp, NLP_STE_UNMAPPED_NODE);
@@ -799,9 +799,15 @@ lpfc_rcv_prli(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
                        if (npr->writeXferRdyDis)
                                ndlp->nlp_flag |= NLP_FIRSTBURST;
                }
-               if (npr->Retry)
+               if (npr->Retry && ndlp->nlp_type &
+                                       (NLP_FCP_INITIATOR | NLP_FCP_TARGET))
                        ndlp->nlp_fcp_info |= NLP_FCP_2_DEVICE;
 
+               if (npr->Retry && phba->nsler &&
+                   ndlp->nlp_type & (NLP_NVME_INITIATOR | NLP_NVME_TARGET))
+                       ndlp->nlp_nvme_info |= NLP_NVME_NSLER;
+
+
                /* If this driver is in nvme target mode, set the ndlp's fc4
                 * type to NVME provided the PRLI response claims NVME FC4
                 * type.  Target mode does not issue gft_id so doesn't get
@@ -885,7 +891,7 @@ lpfc_release_rpi(struct lpfc_hba *phba, struct lpfc_vport *vport,
                lpfc_printf_vlog(vport, KERN_INFO, LOG_DISCOVERY,
                                 "1435 release_rpi SKIP UNREG x%x on "
                                 "NPort x%x deferred x%x  flg x%x "
-                                "Data: %p\n",
+                                "Data: x%px\n",
                                 ndlp->nlp_rpi, ndlp->nlp_DID,
                                 ndlp->nlp_defer_did,
                                 ndlp->nlp_flag, ndlp);
@@ -1661,6 +1667,7 @@ lpfc_rcv_logo_reglogin_issue(struct lpfc_vport *vport,
        LPFC_MBOXQ_t      *mb;
        LPFC_MBOXQ_t      *nextmb;
        struct lpfc_dmabuf *mp;
+       struct lpfc_nodelist *ns_ndlp;
 
        cmdiocb = (struct lpfc_iocbq *) arg;
 
@@ -1693,6 +1700,13 @@ lpfc_rcv_logo_reglogin_issue(struct lpfc_vport *vport,
        }
        spin_unlock_irq(&phba->hbalock);
 
+       /* software abort if any GID_FT is outstanding */
+       if (vport->cfg_enable_fc4_type != LPFC_ENABLE_FCP) {
+               ns_ndlp = lpfc_findnode_did(vport, NameServer_DID);
+               if (ns_ndlp && NLP_CHK_NODE_ACT(ns_ndlp))
+                       lpfc_els_abort(phba, ns_ndlp);
+       }
+
        lpfc_rcv_logo(vport, ndlp, cmdiocb, ELS_CMD_LOGO);
        return ndlp->nlp_state;
 }
@@ -1814,7 +1828,11 @@ lpfc_cmpl_reglogin_reglogin_issue(struct lpfc_vport *vport,
 
                ndlp->nlp_prev_state = NLP_STE_REG_LOGIN_ISSUE;
                lpfc_nlp_set_state(vport, ndlp, NLP_STE_PRLI_ISSUE);
-               lpfc_issue_els_prli(vport, ndlp, 0);
+               if (lpfc_issue_els_prli(vport, ndlp, 0)) {
+                       lpfc_issue_els_logo(vport, ndlp, 0);
+                       ndlp->nlp_prev_state = NLP_STE_REG_LOGIN_ISSUE;
+                       lpfc_nlp_set_state(vport, ndlp, NLP_STE_NPR_NODE);
+               }
        } else {
                if ((vport->fc_flag & FC_PT2PT) && phba->nvmet_support)
                        phba->targetport->port_id = vport->fc_myDID;
@@ -2012,6 +2030,11 @@ lpfc_cmpl_prli_prli_issue(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
                if (bf_get_be32(prli_init, nvpr))
                        ndlp->nlp_type |= NLP_NVME_INITIATOR;
 
+               if (phba->nsler && bf_get_be32(prli_nsler, nvpr))
+                       ndlp->nlp_nvme_info |= NLP_NVME_NSLER;
+               else
+                       ndlp->nlp_nvme_info &= ~NLP_NVME_NSLER;
+
                /* Target driver cannot solicit NVME FB. */
                if (bf_get_be32(prli_tgt, nvpr)) {
                        /* Complete the nvme target roles.  The transport
@@ -2891,18 +2914,21 @@ lpfc_disc_state_machine(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
        uint32_t(*func) (struct lpfc_vport *, struct lpfc_nodelist *, void *,
                         uint32_t);
        uint32_t got_ndlp = 0;
+       uint32_t data1;
 
        if (lpfc_nlp_get(ndlp))
                got_ndlp = 1;
 
        cur_state = ndlp->nlp_state;
 
+       data1 = (((uint32_t)ndlp->nlp_fc4_type << 16) |
+               ((uint32_t)ndlp->nlp_type));
        /* DSM in event <evt> on NPort <nlp_DID> in state <cur_state> */
        lpfc_printf_vlog(vport, KERN_INFO, LOG_DISCOVERY,
                         "0211 DSM in event x%x on NPort x%x in "
                         "state %d rpi x%x Data: x%x x%x\n",
                         evt, ndlp->nlp_DID, cur_state, ndlp->nlp_rpi,
-                        ndlp->nlp_flag, ndlp->nlp_fc4_type);
+                        ndlp->nlp_flag, data1);
 
        lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_DSM,
                 "DSM in:          evt:%d ste:%d did:x%x",
@@ -2913,10 +2939,13 @@ lpfc_disc_state_machine(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
 
        /* DSM out state <rc> on NPort <nlp_DID> */
        if (got_ndlp) {
+               data1 = (((uint32_t)ndlp->nlp_fc4_type << 16) |
+                       ((uint32_t)ndlp->nlp_type));
                lpfc_printf_vlog(vport, KERN_INFO, LOG_DISCOVERY,
                         "0212 DSM out state %d on NPort x%x "
-                        "rpi x%x Data: x%x\n",
-                        rc, ndlp->nlp_DID, ndlp->nlp_rpi, ndlp->nlp_flag);
+                        "rpi x%x Data: x%x x%x\n",
+                        rc, ndlp->nlp_DID, ndlp->nlp_rpi, ndlp->nlp_flag,
+                        data1);
 
                lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_DSM,
                        "DSM out:         ste:%d did:x%x flg:x%x",
index 946642c..a227e36 100644 (file)
@@ -247,7 +247,7 @@ lpfc_nvme_create_queue(struct nvme_fc_local_port *pnvme_lport,
 
        lpfc_printf_vlog(vport, KERN_INFO, LOG_NVME,
                         "6073 Binding %s HdwQueue %d  (cpu %d) to "
-                        "hdw_queue %d qhandle %p\n", str,
+                        "hdw_queue %d qhandle x%px\n", str,
                         qidx, qhandle->cpu_id, qhandle->index, qhandle);
        *handle = (void *)qhandle;
        return 0;
@@ -282,7 +282,7 @@ lpfc_nvme_delete_queue(struct nvme_fc_local_port *pnvme_lport,
        vport = lport->vport;
 
        lpfc_printf_vlog(vport, KERN_INFO, LOG_NVME,
-                       "6001 ENTER.  lpfc_pnvme %p, qidx x%x qhandle %p\n",
+                       "6001 ENTER.  lpfc_pnvme x%px, qidx x%x qhandle x%px\n",
                        lport, qidx, handle);
        kfree(handle);
 }
@@ -293,7 +293,7 @@ lpfc_nvme_localport_delete(struct nvme_fc_local_port *localport)
        struct lpfc_nvme_lport *lport = localport->private;
 
        lpfc_printf_vlog(lport->vport, KERN_INFO, LOG_NVME,
-                        "6173 localport %p delete complete\n",
+                        "6173 localport x%px delete complete\n",
                         lport);
 
        /* release any threads waiting for the unreg to complete */
@@ -332,7 +332,7 @@ lpfc_nvme_remoteport_delete(struct nvme_fc_remote_port *remoteport)
         * calling state machine to remove the node.
         */
        lpfc_printf_vlog(vport, KERN_INFO, LOG_NVME_DISC,
-                       "6146 remoteport delete of remoteport %p\n",
+                       "6146 remoteport delete of remoteport x%px\n",
                        remoteport);
        spin_lock_irq(&vport->phba->hbalock);
 
@@ -383,8 +383,8 @@ lpfc_nvme_cmpl_gen_req(struct lpfc_hba *phba, struct lpfc_iocbq *cmdwqe,
        ndlp = (struct lpfc_nodelist *)cmdwqe->context1;
        lpfc_printf_vlog(vport, KERN_INFO, LOG_NVME_DISC,
                         "6047 nvme cmpl Enter "
-                        "Data %p DID %x Xri: %x status %x reason x%x cmd:%p "
-                        "lsreg:%p bmp:%p ndlp:%p\n",
+                        "Data %px DID %x Xri: %x status %x reason x%x "
+                        "cmd:x%px lsreg:x%px bmp:x%px ndlp:x%px\n",
                         pnvme_lsreq, ndlp ? ndlp->nlp_DID : 0,
                         cmdwqe->sli4_xritag, status,
                         (wcqe->parameter & 0xffff),
@@ -404,7 +404,7 @@ lpfc_nvme_cmpl_gen_req(struct lpfc_hba *phba, struct lpfc_iocbq *cmdwqe,
        else
                lpfc_printf_vlog(vport, KERN_ERR, LOG_NVME_DISC,
                                 "6046 nvme cmpl without done call back? "
-                                "Data %p DID %x Xri: %x status %x\n",
+                                "Data %px DID %x Xri: %x status %x\n",
                                pnvme_lsreq, ndlp ? ndlp->nlp_DID : 0,
                                cmdwqe->sli4_xritag, status);
        if (ndlp) {
@@ -436,6 +436,7 @@ lpfc_nvme_gen_req(struct lpfc_vport *vport, struct lpfc_dmabuf *bmp,
                return 1;
 
        wqe = &genwqe->wqe;
+       /* Initialize only 64 bytes */
        memset(wqe, 0, sizeof(union lpfc_wqe));
 
        genwqe->context3 = (uint8_t *)bmp;
@@ -516,7 +517,8 @@ lpfc_nvme_gen_req(struct lpfc_vport *vport, struct lpfc_dmabuf *bmp,
        /* Issue GEN REQ WQE for NPORT <did> */
        lpfc_printf_vlog(vport, KERN_INFO, LOG_ELS,
                         "6050 Issue GEN REQ WQE to NPORT x%x "
-                        "Data: x%x x%x wq:%p lsreq:%p bmp:%p xmit:%d 1st:%d\n",
+                        "Data: x%x x%x wq:x%px lsreq:x%px bmp:x%px "
+                        "xmit:%d 1st:%d\n",
                         ndlp->nlp_DID, genwqe->iotag,
                         vport->port_state,
                        genwqe, pnvme_lsreq, bmp, xmit_len, first_len);
@@ -594,7 +596,7 @@ lpfc_nvme_ls_req(struct nvme_fc_local_port *pnvme_lport,
        ndlp = rport->ndlp;
        if (!ndlp || !NLP_CHK_NODE_ACT(ndlp)) {
                lpfc_printf_vlog(vport, KERN_ERR, LOG_NODE | LOG_NVME_IOERR,
-                                "6051 Remoteport %p, rport has invalid ndlp. "
+                                "6051 Remoteport x%px, rport has invalid ndlp. "
                                 "Failing LS Req\n", pnvme_rport);
                return -ENODEV;
        }
@@ -646,10 +648,10 @@ lpfc_nvme_ls_req(struct nvme_fc_local_port *pnvme_lport,
 
        /* Expand print to include key fields. */
        lpfc_printf_vlog(vport, KERN_INFO, LOG_NVME_DISC,
-                        "6149 Issue LS Req to DID 0x%06x lport %p, rport %p "
-                        "lsreq%p rqstlen:%d rsplen:%d %pad %pad\n",
-                        ndlp->nlp_DID,
-                        pnvme_lport, pnvme_rport,
+                        "6149 Issue LS Req to DID 0x%06x lport x%px, "
+                        "rport x%px lsreq x%px rqstlen:%d rsplen:%d "
+                        "%pad %pad\n",
+                        ndlp->nlp_DID, pnvme_lport, pnvme_rport,
                         pnvme_lsreq, pnvme_lsreq->rqstlen,
                         pnvme_lsreq->rsplen, &pnvme_lsreq->rqstdma,
                         &pnvme_lsreq->rspdma);
@@ -665,8 +667,8 @@ lpfc_nvme_ls_req(struct nvme_fc_local_port *pnvme_lport,
        if (ret != WQE_SUCCESS) {
                atomic_inc(&lport->xmt_ls_err);
                lpfc_printf_vlog(vport, KERN_ERR, LOG_NVME_DISC,
-                                "6052 EXIT. issue ls wqe failed lport %p, "
-                                "rport %p lsreq%p Status %x DID %x\n",
+                                "6052 EXIT. issue ls wqe failed lport x%px, "
+                                "rport x%px lsreq x%px Status %x DID %x\n",
                                 pnvme_lport, pnvme_rport, pnvme_lsreq,
                                 ret, ndlp->nlp_DID);
                lpfc_mbuf_free(vport->phba, bmp->virt, bmp->phys);
@@ -723,7 +725,7 @@ lpfc_nvme_ls_abort(struct nvme_fc_local_port *pnvme_lport,
 
        /* Expand print to include key fields. */
        lpfc_printf_vlog(vport, KERN_INFO, LOG_NVME_ABTS,
-                        "6040 ENTER.  lport %p, rport %p lsreq %p rqstlen:%d "
+                        "6040 ENTER.  lport x%px, rport x%px lsreq x%px rqstlen:%d "
                         "rsplen:%d %pad %pad\n",
                         pnvme_lport, pnvme_rport,
                         pnvme_lsreq, pnvme_lsreq->rqstlen,
@@ -984,8 +986,8 @@ lpfc_nvme_io_cmd_wqe_cmpl(struct lpfc_hba *phba, struct lpfc_iocbq *pwqeIn,
        if (!lpfc_ncmd->nvmeCmd) {
                spin_unlock(&lpfc_ncmd->buf_lock);
                lpfc_printf_vlog(vport, KERN_ERR, LOG_NODE | LOG_NVME_IOERR,
-                                "6066 Missing cmpl ptrs: lpfc_ncmd %p, "
-                                "nvmeCmd %p\n",
+                                "6066 Missing cmpl ptrs: lpfc_ncmd x%px, "
+                                "nvmeCmd x%px\n",
                                 lpfc_ncmd, lpfc_ncmd->nvmeCmd);
 
                /* Release the lpfc_ncmd regardless of the missing elements. */
@@ -998,9 +1000,9 @@ lpfc_nvme_io_cmd_wqe_cmpl(struct lpfc_hba *phba, struct lpfc_iocbq *pwqeIn,
        idx = lpfc_ncmd->cur_iocbq.hba_wqidx;
        phba->sli4_hba.hdwq[idx].nvme_cstat.io_cmpls++;
 
-       if (vport->localport) {
+       if (unlikely(status && vport->localport)) {
                lport = (struct lpfc_nvme_lport *)vport->localport->private;
-               if (lport && status) {
+               if (lport) {
                        if (bf_get(lpfc_wcqe_c_xb, wcqe))
                                atomic_inc(&lport->cmpl_fcp_xb);
                        atomic_inc(&lport->cmpl_fcp_err);
@@ -1100,8 +1102,8 @@ lpfc_nvme_io_cmd_wqe_cmpl(struct lpfc_hba *phba, struct lpfc_iocbq *pwqeIn,
                        if (lpfc_ncmd->result == IOERR_ABORT_REQUESTED)
                                lpfc_printf_vlog(vport, KERN_INFO,
                                         LOG_NVME_IOERR,
-                                        "6032 Delay Aborted cmd %p "
-                                        "nvme cmd %p, xri x%x, "
+                                        "6032 Delay Aborted cmd x%px "
+                                        "nvme cmd x%px, xri x%x, "
                                         "xb %d\n",
                                         lpfc_ncmd, nCmd,
                                         lpfc_ncmd->cur_iocbq.sli4_xritag,
@@ -1140,7 +1142,7 @@ out_err:
                phba->ktime_last_cmd = lpfc_ncmd->ts_data_nvme;
                lpfc_nvme_ktime(phba, lpfc_ncmd);
        }
-       if (phba->cpucheck_on & LPFC_CHECK_NVME_IO) {
+       if (unlikely(phba->cpucheck_on & LPFC_CHECK_NVME_IO)) {
                uint32_t cpu;
                idx = lpfc_ncmd->cur_iocbq.hba_wqidx;
                cpu = raw_smp_processor_id();
@@ -1253,6 +1255,9 @@ lpfc_nvme_prep_io_cmd(struct lpfc_vport *vport,
                       sizeof(uint32_t) * 8);
                cstat->control_requests++;
        }
+
+       if (pnode->nlp_nvme_info & NLP_NVME_NSLER)
+               bf_set(wqe_erp, &wqe->generic.wqe_com, 1);
        /*
         * Finish initializing those WQE fields that are independent
         * of the nvme_cmnd request_buffer
@@ -1304,14 +1309,16 @@ lpfc_nvme_prep_io_dma(struct lpfc_vport *vport,
        struct nvmefc_fcp_req *nCmd = lpfc_ncmd->nvmeCmd;
        union lpfc_wqe128 *wqe = &lpfc_ncmd->cur_iocbq.wqe;
        struct sli4_sge *sgl = lpfc_ncmd->dma_sgl;
+       struct sli4_hybrid_sgl *sgl_xtra = NULL;
        struct scatterlist *data_sg;
        struct sli4_sge *first_data_sgl;
        struct ulp_bde64 *bde;
-       dma_addr_t physaddr;
+       dma_addr_t physaddr = 0;
        uint32_t num_bde = 0;
-       uint32_t dma_len;
+       uint32_t dma_len = 0;
        uint32_t dma_offset = 0;
-       int nseg, i;
+       int nseg, i, j;
+       bool lsp_just_set = false;
 
        /* Fix up the command and response DMA stuff. */
        lpfc_nvme_adj_fcp_sgls(vport, lpfc_ncmd, nCmd);
@@ -1348,6 +1355,9 @@ lpfc_nvme_prep_io_dma(struct lpfc_vport *vport,
                 */
                nseg = nCmd->sg_cnt;
                data_sg = nCmd->first_sgl;
+
+               /* for tracking the segment boundaries */
+               j = 2;
                for (i = 0; i < nseg; i++) {
                        if (data_sg == NULL) {
                                lpfc_printf_log(phba, KERN_ERR, LOG_NVME_IOERR,
@@ -1356,23 +1366,76 @@ lpfc_nvme_prep_io_dma(struct lpfc_vport *vport,
                                lpfc_ncmd->seg_cnt = 0;
                                return 1;
                        }
-                       physaddr = data_sg->dma_address;
-                       dma_len = data_sg->length;
-                       sgl->addr_lo = cpu_to_le32(putPaddrLow(physaddr));
-                       sgl->addr_hi = cpu_to_le32(putPaddrHigh(physaddr));
-                       sgl->word2 = le32_to_cpu(sgl->word2);
-                       if ((num_bde + 1) == nseg)
+
+                       sgl->word2 = 0;
+                       if ((num_bde + 1) == nseg) {
                                bf_set(lpfc_sli4_sge_last, sgl, 1);
-                       else
+                               bf_set(lpfc_sli4_sge_type, sgl,
+                                      LPFC_SGE_TYPE_DATA);
+                       } else {
                                bf_set(lpfc_sli4_sge_last, sgl, 0);
-                       bf_set(lpfc_sli4_sge_offset, sgl, dma_offset);
-                       bf_set(lpfc_sli4_sge_type, sgl, LPFC_SGE_TYPE_DATA);
-                       sgl->word2 = cpu_to_le32(sgl->word2);
-                       sgl->sge_len = cpu_to_le32(dma_len);
-
-                       dma_offset += dma_len;
-                       data_sg = sg_next(data_sg);
-                       sgl++;
+
+                               /* expand the segment */
+                               if (!lsp_just_set &&
+                                   !((j + 1) % phba->border_sge_num) &&
+                                   ((nseg - 1) != i)) {
+                                       /* set LSP type */
+                                       bf_set(lpfc_sli4_sge_type, sgl,
+                                              LPFC_SGE_TYPE_LSP);
+
+                                       sgl_xtra = lpfc_get_sgl_per_hdwq(
+                                                       phba, lpfc_ncmd);
+
+                                       if (unlikely(!sgl_xtra)) {
+                                               lpfc_ncmd->seg_cnt = 0;
+                                               return 1;
+                                       }
+                                       sgl->addr_lo = cpu_to_le32(putPaddrLow(
+                                                      sgl_xtra->dma_phys_sgl));
+                                       sgl->addr_hi = cpu_to_le32(putPaddrHigh(
+                                                      sgl_xtra->dma_phys_sgl));
+
+                               } else {
+                                       bf_set(lpfc_sli4_sge_type, sgl,
+                                              LPFC_SGE_TYPE_DATA);
+                               }
+                       }
+
+                       if (!(bf_get(lpfc_sli4_sge_type, sgl) &
+                                    LPFC_SGE_TYPE_LSP)) {
+                               if ((nseg - 1) == i)
+                                       bf_set(lpfc_sli4_sge_last, sgl, 1);
+
+                               physaddr = data_sg->dma_address;
+                               dma_len = data_sg->length;
+                               sgl->addr_lo = cpu_to_le32(
+                                                        putPaddrLow(physaddr));
+                               sgl->addr_hi = cpu_to_le32(
+                                                       putPaddrHigh(physaddr));
+
+                               bf_set(lpfc_sli4_sge_offset, sgl, dma_offset);
+                               sgl->word2 = cpu_to_le32(sgl->word2);
+                               sgl->sge_len = cpu_to_le32(dma_len);
+
+                               dma_offset += dma_len;
+                               data_sg = sg_next(data_sg);
+
+                               sgl++;
+
+                               lsp_just_set = false;
+                       } else {
+                               sgl->word2 = cpu_to_le32(sgl->word2);
+
+                               sgl->sge_len = cpu_to_le32(
+                                                    phba->cfg_sg_dma_buf_size);
+
+                               sgl = (struct sli4_sge *)sgl_xtra->dma_sgl;
+                               i = i - 1;
+
+                               lsp_just_set = true;
+                       }
+
+                       j++;
                }
                if (phba->cfg_enable_pbde) {
                        /* Use PBDE support for first SGL only, offset == 0 */
@@ -1474,7 +1537,7 @@ lpfc_nvme_fcp_io_submit(struct nvme_fc_local_port *pnvme_lport,
                goto out_fail;
        }
 
-       if (vport->load_flag & FC_UNLOADING) {
+       if (unlikely(vport->load_flag & FC_UNLOADING)) {
                lpfc_printf_vlog(vport, KERN_INFO, LOG_NVME_IOERR,
                                 "6124 Fail IO, Driver unload\n");
                atomic_inc(&lport->xmt_fcp_err);
@@ -1505,8 +1568,8 @@ lpfc_nvme_fcp_io_submit(struct nvme_fc_local_port *pnvme_lport,
        ndlp = rport->ndlp;
        if (!ndlp || !NLP_CHK_NODE_ACT(ndlp)) {
                lpfc_printf_vlog(vport, KERN_INFO, LOG_NODE | LOG_NVME_IOERR,
-                                "6053 Fail IO, ndlp not ready: rport %p "
-                                 "ndlp %p, DID x%06x\n",
+                                "6053 Busy IO, ndlp not ready: rport x%px "
+                                 "ndlp x%px, DID x%06x\n",
                                 rport, ndlp, pnvme_rport->port_id);
                atomic_inc(&lport->xmt_fcp_err);
                ret = -EBUSY;
@@ -1758,7 +1821,7 @@ lpfc_nvme_fcp_abort(struct nvme_fc_local_port *pnvme_lport,
        /* Announce entry to new IO submit field. */
        lpfc_printf_vlog(vport, KERN_INFO, LOG_NVME_ABTS,
                         "6002 Abort Request to rport DID x%06x "
-                        "for nvme_fc_req %p\n",
+                        "for nvme_fc_req x%px\n",
                         pnvme_rport->port_id,
                         pnvme_fcreq);
 
@@ -1767,7 +1830,7 @@ lpfc_nvme_fcp_abort(struct nvme_fc_local_port *pnvme_lport,
         */
        spin_lock_irqsave(&phba->hbalock, flags);
        /* driver queued commands are in process of being flushed */
-       if (phba->hba_flag & HBA_NVME_IOQ_FLUSH) {
+       if (phba->hba_flag & HBA_IOQ_FLUSH) {
                spin_unlock_irqrestore(&phba->hbalock, flags);
                lpfc_printf_vlog(vport, KERN_ERR, LOG_NVME_ABTS,
                                 "6139 Driver in reset cleanup - flushing "
@@ -1805,8 +1868,8 @@ lpfc_nvme_fcp_abort(struct nvme_fc_local_port *pnvme_lport,
        if (lpfc_nbuf->nvmeCmd != pnvme_fcreq) {
                lpfc_printf_vlog(vport, KERN_ERR, LOG_NVME_ABTS,
                                 "6143 NVME req mismatch: "
-                                "lpfc_nbuf %p nvmeCmd %p, "
-                                "pnvme_fcreq %p.  Skipping Abort xri x%x\n",
+                                "lpfc_nbuf x%px nvmeCmd x%px, "
+                                "pnvme_fcreq x%px.  Skipping Abort xri x%x\n",
                                 lpfc_nbuf, lpfc_nbuf->nvmeCmd,
                                 pnvme_fcreq, nvmereq_wqe->sli4_xritag);
                goto out_unlock;
@@ -1815,7 +1878,7 @@ lpfc_nvme_fcp_abort(struct nvme_fc_local_port *pnvme_lport,
        /* Don't abort IOs no longer on the pending queue. */
        if (!(nvmereq_wqe->iocb_flag & LPFC_IO_ON_TXCMPLQ)) {
                lpfc_printf_vlog(vport, KERN_ERR, LOG_NVME_ABTS,
-                                "6142 NVME IO req %p not queued - skipping "
+                                "6142 NVME IO req x%px not queued - skipping "
                                 "abort req xri x%x\n",
                                 pnvme_fcreq, nvmereq_wqe->sli4_xritag);
                goto out_unlock;
@@ -1830,8 +1893,8 @@ lpfc_nvme_fcp_abort(struct nvme_fc_local_port *pnvme_lport,
        if (nvmereq_wqe->iocb_flag & LPFC_DRIVER_ABORTED) {
                lpfc_printf_vlog(vport, KERN_ERR, LOG_NVME_ABTS,
                                 "6144 Outstanding NVME I/O Abort Request "
-                                "still pending on nvme_fcreq %p, "
-                                "lpfc_ncmd %p xri x%x\n",
+                                "still pending on nvme_fcreq x%px, "
+                                "lpfc_ncmd %px xri x%x\n",
                                 pnvme_fcreq, lpfc_nbuf,
                                 nvmereq_wqe->sli4_xritag);
                goto out_unlock;
@@ -1841,7 +1904,7 @@ lpfc_nvme_fcp_abort(struct nvme_fc_local_port *pnvme_lport,
        if (!abts_buf) {
                lpfc_printf_vlog(vport, KERN_ERR, LOG_NVME_ABTS,
                                 "6136 No available abort wqes. Skipping "
-                                "Abts req for nvme_fcreq %p xri x%x\n",
+                                "Abts req for nvme_fcreq x%px xri x%x\n",
                                 pnvme_fcreq, nvmereq_wqe->sli4_xritag);
                goto out_unlock;
        }
@@ -1855,7 +1918,7 @@ lpfc_nvme_fcp_abort(struct nvme_fc_local_port *pnvme_lport,
        /* WQEs are reused.  Clear stale data and set key fields to
         * zero like ia, iaab, iaar, xri_tag, and ctxt_tag.
         */
-       memset(abts_wqe, 0, sizeof(union lpfc_wqe));
+       memset(abts_wqe, 0, sizeof(*abts_wqe));
        bf_set(abort_cmd_criteria, &abts_wqe->abort_cmd, T_XRI_TAG);
 
        /* word 7 */
@@ -1892,7 +1955,7 @@ lpfc_nvme_fcp_abort(struct nvme_fc_local_port *pnvme_lport,
        if (ret_val) {
                lpfc_printf_vlog(vport, KERN_ERR, LOG_NVME_ABTS,
                                 "6137 Failed abts issue_wqe with status x%x "
-                                "for nvme_fcreq %p.\n",
+                                "for nvme_fcreq x%px.\n",
                                 ret_val, pnvme_fcreq);
                lpfc_sli_release_iocbq(phba, abts_buf);
                return;
@@ -1982,7 +2045,7 @@ lpfc_get_nvme_buf(struct lpfc_hba *phba, struct lpfc_nodelist *ndlp,
                sgl->word2 = cpu_to_le32(sgl->word2);
                /* Fill in word 3 / sgl_len during cmd submission */
 
-               /* Initialize WQE */
+               /* Initialize 64 bytes only */
                memset(wqe, 0, sizeof(union lpfc_wqe));
 
                if (lpfc_ndlp_check_qdepth(phba, ndlp)) {
@@ -2028,11 +2091,11 @@ lpfc_release_nvme_buf(struct lpfc_hba *phba, struct lpfc_io_buf *lpfc_ncmd)
                                lpfc_ncmd->cur_iocbq.sli4_xritag,
                                lpfc_ncmd->cur_iocbq.iotag);
 
-               spin_lock_irqsave(&qp->abts_nvme_buf_list_lock, iflag);
+               spin_lock_irqsave(&qp->abts_io_buf_list_lock, iflag);
                list_add_tail(&lpfc_ncmd->list,
-                       &qp->lpfc_abts_nvme_buf_list);
+                       &qp->lpfc_abts_io_buf_list);
                qp->abts_nvme_io_bufs++;
-               spin_unlock_irqrestore(&qp->abts_nvme_buf_list_lock, iflag);
+               spin_unlock_irqrestore(&qp->abts_io_buf_list_lock, iflag);
        } else
                lpfc_release_io_buf(phba, (struct lpfc_io_buf *)lpfc_ncmd, qp);
 }
@@ -2095,8 +2158,8 @@ lpfc_nvme_create_localport(struct lpfc_vport *vport)
        if (!ret) {
                lpfc_printf_vlog(vport, KERN_INFO, LOG_NVME | LOG_NVME_DISC,
                                 "6005 Successfully registered local "
-                                "NVME port num %d, localP %p, private %p, "
-                                "sg_seg %d\n",
+                                "NVME port num %d, localP x%px, private "
+                                "x%px, sg_seg %d\n",
                                 localport->port_num, localport,
                                 localport->private,
                                 lpfc_nvme_template.max_sgl_segments);
@@ -2157,14 +2220,14 @@ lpfc_nvme_lport_unreg_wait(struct lpfc_vport *vport,
                if (unlikely(!ret)) {
                        pending = 0;
                        for (i = 0; i < phba->cfg_hdw_queue; i++) {
-                               pring = phba->sli4_hba.hdwq[i].nvme_wq->pring;
+                               pring = phba->sli4_hba.hdwq[i].io_wq->pring;
                                if (!pring)
                                        continue;
                                if (pring->txcmplq_cnt)
                                        pending += pring->txcmplq_cnt;
                        }
                        lpfc_printf_vlog(vport, KERN_ERR, LOG_NVME_IOERR,
-                                        "6176 Lport %p Localport %p wait "
+                                        "6176 Lport x%px Localport x%px wait "
                                         "timed out. Pending %d. Renewing.\n",
                                         lport, vport->localport, pending);
                        continue;
@@ -2172,7 +2235,7 @@ lpfc_nvme_lport_unreg_wait(struct lpfc_vport *vport,
                break;
        }
        lpfc_printf_vlog(vport, KERN_INFO, LOG_NVME_IOERR,
-                        "6177 Lport %p Localport %p Complete Success\n",
+                        "6177 Lport x%px Localport x%px Complete Success\n",
                         lport, vport->localport);
 }
 #endif
@@ -2203,7 +2266,7 @@ lpfc_nvme_destroy_localport(struct lpfc_vport *vport)
        lport = (struct lpfc_nvme_lport *)localport->private;
 
        lpfc_printf_vlog(vport, KERN_INFO, LOG_NVME,
-                        "6011 Destroying NVME localport %p\n",
+                        "6011 Destroying NVME localport x%px\n",
                         localport);
 
        /* lport's rport list is clear.  Unregister
@@ -2253,12 +2316,12 @@ lpfc_nvme_update_localport(struct lpfc_vport *vport)
        lport = (struct lpfc_nvme_lport *)localport->private;
        if (!lport) {
                lpfc_printf_vlog(vport, KERN_WARNING, LOG_NVME,
-                                "6171 Update NVME fail. localP %p, No lport\n",
+                                "6171 Update NVME fail. localP x%px, No lport\n",
                                 localport);
                return;
        }
        lpfc_printf_vlog(vport, KERN_INFO, LOG_NVME,
-                        "6012 Update NVME lport %p did x%x\n",
+                        "6012 Update NVME lport x%px did x%x\n",
                         localport, vport->fc_myDID);
 
        localport->port_id = vport->fc_myDID;
@@ -2268,7 +2331,7 @@ lpfc_nvme_update_localport(struct lpfc_vport *vport)
                localport->port_role = FC_PORT_ROLE_NVME_INITIATOR;
 
        lpfc_printf_vlog(vport, KERN_INFO, LOG_NVME_DISC,
-                        "6030 bound lport %p to DID x%06x\n",
+                        "6030 bound lport x%px to DID x%06x\n",
                         lport, localport->port_id);
 #endif
 }
@@ -2317,9 +2380,13 @@ lpfc_nvme_register_port(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp)
 
        spin_lock_irq(&vport->phba->hbalock);
        oldrport = lpfc_ndlp_get_nrport(ndlp);
-       spin_unlock_irq(&vport->phba->hbalock);
-       if (!oldrport)
+       if (oldrport) {
+               prev_ndlp = oldrport->ndlp;
+               spin_unlock_irq(&vport->phba->hbalock);
+       } else {
+               spin_unlock_irq(&vport->phba->hbalock);
                lpfc_nlp_get(ndlp);
+       }
 
        ret = nvme_fc_register_remoteport(localport, &rpinfo, &remote_port);
        if (!ret) {
@@ -2338,25 +2405,34 @@ lpfc_nvme_register_port(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp)
                        /* New remoteport record does not guarantee valid
                         * host private memory area.
                         */
-                       prev_ndlp = oldrport->ndlp;
                        if (oldrport == remote_port->private) {
                                /* Same remoteport - ndlp should match.
                                 * Just reuse.
                                 */
                                lpfc_printf_vlog(ndlp->vport, KERN_INFO,
                                                 LOG_NVME_DISC,
-                                                "6014 Rebinding lport to "
-                                                "remoteport %p wwpn 0x%llx, "
-                                                "Data: x%x x%x %p %p x%x x%06x\n",
+                                                "6014 Rebind lport to current "
+                                                "remoteport x%px wwpn 0x%llx, "
+                                                "Data: x%x x%x x%px x%px x%x "
+                                                " x%06x\n",
                                                 remote_port,
                                                 remote_port->port_name,
                                                 remote_port->port_id,
                                                 remote_port->port_role,
-                                                prev_ndlp,
+                                                oldrport->ndlp,
                                                 ndlp,
                                                 ndlp->nlp_type,
                                                 ndlp->nlp_DID);
-                               return 0;
+
+                               /* It's a complete rebind only if the driver
+                                * is registering with the same ndlp. Otherwise
+                                * the driver likely executed a node swap
+                                * prior to this registration and the ndlp to
+                                * remoteport binding needs to be redone.
+                                */
+                               if (prev_ndlp == ndlp)
+                                       return 0;
+
                        }
 
                        /* Sever the ndlp<->rport association
@@ -2390,10 +2466,10 @@ lpfc_nvme_register_port(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp)
                spin_unlock_irq(&vport->phba->hbalock);
                lpfc_printf_vlog(vport, KERN_INFO,
                                 LOG_NVME_DISC | LOG_NODE,
-                                "6022 Binding new rport to "
-                                "lport %p Remoteport %p rport %p WWNN 0x%llx, "
+                                "6022 Bind lport x%px to remoteport x%px "
+                                "rport x%px WWNN 0x%llx, "
                                 "Rport WWPN 0x%llx DID "
-                                "x%06x Role x%x, ndlp %p prev_ndlp %p\n",
+                                "x%06x Role x%x, ndlp %p prev_ndlp x%px\n",
                                 lport, remote_port, rport,
                                 rpinfo.node_name, rpinfo.port_name,
                                 rpinfo.port_id, rpinfo.port_role,
@@ -2423,20 +2499,23 @@ void
 lpfc_nvme_rescan_port(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp)
 {
 #if (IS_ENABLED(CONFIG_NVME_FC))
-       struct lpfc_nvme_rport *rport;
-       struct nvme_fc_remote_port *remoteport;
+       struct lpfc_nvme_rport *nrport;
+       struct nvme_fc_remote_port *remoteport = NULL;
 
-       rport = ndlp->nrport;
+       spin_lock_irq(&vport->phba->hbalock);
+       nrport = lpfc_ndlp_get_nrport(ndlp);
+       if (nrport)
+               remoteport = nrport->remoteport;
+       spin_unlock_irq(&vport->phba->hbalock);
 
        lpfc_printf_vlog(vport, KERN_INFO, LOG_NVME_DISC,
                         "6170 Rescan NPort DID x%06x type x%x "
-                        "state x%x rport %p\n",
-                        ndlp->nlp_DID, ndlp->nlp_type, ndlp->nlp_state, rport);
-       if (!rport)
-               goto input_err;
-       remoteport = rport->remoteport;
-       if (!remoteport)
-               goto input_err;
+                        "state x%x nrport x%px remoteport x%px\n",
+                        ndlp->nlp_DID, ndlp->nlp_type, ndlp->nlp_state,
+                        nrport, remoteport);
+
+       if (!nrport || !remoteport)
+               goto rescan_exit;
 
        /* Only rescan if we are an NVME target in the MAPPED state */
        if (remoteport->port_role & FC_PORT_ROLE_NVME_DISCOVERY &&
@@ -2449,10 +2528,10 @@ lpfc_nvme_rescan_port(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp)
                                 ndlp->nlp_DID, remoteport->port_state);
        }
        return;
-input_err:
-       lpfc_printf_vlog(vport, KERN_ERR, LOG_NVME_DISC,
-                        "6169 State error: lport %p, rport%p FCID x%06x\n",
-                        vport->localport, ndlp->rport, ndlp->nlp_DID);
+ rescan_exit:
+       lpfc_printf_vlog(vport, KERN_INFO, LOG_NVME_DISC,
+                        "6169 Skip NVME Rport Rescan, NVME remoteport "
+                        "unregistered\n");
 #endif
 }
 
@@ -2499,7 +2578,7 @@ lpfc_nvme_unregister_port(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp)
                goto input_err;
 
        lpfc_printf_vlog(vport, KERN_INFO, LOG_NVME_DISC,
-                        "6033 Unreg nvme remoteport %p, portname x%llx, "
+                        "6033 Unreg nvme remoteport x%px, portname x%llx, "
                         "port_id x%06x, portstate x%x port type x%x\n",
                         remoteport, remoteport->port_name,
                         remoteport->port_id, remoteport->port_state,
@@ -2537,7 +2616,7 @@ lpfc_nvme_unregister_port(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp)
  input_err:
 #endif
        lpfc_printf_vlog(vport, KERN_ERR, LOG_NVME_DISC,
-                        "6168 State error: lport %p, rport%p FCID x%06x\n",
+                        "6168 State error: lport x%px, rport x%px FCID x%06x\n",
                         vport->localport, ndlp->rport, ndlp->nlp_DID);
 }
 
@@ -2545,6 +2624,7 @@ lpfc_nvme_unregister_port(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp)
  * lpfc_sli4_nvme_xri_aborted - Fast-path process of NVME xri abort
  * @phba: pointer to lpfc hba data structure.
  * @axri: pointer to the fcp xri abort wcqe structure.
+ * @lpfc_ncmd: The nvme job structure for the request being aborted.
  *
  * This routine is invoked by the worker thread to process a SLI4 fast-path
  * NVME aborted xri.  Aborted NVME IO commands are completed to the transport
@@ -2552,59 +2632,33 @@ lpfc_nvme_unregister_port(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp)
  **/
 void
 lpfc_sli4_nvme_xri_aborted(struct lpfc_hba *phba,
-                          struct sli4_wcqe_xri_aborted *axri, int idx)
+                          struct sli4_wcqe_xri_aborted *axri,
+                          struct lpfc_io_buf *lpfc_ncmd)
 {
        uint16_t xri = bf_get(lpfc_wcqe_xa_xri, axri);
-       struct lpfc_io_buf *lpfc_ncmd, *next_lpfc_ncmd;
        struct nvmefc_fcp_req *nvme_cmd = NULL;
-       struct lpfc_nodelist *ndlp;
-       struct lpfc_sli4_hdw_queue *qp;
-       unsigned long iflag = 0;
+       struct lpfc_nodelist *ndlp = lpfc_ncmd->ndlp;
 
-       if (!(phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME))
-               return;
-       qp = &phba->sli4_hba.hdwq[idx];
-       spin_lock_irqsave(&phba->hbalock, iflag);
-       spin_lock(&qp->abts_nvme_buf_list_lock);
-       list_for_each_entry_safe(lpfc_ncmd, next_lpfc_ncmd,
-                                &qp->lpfc_abts_nvme_buf_list, list) {
-               if (lpfc_ncmd->cur_iocbq.sli4_xritag == xri) {
-                       list_del_init(&lpfc_ncmd->list);
-                       qp->abts_nvme_io_bufs--;
-                       lpfc_ncmd->flags &= ~LPFC_SBUF_XBUSY;
-                       lpfc_ncmd->status = IOSTAT_SUCCESS;
-                       spin_unlock(&qp->abts_nvme_buf_list_lock);
-
-                       spin_unlock_irqrestore(&phba->hbalock, iflag);
-                       ndlp = lpfc_ncmd->ndlp;
-                       if (ndlp)
-                               lpfc_sli4_abts_err_handler(phba, ndlp, axri);
-
-                       lpfc_printf_log(phba, KERN_INFO, LOG_NVME_ABTS,
-                                       "6311 nvme_cmd %p xri x%x tag x%x "
-                                       "abort complete and xri released\n",
-                                       lpfc_ncmd->nvmeCmd, xri,
-                                       lpfc_ncmd->cur_iocbq.iotag);
-
-                       /* Aborted NVME commands are required to not complete
-                        * before the abort exchange command fully completes.
-                        * Once completed, it is available via the put list.
-                        */
-                       if (lpfc_ncmd->nvmeCmd) {
-                               nvme_cmd = lpfc_ncmd->nvmeCmd;
-                               nvme_cmd->done(nvme_cmd);
-                               lpfc_ncmd->nvmeCmd = NULL;
-                       }
-                       lpfc_release_nvme_buf(phba, lpfc_ncmd);
-                       return;
-               }
-       }
-       spin_unlock(&qp->abts_nvme_buf_list_lock);
-       spin_unlock_irqrestore(&phba->hbalock, iflag);
 
-       lpfc_printf_log(phba, KERN_INFO, LOG_NVME_ABTS,
-                       "6312 XRI Aborted xri x%x not found\n", xri);
+       if (ndlp)
+               lpfc_sli4_abts_err_handler(phba, ndlp, axri);
 
+       lpfc_printf_log(phba, KERN_INFO, LOG_NVME_ABTS,
+                       "6311 nvme_cmd %p xri x%x tag x%x abort complete and "
+                       "xri released\n",
+                       lpfc_ncmd->nvmeCmd, xri,
+                       lpfc_ncmd->cur_iocbq.iotag);
+
+       /* Aborted NVME commands are required to not complete
+        * before the abort exchange command fully completes.
+        * Once completed, it is available via the put list.
+        */
+       if (lpfc_ncmd->nvmeCmd) {
+               nvme_cmd = lpfc_ncmd->nvmeCmd;
+               nvme_cmd->done(nvme_cmd);
+               lpfc_ncmd->nvmeCmd = NULL;
+       }
+       lpfc_release_nvme_buf(phba, lpfc_ncmd);
 }
 
 /**
@@ -2626,13 +2680,13 @@ lpfc_nvme_wait_for_io_drain(struct lpfc_hba *phba)
        if (phba->sli_rev < LPFC_SLI_REV4 || !phba->sli4_hba.hdwq)
                return;
 
-       /* Cycle through all NVME rings and make sure all outstanding
+       /* Cycle through all IO rings and make sure all outstanding
         * WQEs have been removed from the txcmplqs.
         */
        for (i = 0; i < phba->cfg_hdw_queue; i++) {
-               if (!phba->sli4_hba.hdwq[i].nvme_wq)
+               if (!phba->sli4_hba.hdwq[i].io_wq)
                        continue;
-               pring = phba->sli4_hba.hdwq[i].nvme_wq->pring;
+               pring = phba->sli4_hba.hdwq[i].io_wq->pring;
 
                if (!pring)
                        continue;
@@ -2653,3 +2707,50 @@ lpfc_nvme_wait_for_io_drain(struct lpfc_hba *phba)
                }
        }
 }
+
+void
+lpfc_nvme_cancel_iocb(struct lpfc_hba *phba, struct lpfc_iocbq *pwqeIn)
+{
+#if (IS_ENABLED(CONFIG_NVME_FC))
+       struct lpfc_io_buf *lpfc_ncmd;
+       struct nvmefc_fcp_req *nCmd;
+       struct lpfc_nvme_fcpreq_priv *freqpriv;
+
+       if (!pwqeIn->context1) {
+               lpfc_sli_release_iocbq(phba, pwqeIn);
+               return;
+       }
+       /* For abort iocb just return, IO iocb will do a done call */
+       if (bf_get(wqe_cmnd, &pwqeIn->wqe.gen_req.wqe_com) ==
+           CMD_ABORT_XRI_CX) {
+               lpfc_sli_release_iocbq(phba, pwqeIn);
+               return;
+       }
+       lpfc_ncmd = (struct lpfc_io_buf *)pwqeIn->context1;
+
+       spin_lock(&lpfc_ncmd->buf_lock);
+       if (!lpfc_ncmd->nvmeCmd) {
+               spin_unlock(&lpfc_ncmd->buf_lock);
+               lpfc_release_nvme_buf(phba, lpfc_ncmd);
+               return;
+       }
+
+       nCmd = lpfc_ncmd->nvmeCmd;
+       lpfc_printf_log(phba, KERN_INFO, LOG_NVME_IOERR,
+                       "6194 NVME Cancel xri %x\n",
+                       lpfc_ncmd->cur_iocbq.sli4_xritag);
+
+       nCmd->transferred_length = 0;
+       nCmd->rcv_rsplen = 0;
+       nCmd->status = NVME_SC_INTERNAL;
+       freqpriv = nCmd->private;
+       freqpriv->nvme_buf = NULL;
+       lpfc_ncmd->nvmeCmd = NULL;
+
+       spin_unlock(&lpfc_ncmd->buf_lock);
+       nCmd->done(nCmd);
+
+       /* Call release with XB=1 to queue the IO into the abort list. */
+       lpfc_release_nvme_buf(phba, lpfc_ncmd);
+#endif
+}
index faa596f..9884228 100644 (file)
@@ -1026,7 +1026,7 @@ lpfc_nvmet_xmt_fcp_op(struct nvmet_fc_target_port *tgtport,
                 * WQE release CQE
                 */
                ctxp->flag |= LPFC_NVMET_DEFER_WQFULL;
-               wq = ctxp->hdwq->nvme_wq;
+               wq = ctxp->hdwq->io_wq;
                pring = wq->pring;
                spin_lock_irqsave(&pring->ring_lock, iflags);
                list_add_tail(&nvmewqeq->list, &wq->wqfull_list);
@@ -1104,7 +1104,7 @@ lpfc_nvmet_xmt_fcp_abort(struct nvmet_fc_target_port *tgtport,
                spin_unlock_irqrestore(&ctxp->ctxlock, flags);
                lpfc_nvmet_unsol_fcp_issue_abort(phba, ctxp, ctxp->sid,
                                                 ctxp->oxid);
-               wq = ctxp->hdwq->nvme_wq;
+               wq = ctxp->hdwq->io_wq;
                lpfc_nvmet_wqfull_flush(phba, wq, ctxp);
                return;
        }
@@ -1437,7 +1437,7 @@ lpfc_nvmet_setup_io_context(struct lpfc_hba *phba)
                        infop = lpfc_get_ctx_list(phba, i, j);
                        lpfc_printf_log(phba, KERN_INFO, LOG_NVME | LOG_INIT,
                                        "6408 TOTAL NVMET ctx for CPU %d "
-                                       "MRQ %d: cnt %d nextcpu %p\n",
+                                       "MRQ %d: cnt %d nextcpu x%px\n",
                                        i, j, infop->nvmet_ctx_list_cnt,
                                        infop->nvmet_ctx_next_cpu);
                }
@@ -1500,7 +1500,7 @@ lpfc_nvmet_create_targetport(struct lpfc_hba *phba)
 
                lpfc_printf_log(phba, KERN_INFO, LOG_NVME_DISC,
                                "6026 Registered NVME "
-                               "targetport: %p, private %p "
+                               "targetport: x%px, private x%px "
                                "portnm %llx nodenm %llx segs %d qs %d\n",
                                phba->targetport, tgtp,
                                pinfo.port_name, pinfo.node_name,
@@ -1555,7 +1555,7 @@ lpfc_nvmet_update_targetport(struct lpfc_hba *phba)
                return 0;
 
        lpfc_printf_vlog(vport, KERN_INFO, LOG_NVME,
-                        "6007 Update NVMET port %p did x%x\n",
+                        "6007 Update NVMET port x%px did x%x\n",
                         phba->targetport, vport->fc_myDID);
 
        phba->targetport->port_id = vport->fc_myDID;
@@ -1790,12 +1790,8 @@ lpfc_nvmet_rcv_unsol_abort(struct lpfc_vport *vport,
                        lpfc_nvmet_defer_release(phba, ctxp);
                        spin_unlock_irqrestore(&ctxp->ctxlock, iflag);
                }
-               if (ctxp->state == LPFC_NVMET_STE_RCV)
-                       lpfc_nvmet_unsol_fcp_issue_abort(phba, ctxp, ctxp->sid,
-                                                        ctxp->oxid);
-               else
-                       lpfc_nvmet_sol_fcp_issue_abort(phba, ctxp, ctxp->sid,
-                                                      ctxp->oxid);
+               lpfc_nvmet_sol_fcp_issue_abort(phba, ctxp, ctxp->sid,
+                                              ctxp->oxid);
 
                lpfc_sli4_seq_abort_rsp(vport, fc_hdr, 1);
                return 0;
@@ -1922,7 +1918,7 @@ lpfc_nvmet_destroy_targetport(struct lpfc_hba *phba)
        if (phba->targetport) {
                tgtp = (struct lpfc_nvmet_tgtport *)phba->targetport->private;
                for (qidx = 0; qidx < phba->cfg_hdw_queue; qidx++) {
-                       wq = phba->sli4_hba.hdwq[qidx].nvme_wq;
+                       wq = phba->sli4_hba.hdwq[qidx].io_wq;
                        lpfc_nvmet_wqfull_flush(phba, wq, NULL);
                }
                tgtp->tport_unreg_cmp = &tport_unreg_cmp;
@@ -1930,7 +1926,7 @@ lpfc_nvmet_destroy_targetport(struct lpfc_hba *phba)
                if (!wait_for_completion_timeout(tgtp->tport_unreg_cmp,
                                        msecs_to_jiffies(LPFC_NVMET_WAIT_TMO)))
                        lpfc_printf_log(phba, KERN_ERR, LOG_NVME,
-                                       "6179 Unreg targetport %p timeout "
+                                       "6179 Unreg targetport x%px timeout "
                                        "reached.\n", phba->targetport);
                lpfc_nvmet_cleanup_io_context(phba);
        }
@@ -3113,7 +3109,7 @@ lpfc_nvmet_xmt_ls_abort_cmp(struct lpfc_hba *phba, struct lpfc_iocbq *cmdwqe,
        atomic_inc(&tgtp->xmt_ls_abort_cmpl);
 
        lpfc_printf_log(phba, KERN_INFO, LOG_NVME_ABTS,
-                       "6083 Abort cmpl: ctx %p WCQE:%08x %08x %08x %08x\n",
+                       "6083 Abort cmpl: ctx x%px WCQE:%08x %08x %08x %08x\n",
                        ctxp, wcqe->word0, wcqe->total_data_placed,
                        result, wcqe->word3);
 
@@ -3299,7 +3295,7 @@ lpfc_nvmet_sol_fcp_issue_abort(struct lpfc_hba *phba,
         */
        spin_lock_irqsave(&phba->hbalock, flags);
        /* driver queued commands are in process of being flushed */
-       if (phba->hba_flag & HBA_NVME_IOQ_FLUSH) {
+       if (phba->hba_flag & HBA_IOQ_FLUSH) {
                spin_unlock_irqrestore(&phba->hbalock, flags);
                atomic_inc(&tgtp->xmt_abort_rsp_error);
                lpfc_printf_log(phba, KERN_ERR, LOG_NVME,
@@ -3334,7 +3330,7 @@ lpfc_nvmet_sol_fcp_issue_abort(struct lpfc_hba *phba,
        /* WQEs are reused.  Clear stale data and set key fields to
         * zero like ia, iaab, iaar, xri_tag, and ctxt_tag.
         */
-       memset(abts_wqe, 0, sizeof(union lpfc_wqe));
+       memset(abts_wqe, 0, sizeof(*abts_wqe));
 
        /* word 3 */
        bf_set(abort_cmd_criteria, &abts_wqe->abort_cmd, T_XRI_TAG);
index f9df800..fe10976 100644 (file)
@@ -53,8 +53,6 @@
 #define LPFC_RESET_WAIT  2
 #define LPFC_ABORT_WAIT  2
 
-int _dump_buf_done = 1;
-
 static char *dif_op_str[] = {
        "PROT_NORMAL",
        "PROT_READ_INSERT",
@@ -89,63 +87,6 @@ lpfc_release_scsi_buf_s3(struct lpfc_hba *phba, struct lpfc_io_buf *psb);
 static int
 lpfc_prot_group_type(struct lpfc_hba *phba, struct scsi_cmnd *sc);
 
-static void
-lpfc_debug_save_data(struct lpfc_hba *phba, struct scsi_cmnd *cmnd)
-{
-       void *src, *dst;
-       struct scatterlist *sgde = scsi_sglist(cmnd);
-
-       if (!_dump_buf_data) {
-               lpfc_printf_log(phba, KERN_ERR, LOG_BG,
-                       "9050 BLKGRD: ERROR %s _dump_buf_data is NULL\n",
-                               __func__);
-               return;
-       }
-
-
-       if (!sgde) {
-               lpfc_printf_log(phba, KERN_ERR, LOG_BG,
-                       "9051 BLKGRD: ERROR: data scatterlist is null\n");
-               return;
-       }
-
-       dst = (void *) _dump_buf_data;
-       while (sgde) {
-               src = sg_virt(sgde);
-               memcpy(dst, src, sgde->length);
-               dst += sgde->length;
-               sgde = sg_next(sgde);
-       }
-}
-
-static void
-lpfc_debug_save_dif(struct lpfc_hba *phba, struct scsi_cmnd *cmnd)
-{
-       void *src, *dst;
-       struct scatterlist *sgde = scsi_prot_sglist(cmnd);
-
-       if (!_dump_buf_dif) {
-               lpfc_printf_log(phba, KERN_ERR, LOG_BG,
-                       "9052 BLKGRD: ERROR %s _dump_buf_data is NULL\n",
-                               __func__);
-               return;
-       }
-
-       if (!sgde) {
-               lpfc_printf_log(phba, KERN_ERR, LOG_BG,
-                       "9053 BLKGRD: ERROR: prot scatterlist is null\n");
-               return;
-       }
-
-       dst = _dump_buf_dif;
-       while (sgde) {
-               src = sg_virt(sgde);
-               memcpy(dst, src, sgde->length);
-               dst += sgde->length;
-               sgde = sg_next(sgde);
-       }
-}
-
 static inline unsigned
 lpfc_cmd_blksize(struct scsi_cmnd *sc)
 {
@@ -537,29 +478,32 @@ lpfc_sli4_vport_delete_fcp_xri_aborted(struct lpfc_vport *vport)
        for (idx = 0; idx < phba->cfg_hdw_queue; idx++) {
                qp = &phba->sli4_hba.hdwq[idx];
 
-               spin_lock(&qp->abts_scsi_buf_list_lock);
+               spin_lock(&qp->abts_io_buf_list_lock);
                list_for_each_entry_safe(psb, next_psb,
-                                        &qp->lpfc_abts_scsi_buf_list, list) {
+                                        &qp->lpfc_abts_io_buf_list, list) {
+                       if (psb->cur_iocbq.iocb_flag == LPFC_IO_NVME)
+                               continue;
+
                        if (psb->rdata && psb->rdata->pnode &&
                            psb->rdata->pnode->vport == vport)
                                psb->rdata = NULL;
                }
-               spin_unlock(&qp->abts_scsi_buf_list_lock);
+               spin_unlock(&qp->abts_io_buf_list_lock);
        }
        spin_unlock_irqrestore(&phba->hbalock, iflag);
 }
 
 /**
- * lpfc_sli4_fcp_xri_aborted - Fast-path process of fcp xri abort
+ * lpfc_sli4_io_xri_aborted - Fast-path process of fcp xri abort
  * @phba: pointer to lpfc hba data structure.
  * @axri: pointer to the fcp xri abort wcqe structure.
  *
  * This routine is invoked by the worker thread to process a SLI4 fast-path
- * FCP aborted xri.
+ * FCP or NVME aborted xri.
  **/
 void
-lpfc_sli4_fcp_xri_aborted(struct lpfc_hba *phba,
-                         struct sli4_wcqe_xri_aborted *axri, int idx)
+lpfc_sli4_io_xri_aborted(struct lpfc_hba *phba,
+                        struct sli4_wcqe_xri_aborted *axri, int idx)
 {
        uint16_t xri = bf_get(lpfc_wcqe_xa_xri, axri);
        uint16_t rxid = bf_get(lpfc_wcqe_xa_remote_xid, axri);
@@ -577,16 +521,25 @@ lpfc_sli4_fcp_xri_aborted(struct lpfc_hba *phba,
 
        qp = &phba->sli4_hba.hdwq[idx];
        spin_lock_irqsave(&phba->hbalock, iflag);
-       spin_lock(&qp->abts_scsi_buf_list_lock);
+       spin_lock(&qp->abts_io_buf_list_lock);
        list_for_each_entry_safe(psb, next_psb,
-               &qp->lpfc_abts_scsi_buf_list, list) {
+               &qp->lpfc_abts_io_buf_list, list) {
                if (psb->cur_iocbq.sli4_xritag == xri) {
-                       list_del(&psb->list);
-                       qp->abts_scsi_io_bufs--;
+                       list_del_init(&psb->list);
                        psb->exch_busy = 0;
                        psb->status = IOSTAT_SUCCESS;
-                       spin_unlock(
-                               &qp->abts_scsi_buf_list_lock);
+#ifdef BUILD_NVME
+                       if (psb->cur_iocbq.iocb_flag == LPFC_IO_NVME) {
+                               qp->abts_nvme_io_bufs--;
+                               spin_unlock(&qp->abts_io_buf_list_lock);
+                               spin_unlock_irqrestore(&phba->hbalock, iflag);
+                               lpfc_sli4_nvme_xri_aborted(phba, axri, psb);
+                               return;
+                       }
+#endif
+                       qp->abts_scsi_io_bufs--;
+                       spin_unlock(&qp->abts_io_buf_list_lock);
+
                        if (psb->rdata && psb->rdata->pnode)
                                ndlp = psb->rdata->pnode;
                        else
@@ -605,12 +558,12 @@ lpfc_sli4_fcp_xri_aborted(struct lpfc_hba *phba,
                        return;
                }
        }
-       spin_unlock(&qp->abts_scsi_buf_list_lock);
+       spin_unlock(&qp->abts_io_buf_list_lock);
        for (i = 1; i <= phba->sli.last_iotag; i++) {
                iocbq = phba->sli.iocbq_lookup[i];
 
-               if (!(iocbq->iocb_flag &  LPFC_IO_FCP) ||
-                       (iocbq->iocb_flag & LPFC_IO_LIBDFC))
+               if (!(iocbq->iocb_flag & LPFC_IO_FCP) ||
+                   (iocbq->iocb_flag & LPFC_IO_LIBDFC))
                        continue;
                if (iocbq->sli4_xritag != xri)
                        continue;
@@ -685,8 +638,9 @@ lpfc_get_scsi_buf_s4(struct lpfc_hba *phba, struct lpfc_nodelist *ndlp,
        IOCB_t *iocb;
        dma_addr_t pdma_phys_fcp_rsp;
        dma_addr_t pdma_phys_fcp_cmd;
-       uint32_t sgl_size, cpu, idx;
+       uint32_t cpu, idx;
        int tag;
+       struct fcp_cmd_rsp_buf *tmp = NULL;
 
        cpu = raw_smp_processor_id();
        if (cmnd && phba->cfg_fcp_io_sched == LPFC_FCP_SCHED_BY_HDWQ) {
@@ -704,9 +658,6 @@ lpfc_get_scsi_buf_s4(struct lpfc_hba *phba, struct lpfc_nodelist *ndlp,
                return NULL;
        }
 
-       sgl_size = phba->cfg_sg_dma_buf_size -
-               (sizeof(struct fcp_cmnd) + sizeof(struct fcp_rsp));
-
        /* Setup key fields in buffer that may have been changed
         * if other protocols used this buffer.
         */
@@ -721,9 +672,12 @@ lpfc_get_scsi_buf_s4(struct lpfc_hba *phba, struct lpfc_nodelist *ndlp,
 #ifdef CONFIG_SCSI_LPFC_DEBUG_FS
        lpfc_cmd->prot_data_type = 0;
 #endif
-       lpfc_cmd->fcp_cmnd = (lpfc_cmd->data + sgl_size);
-       lpfc_cmd->fcp_rsp = (struct fcp_rsp *)((uint8_t *)lpfc_cmd->fcp_cmnd +
-                               sizeof(struct fcp_cmnd));
+       tmp = lpfc_get_cmd_rsp_buf_per_hdwq(phba, lpfc_cmd);
+       if (!tmp)
+               return NULL;
+
+       lpfc_cmd->fcp_cmnd = tmp->fcp_cmnd;
+       lpfc_cmd->fcp_rsp = tmp->fcp_rsp;
 
        /*
         * The first two SGEs are the FCP_CMD and FCP_RSP.
@@ -731,7 +685,7 @@ lpfc_get_scsi_buf_s4(struct lpfc_hba *phba, struct lpfc_nodelist *ndlp,
         * first two and leave the rest for queuecommand.
         */
        sgl = (struct sli4_sge *)lpfc_cmd->dma_sgl;
-       pdma_phys_fcp_cmd = (lpfc_cmd->dma_handle + sgl_size);
+       pdma_phys_fcp_cmd = tmp->fcp_cmd_rsp_dma_handle;
        sgl->addr_hi = cpu_to_le32(putPaddrHigh(pdma_phys_fcp_cmd));
        sgl->addr_lo = cpu_to_le32(putPaddrLow(pdma_phys_fcp_cmd));
        sgl->word2 = le32_to_cpu(sgl->word2);
@@ -835,11 +789,11 @@ lpfc_release_scsi_buf_s4(struct lpfc_hba *phba, struct lpfc_io_buf *psb)
 
        qp = psb->hdwq;
        if (psb->exch_busy) {
-               spin_lock_irqsave(&qp->abts_scsi_buf_list_lock, iflag);
+               spin_lock_irqsave(&qp->abts_io_buf_list_lock, iflag);
                psb->pCmd = NULL;
-               list_add_tail(&psb->list, &qp->lpfc_abts_scsi_buf_list);
+               list_add_tail(&psb->list, &qp->lpfc_abts_io_buf_list);
                qp->abts_scsi_io_bufs++;
-               spin_unlock_irqrestore(&qp->abts_scsi_buf_list_lock, iflag);
+               spin_unlock_irqrestore(&qp->abts_io_buf_list_lock, iflag);
        } else {
                lpfc_release_io_buf(phba, (struct lpfc_io_buf *)psb, qp);
        }
@@ -918,9 +872,10 @@ lpfc_scsi_prep_dma_buf_s3(struct lpfc_hba *phba, struct lpfc_io_buf *lpfc_cmd)
                               "dma_map_sg.  Config %d, seg_cnt %d\n",
                               __func__, phba->cfg_sg_seg_cnt,
                               lpfc_cmd->seg_cnt);
+                       WARN_ON_ONCE(lpfc_cmd->seg_cnt > phba->cfg_sg_seg_cnt);
                        lpfc_cmd->seg_cnt = 0;
                        scsi_dma_unmap(scsi_cmnd);
-                       return 1;
+                       return 2;
                }
 
                /*
@@ -1774,7 +1729,7 @@ lpfc_bg_setup_bpl_prot(struct lpfc_hba *phba, struct scsi_cmnd *sc,
 
        if (!sgpe || !sgde) {
                lpfc_printf_log(phba, KERN_ERR, LOG_FCP,
-                               "9020 Invalid s/g entry: data=0x%p prot=0x%p\n",
+                               "9020 Invalid s/g entry: data=x%px prot=x%px\n",
                                sgpe, sgde);
                return 0;
        }
@@ -1989,7 +1944,8 @@ out:
  **/
 static int
 lpfc_bg_setup_sgl(struct lpfc_hba *phba, struct scsi_cmnd *sc,
-               struct sli4_sge *sgl, int datasegcnt)
+               struct sli4_sge *sgl, int datasegcnt,
+               struct lpfc_io_buf *lpfc_cmd)
 {
        struct scatterlist *sgde = NULL; /* s/g data entry */
        struct sli4_sge_diseed *diseed = NULL;
@@ -2003,6 +1959,9 @@ lpfc_bg_setup_sgl(struct lpfc_hba *phba, struct scsi_cmnd *sc,
        uint32_t checking = 1;
        uint32_t dma_len;
        uint32_t dma_offset = 0;
+       struct sli4_hybrid_sgl *sgl_xtra = NULL;
+       int j;
+       bool lsp_just_set = false;
 
        status  = lpfc_sc_to_bg_opcodes(phba, sc, &txop, &rxop);
        if (status)
@@ -2062,23 +2021,64 @@ lpfc_bg_setup_sgl(struct lpfc_hba *phba, struct scsi_cmnd *sc,
        sgl++;
 
        /* assumption: caller has already run dma_map_sg on command data */
-       scsi_for_each_sg(sc, sgde, datasegcnt, i) {
-               physaddr = sg_dma_address(sgde);
-               dma_len = sg_dma_len(sgde);
-               sgl->addr_lo = cpu_to_le32(putPaddrLow(physaddr));
-               sgl->addr_hi = cpu_to_le32(putPaddrHigh(physaddr));
-               if ((i + 1) == datasegcnt)
-                       bf_set(lpfc_sli4_sge_last, sgl, 1);
-               else
-                       bf_set(lpfc_sli4_sge_last, sgl, 0);
-               bf_set(lpfc_sli4_sge_offset, sgl, dma_offset);
-               bf_set(lpfc_sli4_sge_type, sgl, LPFC_SGE_TYPE_DATA);
+       sgde = scsi_sglist(sc);
+       j = 3;
+       for (i = 0; i < datasegcnt; i++) {
+               /* clear it */
+               sgl->word2 = 0;
 
-               sgl->sge_len = cpu_to_le32(dma_len);
-               dma_offset += dma_len;
+               /* do we need to expand the segment */
+               if (!lsp_just_set && !((j + 1) % phba->border_sge_num) &&
+                   ((datasegcnt - 1) != i)) {
+                       /* set LSP type */
+                       bf_set(lpfc_sli4_sge_type, sgl, LPFC_SGE_TYPE_LSP);
+
+                       sgl_xtra = lpfc_get_sgl_per_hdwq(phba, lpfc_cmd);
+
+                       if (unlikely(!sgl_xtra)) {
+                               lpfc_cmd->seg_cnt = 0;
+                               return 0;
+                       }
+                       sgl->addr_lo = cpu_to_le32(putPaddrLow(
+                                               sgl_xtra->dma_phys_sgl));
+                       sgl->addr_hi = cpu_to_le32(putPaddrHigh(
+                                               sgl_xtra->dma_phys_sgl));
+
+               } else {
+                       bf_set(lpfc_sli4_sge_type, sgl, LPFC_SGE_TYPE_DATA);
+               }
+
+               if (!(bf_get(lpfc_sli4_sge_type, sgl) & LPFC_SGE_TYPE_LSP)) {
+                       if ((datasegcnt - 1) == i)
+                               bf_set(lpfc_sli4_sge_last, sgl, 1);
+                       physaddr = sg_dma_address(sgde);
+                       dma_len = sg_dma_len(sgde);
+                       sgl->addr_lo = cpu_to_le32(putPaddrLow(physaddr));
+                       sgl->addr_hi = cpu_to_le32(putPaddrHigh(physaddr));
+
+                       bf_set(lpfc_sli4_sge_offset, sgl, dma_offset);
+                       sgl->word2 = cpu_to_le32(sgl->word2);
+                       sgl->sge_len = cpu_to_le32(dma_len);
+
+                       dma_offset += dma_len;
+                       sgde = sg_next(sgde);
+
+                       sgl++;
+                       num_sge++;
+                       lsp_just_set = false;
+
+               } else {
+                       sgl->word2 = cpu_to_le32(sgl->word2);
+                       sgl->sge_len = cpu_to_le32(phba->cfg_sg_dma_buf_size);
+
+                       sgl = (struct sli4_sge *)sgl_xtra->dma_sgl;
+                       i = i - 1;
+
+                       lsp_just_set = true;
+               }
+
+               j++;
 
-               sgl++;
-               num_sge++;
        }
 
 out:
@@ -2124,7 +2124,8 @@ out:
  **/
 static int
 lpfc_bg_setup_sgl_prot(struct lpfc_hba *phba, struct scsi_cmnd *sc,
-               struct sli4_sge *sgl, int datacnt, int protcnt)
+               struct sli4_sge *sgl, int datacnt, int protcnt,
+               struct lpfc_io_buf *lpfc_cmd)
 {
        struct scatterlist *sgde = NULL; /* s/g data entry */
        struct scatterlist *sgpe = NULL; /* s/g prot entry */
@@ -2146,14 +2147,15 @@ lpfc_bg_setup_sgl_prot(struct lpfc_hba *phba, struct scsi_cmnd *sc,
 #endif
        uint32_t checking = 1;
        uint32_t dma_offset = 0;
-       int num_sge = 0;
+       int num_sge = 0, j = 2;
+       struct sli4_hybrid_sgl *sgl_xtra = NULL;
 
        sgpe = scsi_prot_sglist(sc);
        sgde = scsi_sglist(sc);
 
        if (!sgpe || !sgde) {
                lpfc_printf_log(phba, KERN_ERR, LOG_FCP,
-                               "9082 Invalid s/g entry: data=0x%p prot=0x%p\n",
+                               "9082 Invalid s/g entry: data=x%px prot=x%px\n",
                                sgpe, sgde);
                return 0;
        }
@@ -2179,9 +2181,37 @@ lpfc_bg_setup_sgl_prot(struct lpfc_hba *phba, struct scsi_cmnd *sc,
        split_offset = 0;
        do {
                /* Check to see if we ran out of space */
-               if (num_sge >= (phba->cfg_total_seg_cnt - 2))
+               if ((num_sge >= (phba->cfg_total_seg_cnt - 2)) &&
+                   !(phba->cfg_xpsgl))
                        return num_sge + 3;
 
+               /* DISEED and DIF have to be together */
+               if (!((j + 1) % phba->border_sge_num) ||
+                   !((j + 2) % phba->border_sge_num) ||
+                   !((j + 3) % phba->border_sge_num)) {
+                       sgl->word2 = 0;
+
+                       /* set LSP type */
+                       bf_set(lpfc_sli4_sge_type, sgl, LPFC_SGE_TYPE_LSP);
+
+                       sgl_xtra = lpfc_get_sgl_per_hdwq(phba, lpfc_cmd);
+
+                       if (unlikely(!sgl_xtra)) {
+                               goto out;
+                       } else {
+                               sgl->addr_lo = cpu_to_le32(putPaddrLow(
+                                               sgl_xtra->dma_phys_sgl));
+                               sgl->addr_hi = cpu_to_le32(putPaddrHigh(
+                                                      sgl_xtra->dma_phys_sgl));
+                       }
+
+                       sgl->word2 = cpu_to_le32(sgl->word2);
+                       sgl->sge_len = cpu_to_le32(phba->cfg_sg_dma_buf_size);
+
+                       sgl = (struct sli4_sge *)sgl_xtra->dma_sgl;
+                       j = 0;
+               }
+
                /* setup DISEED with what we have */
                diseed = (struct sli4_sge_diseed *) sgl;
                memset(diseed, 0, sizeof(struct sli4_sge_diseed));
@@ -2228,7 +2258,9 @@ lpfc_bg_setup_sgl_prot(struct lpfc_hba *phba, struct scsi_cmnd *sc,
 
                /* advance sgl and increment bde count */
                num_sge++;
+
                sgl++;
+               j++;
 
                /* setup the first BDE that points to protection buffer */
                protphysaddr = sg_dma_address(sgpe) + protgroup_offset;
@@ -2243,6 +2275,7 @@ lpfc_bg_setup_sgl_prot(struct lpfc_hba *phba, struct scsi_cmnd *sc,
                sgl->addr_hi = le32_to_cpu(putPaddrHigh(protphysaddr));
                sgl->addr_lo = le32_to_cpu(putPaddrLow(protphysaddr));
                sgl->word2 = cpu_to_le32(sgl->word2);
+               sgl->sge_len = 0;
 
                protgrp_blks = protgroup_len / 8;
                protgrp_bytes = protgrp_blks * blksize;
@@ -2263,9 +2296,14 @@ lpfc_bg_setup_sgl_prot(struct lpfc_hba *phba, struct scsi_cmnd *sc,
                /* setup SGE's for data blocks associated with DIF data */
                pgdone = 0;
                subtotal = 0; /* total bytes processed for current prot grp */
+
+               sgl++;
+               j++;
+
                while (!pgdone) {
                        /* Check to see if we ran out of space */
-                       if (num_sge >= phba->cfg_total_seg_cnt)
+                       if ((num_sge >= phba->cfg_total_seg_cnt) &&
+                           !phba->cfg_xpsgl)
                                return num_sge + 1;
 
                        if (!sgde) {
@@ -2274,60 +2312,101 @@ lpfc_bg_setup_sgl_prot(struct lpfc_hba *phba, struct scsi_cmnd *sc,
                                                __func__);
                                return 0;
                        }
-                       sgl++;
-                       dataphysaddr = sg_dma_address(sgde) + split_offset;
 
-                       remainder = sg_dma_len(sgde) - split_offset;
+                       if (!((j + 1) % phba->border_sge_num)) {
+                               sgl->word2 = 0;
 
-                       if ((subtotal + remainder) <= protgrp_bytes) {
-                               /* we can use this whole buffer */
-                               dma_len = remainder;
-                               split_offset = 0;
+                               /* set LSP type */
+                               bf_set(lpfc_sli4_sge_type, sgl,
+                                      LPFC_SGE_TYPE_LSP);
 
-                               if ((subtotal + remainder) == protgrp_bytes)
-                                       pgdone = 1;
+                               sgl_xtra = lpfc_get_sgl_per_hdwq(phba,
+                                                                lpfc_cmd);
+
+                               if (unlikely(!sgl_xtra)) {
+                                       goto out;
+                               } else {
+                                       sgl->addr_lo = cpu_to_le32(
+                                         putPaddrLow(sgl_xtra->dma_phys_sgl));
+                                       sgl->addr_hi = cpu_to_le32(
+                                         putPaddrHigh(sgl_xtra->dma_phys_sgl));
+                               }
+
+                               sgl->word2 = cpu_to_le32(sgl->word2);
+                               sgl->sge_len = cpu_to_le32(
+                                                    phba->cfg_sg_dma_buf_size);
+
+                               sgl = (struct sli4_sge *)sgl_xtra->dma_sgl;
                        } else {
-                               /* must split this buffer with next prot grp */
-                               dma_len = protgrp_bytes - subtotal;
-                               split_offset += dma_len;
-                       }
+                               dataphysaddr = sg_dma_address(sgde) +
+                                                                  split_offset;
 
-                       subtotal += dma_len;
+                               remainder = sg_dma_len(sgde) - split_offset;
 
-                       sgl->addr_lo = cpu_to_le32(putPaddrLow(dataphysaddr));
-                       sgl->addr_hi = cpu_to_le32(putPaddrHigh(dataphysaddr));
-                       bf_set(lpfc_sli4_sge_last, sgl, 0);
-                       bf_set(lpfc_sli4_sge_offset, sgl, dma_offset);
-                       bf_set(lpfc_sli4_sge_type, sgl, LPFC_SGE_TYPE_DATA);
+                               if ((subtotal + remainder) <= protgrp_bytes) {
+                                       /* we can use this whole buffer */
+                                       dma_len = remainder;
+                                       split_offset = 0;
 
-                       sgl->sge_len = cpu_to_le32(dma_len);
-                       dma_offset += dma_len;
+                                       if ((subtotal + remainder) ==
+                                                                 protgrp_bytes)
+                                               pgdone = 1;
+                               } else {
+                                       /* must split this buffer with next
+                                        * prot grp
+                                        */
+                                       dma_len = protgrp_bytes - subtotal;
+                                       split_offset += dma_len;
+                               }
 
-                       num_sge++;
-                       curr_data++;
+                               subtotal += dma_len;
 
-                       if (split_offset)
-                               break;
+                               sgl->word2 = 0;
+                               sgl->addr_lo = cpu_to_le32(putPaddrLow(
+                                                                dataphysaddr));
+                               sgl->addr_hi = cpu_to_le32(putPaddrHigh(
+                                                                dataphysaddr));
+                               bf_set(lpfc_sli4_sge_last, sgl, 0);
+                               bf_set(lpfc_sli4_sge_offset, sgl, dma_offset);
+                               bf_set(lpfc_sli4_sge_type, sgl,
+                                      LPFC_SGE_TYPE_DATA);
 
-                       /* Move to the next s/g segment if possible */
-                       sgde = sg_next(sgde);
+                               sgl->sge_len = cpu_to_le32(dma_len);
+                               dma_offset += dma_len;
+
+                               num_sge++;
+                               curr_data++;
+
+                               if (split_offset) {
+                                       sgl++;
+                                       j++;
+                                       break;
+                               }
+
+                               /* Move to the next s/g segment if possible */
+                               sgde = sg_next(sgde);
+
+                               sgl++;
+                       }
+
+                       j++;
                }
 
                if (protgroup_offset) {
                        /* update the reference tag */
                        reftag += protgrp_blks;
-                       sgl++;
                        continue;
                }
 
                /* are we done ? */
                if (curr_prot == protcnt) {
+                       /* mark the last SGL */
+                       sgl--;
                        bf_set(lpfc_sli4_sge_last, sgl, 1);
                        alldone = 1;
                } else if (curr_prot < protcnt) {
                        /* advance to next prot buffer */
                        sgpe = sg_next(sgpe);
-                       sgl++;
 
                        /* update the reference tag */
                        reftag += protgrp_blks;
@@ -2430,7 +2509,10 @@ lpfc_bg_scsi_adjust_dl(struct lpfc_hba *phba,
  *
  * This is the protection/DIF aware version of
  * lpfc_scsi_prep_dma_buf(). It may be a good idea to combine the
- * two functions eventually, but for now, it's here
+ * two functions eventually, but for now, it's here.
+ * RETURNS 0 - SUCCESS,
+ *         1 - Failed DMA map, retry.
+ *         2 - Invalid scsi cmd or prot-type. Do not rety.
  **/
 static int
 lpfc_bg_scsi_prep_dma_buf_s3(struct lpfc_hba *phba,
@@ -2444,6 +2526,7 @@ lpfc_bg_scsi_prep_dma_buf_s3(struct lpfc_hba *phba,
        int datasegcnt, protsegcnt, datadir = scsi_cmnd->sc_data_direction;
        int prot_group_type = 0;
        int fcpdl;
+       int ret = 1;
        struct lpfc_vport *vport = phba->pport;
 
        /*
@@ -2467,8 +2550,11 @@ lpfc_bg_scsi_prep_dma_buf_s3(struct lpfc_hba *phba,
                lpfc_cmd->seg_cnt = datasegcnt;
 
                /* First check if data segment count from SCSI Layer is good */
-               if (lpfc_cmd->seg_cnt > phba->cfg_sg_seg_cnt)
+               if (lpfc_cmd->seg_cnt > phba->cfg_sg_seg_cnt) {
+                       WARN_ON_ONCE(lpfc_cmd->seg_cnt > phba->cfg_sg_seg_cnt);
+                       ret = 2;
                        goto err;
+               }
 
                prot_group_type = lpfc_prot_group_type(phba, scsi_cmnd);
 
@@ -2476,14 +2562,18 @@ lpfc_bg_scsi_prep_dma_buf_s3(struct lpfc_hba *phba,
                case LPFC_PG_TYPE_NO_DIF:
 
                        /* Here we need to add a PDE5 and PDE6 to the count */
-                       if ((lpfc_cmd->seg_cnt + 2) > phba->cfg_total_seg_cnt)
+                       if ((lpfc_cmd->seg_cnt + 2) > phba->cfg_total_seg_cnt) {
+                               ret = 2;
                                goto err;
+                       }
 
                        num_bde = lpfc_bg_setup_bpl(phba, scsi_cmnd, bpl,
                                        datasegcnt);
                        /* we should have 2 or more entries in buffer list */
-                       if (num_bde < 2)
+                       if (num_bde < 2) {
+                               ret = 2;
                                goto err;
+                       }
                        break;
 
                case LPFC_PG_TYPE_DIF_BUF:
@@ -2507,15 +2597,19 @@ lpfc_bg_scsi_prep_dma_buf_s3(struct lpfc_hba *phba,
                         * protection data segment.
                         */
                        if ((lpfc_cmd->prot_seg_cnt * 4) >
-                           (phba->cfg_total_seg_cnt - 2))
+                           (phba->cfg_total_seg_cnt - 2)) {
+                               ret = 2;
                                goto err;
+                       }
 
                        num_bde = lpfc_bg_setup_bpl_prot(phba, scsi_cmnd, bpl,
                                        datasegcnt, protsegcnt);
                        /* we should have 3 or more entries in buffer list */
                        if ((num_bde < 3) ||
-                           (num_bde > phba->cfg_total_seg_cnt))
+                           (num_bde > phba->cfg_total_seg_cnt)) {
+                               ret = 2;
                                goto err;
+                       }
                        break;
 
                case LPFC_PG_TYPE_INVALID:
@@ -2526,7 +2620,7 @@ lpfc_bg_scsi_prep_dma_buf_s3(struct lpfc_hba *phba,
                        lpfc_printf_log(phba, KERN_ERR, LOG_FCP,
                                        "9022 Unexpected protection group %i\n",
                                        prot_group_type);
-                       return 1;
+                       return 2;
                }
        }
 
@@ -2576,7 +2670,7 @@ err:
 
        lpfc_cmd->seg_cnt = 0;
        lpfc_cmd->prot_seg_cnt = 0;
-       return 1;
+       return ret;
 }
 
 /*
@@ -2809,26 +2903,6 @@ lpfc_parse_bg_err(struct lpfc_hba *phba, struct lpfc_io_buf *lpfc_cmd,
        uint32_t bgstat = bgf->bgstat;
        uint64_t failing_sector = 0;
 
-       spin_lock(&_dump_buf_lock);
-       if (!_dump_buf_done) {
-               lpfc_printf_log(phba, KERN_ERR, LOG_BG,  "9070 BLKGRD: Saving"
-                       " Data for %u blocks to debugfs\n",
-                               (cmd->cmnd[7] << 8 | cmd->cmnd[8]));
-               lpfc_debug_save_data(phba, cmd);
-
-               /* If we have a prot sgl, save the DIF buffer */
-               if (lpfc_prot_group_type(phba, cmd) ==
-                               LPFC_PG_TYPE_DIF_BUF) {
-                       lpfc_printf_log(phba, KERN_ERR, LOG_BG, "9071 BLKGRD: "
-                               "Saving DIF for %u blocks to debugfs\n",
-                               (cmd->cmnd[7] << 8 | cmd->cmnd[8]));
-                       lpfc_debug_save_dif(phba, cmd);
-               }
-
-               _dump_buf_done = 1;
-       }
-       spin_unlock(&_dump_buf_lock);
-
        if (lpfc_bgs_get_invalid_prof(bgstat)) {
                cmd->result = DID_ERROR << 16;
                lpfc_printf_log(phba, KERN_WARNING, LOG_FCP | LOG_BG,
@@ -2962,7 +3036,8 @@ out:
  * field of @lpfc_cmd for device with SLI-4 interface spec.
  *
  * Return codes:
- *     1 - Error
+ *     2 - Error - Do not retry
+ *     1 - Error - Retry
  *     0 - Success
  **/
 static int
@@ -2978,8 +3053,10 @@ lpfc_scsi_prep_dma_buf_s4(struct lpfc_hba *phba, struct lpfc_io_buf *lpfc_cmd)
        uint32_t num_bde = 0;
        uint32_t dma_len;
        uint32_t dma_offset = 0;
-       int nseg;
+       int nseg, i, j;
        struct ulp_bde64 *bde;
+       bool lsp_just_set = false;
+       struct sli4_hybrid_sgl *sgl_xtra = NULL;
 
        /*
         * There are three possibilities here - use scatter-gather segment, use
@@ -3006,15 +3083,17 @@ lpfc_scsi_prep_dma_buf_s4(struct lpfc_hba *phba, struct lpfc_io_buf *lpfc_cmd)
                sgl += 1;
                first_data_sgl = sgl;
                lpfc_cmd->seg_cnt = nseg;
-               if (lpfc_cmd->seg_cnt > phba->cfg_sg_seg_cnt) {
+               if (!phba->cfg_xpsgl &&
+                   lpfc_cmd->seg_cnt > phba->cfg_sg_seg_cnt) {
                        lpfc_printf_log(phba, KERN_ERR, LOG_BG, "9074 BLKGRD:"
                                " %s: Too many sg segments from "
                                "dma_map_sg.  Config %d, seg_cnt %d\n",
                                __func__, phba->cfg_sg_seg_cnt,
                               lpfc_cmd->seg_cnt);
+                       WARN_ON_ONCE(lpfc_cmd->seg_cnt > phba->cfg_sg_seg_cnt);
                        lpfc_cmd->seg_cnt = 0;
                        scsi_dma_unmap(scsi_cmnd);
-                       return 1;
+                       return 2;
                }
 
                /*
@@ -3026,22 +3105,80 @@ lpfc_scsi_prep_dma_buf_s4(struct lpfc_hba *phba, struct lpfc_io_buf *lpfc_cmd)
                 * the IOCB. If it can't then the BDEs get added to a BPL as it
                 * does for SLI-2 mode.
                 */
-               scsi_for_each_sg(scsi_cmnd, sgel, nseg, num_bde) {
-                       physaddr = sg_dma_address(sgel);
-                       dma_len = sg_dma_len(sgel);
-                       sgl->addr_lo = cpu_to_le32(putPaddrLow(physaddr));
-                       sgl->addr_hi = cpu_to_le32(putPaddrHigh(physaddr));
-                       sgl->word2 = le32_to_cpu(sgl->word2);
-                       if ((num_bde + 1) == nseg)
+
+               /* for tracking segment boundaries */
+               sgel = scsi_sglist(scsi_cmnd);
+               j = 2;
+               for (i = 0; i < nseg; i++) {
+                       sgl->word2 = 0;
+                       if ((num_bde + 1) == nseg) {
                                bf_set(lpfc_sli4_sge_last, sgl, 1);
-                       else
+                               bf_set(lpfc_sli4_sge_type, sgl,
+                                      LPFC_SGE_TYPE_DATA);
+                       } else {
                                bf_set(lpfc_sli4_sge_last, sgl, 0);
-                       bf_set(lpfc_sli4_sge_offset, sgl, dma_offset);
-                       bf_set(lpfc_sli4_sge_type, sgl, LPFC_SGE_TYPE_DATA);
-                       sgl->word2 = cpu_to_le32(sgl->word2);
-                       sgl->sge_len = cpu_to_le32(dma_len);
-                       dma_offset += dma_len;
-                       sgl++;
+
+                               /* do we need to expand the segment */
+                               if (!lsp_just_set &&
+                                   !((j + 1) % phba->border_sge_num) &&
+                                   ((nseg - 1) != i)) {
+                                       /* set LSP type */
+                                       bf_set(lpfc_sli4_sge_type, sgl,
+                                              LPFC_SGE_TYPE_LSP);
+
+                                       sgl_xtra = lpfc_get_sgl_per_hdwq(
+                                                       phba, lpfc_cmd);
+
+                                       if (unlikely(!sgl_xtra)) {
+                                               lpfc_cmd->seg_cnt = 0;
+                                               scsi_dma_unmap(scsi_cmnd);
+                                               return 1;
+                                       }
+                                       sgl->addr_lo = cpu_to_le32(putPaddrLow(
+                                                      sgl_xtra->dma_phys_sgl));
+                                       sgl->addr_hi = cpu_to_le32(putPaddrHigh(
+                                                      sgl_xtra->dma_phys_sgl));
+
+                               } else {
+                                       bf_set(lpfc_sli4_sge_type, sgl,
+                                              LPFC_SGE_TYPE_DATA);
+                               }
+                       }
+
+                       if (!(bf_get(lpfc_sli4_sge_type, sgl) &
+                                    LPFC_SGE_TYPE_LSP)) {
+                               if ((nseg - 1) == i)
+                                       bf_set(lpfc_sli4_sge_last, sgl, 1);
+
+                               physaddr = sg_dma_address(sgel);
+                               dma_len = sg_dma_len(sgel);
+                               sgl->addr_lo = cpu_to_le32(putPaddrLow(
+                                                          physaddr));
+                               sgl->addr_hi = cpu_to_le32(putPaddrHigh(
+                                                          physaddr));
+
+                               bf_set(lpfc_sli4_sge_offset, sgl, dma_offset);
+                               sgl->word2 = cpu_to_le32(sgl->word2);
+                               sgl->sge_len = cpu_to_le32(dma_len);
+
+                               dma_offset += dma_len;
+                               sgel = sg_next(sgel);
+
+                               sgl++;
+                               lsp_just_set = false;
+
+                       } else {
+                               sgl->word2 = cpu_to_le32(sgl->word2);
+                               sgl->sge_len = cpu_to_le32(
+                                                    phba->cfg_sg_dma_buf_size);
+
+                               sgl = (struct sli4_sge *)sgl_xtra->dma_sgl;
+                               i = i - 1;
+
+                               lsp_just_set = true;
+                       }
+
+                       j++;
                }
                /*
                 * Setup the first Payload BDE. For FCoE we just key off
@@ -3110,6 +3247,10 @@ lpfc_scsi_prep_dma_buf_s4(struct lpfc_hba *phba, struct lpfc_io_buf *lpfc_cmd)
  * This is the protection/DIF aware version of
  * lpfc_scsi_prep_dma_buf(). It may be a good idea to combine the
  * two functions eventually, but for now, it's here
+ * Return codes:
+ *     2 - Error - Do not retry
+ *     1 - Error - Retry
+ *     0 - Success
  **/
 static int
 lpfc_bg_scsi_prep_dma_buf_s4(struct lpfc_hba *phba,
@@ -3123,6 +3264,7 @@ lpfc_bg_scsi_prep_dma_buf_s4(struct lpfc_hba *phba,
        int datasegcnt, protsegcnt, datadir = scsi_cmnd->sc_data_direction;
        int prot_group_type = 0;
        int fcpdl;
+       int ret = 1;
        struct lpfc_vport *vport = phba->pport;
 
        /*
@@ -3152,23 +3294,33 @@ lpfc_bg_scsi_prep_dma_buf_s4(struct lpfc_hba *phba,
                lpfc_cmd->seg_cnt = datasegcnt;
 
                /* First check if data segment count from SCSI Layer is good */
-               if (lpfc_cmd->seg_cnt > phba->cfg_sg_seg_cnt)
+               if (lpfc_cmd->seg_cnt > phba->cfg_sg_seg_cnt &&
+                   !phba->cfg_xpsgl) {
+                       WARN_ON_ONCE(lpfc_cmd->seg_cnt > phba->cfg_sg_seg_cnt);
+                       ret = 2;
                        goto err;
+               }
 
                prot_group_type = lpfc_prot_group_type(phba, scsi_cmnd);
 
                switch (prot_group_type) {
                case LPFC_PG_TYPE_NO_DIF:
                        /* Here we need to add a DISEED to the count */
-                       if ((lpfc_cmd->seg_cnt + 1) > phba->cfg_total_seg_cnt)
+                       if (((lpfc_cmd->seg_cnt + 1) >
+                                       phba->cfg_total_seg_cnt) &&
+                           !phba->cfg_xpsgl) {
+                               ret = 2;
                                goto err;
+                       }
 
                        num_sge = lpfc_bg_setup_sgl(phba, scsi_cmnd, sgl,
-                                       datasegcnt);
+                                       datasegcnt, lpfc_cmd);
 
                        /* we should have 2 or more entries in buffer list */
-                       if (num_sge < 2)
+                       if (num_sge < 2) {
+                               ret = 2;
                                goto err;
+                       }
                        break;
 
                case LPFC_PG_TYPE_DIF_BUF:
@@ -3190,17 +3342,23 @@ lpfc_bg_scsi_prep_dma_buf_s4(struct lpfc_hba *phba,
                         * There is a minimun of 3 SGEs used for every
                         * protection data segment.
                         */
-                       if ((lpfc_cmd->prot_seg_cnt * 3) >
-                           (phba->cfg_total_seg_cnt - 2))
+                       if (((lpfc_cmd->prot_seg_cnt * 3) >
+                                       (phba->cfg_total_seg_cnt - 2)) &&
+                           !phba->cfg_xpsgl) {
+                               ret = 2;
                                goto err;
+                       }
 
                        num_sge = lpfc_bg_setup_sgl_prot(phba, scsi_cmnd, sgl,
-                                       datasegcnt, protsegcnt);
+                                       datasegcnt, protsegcnt, lpfc_cmd);
 
                        /* we should have 3 or more entries in buffer list */
-                       if ((num_sge < 3) ||
-                           (num_sge > phba->cfg_total_seg_cnt))
+                       if (num_sge < 3 ||
+                           (num_sge > phba->cfg_total_seg_cnt &&
+                            !phba->cfg_xpsgl)) {
+                               ret = 2;
                                goto err;
+                       }
                        break;
 
                case LPFC_PG_TYPE_INVALID:
@@ -3211,7 +3369,7 @@ lpfc_bg_scsi_prep_dma_buf_s4(struct lpfc_hba *phba,
                        lpfc_printf_log(phba, KERN_ERR, LOG_FCP,
                                        "9083 Unexpected protection group %i\n",
                                        prot_group_type);
-                       return 1;
+                       return 2;
                }
        }
 
@@ -3273,7 +3431,7 @@ err:
 
        lpfc_cmd->seg_cnt = 0;
        lpfc_cmd->prot_seg_cnt = 0;
-       return 1;
+       return ret;
 }
 
 /**
@@ -3839,7 +3997,7 @@ lpfc_scsi_cmd_iocb_cmpl(struct lpfc_hba *phba, struct lpfc_iocbq *pIocbIn,
                uint32_t *lp = (uint32_t *)cmd->sense_buffer;
 
                lpfc_printf_vlog(vport, KERN_INFO, LOG_FCP,
-                                "0710 Iodone <%d/%llu> cmd %p, error "
+                                "0710 Iodone <%d/%llu> cmd x%px, error "
                                 "x%x SNS x%x x%x Data: x%x x%x\n",
                                 cmd->device->id, cmd->device->lun, cmd,
                                 cmd->result, *lp, *(lp + 3), cmd->retries,
@@ -4454,8 +4612,12 @@ lpfc_queuecommand(struct Scsi_Host *shost, struct scsi_cmnd *cmnd)
                err = lpfc_scsi_prep_dma_buf(phba, lpfc_cmd);
        }
 
-       if (err)
+       if (err == 2) {
+               cmnd->result = DID_ERROR << 16;
+               goto out_fail_command_release_buf;
+       } else if (err) {
                goto out_host_busy_free_buf;
+       }
 
        lpfc_scsi_prep_cmnd(vport, lpfc_cmd, ndlp);
 
@@ -4526,6 +4688,9 @@ lpfc_queuecommand(struct Scsi_Host *shost, struct scsi_cmnd *cmnd)
  out_tgt_busy:
        return SCSI_MLQUEUE_TARGET_BUSY;
 
+ out_fail_command_release_buf:
+       lpfc_release_scsi_buf(phba, lpfc_cmd);
+
  out_fail_command:
        cmnd->scsi_done(cmnd);
        return 0;
@@ -4568,7 +4733,7 @@ lpfc_abort_handler(struct scsi_cmnd *cmnd)
 
        spin_lock_irqsave(&phba->hbalock, flags);
        /* driver queued commands are in process of being flushed */
-       if (phba->hba_flag & HBA_FCP_IOQ_FLUSH) {
+       if (phba->hba_flag & HBA_IOQ_FLUSH) {
                lpfc_printf_vlog(vport, KERN_WARNING, LOG_FCP,
                        "3168 SCSI Layer abort requested I/O has been "
                        "flushed by LLD.\n");
@@ -4589,7 +4754,7 @@ lpfc_abort_handler(struct scsi_cmnd *cmnd)
 
        iocb = &lpfc_cmd->cur_iocbq;
        if (phba->sli_rev == LPFC_SLI_REV4) {
-               pring_s4 = phba->sli4_hba.hdwq[iocb->hba_wqidx].fcp_wq->pring;
+               pring_s4 = phba->sli4_hba.hdwq[iocb->hba_wqidx].io_wq->pring;
                if (!pring_s4) {
                        ret = FAILED;
                        goto out_unlock_buf;
@@ -4956,7 +5121,7 @@ lpfc_chk_tgt_mapped(struct lpfc_vport *vport, struct scsi_cmnd *cmnd)
        rdata = lpfc_rport_data_from_scsi_device(cmnd->device);
        if (!rdata) {
                lpfc_printf_vlog(vport, KERN_INFO, LOG_FCP,
-                       "0797 Tgt Map rport failure: rdata x%p\n", rdata);
+                       "0797 Tgt Map rport failure: rdata x%px\n", rdata);
                return FAILED;
        }
        pnode = rdata->pnode;
@@ -5054,7 +5219,7 @@ lpfc_device_reset_handler(struct scsi_cmnd *cmnd)
        rdata = lpfc_rport_data_from_scsi_device(cmnd->device);
        if (!rdata || !rdata->pnode) {
                lpfc_printf_vlog(vport, KERN_ERR, LOG_FCP,
-                                "0798 Device Reset rdata failure: rdata x%p\n",
+                                "0798 Device Reset rdata failure: rdata x%px\n",
                                 rdata);
                return FAILED;
        }
@@ -5066,7 +5231,7 @@ lpfc_device_reset_handler(struct scsi_cmnd *cmnd)
        status = lpfc_chk_tgt_mapped(vport, cmnd);
        if (status == FAILED) {
                lpfc_printf_vlog(vport, KERN_ERR, LOG_FCP,
-                       "0721 Device Reset rport failure: rdata x%p\n", rdata);
+                       "0721 Device Reset rport failure: rdata x%px\n", rdata);
                return FAILED;
        }
 
@@ -5125,7 +5290,7 @@ lpfc_target_reset_handler(struct scsi_cmnd *cmnd)
        rdata = lpfc_rport_data_from_scsi_device(cmnd->device);
        if (!rdata || !rdata->pnode) {
                lpfc_printf_vlog(vport, KERN_ERR, LOG_FCP,
-                                "0799 Target Reset rdata failure: rdata x%p\n",
+                                "0799 Target Reset rdata failure: rdata x%px\n",
                                 rdata);
                return FAILED;
        }
@@ -5137,7 +5302,7 @@ lpfc_target_reset_handler(struct scsi_cmnd *cmnd)
        status = lpfc_chk_tgt_mapped(vport, cmnd);
        if (status == FAILED) {
                lpfc_printf_vlog(vport, KERN_ERR, LOG_FCP,
-                       "0722 Target Reset rport failure: rdata x%p\n", rdata);
+                       "0722 Target Reset rport failure: rdata x%px\n", rdata);
                if (pnode) {
                        spin_lock_irq(shost->host_lock);
                        pnode->nlp_flag &= ~NLP_NPR_ADISC;
@@ -5295,18 +5460,20 @@ lpfc_host_reset_handler(struct scsi_cmnd *cmnd)
        lpfc_offline(phba);
        rc = lpfc_sli_brdrestart(phba);
        if (rc)
-               ret = FAILED;
+               goto error;
+
        rc = lpfc_online(phba);
        if (rc)
-               ret = FAILED;
+               goto error;
+
        lpfc_unblock_mgmt_io(phba);
 
-       if (ret == FAILED) {
-               lpfc_printf_vlog(vport, KERN_ERR, LOG_FCP,
-                                "3323 Failed host reset, bring it offline\n");
-               lpfc_sli4_offline_eratt(phba);
-       }
        return ret;
+error:
+       lpfc_printf_vlog(vport, KERN_ERR, LOG_FCP,
+                        "3323 Failed host reset\n");
+       lpfc_unblock_mgmt_io(phba);
+       return FAILED;
 }
 
 /**
@@ -5870,7 +6037,7 @@ struct scsi_host_template lpfc_template_no_hr = {
        .sg_tablesize           = LPFC_DEFAULT_SG_SEG_CNT,
        .cmd_per_lun            = LPFC_CMD_PER_LUN,
        .shost_attrs            = lpfc_hba_attrs,
-       .max_sectors            = 0xFFFF,
+       .max_sectors            = 0xFFFFFFFF,
        .vendor_id              = LPFC_NL_VENDOR_ID,
        .change_queue_depth     = scsi_change_queue_depth,
        .track_queue_depth      = 1,
index f9e6a13..a0c6945 100644 (file)
@@ -1391,9 +1391,12 @@ lpfc_sli_cancel_iocbs(struct lpfc_hba *phba, struct list_head *iocblist,
 
        while (!list_empty(iocblist)) {
                list_remove_head(iocblist, piocb, struct lpfc_iocbq, list);
-               if (!piocb->iocb_cmpl)
-                       lpfc_sli_release_iocbq(phba, piocb);
-               else {
+               if (!piocb->iocb_cmpl) {
+                       if (piocb->iocb_flag & LPFC_IO_NVME)
+                               lpfc_nvme_cancel_iocb(phba, piocb);
+                       else
+                               lpfc_sli_release_iocbq(phba, piocb);
+               } else {
                        piocb->iocb.ulpStatus = ulpstatus;
                        piocb->iocb.un.ulpWord[4] = ulpWord4;
                        (piocb->iocb_cmpl) (phba, piocb, piocb);
@@ -2426,6 +2429,20 @@ lpfc_sli_wake_mbox_wait(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmboxq)
        return;
 }
 
+static void
+__lpfc_sli_rpi_release(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp)
+{
+       unsigned long iflags;
+
+       if (ndlp->nlp_flag & NLP_RELEASE_RPI) {
+               lpfc_sli4_free_rpi(vport->phba, ndlp->nlp_rpi);
+               spin_lock_irqsave(&vport->phba->ndlp_lock, iflags);
+               ndlp->nlp_flag &= ~NLP_RELEASE_RPI;
+               ndlp->nlp_rpi = LPFC_RPI_ALLOC_ERROR;
+               spin_unlock_irqrestore(&vport->phba->ndlp_lock, iflags);
+       }
+       ndlp->nlp_flag &= ~NLP_UNREG_INP;
+}
 
 /**
  * lpfc_sli_def_mbox_cmpl - Default mailbox completion handler
@@ -2497,7 +2514,7 @@ lpfc_sli_def_mbox_cmpl(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb)
                                vport,
                                KERN_INFO, LOG_MBOX | LOG_DISCOVERY,
                                "1438 UNREG cmpl deferred mbox x%x "
-                               "on NPort x%x Data: x%x x%x %p\n",
+                               "on NPort x%x Data: x%x x%x %px\n",
                                ndlp->nlp_rpi, ndlp->nlp_DID,
                                ndlp->nlp_flag, ndlp->nlp_defer_did, ndlp);
 
@@ -2507,7 +2524,7 @@ lpfc_sli_def_mbox_cmpl(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb)
                                ndlp->nlp_defer_did = NLP_EVT_NOTHING_PENDING;
                                lpfc_issue_els_plogi(vport, ndlp->nlp_DID, 0);
                        } else {
-                               ndlp->nlp_flag &= ~NLP_UNREG_INP;
+                               __lpfc_sli_rpi_release(vport, ndlp);
                        }
                        pmb->ctx_ndlp = NULL;
                }
@@ -2555,7 +2572,7 @@ lpfc_sli4_unreg_rpi_cmpl_clr(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb)
                                        vport, KERN_INFO, LOG_MBOX | LOG_SLI,
                                         "0010 UNREG_LOGIN vpi:%x "
                                         "rpi:%x DID:%x defer x%x flg x%x "
-                                        "map:%x %p\n",
+                                        "map:%x %px\n",
                                         vport->vpi, ndlp->nlp_rpi,
                                         ndlp->nlp_DID, ndlp->nlp_defer_did,
                                         ndlp->nlp_flag,
@@ -2573,7 +2590,7 @@ lpfc_sli4_unreg_rpi_cmpl_clr(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb)
                                                vport, KERN_INFO, LOG_DISCOVERY,
                                                "4111 UNREG cmpl deferred "
                                                "clr x%x on "
-                                               "NPort x%x Data: x%x %p\n",
+                                               "NPort x%x Data: x%x x%px\n",
                                                ndlp->nlp_rpi, ndlp->nlp_DID,
                                                ndlp->nlp_defer_did, ndlp);
                                        ndlp->nlp_flag &= ~NLP_UNREG_INP;
@@ -2582,7 +2599,7 @@ lpfc_sli4_unreg_rpi_cmpl_clr(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb)
                                        lpfc_issue_els_plogi(
                                                vport, ndlp->nlp_DID, 0);
                                } else {
-                                       ndlp->nlp_flag &= ~NLP_UNREG_INP;
+                                       __lpfc_sli_rpi_release(vport, ndlp);
                                }
                        }
                }
@@ -2695,7 +2712,7 @@ lpfc_sli_handle_mb_event(struct lpfc_hba *phba)
 
                /* Mailbox cmd <cmd> Cmpl <cmpl> */
                lpfc_printf_log(phba, KERN_INFO, LOG_MBOX | LOG_SLI,
-                               "(%d):0307 Mailbox cmd x%x (x%x/x%x) Cmpl x%p "
+                               "(%d):0307 Mailbox cmd x%x (x%x/x%x) Cmpl %ps "
                                "Data: x%x x%x x%x x%x x%x x%x x%x x%x x%x "
                                "x%x x%x x%x\n",
                                pmb->vport ? pmb->vport->vpi : 0,
@@ -3961,7 +3978,7 @@ lpfc_sli_abort_fcp_rings(struct lpfc_hba *phba)
        /* Look on all the FCP Rings for the iotag */
        if (phba->sli_rev >= LPFC_SLI_REV4) {
                for (i = 0; i < phba->cfg_hdw_queue; i++) {
-                       pring = phba->sli4_hba.hdwq[i].fcp_wq->pring;
+                       pring = phba->sli4_hba.hdwq[i].io_wq->pring;
                        lpfc_sli_abort_iocb_ring(phba, pring);
                }
        } else {
@@ -3971,17 +3988,17 @@ lpfc_sli_abort_fcp_rings(struct lpfc_hba *phba)
 }
 
 /**
- * lpfc_sli_flush_fcp_rings - flush all iocbs in the fcp ring
+ * lpfc_sli_flush_io_rings - flush all iocbs in the IO ring
  * @phba: Pointer to HBA context object.
  *
- * This function flushes all iocbs in the fcp ring and frees all the iocb
+ * This function flushes all iocbs in the IO ring and frees all the iocb
  * objects in txq and txcmplq. This function will not issue abort iocbs
  * for all the iocb commands in txcmplq, they will just be returned with
  * IOERR_SLI_DOWN. This function is invoked with EEH when device's PCI
  * slot has been permanently disabled.
  **/
 void
-lpfc_sli_flush_fcp_rings(struct lpfc_hba *phba)
+lpfc_sli_flush_io_rings(struct lpfc_hba *phba)
 {
        LIST_HEAD(txq);
        LIST_HEAD(txcmplq);
@@ -3992,13 +4009,13 @@ lpfc_sli_flush_fcp_rings(struct lpfc_hba *phba)
 
        spin_lock_irq(&phba->hbalock);
        /* Indicate the I/O queues are flushed */
-       phba->hba_flag |= HBA_FCP_IOQ_FLUSH;
+       phba->hba_flag |= HBA_IOQ_FLUSH;
        spin_unlock_irq(&phba->hbalock);
 
        /* Look on all the FCP Rings for the iotag */
        if (phba->sli_rev >= LPFC_SLI_REV4) {
                for (i = 0; i < phba->cfg_hdw_queue; i++) {
-                       pring = phba->sli4_hba.hdwq[i].fcp_wq->pring;
+                       pring = phba->sli4_hba.hdwq[i].io_wq->pring;
 
                        spin_lock_irq(&pring->ring_lock);
                        /* Retrieve everything on txq */
@@ -4046,56 +4063,6 @@ lpfc_sli_flush_fcp_rings(struct lpfc_hba *phba)
 }
 
 /**
- * lpfc_sli_flush_nvme_rings - flush all wqes in the nvme rings
- * @phba: Pointer to HBA context object.
- *
- * This function flushes all wqes in the nvme rings and frees all resources
- * in the txcmplq. This function does not issue abort wqes for the IO
- * commands in txcmplq, they will just be returned with
- * IOERR_SLI_DOWN. This function is invoked with EEH when device's PCI
- * slot has been permanently disabled.
- **/
-void
-lpfc_sli_flush_nvme_rings(struct lpfc_hba *phba)
-{
-       LIST_HEAD(txcmplq);
-       struct lpfc_sli_ring  *pring;
-       uint32_t i;
-       struct lpfc_iocbq *piocb, *next_iocb;
-
-       if ((phba->sli_rev < LPFC_SLI_REV4) ||
-           !(phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME))
-               return;
-
-       /* Hint to other driver operations that a flush is in progress. */
-       spin_lock_irq(&phba->hbalock);
-       phba->hba_flag |= HBA_NVME_IOQ_FLUSH;
-       spin_unlock_irq(&phba->hbalock);
-
-       /* Cycle through all NVME rings and complete each IO with
-        * a local driver reason code.  This is a flush so no
-        * abort exchange to FW.
-        */
-       for (i = 0; i < phba->cfg_hdw_queue; i++) {
-               pring = phba->sli4_hba.hdwq[i].nvme_wq->pring;
-
-               spin_lock_irq(&pring->ring_lock);
-               list_for_each_entry_safe(piocb, next_iocb,
-                                        &pring->txcmplq, list)
-                       piocb->iocb_flag &= ~LPFC_IO_ON_TXCMPLQ;
-               /* Retrieve everything on the txcmplq */
-               list_splice_init(&pring->txcmplq, &txcmplq);
-               pring->txcmplq_cnt = 0;
-               spin_unlock_irq(&pring->ring_lock);
-
-               /* Flush the txcmpq &&&PAE */
-               lpfc_sli_cancel_iocbs(phba, &txcmplq,
-                                     IOSTAT_LOCAL_REJECT,
-                                     IOERR_SLI_DOWN);
-       }
-}
-
-/**
  * lpfc_sli_brdready_s3 - Check for sli3 host ready status
  * @phba: Pointer to HBA context object.
  * @mask: Bit mask to be checked.
@@ -4495,7 +4462,7 @@ lpfc_sli_brdreset(struct lpfc_hba *phba)
  * checking during resets the device. The caller is not required to hold
  * any locks.
  *
- * This function returns 0 always.
+ * This function returns 0 on success else returns negative error code.
  **/
 int
 lpfc_sli4_brdreset(struct lpfc_hba *phba)
@@ -4652,8 +4619,10 @@ lpfc_sli_brdrestart_s4(struct lpfc_hba *phba)
        hba_aer_enabled = phba->hba_flag & HBA_AER_ENABLED;
 
        rc = lpfc_sli4_brdreset(phba);
-       if (rc)
-               return rc;
+       if (rc) {
+               phba->link_state = LPFC_HBA_ERROR;
+               goto hba_down_queue;
+       }
 
        spin_lock_irq(&phba->hbalock);
        phba->pport->stopped = 0;
@@ -4668,6 +4637,7 @@ lpfc_sli_brdrestart_s4(struct lpfc_hba *phba)
        if (hba_aer_enabled)
                pci_disable_pcie_error_reporting(phba->pcidev);
 
+hba_down_queue:
        lpfc_hba_down_post(phba);
        lpfc_sli4_queue_destroy(phba);
 
@@ -5584,10 +5554,8 @@ lpfc_sli4_arm_cqeq_intr(struct lpfc_hba *phba)
                for (qidx = 0; qidx < phba->cfg_hdw_queue; qidx++) {
                        qp = &sli4_hba->hdwq[qidx];
                        /* ARM the corresponding CQ */
-                       sli4_hba->sli4_write_cq_db(phba, qp->fcp_cq, 0,
-                                                  LPFC_QUEUE_REARM);
-                       sli4_hba->sli4_write_cq_db(phba, qp->nvme_cq, 0,
-                                                  LPFC_QUEUE_REARM);
+                       sli4_hba->sli4_write_cq_db(phba, qp->io_cq, 0,
+                                               LPFC_QUEUE_REARM);
                }
 
                /* Loop thru all IRQ vectors */
@@ -7243,7 +7211,7 @@ lpfc_sli4_hba_setup(struct lpfc_hba *phba)
        else
                phba->hba_flag &= ~HBA_FIP_SUPPORT;
 
-       phba->hba_flag &= ~HBA_FCP_IOQ_FLUSH;
+       phba->hba_flag &= ~HBA_IOQ_FLUSH;
 
        if (phba->sli_rev != LPFC_SLI_REV4) {
                lpfc_printf_log(phba, KERN_ERR, LOG_MBOX | LOG_SLI,
@@ -7972,7 +7940,7 @@ lpfc_mbox_timeout_handler(struct lpfc_hba *phba)
 
        /* Mbox cmd <mbxCommand> timeout */
        lpfc_printf_log(phba, KERN_ERR, LOG_MBOX | LOG_SLI,
-                       "0310 Mailbox command x%x timeout Data: x%x x%x x%p\n",
+                       "0310 Mailbox command x%x timeout Data: x%x x%x x%px\n",
                        mb->mbxCommand,
                        phba->pport->port_state,
                        phba->sli.sli_flag,
@@ -9333,11 +9301,9 @@ lpfc_sli4_iocb2wqe(struct lpfc_hba *phba, struct lpfc_iocbq *iocbq,
                memset(wqe, 0, sizeof(union lpfc_wqe128));
        /* Some of the fields are in the right position already */
        memcpy(wqe, &iocbq->iocb, sizeof(union lpfc_wqe));
-       if (iocbq->iocb.ulpCommand != CMD_SEND_FRAME) {
-               /* The ct field has moved so reset */
-               wqe->generic.wqe_com.word7 = 0;
-               wqe->generic.wqe_com.word10 = 0;
-       }
+       /* The ct field has moved so reset */
+       wqe->generic.wqe_com.word7 = 0;
+       wqe->generic.wqe_com.word10 = 0;
 
        abort_tag = (uint32_t) iocbq->iotag;
        xritag = iocbq->sli4_xritag;
@@ -9796,7 +9762,7 @@ lpfc_sli4_iocb2wqe(struct lpfc_hba *phba, struct lpfc_iocbq *iocbq,
                 * we re-construct this WQE here based on information in
                 * iocbq from scratch.
                 */
-               memset(wqe, 0, sizeof(union lpfc_wqe));
+               memset(wqe, 0, sizeof(*wqe));
                /* OX_ID is invariable to who sent ABTS to CT exchange */
                bf_set(xmit_bls_rsp64_oxid, &wqe->xmit_bls_rsp,
                       bf_get(lpfc_abts_oxid, &iocbq->iocb.un.bls_rsp));
@@ -9843,6 +9809,15 @@ lpfc_sli4_iocb2wqe(struct lpfc_hba *phba, struct lpfc_iocbq *iocbq,
 
                break;
        case CMD_SEND_FRAME:
+               bf_set(wqe_cmnd, &wqe->generic.wqe_com, CMD_SEND_FRAME);
+               bf_set(wqe_sof, &wqe->generic.wqe_com, 0x2E); /* SOF byte */
+               bf_set(wqe_eof, &wqe->generic.wqe_com, 0x41); /* EOF byte */
+               bf_set(wqe_lenloc, &wqe->generic.wqe_com, 1);
+               bf_set(wqe_xbl, &wqe->generic.wqe_com, 1);
+               bf_set(wqe_dbde, &wqe->generic.wqe_com, 1);
+               bf_set(wqe_xc, &wqe->generic.wqe_com, 1);
+               bf_set(wqe_cmd_type, &wqe->generic.wqe_com, 0xA);
+               bf_set(wqe_cqid, &wqe->generic.wqe_com, LPFC_WQE_CQ_ID_DEFAULT);
                bf_set(wqe_xri_tag, &wqe->generic.wqe_com, xritag);
                bf_set(wqe_reqtag, &wqe->generic.wqe_com, iocbq->iotag);
                return 0;
@@ -9904,7 +9879,7 @@ __lpfc_sli_issue_iocb_s4(struct lpfc_hba *phba, uint32_t ring_number,
        /* Get the WQ */
        if ((piocb->iocb_flag & LPFC_IO_FCP) ||
            (piocb->iocb_flag & LPFC_USE_FCPWQIDX)) {
-               wq = phba->sli4_hba.hdwq[piocb->hba_wqidx].fcp_wq;
+               wq = phba->sli4_hba.hdwq[piocb->hba_wqidx].io_wq;
        } else {
                wq = phba->sli4_hba.els_wq;
        }
@@ -10051,7 +10026,7 @@ lpfc_sli4_calc_ring(struct lpfc_hba *phba, struct lpfc_iocbq *piocb)
                        lpfc_cmd = (struct lpfc_io_buf *)piocb->context1;
                        piocb->hba_wqidx = lpfc_cmd->hdwq_no;
                }
-               return phba->sli4_hba.hdwq[piocb->hba_wqidx].fcp_wq->pring;
+               return phba->sli4_hba.hdwq[piocb->hba_wqidx].io_wq->pring;
        } else {
                if (unlikely(!phba->sli4_hba.els_wq))
                        return NULL;
@@ -10504,7 +10479,7 @@ lpfc_sli4_queue_init(struct lpfc_hba *phba)
        INIT_LIST_HEAD(&psli->mboxq_cmpl);
        /* Initialize list headers for txq and txcmplq as double linked lists */
        for (i = 0; i < phba->cfg_hdw_queue; i++) {
-               pring = phba->sli4_hba.hdwq[i].fcp_wq->pring;
+               pring = phba->sli4_hba.hdwq[i].io_wq->pring;
                pring->flag = 0;
                pring->ringno = LPFC_FCP_RING;
                pring->txcmplq_cnt = 0;
@@ -10523,16 +10498,6 @@ lpfc_sli4_queue_init(struct lpfc_hba *phba)
        spin_lock_init(&pring->ring_lock);
 
        if (phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME) {
-               for (i = 0; i < phba->cfg_hdw_queue; i++) {
-                       pring = phba->sli4_hba.hdwq[i].nvme_wq->pring;
-                       pring->flag = 0;
-                       pring->ringno = LPFC_FCP_RING;
-                       pring->txcmplq_cnt = 0;
-                       INIT_LIST_HEAD(&pring->txq);
-                       INIT_LIST_HEAD(&pring->txcmplq);
-                       INIT_LIST_HEAD(&pring->iocb_continueq);
-                       spin_lock_init(&pring->ring_lock);
-               }
                pring = phba->sli4_hba.nvmels_wq->pring;
                pring->flag = 0;
                pring->ringno = LPFC_ELS_RING;
@@ -10796,9 +10761,9 @@ lpfc_sli_hba_down(struct lpfc_hba *phba)
                        pring = qp->pring;
                        if (!pring)
                                continue;
-                       spin_lock_irq(&pring->ring_lock);
+                       spin_lock(&pring->ring_lock);
                        list_splice_init(&pring->txq, &completions);
-                       spin_unlock_irq(&pring->ring_lock);
+                       spin_unlock(&pring->ring_lock);
                        if (pring == phba->sli4_hba.els_wq->pring) {
                                pring->flag |= LPFC_DEFERRED_RING_EVENT;
                                /* Set the lpfc data pending flag */
@@ -10979,7 +10944,7 @@ lpfc_sli_ring_taggedbuf_get(struct lpfc_hba *phba, struct lpfc_sli_ring *pring,
        spin_unlock_irq(&phba->hbalock);
        lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
                        "0402 Cannot find virtual addr for buffer tag on "
-                       "ring %d Data x%lx x%p x%p x%x\n",
+                       "ring %d Data x%lx x%px x%px x%x\n",
                        pring->ringno, (unsigned long) tag,
                        slp->next, slp->prev, pring->postbufq_cnt);
 
@@ -11023,7 +10988,7 @@ lpfc_sli_ringpostbuf_get(struct lpfc_hba *phba, struct lpfc_sli_ring *pring,
        spin_unlock_irq(&phba->hbalock);
        lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
                        "0410 Cannot find virtual addr for mapped buf on "
-                       "ring %d Data x%llx x%p x%p x%x\n",
+                       "ring %d Data x%llx x%px x%px x%x\n",
                        pring->ringno, (unsigned long long)phys,
                        slp->next, slp->prev, pring->postbufq_cnt);
        return NULL;
@@ -11078,13 +11043,16 @@ lpfc_sli_abort_els_cmpl(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
                        abort_iocb = phba->sli.iocbq_lookup[abort_context];
 
                lpfc_printf_log(phba, KERN_WARNING, LOG_ELS | LOG_SLI,
-                               "0327 Cannot abort els iocb %p "
+                               "0327 Cannot abort els iocb x%px "
                                "with tag %x context %x, abort status %x, "
                                "abort code %x\n",
                                abort_iocb, abort_iotag, abort_context,
                                irsp->ulpStatus, irsp->un.ulpWord[4]);
 
                spin_unlock_irq(&phba->hbalock);
+               if (irsp->ulpStatus == IOSTAT_LOCAL_REJECT &&
+                   irsp->un.ulpWord[4] == IOERR_SLI_ABORTED)
+                       lpfc_sli_release_iocbq(phba, abort_iocb);
        }
 release_iocb:
        lpfc_sli_release_iocbq(phba, cmdiocb);
@@ -11493,7 +11461,7 @@ lpfc_sli_abort_iocb(struct lpfc_vport *vport, struct lpfc_sli_ring *pring,
        int i;
 
        /* all I/Os are in process of being flushed */
-       if (phba->hba_flag & HBA_FCP_IOQ_FLUSH)
+       if (phba->hba_flag & HBA_IOQ_FLUSH)
                return errcnt;
 
        for (i = 1; i <= phba->sli.last_iotag; i++) {
@@ -11603,7 +11571,7 @@ lpfc_sli_abort_taskmgmt(struct lpfc_vport *vport, struct lpfc_sli_ring *pring,
        spin_lock_irqsave(&phba->hbalock, iflags);
 
        /* all I/Os are in process of being flushed */
-       if (phba->hba_flag & HBA_FCP_IOQ_FLUSH) {
+       if (phba->hba_flag & HBA_IOQ_FLUSH) {
                spin_unlock_irqrestore(&phba->hbalock, iflags);
                return 0;
        }
@@ -11627,7 +11595,7 @@ lpfc_sli_abort_taskmgmt(struct lpfc_vport *vport, struct lpfc_sli_ring *pring,
 
                if (phba->sli_rev == LPFC_SLI_REV4) {
                        pring_s4 =
-                           phba->sli4_hba.hdwq[iocbq->hba_wqidx].fcp_wq->pring;
+                           phba->sli4_hba.hdwq[iocbq->hba_wqidx].io_wq->pring;
                        if (!pring_s4) {
                                spin_unlock(&lpfc_cmd->buf_lock);
                                continue;
@@ -13336,8 +13304,13 @@ lpfc_sli4_sp_handle_abort_xri_wcqe(struct lpfc_hba *phba,
        unsigned long iflags;
 
        switch (cq->subtype) {
-       case LPFC_FCP:
-               lpfc_sli4_fcp_xri_aborted(phba, wcqe, cq->hdwq);
+       case LPFC_IO:
+               lpfc_sli4_io_xri_aborted(phba, wcqe, cq->hdwq);
+               if (phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME) {
+                       /* Notify aborted XRI for NVME work queue */
+                       if (phba->nvmet_support)
+                               lpfc_sli4_nvmet_xri_aborted(phba, wcqe);
+               }
                workposted = false;
                break;
        case LPFC_NVME_LS: /* NVME LS uses ELS resources */
@@ -13355,15 +13328,6 @@ lpfc_sli4_sp_handle_abort_xri_wcqe(struct lpfc_hba *phba,
                spin_unlock_irqrestore(&phba->hbalock, iflags);
                workposted = true;
                break;
-       case LPFC_NVME:
-               /* Notify aborted XRI for NVME work queue */
-               if (phba->nvmet_support)
-                       lpfc_sli4_nvmet_xri_aborted(phba, wcqe);
-               else
-                       lpfc_sli4_nvme_xri_aborted(phba, wcqe, cq->hdwq);
-
-               workposted = false;
-               break;
        default:
                lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
                                "0603 Invalid CQ subtype %d: "
@@ -13691,7 +13655,7 @@ __lpfc_sli4_sp_process_cq(struct lpfc_queue *cq)
                                                &delay);
                break;
        case LPFC_WCQ:
-               if (cq->subtype == LPFC_FCP || cq->subtype == LPFC_NVME)
+               if (cq->subtype == LPFC_IO)
                        workposted |= __lpfc_sli4_process_cq(phba, cq,
                                                lpfc_sli4_fp_handle_cqe,
                                                &delay);
@@ -14008,10 +13972,7 @@ lpfc_sli4_fp_handle_cqe(struct lpfc_hba *phba, struct lpfc_queue *cq,
                cq->CQ_wq++;
                /* Process the WQ complete event */
                phba->last_completion_time = jiffies;
-               if ((cq->subtype == LPFC_FCP) || (cq->subtype == LPFC_NVME))
-                       lpfc_sli4_fp_handle_fcp_wcqe(phba, cq,
-                               (struct lpfc_wcqe_complete *)&wcqe);
-               if (cq->subtype == LPFC_NVME_LS)
+               if (cq->subtype == LPFC_IO || cq->subtype == LPFC_NVME_LS)
                        lpfc_sli4_fp_handle_fcp_wcqe(phba, cq,
                                (struct lpfc_wcqe_complete *)&wcqe);
                break;
@@ -16918,6 +16879,8 @@ lpfc_fc_frame_check(struct lpfc_hba *phba, struct fc_frame_header *fc_hdr)
        struct fc_vft_header *fc_vft_hdr;
        uint32_t *header = (uint32_t *) fc_hdr;
 
+#define FC_RCTL_MDS_DIAGS      0xF4
+
        switch (fc_hdr->fh_r_ctl) {
        case FC_RCTL_DD_UNCAT:          /* uncategorized information */
        case FC_RCTL_DD_SOL_DATA:       /* solicited data */
@@ -17445,7 +17408,6 @@ lpfc_sli4_seq_abort_rsp(struct lpfc_vport *vport,
        icmd->ulpContext = phba->sli4_hba.rpi_ids[ndlp->nlp_rpi];
        ctiocb->context1 = lpfc_nlp_get(ndlp);
 
-       ctiocb->iocb_cmpl = NULL;
        ctiocb->vport = phba->pport;
        ctiocb->iocb_cmpl = lpfc_sli4_seq_abort_rsp_cmpl;
        ctiocb->sli4_lxritag = NO_XRI;
@@ -17928,6 +17890,17 @@ lpfc_sli4_handle_received_buffer(struct lpfc_hba *phba,
                fcfi = bf_get(lpfc_rcqe_fcf_id,
                              &dmabuf->cq_event.cqe.rcqe_cmpl);
 
+       if (fc_hdr->fh_r_ctl == 0xF4 && fc_hdr->fh_type == 0xFF) {
+               vport = phba->pport;
+               lpfc_printf_log(phba, KERN_INFO, LOG_SLI,
+                               "2023 MDS Loopback %d bytes\n",
+                               bf_get(lpfc_rcqe_length,
+                                      &dmabuf->cq_event.cqe.rcqe_cmpl));
+               /* Handle MDS Loopback frames */
+               lpfc_sli4_handle_mds_loopback(vport, dmabuf);
+               return;
+       }
+
        /* d_id this frame is directed to */
        did = sli4_did_from_fc_hdr(fc_hdr);
 
@@ -18211,6 +18184,10 @@ __lpfc_sli4_free_rpi(struct lpfc_hba *phba, int rpi)
        if (test_and_clear_bit(rpi, phba->sli4_hba.rpi_bmask)) {
                phba->sli4_hba.rpi_count--;
                phba->sli4_hba.max_cfg_param.rpi_used--;
+       } else {
+               lpfc_printf_log(phba, KERN_INFO, LOG_SLI,
+                               "2016 rpi %x not inuse\n",
+                               rpi);
        }
 }
 
@@ -19461,7 +19438,7 @@ lpfc_drain_txq(struct lpfc_hba *phba)
 
        if (phba->link_flag & LS_MDS_LOOPBACK) {
                /* MDS WQE are posted only to first WQ*/
-               wq = phba->sli4_hba.hdwq[0].fcp_wq;
+               wq = phba->sli4_hba.hdwq[0].io_wq;
                if (unlikely(!wq))
                        return 0;
                pring = wq->pring;
@@ -19712,10 +19689,10 @@ lpfc_sli4_issue_wqe(struct lpfc_hba *phba, struct lpfc_sli4_hdw_queue *qp,
        /* NVME_FCREQ and NVME_ABTS requests */
        if (pwqe->iocb_flag & LPFC_IO_NVME) {
                /* Get the IO distribution (hba_wqidx) for WQ assignment. */
-               wq = qp->nvme_wq;
+               wq = qp->io_wq;
                pring = wq->pring;
 
-               bf_set(wqe_cqid, &wqe->generic.wqe_com, qp->nvme_cq_map);
+               bf_set(wqe_cqid, &wqe->generic.wqe_com, qp->io_cq_map);
 
                lpfc_qp_spin_lock_irqsave(&pring->ring_lock, iflags,
                                          qp, wq_access);
@@ -19732,7 +19709,7 @@ lpfc_sli4_issue_wqe(struct lpfc_hba *phba, struct lpfc_sli4_hdw_queue *qp,
        /* NVMET requests */
        if (pwqe->iocb_flag & LPFC_IO_NVMET) {
                /* Get the IO distribution (hba_wqidx) for WQ assignment. */
-               wq = qp->nvme_wq;
+               wq = qp->io_wq;
                pring = wq->pring;
 
                ctxp = pwqe->context2;
@@ -19743,7 +19720,7 @@ lpfc_sli4_issue_wqe(struct lpfc_hba *phba, struct lpfc_sli4_hdw_queue *qp,
                }
                bf_set(wqe_xri_tag, &pwqe->wqe.xmit_bls_rsp.wqe_com,
                       pwqe->sli4_xritag);
-               bf_set(wqe_cqid, &wqe->generic.wqe_com, qp->nvme_cq_map);
+               bf_set(wqe_cqid, &wqe->generic.wqe_com, qp->io_cq_map);
 
                lpfc_qp_spin_lock_irqsave(&pring->ring_lock, iflags,
                                          qp, wq_access);
@@ -19790,9 +19767,7 @@ void lpfc_snapshot_mxp(struct lpfc_hba *phba, u32 hwqid)
        if (multixri_pool->stat_snapshot_taken == LPFC_MXP_SNAPSHOT_TAKEN) {
                pvt_pool = &qp->p_multixri_pool->pvt_pool;
                pbl_pool = &qp->p_multixri_pool->pbl_pool;
-               txcmplq_cnt = qp->fcp_wq->pring->txcmplq_cnt;
-               if (qp->nvme_wq)
-                       txcmplq_cnt += qp->nvme_wq->pring->txcmplq_cnt;
+               txcmplq_cnt = qp->io_wq->pring->txcmplq_cnt;
 
                multixri_pool->stat_pbl_count = pbl_pool->count;
                multixri_pool->stat_pvt_count = pvt_pool->count;
@@ -19862,12 +19837,9 @@ void lpfc_adjust_high_watermark(struct lpfc_hba *phba, u32 hwqid)
        watermark_max = xri_limit;
        watermark_min = xri_limit / 2;
 
-       txcmplq_cnt = qp->fcp_wq->pring->txcmplq_cnt;
+       txcmplq_cnt = qp->io_wq->pring->txcmplq_cnt;
        abts_io_bufs = qp->abts_scsi_io_bufs;
-       if (qp->nvme_wq) {
-               txcmplq_cnt += qp->nvme_wq->pring->txcmplq_cnt;
-               abts_io_bufs += qp->abts_nvme_io_bufs;
-       }
+       abts_io_bufs += qp->abts_nvme_io_bufs;
 
        new_watermark = txcmplq_cnt + abts_io_bufs;
        new_watermark = min(watermark_max, new_watermark);
@@ -20142,12 +20114,9 @@ void lpfc_release_io_buf(struct lpfc_hba *phba, struct lpfc_io_buf *lpfc_ncmd,
                pbl_pool = &qp->p_multixri_pool->pbl_pool;
                pvt_pool = &qp->p_multixri_pool->pvt_pool;
 
-               txcmplq_cnt = qp->fcp_wq->pring->txcmplq_cnt;
+               txcmplq_cnt = qp->io_wq->pring->txcmplq_cnt;
                abts_io_bufs = qp->abts_scsi_io_bufs;
-               if (qp->nvme_wq) {
-                       txcmplq_cnt += qp->nvme_wq->pring->txcmplq_cnt;
-                       abts_io_bufs += qp->abts_nvme_io_bufs;
-               }
+               abts_io_bufs += qp->abts_nvme_io_bufs;
 
                xri_owned = pvt_pool->count + txcmplq_cnt + abts_io_bufs;
                xri_limit = qp->p_multixri_pool->xri_limit;
@@ -20188,6 +20157,13 @@ void lpfc_release_io_buf(struct lpfc_hba *phba, struct lpfc_io_buf *lpfc_ncmd,
                spin_unlock_irqrestore(&qp->io_buf_list_put_lock,
                                       iflag);
        }
+
+       if (phba->cfg_xpsgl && !phba->nvmet_support &&
+           !list_empty(&lpfc_ncmd->dma_sgl_xtra_list))
+               lpfc_put_sgl_per_hdwq(phba, lpfc_ncmd);
+
+       if (!list_empty(&lpfc_ncmd->dma_cmd_rsp_list))
+               lpfc_put_cmd_rsp_buf_per_hdwq(phba, lpfc_ncmd);
 }
 
 /**
@@ -20402,3 +20378,288 @@ struct lpfc_io_buf *lpfc_get_io_buf(struct lpfc_hba *phba,
 
        return lpfc_cmd;
 }
+
+/**
+ * lpfc_get_sgl_per_hdwq - Get one SGL chunk from hdwq's pool
+ * @phba: The HBA for which this call is being executed.
+ * @lpfc_buf: IO buf structure to append the SGL chunk
+ *
+ * This routine gets one SGL chunk buffer from hdwq's SGL chunk pool,
+ * and will allocate an SGL chunk if the pool is empty.
+ *
+ * Return codes:
+ *   NULL - Error
+ *   Pointer to sli4_hybrid_sgl - Success
+ **/
+struct sli4_hybrid_sgl *
+lpfc_get_sgl_per_hdwq(struct lpfc_hba *phba, struct lpfc_io_buf *lpfc_buf)
+{
+       struct sli4_hybrid_sgl *list_entry = NULL;
+       struct sli4_hybrid_sgl *tmp = NULL;
+       struct sli4_hybrid_sgl *allocated_sgl = NULL;
+       struct lpfc_sli4_hdw_queue *hdwq = lpfc_buf->hdwq;
+       struct list_head *buf_list = &hdwq->sgl_list;
+
+       spin_lock_irq(&hdwq->hdwq_lock);
+
+       if (likely(!list_empty(buf_list))) {
+               /* break off 1 chunk from the sgl_list */
+               list_for_each_entry_safe(list_entry, tmp,
+                                        buf_list, list_node) {
+                       list_move_tail(&list_entry->list_node,
+                                      &lpfc_buf->dma_sgl_xtra_list);
+                       break;
+               }
+       } else {
+               /* allocate more */
+               spin_unlock_irq(&hdwq->hdwq_lock);
+               tmp = kmalloc_node(sizeof(*tmp), GFP_ATOMIC,
+                                  cpu_to_node(smp_processor_id()));
+               if (!tmp) {
+                       lpfc_printf_log(phba, KERN_INFO, LOG_SLI,
+                                       "8353 error kmalloc memory for HDWQ "
+                                       "%d %s\n",
+                                       lpfc_buf->hdwq_no, __func__);
+                       return NULL;
+               }
+
+               tmp->dma_sgl = dma_pool_alloc(phba->lpfc_sg_dma_buf_pool,
+                                             GFP_ATOMIC, &tmp->dma_phys_sgl);
+               if (!tmp->dma_sgl) {
+                       lpfc_printf_log(phba, KERN_INFO, LOG_SLI,
+                                       "8354 error pool_alloc memory for HDWQ "
+                                       "%d %s\n",
+                                       lpfc_buf->hdwq_no, __func__);
+                       kfree(tmp);
+                       return NULL;
+               }
+
+               spin_lock_irq(&hdwq->hdwq_lock);
+               list_add_tail(&tmp->list_node, &lpfc_buf->dma_sgl_xtra_list);
+       }
+
+       allocated_sgl = list_last_entry(&lpfc_buf->dma_sgl_xtra_list,
+                                       struct sli4_hybrid_sgl,
+                                       list_node);
+
+       spin_unlock_irq(&hdwq->hdwq_lock);
+
+       return allocated_sgl;
+}
+
+/**
+ * lpfc_put_sgl_per_hdwq - Put one SGL chunk into hdwq pool
+ * @phba: The HBA for which this call is being executed.
+ * @lpfc_buf: IO buf structure with the SGL chunk
+ *
+ * This routine puts one SGL chunk buffer into hdwq's SGL chunk pool.
+ *
+ * Return codes:
+ *   0 - Success
+ *   -EINVAL - Error
+ **/
+int
+lpfc_put_sgl_per_hdwq(struct lpfc_hba *phba, struct lpfc_io_buf *lpfc_buf)
+{
+       int rc = 0;
+       struct sli4_hybrid_sgl *list_entry = NULL;
+       struct sli4_hybrid_sgl *tmp = NULL;
+       struct lpfc_sli4_hdw_queue *hdwq = lpfc_buf->hdwq;
+       struct list_head *buf_list = &hdwq->sgl_list;
+
+       spin_lock_irq(&hdwq->hdwq_lock);
+
+       if (likely(!list_empty(&lpfc_buf->dma_sgl_xtra_list))) {
+               list_for_each_entry_safe(list_entry, tmp,
+                                        &lpfc_buf->dma_sgl_xtra_list,
+                                        list_node) {
+                       list_move_tail(&list_entry->list_node,
+                                      buf_list);
+               }
+       } else {
+               rc = -EINVAL;
+       }
+
+       spin_unlock_irq(&hdwq->hdwq_lock);
+       return rc;
+}
+
+/**
+ * lpfc_free_sgl_per_hdwq - Free all SGL chunks of hdwq pool
+ * @phba: phba object
+ * @hdwq: hdwq to cleanup sgl buff resources on
+ *
+ * This routine frees all SGL chunks of hdwq SGL chunk pool.
+ *
+ * Return codes:
+ *   None
+ **/
+void
+lpfc_free_sgl_per_hdwq(struct lpfc_hba *phba,
+                      struct lpfc_sli4_hdw_queue *hdwq)
+{
+       struct list_head *buf_list = &hdwq->sgl_list;
+       struct sli4_hybrid_sgl *list_entry = NULL;
+       struct sli4_hybrid_sgl *tmp = NULL;
+
+       spin_lock_irq(&hdwq->hdwq_lock);
+
+       /* Free sgl pool */
+       list_for_each_entry_safe(list_entry, tmp,
+                                buf_list, list_node) {
+               dma_pool_free(phba->lpfc_sg_dma_buf_pool,
+                             list_entry->dma_sgl,
+                             list_entry->dma_phys_sgl);
+               list_del(&list_entry->list_node);
+               kfree(list_entry);
+       }
+
+       spin_unlock_irq(&hdwq->hdwq_lock);
+}
+
+/**
+ * lpfc_get_cmd_rsp_buf_per_hdwq - Get one CMD/RSP buffer from hdwq
+ * @phba: The HBA for which this call is being executed.
+ * @lpfc_buf: IO buf structure to attach the CMD/RSP buffer
+ *
+ * This routine gets one CMD/RSP buffer from hdwq's CMD/RSP pool,
+ * and will allocate an CMD/RSP buffer if the pool is empty.
+ *
+ * Return codes:
+ *   NULL - Error
+ *   Pointer to fcp_cmd_rsp_buf - Success
+ **/
+struct fcp_cmd_rsp_buf *
+lpfc_get_cmd_rsp_buf_per_hdwq(struct lpfc_hba *phba,
+                             struct lpfc_io_buf *lpfc_buf)
+{
+       struct fcp_cmd_rsp_buf *list_entry = NULL;
+       struct fcp_cmd_rsp_buf *tmp = NULL;
+       struct fcp_cmd_rsp_buf *allocated_buf = NULL;
+       struct lpfc_sli4_hdw_queue *hdwq = lpfc_buf->hdwq;
+       struct list_head *buf_list = &hdwq->cmd_rsp_buf_list;
+
+       spin_lock_irq(&hdwq->hdwq_lock);
+
+       if (likely(!list_empty(buf_list))) {
+               /* break off 1 chunk from the list */
+               list_for_each_entry_safe(list_entry, tmp,
+                                        buf_list,
+                                        list_node) {
+                       list_move_tail(&list_entry->list_node,
+                                      &lpfc_buf->dma_cmd_rsp_list);
+                       break;
+               }
+       } else {
+               /* allocate more */
+               spin_unlock_irq(&hdwq->hdwq_lock);
+               tmp = kmalloc_node(sizeof(*tmp), GFP_ATOMIC,
+                                  cpu_to_node(smp_processor_id()));
+               if (!tmp) {
+                       lpfc_printf_log(phba, KERN_INFO, LOG_SLI,
+                                       "8355 error kmalloc memory for HDWQ "
+                                       "%d %s\n",
+                                       lpfc_buf->hdwq_no, __func__);
+                       return NULL;
+               }
+
+               tmp->fcp_cmnd = dma_pool_alloc(phba->lpfc_cmd_rsp_buf_pool,
+                                               GFP_ATOMIC,
+                                               &tmp->fcp_cmd_rsp_dma_handle);
+
+               if (!tmp->fcp_cmnd) {
+                       lpfc_printf_log(phba, KERN_INFO, LOG_SLI,
+                                       "8356 error pool_alloc memory for HDWQ "
+                                       "%d %s\n",
+                                       lpfc_buf->hdwq_no, __func__);
+                       kfree(tmp);
+                       return NULL;
+               }
+
+               tmp->fcp_rsp = (struct fcp_rsp *)((uint8_t *)tmp->fcp_cmnd +
+                               sizeof(struct fcp_cmnd));
+
+               spin_lock_irq(&hdwq->hdwq_lock);
+               list_add_tail(&tmp->list_node, &lpfc_buf->dma_cmd_rsp_list);
+       }
+
+       allocated_buf = list_last_entry(&lpfc_buf->dma_cmd_rsp_list,
+                                       struct fcp_cmd_rsp_buf,
+                                       list_node);
+
+       spin_unlock_irq(&hdwq->hdwq_lock);
+
+       return allocated_buf;
+}
+
+/**
+ * lpfc_put_cmd_rsp_buf_per_hdwq - Put one CMD/RSP buffer into hdwq pool
+ * @phba: The HBA for which this call is being executed.
+ * @lpfc_buf: IO buf structure with the CMD/RSP buf
+ *
+ * This routine puts one CMD/RSP buffer into executing CPU's CMD/RSP pool.
+ *
+ * Return codes:
+ *   0 - Success
+ *   -EINVAL - Error
+ **/
+int
+lpfc_put_cmd_rsp_buf_per_hdwq(struct lpfc_hba *phba,
+                             struct lpfc_io_buf *lpfc_buf)
+{
+       int rc = 0;
+       struct fcp_cmd_rsp_buf *list_entry = NULL;
+       struct fcp_cmd_rsp_buf *tmp = NULL;
+       struct lpfc_sli4_hdw_queue *hdwq = lpfc_buf->hdwq;
+       struct list_head *buf_list = &hdwq->cmd_rsp_buf_list;
+
+       spin_lock_irq(&hdwq->hdwq_lock);
+
+       if (likely(!list_empty(&lpfc_buf->dma_cmd_rsp_list))) {
+               list_for_each_entry_safe(list_entry, tmp,
+                                        &lpfc_buf->dma_cmd_rsp_list,
+                                        list_node) {
+                       list_move_tail(&list_entry->list_node,
+                                      buf_list);
+               }
+       } else {
+               rc = -EINVAL;
+       }
+
+       spin_unlock_irq(&hdwq->hdwq_lock);
+       return rc;
+}
+
+/**
+ * lpfc_free_cmd_rsp_buf_per_hdwq - Free all CMD/RSP chunks of hdwq pool
+ * @phba: phba object
+ * @hdwq: hdwq to cleanup cmd rsp buff resources on
+ *
+ * This routine frees all CMD/RSP buffers of hdwq's CMD/RSP buf pool.
+ *
+ * Return codes:
+ *   None
+ **/
+void
+lpfc_free_cmd_rsp_buf_per_hdwq(struct lpfc_hba *phba,
+                              struct lpfc_sli4_hdw_queue *hdwq)
+{
+       struct list_head *buf_list = &hdwq->cmd_rsp_buf_list;
+       struct fcp_cmd_rsp_buf *list_entry = NULL;
+       struct fcp_cmd_rsp_buf *tmp = NULL;
+
+       spin_lock_irq(&hdwq->hdwq_lock);
+
+       /* Free cmd_rsp buf pool */
+       list_for_each_entry_safe(list_entry, tmp,
+                                buf_list,
+                                list_node) {
+               dma_pool_free(phba->lpfc_cmd_rsp_buf_pool,
+                             list_entry->fcp_cmnd,
+                             list_entry->fcp_cmd_rsp_dma_handle);
+               list_del(&list_entry->list_node);
+               kfree(list_entry);
+       }
+
+       spin_unlock_irq(&hdwq->hdwq_lock);
+}
index 467b827..37fbcb4 100644 (file)
@@ -365,9 +365,18 @@ struct lpfc_io_buf {
        /* Common fields */
        struct list_head list;
        void *data;
+
        dma_addr_t dma_handle;
        dma_addr_t dma_phys_sgl;
-       struct sli4_sge *dma_sgl;
+
+       struct sli4_sge *dma_sgl; /* initial segment chunk */
+
+       /* linked list of extra sli4_hybrid_sge */
+       struct list_head dma_sgl_xtra_list;
+
+       /* list head for fcp_cmd_rsp buf */
+       struct list_head dma_cmd_rsp_list;
+
        struct lpfc_iocbq cur_iocbq;
        struct lpfc_sli4_hdw_queue *hdwq;
        uint16_t hdwq_no;
index a81ef02..0d4882a 100644 (file)
@@ -49,9 +49,6 @@
 #define LPFC_FCP_MQ_THRESHOLD_MAX      256
 #define LPFC_FCP_MQ_THRESHOLD_DEF      8
 
-/* Common buffer size to accomidate SCSI and NVME IO buffers */
-#define LPFC_COMMON_IO_BUF_SZ  768
-
 /*
  * Provide the default FCF Record attributes used by the driver
  * when nonFIP mode is configured and there is no other default
@@ -114,9 +111,8 @@ enum lpfc_sli4_queue_type {
 enum lpfc_sli4_queue_subtype {
        LPFC_NONE,
        LPFC_MBOX,
-       LPFC_FCP,
+       LPFC_IO,
        LPFC_ELS,
-       LPFC_NVME,
        LPFC_NVMET,
        LPFC_NVME_LS,
        LPFC_USOL
@@ -646,22 +642,17 @@ struct lpfc_eq_intr_info {
 struct lpfc_sli4_hdw_queue {
        /* Pointers to the constructed SLI4 queues */
        struct lpfc_queue *hba_eq;  /* Event queues for HBA */
-       struct lpfc_queue *fcp_cq;  /* Fast-path FCP compl queue */
-       struct lpfc_queue *nvme_cq; /* Fast-path NVME compl queue */
-       struct lpfc_queue *fcp_wq;  /* Fast-path FCP work queue */
-       struct lpfc_queue *nvme_wq; /* Fast-path NVME work queue */
-       uint16_t fcp_cq_map;
-       uint16_t nvme_cq_map;
+       struct lpfc_queue *io_cq;   /* Fast-path FCP & NVME compl queue */
+       struct lpfc_queue *io_wq;   /* Fast-path FCP & NVME work queue */
+       uint16_t io_cq_map;
 
        /* Keep track of IO buffers for this hardware queue */
        spinlock_t io_buf_list_get_lock;  /* Common buf alloc list lock */
        struct list_head lpfc_io_buf_list_get;
        spinlock_t io_buf_list_put_lock;  /* Common buf free list lock */
        struct list_head lpfc_io_buf_list_put;
-       spinlock_t abts_scsi_buf_list_lock; /* list of aborted SCSI IOs */
-       struct list_head lpfc_abts_scsi_buf_list;
-       spinlock_t abts_nvme_buf_list_lock; /* list of aborted NVME IOs */
-       struct list_head lpfc_abts_nvme_buf_list;
+       spinlock_t abts_io_buf_list_lock; /* list of aborted IOs */
+       struct list_head lpfc_abts_io_buf_list;
        uint32_t total_io_bufs;
        uint32_t get_io_bufs;
        uint32_t put_io_bufs;
@@ -685,6 +676,13 @@ struct lpfc_sli4_hdw_queue {
        uint32_t cpucheck_xmt_io[LPFC_CHECK_CPU_CNT];
        uint32_t cpucheck_cmpl_io[LPFC_CHECK_CPU_CNT];
 #endif
+
+       /* Per HDWQ pool resources */
+       struct list_head sgl_list;
+       struct list_head cmd_rsp_buf_list;
+
+       /* Lock for syncing Per HDWQ pool resources */
+       spinlock_t hdwq_lock;
 };
 
 #ifdef LPFC_HDWQ_LOCK_STAT
@@ -850,8 +848,8 @@ struct lpfc_sli4_hba {
        struct lpfc_queue **cq_lookup;
        struct list_head lpfc_els_sgl_list;
        struct list_head lpfc_abts_els_sgl_list;
-       spinlock_t abts_scsi_buf_list_lock; /* list of aborted SCSI IOs */
-       struct list_head lpfc_abts_scsi_buf_list;
+       spinlock_t abts_io_buf_list_lock; /* list of aborted SCSI IOs */
+       struct list_head lpfc_abts_io_buf_list;
        struct list_head lpfc_nvmet_sgl_list;
        spinlock_t abts_nvmet_buf_list_lock; /* list of aborted NVMET IOs */
        struct list_head lpfc_abts_nvmet_ctx_list;
@@ -1056,10 +1054,11 @@ int lpfc_sli4_resume_rpi(struct lpfc_nodelist *,
                        void (*)(struct lpfc_hba *, LPFC_MBOXQ_t *), void *);
 void lpfc_sli4_fcp_xri_abort_event_proc(struct lpfc_hba *);
 void lpfc_sli4_els_xri_abort_event_proc(struct lpfc_hba *);
-void lpfc_sli4_fcp_xri_aborted(struct lpfc_hba *,
-                              struct sli4_wcqe_xri_aborted *, int);
 void lpfc_sli4_nvme_xri_aborted(struct lpfc_hba *phba,
-                               struct sli4_wcqe_xri_aborted *axri, int idx);
+                               struct sli4_wcqe_xri_aborted *axri,
+                               struct lpfc_io_buf *lpfc_ncmd);
+void lpfc_sli4_io_xri_aborted(struct lpfc_hba *phba,
+                             struct sli4_wcqe_xri_aborted *axri, int idx);
 void lpfc_sli4_nvmet_xri_aborted(struct lpfc_hba *phba,
                                 struct sli4_wcqe_xri_aborted *axri);
 void lpfc_sli4_els_xri_aborted(struct lpfc_hba *,
@@ -1094,6 +1093,17 @@ int lpfc_sli4_post_status_check(struct lpfc_hba *);
 uint8_t lpfc_sli_config_mbox_subsys_get(struct lpfc_hba *, LPFC_MBOXQ_t *);
 uint8_t lpfc_sli_config_mbox_opcode_get(struct lpfc_hba *, LPFC_MBOXQ_t *);
 void lpfc_sli4_ras_dma_free(struct lpfc_hba *phba);
+struct sli4_hybrid_sgl *lpfc_get_sgl_per_hdwq(struct lpfc_hba *phba,
+                                             struct lpfc_io_buf *buf);
+struct fcp_cmd_rsp_buf *lpfc_get_cmd_rsp_buf_per_hdwq(struct lpfc_hba *phba,
+                                                     struct lpfc_io_buf *buf);
+int lpfc_put_sgl_per_hdwq(struct lpfc_hba *phba, struct lpfc_io_buf *buf);
+int lpfc_put_cmd_rsp_buf_per_hdwq(struct lpfc_hba *phba,
+                                 struct lpfc_io_buf *buf);
+void lpfc_free_sgl_per_hdwq(struct lpfc_hba *phba,
+                           struct lpfc_sli4_hdw_queue *hdwq);
+void lpfc_free_cmd_rsp_buf_per_hdwq(struct lpfc_hba *phba,
+                                   struct lpfc_sli4_hdw_queue *hdwq);
 static inline void *lpfc_sli4_qe(struct lpfc_queue *q, uint16_t idx)
 {
        return q->q_pgs[idx / q->entry_cnt_per_pg] +
index f7e93aa..b8aae31 100644 (file)
@@ -20,7 +20,7 @@
  * included with this package.                                     *
  *******************************************************************/
 
-#define LPFC_DRIVER_VERSION "12.2.0.3"
+#define LPFC_DRIVER_VERSION "12.4.0.0"
 #define LPFC_DRIVER_NAME               "lpfc"
 
 /* Used for SLI 2/3 */
index 343bc71..b766463 100644 (file)
@@ -527,9 +527,11 @@ disable_vport(struct fc_vport *fc_vport)
         * scsi_host_put() to release the vport.
         */
        lpfc_mbx_unreg_vpi(vport);
-       spin_lock_irq(shost->host_lock);
-       vport->fc_flag |= FC_VPORT_NEEDS_INIT_VPI;
-       spin_unlock_irq(shost->host_lock);
+       if (phba->sli_rev == LPFC_SLI_REV4) {
+               spin_lock_irq(shost->host_lock);
+               vport->fc_flag |= FC_VPORT_NEEDS_INIT_VPI;
+               spin_unlock_irq(shost->host_lock);
+       }
 
        lpfc_vport_set_state(vport, FC_VPORT_DISABLED);
        lpfc_printf_vlog(vport, KERN_ERR, LOG_VPORT,
index a14e834..a6e788c 100644 (file)
@@ -2429,6 +2429,7 @@ struct megasas_instance {
        u8 adapter_type;
        bool consistent_mask_64bit;
        bool support_nvme_passthru;
+       bool enable_sdev_max_qd;
        u8 task_abort_tmo;
        u8 max_reset_tmo;
        u8 snapdump_wait_time;
index f9f0793..42cf38c 100644 (file)
@@ -109,6 +109,10 @@ int event_log_level = MFI_EVT_CLASS_CRITICAL;
 module_param(event_log_level, int, 0644);
 MODULE_PARM_DESC(event_log_level, "Asynchronous event logging level- range is: -2(CLASS_DEBUG) to 4(CLASS_DEAD), Default: 2(CLASS_CRITICAL)");
 
+unsigned int enable_sdev_max_qd;
+module_param(enable_sdev_max_qd, int, 0444);
+MODULE_PARM_DESC(enable_sdev_max_qd, "Enable sdev max qd as can_queue. Default: 0");
+
 MODULE_LICENSE("GPL");
 MODULE_VERSION(MEGASAS_VERSION);
 MODULE_AUTHOR("megaraidlinux.pdl@broadcom.com");
@@ -1941,25 +1945,19 @@ megasas_set_nvme_device_properties(struct scsi_device *sdev, u32 max_io_size)
        blk_queue_virt_boundary(sdev->request_queue, mr_nvme_pg_size - 1);
 }
 
-
 /*
- * megasas_set_static_target_properties -
- * Device property set by driver are static and it is not required to be
- * updated after OCR.
- *
- * set io timeout
- * set device queue depth
- * set nvme device properties. see - megasas_set_nvme_device_properties
+ * megasas_set_fw_assisted_qd -
+ * set device queue depth to can_queue
+ * set device queue depth to fw assisted qd
  *
  * @sdev:                              scsi device
  * @is_target_prop                     true, if fw provided target properties.
  */
-static void megasas_set_static_target_properties(struct scsi_device *sdev,
+static void megasas_set_fw_assisted_qd(struct scsi_device *sdev,
                                                 bool is_target_prop)
 {
        u8 interface_type;
        u32 device_qd = MEGASAS_DEFAULT_CMD_PER_LUN;
-       u32 max_io_size_kb = MR_DEFAULT_NVME_MDTS_KB;
        u32 tgt_device_qd;
        struct megasas_instance *instance;
        struct MR_PRIV_DEVICE *mr_device_priv_data;
@@ -1968,11 +1966,6 @@ static void megasas_set_static_target_properties(struct scsi_device *sdev,
        mr_device_priv_data = sdev->hostdata;
        interface_type  = mr_device_priv_data->interface_type;
 
-       /*
-        * The RAID firmware may require extended timeouts.
-        */
-       blk_queue_rq_timeout(sdev->request_queue, scmd_timeout * HZ);
-
        switch (interface_type) {
        case SAS_PD:
                device_qd = MEGASAS_SAS_QD;
@@ -1990,18 +1983,49 @@ static void megasas_set_static_target_properties(struct scsi_device *sdev,
                if (tgt_device_qd &&
                    (tgt_device_qd <= instance->host->can_queue))
                        device_qd = tgt_device_qd;
+       }
 
-               /* max_io_size_kb will be set to non zero for
-                * nvme based vd and syspd.
-                */
+       if (instance->enable_sdev_max_qd && interface_type != UNKNOWN_DRIVE)
+               device_qd = instance->host->can_queue;
+
+       scsi_change_queue_depth(sdev, device_qd);
+}
+
+/*
+ * megasas_set_static_target_properties -
+ * Device property set by driver are static and it is not required to be
+ * updated after OCR.
+ *
+ * set io timeout
+ * set device queue depth
+ * set nvme device properties. see - megasas_set_nvme_device_properties
+ *
+ * @sdev:                              scsi device
+ * @is_target_prop                     true, if fw provided target properties.
+ */
+static void megasas_set_static_target_properties(struct scsi_device *sdev,
+                                                bool is_target_prop)
+{
+       u32 max_io_size_kb = MR_DEFAULT_NVME_MDTS_KB;
+       struct megasas_instance *instance;
+
+       instance = megasas_lookup_instance(sdev->host->host_no);
+
+       /*
+        * The RAID firmware may require extended timeouts.
+        */
+       blk_queue_rq_timeout(sdev->request_queue, scmd_timeout * HZ);
+
+       /* max_io_size_kb will be set to non zero for
+        * nvme based vd and syspd.
+        */
+       if (is_target_prop)
                max_io_size_kb = le32_to_cpu(instance->tgt_prop->max_io_size_kb);
-       }
 
        if (instance->nvme_page_size && max_io_size_kb)
                megasas_set_nvme_device_properties(sdev, (max_io_size_kb << 10));
 
-       scsi_change_queue_depth(sdev, device_qd);
-
+       megasas_set_fw_assisted_qd(sdev, is_target_prop);
 }
 
 
@@ -3285,6 +3309,48 @@ fw_cmds_outstanding_show(struct device *cdev,
 }
 
 static ssize_t
+enable_sdev_max_qd_show(struct device *cdev,
+       struct device_attribute *attr, char *buf)
+{
+       struct Scsi_Host *shost = class_to_shost(cdev);
+       struct megasas_instance *instance = (struct megasas_instance *)shost->hostdata;
+
+       return snprintf(buf, PAGE_SIZE, "%d\n", instance->enable_sdev_max_qd);
+}
+
+static ssize_t
+enable_sdev_max_qd_store(struct device *cdev,
+       struct device_attribute *attr, const char *buf, size_t count)
+{
+       struct Scsi_Host *shost = class_to_shost(cdev);
+       struct megasas_instance *instance = (struct megasas_instance *)shost->hostdata;
+       u32 val = 0;
+       bool is_target_prop;
+       int ret_target_prop = DCMD_FAILED;
+       struct scsi_device *sdev;
+
+       if (kstrtou32(buf, 0, &val) != 0) {
+               pr_err("megasas: could not set enable_sdev_max_qd\n");
+               return -EINVAL;
+       }
+
+       mutex_lock(&instance->reset_mutex);
+       if (val)
+               instance->enable_sdev_max_qd = true;
+       else
+               instance->enable_sdev_max_qd = false;
+
+       shost_for_each_device(sdev, shost) {
+               ret_target_prop = megasas_get_target_prop(instance, sdev);
+               is_target_prop = (ret_target_prop == DCMD_SUCCESS) ? true : false;
+               megasas_set_fw_assisted_qd(sdev, is_target_prop);
+       }
+       mutex_unlock(&instance->reset_mutex);
+
+       return strlen(buf);
+}
+
+static ssize_t
 dump_system_regs_show(struct device *cdev,
                               struct device_attribute *attr, char *buf)
 {
@@ -3313,6 +3379,7 @@ static DEVICE_ATTR_RW(fw_crash_state);
 static DEVICE_ATTR_RO(page_size);
 static DEVICE_ATTR_RO(ldio_outstanding);
 static DEVICE_ATTR_RO(fw_cmds_outstanding);
+static DEVICE_ATTR_RW(enable_sdev_max_qd);
 static DEVICE_ATTR_RO(dump_system_regs);
 static DEVICE_ATTR_RO(raid_map_id);
 
@@ -3323,6 +3390,7 @@ static struct device_attribute *megaraid_host_attrs[] = {
        &dev_attr_page_size,
        &dev_attr_ldio_outstanding,
        &dev_attr_fw_cmds_outstanding,
+       &dev_attr_enable_sdev_max_qd,
        &dev_attr_dump_system_regs,
        &dev_attr_raid_map_id,
        NULL,
@@ -5894,6 +5962,8 @@ static int megasas_init_fw(struct megasas_instance *instance)
                        MR_MAX_RAID_MAP_SIZE_MASK);
        }
 
+       instance->enable_sdev_max_qd = enable_sdev_max_qd;
+
        switch (instance->adapter_type) {
        case VENTURA_SERIES:
                fusion->pcie_bw_limitation = true;
index 120e3c4..e301458 100644 (file)
@@ -323,9 +323,6 @@ megasas_fusion_update_can_queue(struct megasas_instance *instance, int fw_boot_c
 {
        u16 cur_max_fw_cmds = 0;
        u16 ldio_threshold = 0;
-       struct megasas_register_set __iomem *reg_set;
-
-       reg_set = instance->reg_set;
 
        /* ventura FW does not fill outbound_scratch_pad_2 with queue depth */
        if (instance->adapter_type < VENTURA_SERIES)
@@ -3511,7 +3508,7 @@ megasas_complete_r1_command(struct megasas_instance *instance,
  * @instance:                  Adapter soft state
  * Completes all commands that is in reply descriptor queue
  */
-int
+static int
 complete_cmd_fusion(struct megasas_instance *instance, u32 MSIxIndex,
                    struct megasas_irq_context *irq_context)
 {
@@ -3702,7 +3699,7 @@ static void megasas_enable_irq_poll(struct megasas_instance *instance)
  * megasas_sync_irqs - Synchronizes all IRQs owned by adapter
  * @instance:                  Adapter soft state
  */
-void megasas_sync_irqs(unsigned long instance_addr)
+static void megasas_sync_irqs(unsigned long instance_addr)
 {
        u32 count, i;
        struct megasas_instance *instance =
@@ -3760,7 +3757,7 @@ int megasas_irqpoll(struct irq_poll *irqpoll, int budget)
  *
  * Tasklet to complete cmds
  */
-void
+static void
 megasas_complete_cmd_dpc_fusion(unsigned long instance_addr)
 {
        struct megasas_instance *instance =
@@ -3780,7 +3777,7 @@ megasas_complete_cmd_dpc_fusion(unsigned long instance_addr)
 /**
  * megasas_isr_fusion - isr entry point
  */
-irqreturn_t megasas_isr_fusion(int irq, void *devp)
+static irqreturn_t megasas_isr_fusion(int irq, void *devp)
 {
        struct megasas_irq_context *irq_context = devp;
        struct megasas_instance *instance = irq_context->instance;
@@ -3816,7 +3813,7 @@ irqreturn_t megasas_isr_fusion(int irq, void *devp)
  * mfi_cmd:                    megasas_cmd pointer
  *
  */
-void
+static void
 build_mpt_mfi_pass_thru(struct megasas_instance *instance,
                        struct megasas_cmd *mfi_cmd)
 {
@@ -3874,7 +3871,7 @@ build_mpt_mfi_pass_thru(struct megasas_instance *instance,
  * @cmd:                       mfi cmd to build
  *
  */
-union MEGASAS_REQUEST_DESCRIPTOR_UNION *
+static union MEGASAS_REQUEST_DESCRIPTOR_UNION *
 build_mpt_cmd(struct megasas_instance *instance, struct megasas_cmd *cmd)
 {
        union MEGASAS_REQUEST_DESCRIPTOR_UNION *req_desc = NULL;
@@ -3900,7 +3897,7 @@ build_mpt_cmd(struct megasas_instance *instance, struct megasas_cmd *cmd)
  * @cmd:                       mfi cmd pointer
  *
  */
-void
+static void
 megasas_issue_dcmd_fusion(struct megasas_instance *instance,
                          struct megasas_cmd *cmd)
 {
@@ -4096,8 +4093,9 @@ static inline void megasas_trigger_snap_dump(struct megasas_instance *instance)
 }
 
 /* This function waits for outstanding commands on fusion to complete */
-int megasas_wait_for_outstanding_fusion(struct megasas_instance *instance,
-                                       int reason, int *convert)
+static int
+megasas_wait_for_outstanding_fusion(struct megasas_instance *instance,
+                                   int reason, int *convert)
 {
        int i, outstanding, retval = 0, hb_seconds_missed = 0;
        u32 fw_state, abs_state;
@@ -4221,7 +4219,7 @@ void  megasas_reset_reply_desc(struct megasas_instance *instance)
  * megasas_refire_mgmt_cmd :   Re-fire management commands
  * @instance:                          Controller's soft instance
 */
-void megasas_refire_mgmt_cmd(struct megasas_instance *instance)
+static void megasas_refire_mgmt_cmd(struct megasas_instance *instance)
 {
        int j;
        struct megasas_cmd_fusion *cmd_fusion;
@@ -4747,7 +4745,8 @@ out:
 }
 
 /*SRIOV get other instance in cluster if any*/
-struct megasas_instance *megasas_get_peer_instance(struct megasas_instance *instance)
+static struct
+megasas_instance *megasas_get_peer_instance(struct megasas_instance *instance)
 {
        int i;
 
@@ -5053,7 +5052,7 @@ out:
 }
 
 /* Fusion Crash dump collection */
-void  megasas_fusion_crash_dump(struct megasas_instance *instance)
+static void  megasas_fusion_crash_dump(struct megasas_instance *instance)
 {
        u32 status_reg;
        u8 partial_copy = 0;
index 7efd17a..18b1e31 100644 (file)
@@ -9,7 +9,7 @@
  *                 scatter/gather formats.
  * Creation Date:  June 21, 2006
  *
- *  mpi2.h Version:  02.00.53
+ *  mpi2.h Version:  02.00.54
  *
  * NOTE: Names (typedefs, defines, etc.) beginning with an MPI25 or Mpi25
  *       prefix are for use only on MPI v2.5 products, and must not be used
  * 08-15-18  02.00.52  Bumped MPI2_HEADER_VERSION_UNIT.
  * 08-28-18  02.00.53  Bumped MPI2_HEADER_VERSION_UNIT.
  *                     Added MPI2_IOCSTATUS_FAILURE
+ * 12-17-18  02.00.54  Bumped MPI2_HEADER_VERSION_UNIT
  *  --------------------------------------------------------------------------
  */
 
 
 
 /* Unit and Dev versioning for this MPI header set */
-#define MPI2_HEADER_VERSION_UNIT            (0x35)
+#define MPI2_HEADER_VERSION_UNIT            (0x36)
 #define MPI2_HEADER_VERSION_DEV             (0x00)
 #define MPI2_HEADER_VERSION_UNIT_MASK       (0xFF00)
 #define MPI2_HEADER_VERSION_UNIT_SHIFT      (8)
index 167d79d..3a6871a 100644 (file)
@@ -7,7 +7,7 @@
  *         Title:  MPI Configuration messages and pages
  * Creation Date:  November 10, 2006
  *
- *    mpi2_cnfg.h Version:  02.00.46
+ *    mpi2_cnfg.h Version:  02.00.47
  *
  * NOTE: Names (typedefs, defines, etc.) beginning with an MPI25 or Mpi25
  *       prefix are for use only on MPI v2.5 products, and must not be used
  *                     Added DMDReport Delay Time defines to
  *                     PCIeIOUnitPage1
  * --------------------------------------------------------------------------
+ * 08-02-18  02.00.44  Added Slotx2, Slotx4 to ManPage 7.
+ * 08-15-18  02.00.45  Added ProductSpecific field at end of IOC Page 1
+ * 08-28-18  02.00.46  Added NVMs Write Cache flag to IOUnitPage1
+ *                     Added DMDReport Delay Time defines to PCIeIOUnitPage1
+ * 12-17-18  02.00.47  Swap locations of Slotx2 and Slotx4 in ManPage 7.
  */
 
 #ifndef MPI2_CNFG_H
@@ -810,7 +815,8 @@ typedef struct _MPI2_MANPAGE7_CONNECTOR_INFO {
        U8                          Location;               /*0x14 */
        U8                          ReceptacleID;           /*0x15 */
        U16                         Slot;                   /*0x16 */
-       U32                         Reserved2;              /*0x18 */
+       U16                         Slotx2;                 /*0x18 */
+       U16                         Slotx4;                 /*0x1A */
 } MPI2_MANPAGE7_CONNECTOR_INFO,
        *PTR_MPI2_MANPAGE7_CONNECTOR_INFO,
        Mpi2ManPage7ConnectorInfo_t,
index 4959585..a3f6778 100644 (file)
@@ -5,7 +5,7 @@
  *          Name: mpi2_image.h
  * Description: Contains definitions for firmware and other component images
  * Creation Date: 04/02/2018
- *       Version: 02.06.03
+ *       Version: 02.06.04
  *
  *
  * Version History
@@ -17,6 +17,8 @@
  * 08-14-18  02.06.01  Corrected define for MPI26_IMAGE_HEADER_SIGNATURE0_MPI26
  * 08-28-18  02.06.02  Added MPI2_EXT_IMAGE_TYPE_RDE
  * 09-07-18  02.06.03  Added MPI26_EVENT_PCIE_TOPO_PI_16_LANES
+ * 12-17-18  02.06.04  Addd MPI2_EXT_IMAGE_TYPE_PBLP
+ *                     Shorten some defines to be compatible with DOS
  */
 #ifndef MPI2_IMAGE_H
 #define MPI2_IMAGE_H
@@ -200,17 +202,17 @@ typedef struct _MPI26_COMPONENT_IMAGE_HEADER {
 #define MPI26_IMAGE_HEADER_SIGNATURE0_MPI26                     (0xEB000042)
 
 /**** Definitions for Signature1 field ****/
-#define MPI26_IMAGE_HEADER_SIGNATURE1_APPLICATION              (0x20505041)
-#define MPI26_IMAGE_HEADER_SIGNATURE1_CBB                      (0x20424243)
-#define MPI26_IMAGE_HEADER_SIGNATURE1_MFG                      (0x2047464D)
-#define MPI26_IMAGE_HEADER_SIGNATURE1_BIOS                     (0x534F4942)
-#define MPI26_IMAGE_HEADER_SIGNATURE1_HIIM                     (0x4D494948)
-#define MPI26_IMAGE_HEADER_SIGNATURE1_HIIA                     (0x41494948)
-#define MPI26_IMAGE_HEADER_SIGNATURE1_CPLD                     (0x444C5043)
-#define MPI26_IMAGE_HEADER_SIGNATURE1_SPD                      (0x20445053)
-#define MPI26_IMAGE_HEADER_SIGNATURE1_NVDATA                   (0x5444564E)
-#define MPI26_IMAGE_HEADER_SIGNATURE1_GAS_GAUGE                (0x20534147)
-#define MPI26_IMAGE_HEADER_SIGNATURE1_PBLP                     (0x50424C50)
+#define MPI26_IMAGE_HEADER_SIG1_APPLICATION              (0x20505041)
+#define MPI26_IMAGE_HEADER_SIG1_CBB                      (0x20424243)
+#define MPI26_IMAGE_HEADER_SIG1_MFG                      (0x2047464D)
+#define MPI26_IMAGE_HEADER_SIG1_BIOS                     (0x534F4942)
+#define MPI26_IMAGE_HEADER_SIG1_HIIM                     (0x4D494948)
+#define MPI26_IMAGE_HEADER_SIG1_HIIA                     (0x41494948)
+#define MPI26_IMAGE_HEADER_SIG1_CPLD                     (0x444C5043)
+#define MPI26_IMAGE_HEADER_SIG1_SPD                      (0x20445053)
+#define MPI26_IMAGE_HEADER_SIG1_NVDATA                   (0x5444564E)
+#define MPI26_IMAGE_HEADER_SIG1_GAS_GAUGE                (0x20534147)
+#define MPI26_IMAGE_HEADER_SIG1_PBLP                     (0x504C4250)
 
 /**** Definitions for Signature2 field ****/
 #define MPI26_IMAGE_HEADER_SIGNATURE2_VALUE                    (0x50584546)
@@ -278,6 +280,7 @@ typedef struct _MPI2_EXT_IMAGE_HEADER {
 #define MPI2_EXT_IMAGE_TYPE_MEGARAID                (0x08)
 #define MPI2_EXT_IMAGE_TYPE_ENCRYPTED_HASH          (0x09)
 #define MPI2_EXT_IMAGE_TYPE_RDE                     (0x0A)
+#define MPI2_EXT_IMAGE_TYPE_PBLP                    (0x0B)
 #define MPI2_EXT_IMAGE_TYPE_MIN_PRODUCT_SPECIFIC    (0x80)
 #define MPI2_EXT_IMAGE_TYPE_MAX_PRODUCT_SPECIFIC    (0xFF)
 
@@ -472,12 +475,12 @@ Mpi25EncryptedHashEntry_t, *pMpi25EncryptedHashEntry_t;
 #define MPI25_HASH_ALGORITHM_UNUSED             (0x00)
 #define MPI25_HASH_ALGORITHM_SHA256             (0x01)
 
-#define MPI26_HASH_ALGORITHM_VERSION_MASK       (0xE0)
-#define MPI26_HASH_ALGORITHM_VERSION_NONE       (0x00)
-#define MPI26_HASH_ALGORITHM_VERSION_SHA1       (0x20)
-#define MPI26_HASH_ALGORITHM_VERSION_SHA2       (0x40)
-#define MPI26_HASH_ALGORITHM_VERSION_SHA3       (0x60)
-#define MPI26_HASH_ALGORITHM_SIZE_MASK          (0x1F)
+#define MPI26_HASH_ALGORITHM_VER_MASK          (0xE0)
+#define MPI26_HASH_ALGORITHM_VER_NONE          (0x00)
+#define MPI26_HASH_ALGORITHM_VER_SHA1          (0x20)
+#define MPI26_HASH_ALGORITHM_VER_SHA2          (0x40)
+#define MPI26_HASH_ALGORITHM_VER_SHA3          (0x60)
+#define MPI26_HASH_ALGORITHM_SIZE_MASK         (0x1F)
 #define MPI26_HASH_ALGORITHM_SIZE_256           (0x01)
 #define MPI26_HASH_ALGORITHM_SIZE_512           (0x02)
 
index 63a0950..bb7b79c 100644 (file)
@@ -6,7 +6,7 @@
  *         Title:  MPI PCIe Attached Devices structures and definitions.
  * Creation Date:  October 9, 2012
  *
- * mpi2_pci.h Version:  02.00.03
+ * mpi2_pci.h Version:  02.00.04
  *
  * NOTE: Names (typedefs, defines, etc.) beginning with an MPI25 or Mpi25
  *       prefix are for use only on MPI v2.5 products, and must not be used
@@ -24,6 +24,8 @@
  * 07-01-16  02.00.02  Added MPI26_NVME_FLAGS_FORCE_ADMIN_ERR_RESP to
  *                     NVME Encapsulated Request.
  * 07-22-18  02.00.03  Updted flags field for NVME Encapsulated req
+ * 12-17-18  02.00.04  Added MPI26_PCIE_DEVINFO_SCSI
+ *                     Shortten some defines to be compatible with DOS
  * --------------------------------------------------------------------------
  */
 
@@ -41,7 +43,7 @@
 #define MPI26_PCIE_DEVINFO_NO_DEVICE            (0x00000000)
 #define MPI26_PCIE_DEVINFO_PCI_SWITCH           (0x00000001)
 #define MPI26_PCIE_DEVINFO_NVME                 (0x00000003)
-
+#define MPI26_PCIE_DEVINFO_SCSI                 (0x00000004)
 
 /****************************************************************************
 *  NVMe Encapsulated message
@@ -75,10 +77,9 @@ typedef struct _MPI26_NVME_ENCAPSULATED_REQUEST {
 #define MPI26_NVME_FLAGS_SUBMISSIONQ_IO             (0x0000)
 #define MPI26_NVME_FLAGS_SUBMISSIONQ_ADMIN          (0x0010)
 /*Error Response Address Space */
-#define MPI26_NVME_FLAGS_MASK_ERROR_RSP_ADDR        (0x000C)
-#define MPI26_NVME_FLAGS_MASK_ERROR_RSP_ADDR_MASK   (0x000C)
-#define MPI26_NVME_FLAGS_SYSTEM_RSP_ADDR            (0x0000)
-#define MPI26_NVME_FLAGS_IOCCTL_RSP_ADDR            (0x0008)
+#define MPI26_NVME_FLAGS_ERR_RSP_ADDR_MASK          (0x000C)
+#define MPI26_NVME_FLAGS_ERR_RSP_ADDR_SYSTEM        (0x0000)
+#define MPI26_NVME_FLAGS_ERR_RSP_ADDR_IOCTL         (0x0008)
 /* Data Direction*/
 #define MPI26_NVME_FLAGS_DATADIRECTION_MASK         (0x0003)
 #define MPI26_NVME_FLAGS_NODATATRANSFER             (0x0000)
index 3f966b6..17ef7f6 100644 (file)
@@ -7,7 +7,7 @@
  *         Title:  MPI diagnostic tool structures and definitions
  * Creation Date:  March 26, 2007
  *
- *   mpi2_tool.h Version:  02.00.15
+ *   mpi2_tool.h Version:  02.00.16
  *
  * Version History
  * ---------------
@@ -40,6 +40,7 @@
  *                     Tool Request Message.
  * 07-22-18  02.00.15  Added defines for new TOOLBOX_PCIE_LANE_MARGINING tool.
  *                     Added option for DeviceInfo field in ISTWI tool.
+ * 12-17-18  02.00.16  Shorten some defines to be compatible with DOS.
  * --------------------------------------------------------------------------
  */
 
@@ -230,11 +231,11 @@ typedef struct _MPI2_TOOLBOX_ISTWI_READ_WRITE_REQUEST {
 #define MPI2_TOOL_ISTWI_FLAG_PAGE_ADDR_MASK         (0x07)
 
 /*MPI26 TOOLBOX Request MsgFlags defines */
-#define MPI26_TOOLBOX_REQ_MSGFLAGS_ADDRESSING_MASK     (0x01)
+#define MPI26_TOOL_ISTWI_MSGFLG_ADDR_MASK           (0x01)
 /*Request uses Man Page 43 device index addressing */
-#define MPI26_TOOLBOX_REQ_MSGFLAGS_ADDRESSING_DEVINDEX (0x00)
+#define MPI26_TOOL_ISTWI_MSGFLG_ADDR_INDEX          (0x00)
 /*Request uses Man Page 43 device info struct addressing */
-#define MPI26_TOOLBOX_REQ_MSGFLAGS_ADDRESSING_DEVINFO  (0x01)
+#define MPI26_TOOL_ISTWI_MSGFLG_ADDR_INFO           (0x01)
 
 /*Toolbox ISTWI Read Write Tool reply message */
 typedef struct _MPI2_TOOLBOX_ISTWI_REPLY {
@@ -403,7 +404,7 @@ Mpi2ToolboxTextDisplayRequest_t,
  */
 
 /*Toolbox Backend Lane Margining Tool request message */
-typedef struct _MPI26_TOOLBOX_LANE_MARGINING_REQUEST {
+typedef struct _MPI26_TOOLBOX_LANE_MARGIN_REQUEST {
        U8 Tool;                        /*0x00 */
        U8 Reserved1;                   /*0x01 */
        U8 ChainOffset;                 /*0x02 */
@@ -434,7 +435,7 @@ typedef struct _MPI26_TOOLBOX_LANE_MARGINING_REQUEST {
 
 
 /*Toolbox Backend Lane Margining Tool reply message */
-typedef struct _MPI26_TOOLBOX_LANE_MARGINING_REPLY {
+typedef struct _MPI26_TOOLBOX_LANE_MARGIN_REPLY {
        U8 Tool;                        /*0x00 */
        U8 Reserved1;                   /*0x01 */
        U8 MsgLength;                   /*0x02 */
index 050c0f0..fea3cb6 100644 (file)
@@ -2260,6 +2260,11 @@ base_is_prp_possible(struct MPT3SAS_ADAPTER *ioc,
        bool build_prp = true;
 
        data_length = scsi_bufflen(scmd);
+       if (pcie_device &&
+           (mpt3sas_scsih_is_pcie_scsi_device(pcie_device->device_info))) {
+               build_prp = false;
+               return build_prp;
+       }
 
        /* If Datalenth is <= 16K and number of SGE’s entries are <= 2
         * we built IEEE SGL
@@ -3178,6 +3183,37 @@ mpt3sas_base_unmap_resources(struct MPT3SAS_ADAPTER *ioc)
        }
 }
 
+static int
+_base_diag_reset(struct MPT3SAS_ADAPTER *ioc);
+
+/**
+ * _base_check_for_fault_and_issue_reset - check if IOC is in fault state
+ *     and if it is in fault state then issue diag reset.
+ * @ioc: per adapter object
+ *
+ * Returns: 0 for success, non-zero for failure.
+ */
+static int
+_base_check_for_fault_and_issue_reset(struct MPT3SAS_ADAPTER *ioc)
+{
+       u32 ioc_state;
+       int rc = -EFAULT;
+
+       dinitprintk(ioc, pr_info("%s\n", __func__));
+       if (ioc->pci_error_recovery)
+               return 0;
+       ioc_state = mpt3sas_base_get_iocstate(ioc, 0);
+       dhsprintk(ioc, pr_info("%s: ioc_state(0x%08x)\n", __func__, ioc_state));
+
+       if ((ioc_state & MPI2_IOC_STATE_MASK) == MPI2_IOC_STATE_FAULT) {
+               mpt3sas_base_fault_info(ioc, ioc_state &
+                   MPI2_DOORBELL_DATA_MASK);
+               rc = _base_diag_reset(ioc);
+       }
+
+       return rc;
+}
+
 /**
  * mpt3sas_base_map_resources - map in controller resources (io/irq/memap)
  * @ioc: per adapter object
@@ -3190,7 +3226,7 @@ mpt3sas_base_map_resources(struct MPT3SAS_ADAPTER *ioc)
        struct pci_dev *pdev = ioc->pdev;
        u32 memap_sz;
        u32 pio_sz;
-       int i, r = 0;
+       int i, r = 0, rc;
        u64 pio_chip = 0;
        phys_addr_t chip_phys = 0;
        struct adapter_reply_queue *reply_q;
@@ -3251,8 +3287,11 @@ mpt3sas_base_map_resources(struct MPT3SAS_ADAPTER *ioc)
        _base_mask_interrupts(ioc);
 
        r = _base_get_ioc_facts(ioc);
-       if (r)
-               goto out_fail;
+       if (r) {
+               rc = _base_check_for_fault_and_issue_reset(ioc);
+               if (rc || (_base_get_ioc_facts(ioc)))
+                       goto out_fail;
+       }
 
        if (!ioc->rdpq_array_enable_assigned) {
                ioc->rdpq_array_enable = ioc->rdpq_array_capable;
@@ -5037,6 +5076,7 @@ _base_allocate_memory_pools(struct MPT3SAS_ADAPTER *ioc)
                _base_release_memory_pools(ioc);
                goto retry_allocation;
        }
+       memset(ioc->request, 0, sz);
 
        if (retry_sz)
                ioc_err(ioc, "request pool: dma_alloc_coherent succeed: hba_depth(%d), chains_per_io(%d), frame_sz(%d), total(%d kb)\n",
@@ -5410,8 +5450,6 @@ _base_wait_on_iocstate(struct MPT3SAS_ADAPTER *ioc, u32 ioc_state, int timeout)
  *
  * Notes: MPI2_HIS_IOC2SYS_DB_STATUS - set to one when IOC writes to doorbell.
  */
-static int
-_base_diag_reset(struct MPT3SAS_ADAPTER *ioc);
 
 static int
 _base_wait_for_doorbell_int(struct MPT3SAS_ADAPTER *ioc, int timeout)
@@ -5868,6 +5906,7 @@ mpt3sas_base_scsi_enclosure_processor(struct MPT3SAS_ADAPTER *ioc,
        ioc->base_cmds.status = MPT3_CMD_PENDING;
        request = mpt3sas_base_get_msg_frame(ioc, smid);
        ioc->base_cmds.smid = smid;
+       memset(request, 0, ioc->request_sz);
        memcpy(request, mpi_request, sizeof(Mpi2SepReply_t));
        init_completion(&ioc->base_cmds.done);
        ioc->put_smid_default(ioc, smid);
@@ -6686,7 +6725,7 @@ _base_make_ioc_ready(struct MPT3SAS_ADAPTER *ioc, enum reset_type type)
 static int
 _base_make_ioc_operational(struct MPT3SAS_ADAPTER *ioc)
 {
-       int r, i, index;
+       int r, i, index, rc;
        unsigned long   flags;
        u32 reply_address;
        u16 smid;
@@ -6789,8 +6828,19 @@ _base_make_ioc_operational(struct MPT3SAS_ADAPTER *ioc)
  skip_init_reply_post_free_queue:
 
        r = _base_send_ioc_init(ioc);
-       if (r)
-               return r;
+       if (r) {
+               /*
+                * No need to check IOC state for fault state & issue
+                * diag reset during host reset. This check is need
+                * only during driver load time.
+                */
+               if (!ioc->is_driver_loading)
+                       return r;
+
+               rc = _base_check_for_fault_and_issue_reset(ioc);
+               if (rc || (_base_send_ioc_init(ioc)))
+                       return r;
+       }
 
        /* initialize reply free host index */
        ioc->reply_free_host_index = ioc->reply_free_queue_depth - 1;
@@ -6882,7 +6932,7 @@ mpt3sas_base_free_resources(struct MPT3SAS_ADAPTER *ioc)
 int
 mpt3sas_base_attach(struct MPT3SAS_ADAPTER *ioc)
 {
-       int r, i;
+       int r, i, rc;
        int cpu_id, last_cpu_id = 0;
 
        dinitprintk(ioc, ioc_info(ioc, "%s\n", __func__));
@@ -6926,8 +6976,11 @@ mpt3sas_base_attach(struct MPT3SAS_ADAPTER *ioc)
 
        pci_set_drvdata(ioc->pdev, ioc->shost);
        r = _base_get_ioc_facts(ioc);
-       if (r)
-               goto out_free_resources;
+       if (r) {
+               rc = _base_check_for_fault_and_issue_reset(ioc);
+               if (rc || (_base_get_ioc_facts(ioc)))
+                       goto out_free_resources;
+       }
 
        switch (ioc->hba_mpi_version_belonged) {
        case MPI2_VERSION:
@@ -6995,8 +7048,11 @@ mpt3sas_base_attach(struct MPT3SAS_ADAPTER *ioc)
 
        for (i = 0 ; i < ioc->facts.NumberOfPorts; i++) {
                r = _base_get_port_facts(ioc, i);
-               if (r)
-                       goto out_free_resources;
+               if (r) {
+                       rc = _base_check_for_fault_and_issue_reset(ioc);
+                       if (rc || (_base_get_port_facts(ioc, i)))
+                               goto out_free_resources;
+               }
        }
 
        r = _base_allocate_memory_pools(ioc);
@@ -7118,6 +7174,13 @@ mpt3sas_base_attach(struct MPT3SAS_ADAPTER *ioc)
        if (r)
                goto out_free_resources;
 
+       /*
+        * Copy current copy of IOCFacts in prev_fw_facts
+        * and it will be used during online firmware upgrade.
+        */
+       memcpy(&ioc->prev_fw_facts, &ioc->facts,
+           sizeof(struct mpt3sas_facts));
+
        ioc->non_operational_loop = 0;
        ioc->got_task_abort_from_ioctl = 0;
        return 0;
@@ -7280,6 +7343,85 @@ mpt3sas_wait_for_commands_to_complete(struct MPT3SAS_ADAPTER *ioc)
 }
 
 /**
+ * _base_check_ioc_facts_changes - Look for increase/decrease of IOCFacts
+ *     attributes during online firmware upgrade and update the corresponding
+ *     IOC variables accordingly.
+ *
+ * @ioc: Pointer to MPT_ADAPTER structure
+ */
+static int
+_base_check_ioc_facts_changes(struct MPT3SAS_ADAPTER *ioc)
+{
+       u16 pd_handles_sz;
+       void *pd_handles = NULL, *blocking_handles = NULL;
+       void *pend_os_device_add = NULL, *device_remove_in_progress = NULL;
+       struct mpt3sas_facts *old_facts = &ioc->prev_fw_facts;
+
+       if (ioc->facts.MaxDevHandle > old_facts->MaxDevHandle) {
+               pd_handles_sz = (ioc->facts.MaxDevHandle / 8);
+               if (ioc->facts.MaxDevHandle % 8)
+                       pd_handles_sz++;
+
+               pd_handles = krealloc(ioc->pd_handles, pd_handles_sz,
+                   GFP_KERNEL);
+               if (!pd_handles) {
+                       ioc_info(ioc,
+                           "Unable to allocate the memory for pd_handles of sz: %d\n",
+                           pd_handles_sz);
+                       return -ENOMEM;
+               }
+               memset(pd_handles + ioc->pd_handles_sz, 0,
+                   (pd_handles_sz - ioc->pd_handles_sz));
+               ioc->pd_handles = pd_handles;
+
+               blocking_handles = krealloc(ioc->blocking_handles,
+                   pd_handles_sz, GFP_KERNEL);
+               if (!blocking_handles) {
+                       ioc_info(ioc,
+                           "Unable to allocate the memory for "
+                           "blocking_handles of sz: %d\n",
+                           pd_handles_sz);
+                       return -ENOMEM;
+               }
+               memset(blocking_handles + ioc->pd_handles_sz, 0,
+                   (pd_handles_sz - ioc->pd_handles_sz));
+               ioc->blocking_handles = blocking_handles;
+               ioc->pd_handles_sz = pd_handles_sz;
+
+               pend_os_device_add = krealloc(ioc->pend_os_device_add,
+                   pd_handles_sz, GFP_KERNEL);
+               if (!pend_os_device_add) {
+                       ioc_info(ioc,
+                           "Unable to allocate the memory for pend_os_device_add of sz: %d\n",
+                           pd_handles_sz);
+                       return -ENOMEM;
+               }
+               memset(pend_os_device_add + ioc->pend_os_device_add_sz, 0,
+                   (pd_handles_sz - ioc->pend_os_device_add_sz));
+               ioc->pend_os_device_add = pend_os_device_add;
+               ioc->pend_os_device_add_sz = pd_handles_sz;
+
+               device_remove_in_progress = krealloc(
+                   ioc->device_remove_in_progress, pd_handles_sz, GFP_KERNEL);
+               if (!device_remove_in_progress) {
+                       ioc_info(ioc,
+                           "Unable to allocate the memory for "
+                           "device_remove_in_progress of sz: %d\n "
+                           , pd_handles_sz);
+                       return -ENOMEM;
+               }
+               memset(device_remove_in_progress +
+                   ioc->device_remove_in_progress_sz, 0,
+                   (pd_handles_sz - ioc->device_remove_in_progress_sz));
+               ioc->device_remove_in_progress = device_remove_in_progress;
+               ioc->device_remove_in_progress_sz = pd_handles_sz;
+       }
+
+       memcpy(&ioc->prev_fw_facts, &ioc->facts, sizeof(struct mpt3sas_facts));
+       return 0;
+}
+
+/**
  * mpt3sas_base_hard_reset_handler - reset controller
  * @ioc: Pointer to MPT_ADAPTER structure
  * @type: FORCE_BIG_HAMMER or SOFT_RESET
@@ -7342,6 +7484,13 @@ mpt3sas_base_hard_reset_handler(struct MPT3SAS_ADAPTER *ioc,
        if (r)
                goto out;
 
+       r = _base_check_ioc_facts_changes(ioc);
+       if (r) {
+               ioc_info(ioc,
+                   "Some of the parameters got changed in this new firmware"
+                   " image and it requires system reboot\n");
+               goto out;
+       }
        if (ioc->rdpq_array_enable && !ioc->rdpq_array_capable)
                panic("%s: Issue occurred with flashing controller firmware."
                      "Please reboot the system and ensure that the correct"
index 6afbdb0..faca0a5 100644 (file)
@@ -76,8 +76,8 @@
 #define MPT3SAS_DRIVER_NAME            "mpt3sas"
 #define MPT3SAS_AUTHOR "Avago Technologies <MPT-FusionLinux.pdl@avagotech.com>"
 #define MPT3SAS_DESCRIPTION    "LSI MPT Fusion SAS 3.0 Device Driver"
-#define MPT3SAS_DRIVER_VERSION         "29.100.00.00"
-#define MPT3SAS_MAJOR_VERSION          29
+#define MPT3SAS_DRIVER_VERSION         "31.100.00.00"
+#define MPT3SAS_MAJOR_VERSION          31
 #define MPT3SAS_MINOR_VERSION          100
 #define MPT3SAS_BUILD_VERSION          0
 #define MPT3SAS_RELEASE_VERSION        00
@@ -583,6 +583,7 @@ static inline void sas_device_put(struct _sas_device *s)
  * @enclosure_level: The level of device's enclosure from the controller
  * @connector_name: ASCII value of the Connector's name
  * @serial_number: pointer of serial number string allocated runtime
+ * @access_status: Device's Access Status
  * @refcount: reference count for deletion
  */
 struct _pcie_device {
@@ -604,6 +605,7 @@ struct _pcie_device {
        u8      connector_name[4];
        u8      *serial_number;
        u8      reset_timeout;
+       u8      access_status;
        struct kref refcount;
 };
 /**
@@ -1045,6 +1047,7 @@ typedef void (*MPT3SAS_FLUSH_RUNNING_CMDS)(struct MPT3SAS_ADAPTER *ioc);
  * @schedule_dead_ioc_flush_running_cmds: callback to flush pending commands
  * @thresh_hold: Max number of reply descriptors processed
  *                             before updating Host Index
+ * @drv_support_bitmap: driver's supported feature bit map
  * @scsi_io_cb_idx: shost generated commands
  * @tm_cb_idx: task management commands
  * @scsih_cb_idx: scsih internal commands
@@ -1066,6 +1069,7 @@ typedef void (*MPT3SAS_FLUSH_RUNNING_CMDS)(struct MPT3SAS_ADAPTER *ioc);
  * @event_log: event log pointer
  * @event_masks: events that are masked
  * @facts: static facts data
+ * @prev_fw_facts: previous fw facts data
  * @pfacts: static port facts data
  * @manu_pg0: static manufacturing page 0
  * @manu_pg10: static manufacturing page 10
@@ -1227,6 +1231,8 @@ struct MPT3SAS_ADAPTER {
        bool            msix_load_balance;
        u16             thresh_hold;
        u8              high_iops_queues;
+       u32             drv_support_bitmap;
+       bool            enable_sdev_max_qd;
 
        /* internal commands, callback index */
        u8              scsi_io_cb_idx;
@@ -1276,6 +1282,7 @@ struct MPT3SAS_ADAPTER {
 
        /* static config pages */
        struct mpt3sas_facts facts;
+       struct mpt3sas_facts prev_fw_facts;
        struct mpt3sas_port_facts *pfacts;
        Mpi2ManufacturingPage0_t manu_pg0;
        struct Mpi2ManufacturingPage10_t manu_pg10;
@@ -1450,6 +1457,8 @@ struct MPT3SAS_ADAPTER {
        GET_MSIX_INDEX get_msix_index_for_smlio;
 };
 
+#define MPT_DRV_SUPPORT_BITMAP_MEMMOVE 0x00000001
+
 typedef u8 (*MPT_CALLBACK)(struct MPT3SAS_ADAPTER *ioc, u16 smid, u8 msix_index,
        u32 reply);
 
@@ -1579,6 +1588,7 @@ struct _pcie_device *mpt3sas_get_pdev_by_handle(struct MPT3SAS_ADAPTER *ioc,
 void mpt3sas_port_enable_complete(struct MPT3SAS_ADAPTER *ioc);
 struct _raid_device *
 mpt3sas_raid_device_find_by_handle(struct MPT3SAS_ADAPTER *ioc, u16 handle);
+void mpt3sas_scsih_change_queue_depth(struct scsi_device *sdev, int qdepth);
 
 /* config shared API */
 u8 mpt3sas_config_done(struct MPT3SAS_ADAPTER *ioc, u16 smid, u8 msix_index,
@@ -1733,4 +1743,20 @@ mpt3sas_setup_direct_io(struct MPT3SAS_ADAPTER *ioc, struct scsi_cmnd *scmd,
 /* NCQ Prio Handling Check */
 bool scsih_ncq_prio_supp(struct scsi_device *sdev);
 
+/**
+ * _scsih_is_pcie_scsi_device - determines if device is an pcie scsi device
+ * @device_info: bitfield providing information about the device.
+ * Context: none
+ *
+ * Returns 1 if scsi device.
+ */
+static inline int
+mpt3sas_scsih_is_pcie_scsi_device(u32 device_info)
+{
+       if ((device_info &
+           MPI26_PCIE_DEVINFO_MASK_DEVICE_TYPE) == MPI26_PCIE_DEVINFO_SCSI)
+               return 1;
+       else
+               return 0;
+}
 #endif /* MPT3SAS_BASE_H_INCLUDED */
index d4ecfbb..7d69695 100644 (file)
@@ -596,8 +596,16 @@ _ctl_set_task_mid(struct MPT3SAS_ADAPTER *ioc, struct mpt3_ioctl_command *karg,
                if (priv_data->sas_target->handle != handle)
                        continue;
                st = scsi_cmd_priv(scmd);
-               tm_request->TaskMID = cpu_to_le16(st->smid);
-               found = 1;
+
+               /*
+                * If the given TaskMID from the user space is zero, then the
+                * first outstanding smid will be picked up.  Otherwise,
+                * targeted smid will be the one.
+                */
+               if (!tm_request->TaskMID || tm_request->TaskMID == st->smid) {
+                       tm_request->TaskMID = cpu_to_le16(st->smid);
+                       found = 1;
+               }
        }
 
        if (!found) {
@@ -654,7 +662,6 @@ _ctl_do_mpt_command(struct MPT3SAS_ADAPTER *ioc, struct mpt3_ioctl_command karg,
        size_t data_in_sz = 0;
        long ret;
        u16 device_handle = MPT3SAS_INVALID_DEVICE_HANDLE;
-       u8 tr_method = MPI26_SCSITASKMGMT_MSGFLAGS_PROTOCOL_LVL_RST_PCIE;
 
        issue_reset = 0;
 
@@ -707,6 +714,7 @@ _ctl_do_mpt_command(struct MPT3SAS_ADAPTER *ioc, struct mpt3_ioctl_command karg,
        ioc->ctl_cmds.status = MPT3_CMD_PENDING;
        memset(ioc->ctl_cmds.reply, 0, ioc->reply_sz);
        request = mpt3sas_base_get_msg_frame(ioc, smid);
+       memset(request, 0, ioc->request_sz);
        memcpy(request, mpi_request, karg.data_sge_offset*4);
        ioc->ctl_cmds.smid = smid;
        data_out_sz = karg.data_out_size;
@@ -921,13 +929,37 @@ _ctl_do_mpt_command(struct MPT3SAS_ADAPTER *ioc, struct mpt3_ioctl_command karg,
                Mpi2ToolboxCleanRequest_t *toolbox_request =
                        (Mpi2ToolboxCleanRequest_t *)mpi_request;
 
-               if (toolbox_request->Tool == MPI2_TOOLBOX_DIAGNOSTIC_CLI_TOOL) {
+               if ((toolbox_request->Tool == MPI2_TOOLBOX_DIAGNOSTIC_CLI_TOOL)
+                   || (toolbox_request->Tool ==
+                   MPI26_TOOLBOX_BACKEND_PCIE_LANE_MARGIN))
                        ioc->build_sg(ioc, psge, data_out_dma, data_out_sz,
                                data_in_dma, data_in_sz);
-               } else {
+               else if (toolbox_request->Tool ==
+                               MPI2_TOOLBOX_MEMORY_MOVE_TOOL) {
+                       Mpi2ToolboxMemMoveRequest_t *mem_move_request =
+                                       (Mpi2ToolboxMemMoveRequest_t *)request;
+                       Mpi2SGESimple64_t tmp, *src = NULL, *dst = NULL;
+
+                       ioc->build_sg_mpi(ioc, psge, data_out_dma,
+                                       data_out_sz, data_in_dma, data_in_sz);
+                       if (data_out_sz && !data_in_sz) {
+                               dst =
+                                   (Mpi2SGESimple64_t *)&mem_move_request->SGL;
+                               src = (void *)dst + ioc->sge_size;
+
+                               memcpy(&tmp, src, ioc->sge_size);
+                               memcpy(src, dst, ioc->sge_size);
+                               memcpy(dst, &tmp, ioc->sge_size);
+                       }
+                       if (ioc->logging_level & MPT_DEBUG_TM) {
+                               ioc_info(ioc,
+                                 "Mpi2ToolboxMemMoveRequest_t request msg\n");
+                               _debug_dump_mf(mem_move_request,
+                                                       ioc->request_sz/4);
+                       }
+               } else
                        ioc->build_sg_mpi(ioc, psge, data_out_dma, data_out_sz,
-                               data_in_dma, data_in_sz);
-               }
+                           data_in_dma, data_in_sz);
                ioc->put_smid_default(ioc, smid);
                break;
        }
@@ -1047,12 +1079,14 @@ _ctl_do_mpt_command(struct MPT3SAS_ADAPTER *ioc, struct mpt3_ioctl_command karg,
                        mpt3sas_halt_firmware(ioc);
                        pcie_device = mpt3sas_get_pdev_by_handle(ioc,
                                le16_to_cpu(mpi_request->FunctionDependent1));
-                       if (pcie_device && (!ioc->tm_custom_handling))
+                       if (pcie_device && (!ioc->tm_custom_handling) &&
+                           (!(mpt3sas_scsih_is_pcie_scsi_device(
+                           pcie_device->device_info))))
                                mpt3sas_scsih_issue_locked_tm(ioc,
                                  le16_to_cpu(mpi_request->FunctionDependent1),
                                  0, MPI2_SCSITASKMGMT_TASKTYPE_TARGET_RESET, 0,
                                  0, pcie_device->reset_timeout,
-                                 tr_method);
+                       MPI26_SCSITASKMGMT_MSGFLAGS_PROTOCOL_LVL_RST_PCIE);
                        else
                                mpt3sas_scsih_issue_locked_tm(ioc,
                                  le16_to_cpu(mpi_request->FunctionDependent1),
@@ -3278,9 +3312,8 @@ diag_trigger_scsi_store(struct device *cdev,
        ssize_t sz;
 
        spin_lock_irqsave(&ioc->diag_trigger_lock, flags);
-       sz = min(sizeof(struct SL_WH_SCSI_TRIGGERS_T), count);
-       memset(&ioc->diag_trigger_scsi, 0,
-           sizeof(struct SL_WH_EVENT_TRIGGERS_T));
+       sz = min(sizeof(ioc->diag_trigger_scsi), count);
+       memset(&ioc->diag_trigger_scsi, 0, sizeof(ioc->diag_trigger_scsi));
        memcpy(&ioc->diag_trigger_scsi, buf, sz);
        if (ioc->diag_trigger_scsi.ValidEntries > NUM_VALID_ENTRIES)
                ioc->diag_trigger_scsi.ValidEntries = NUM_VALID_ENTRIES;
@@ -3349,6 +3382,125 @@ static DEVICE_ATTR_RW(diag_trigger_mpi);
 
 /*****************************************/
 
+/**
+ * drv_support_bitmap_show - driver supported feature bitmap
+ * @cdev - pointer to embedded class device
+ * @buf - the buffer returned
+ *
+ * A sysfs 'read-only' shost attribute.
+ */
+static ssize_t
+drv_support_bitmap_show(struct device *cdev,
+       struct device_attribute *attr, char *buf)
+{
+       struct Scsi_Host *shost = class_to_shost(cdev);
+       struct MPT3SAS_ADAPTER *ioc = shost_priv(shost);
+
+       return snprintf(buf, PAGE_SIZE, "0x%08x\n", ioc->drv_support_bitmap);
+}
+static DEVICE_ATTR_RO(drv_support_bitmap);
+
+/**
+ * enable_sdev_max_qd_show - display whether sdev max qd is enabled/disabled
+ * @cdev - pointer to embedded class device
+ * @buf - the buffer returned
+ *
+ * A sysfs read/write shost attribute. This attribute is used to set the
+ * targets queue depth to HBA IO queue depth if this attribute is enabled.
+ */
+static ssize_t
+enable_sdev_max_qd_show(struct device *cdev,
+       struct device_attribute *attr, char *buf)
+{
+       struct Scsi_Host *shost = class_to_shost(cdev);
+       struct MPT3SAS_ADAPTER *ioc = shost_priv(shost);
+
+       return snprintf(buf, PAGE_SIZE, "%d\n", ioc->enable_sdev_max_qd);
+}
+
+/**
+ * enable_sdev_max_qd_store - Enable/disable sdev max qd
+ * @cdev - pointer to embedded class device
+ * @buf - the buffer returned
+ *
+ * A sysfs read/write shost attribute. This attribute is used to set the
+ * targets queue depth to HBA IO queue depth if this attribute is enabled.
+ * If this attribute is disabled then targets will have corresponding default
+ * queue depth.
+ */
+static ssize_t
+enable_sdev_max_qd_store(struct device *cdev,
+       struct device_attribute *attr, const char *buf, size_t count)
+{
+       struct Scsi_Host *shost = class_to_shost(cdev);
+       struct MPT3SAS_ADAPTER *ioc = shost_priv(shost);
+       struct MPT3SAS_DEVICE *sas_device_priv_data;
+       struct MPT3SAS_TARGET *sas_target_priv_data;
+       int val = 0;
+       struct scsi_device *sdev;
+       struct _raid_device *raid_device;
+       int qdepth;
+
+       if (kstrtoint(buf, 0, &val) != 0)
+               return -EINVAL;
+
+       switch (val) {
+       case 0:
+               ioc->enable_sdev_max_qd = 0;
+               shost_for_each_device(sdev, ioc->shost) {
+                       sas_device_priv_data = sdev->hostdata;
+                       if (!sas_device_priv_data)
+                               continue;
+                       sas_target_priv_data = sas_device_priv_data->sas_target;
+                       if (!sas_target_priv_data)
+                               continue;
+
+                       if (sas_target_priv_data->flags &
+                           MPT_TARGET_FLAGS_VOLUME) {
+                               raid_device =
+                                   mpt3sas_raid_device_find_by_handle(ioc,
+                                   sas_target_priv_data->handle);
+
+                               switch (raid_device->volume_type) {
+                               case MPI2_RAID_VOL_TYPE_RAID0:
+                                       if (raid_device->device_info &
+                                           MPI2_SAS_DEVICE_INFO_SSP_TARGET)
+                                               qdepth =
+                                                   MPT3SAS_SAS_QUEUE_DEPTH;
+                                       else
+                                               qdepth =
+                                                   MPT3SAS_SATA_QUEUE_DEPTH;
+                                       break;
+                               case MPI2_RAID_VOL_TYPE_RAID1E:
+                               case MPI2_RAID_VOL_TYPE_RAID1:
+                               case MPI2_RAID_VOL_TYPE_RAID10:
+                               case MPI2_RAID_VOL_TYPE_UNKNOWN:
+                               default:
+                                       qdepth = MPT3SAS_RAID_QUEUE_DEPTH;
+                               }
+                       } else if (sas_target_priv_data->flags &
+                           MPT_TARGET_FLAGS_PCIE_DEVICE)
+                               qdepth = MPT3SAS_NVME_QUEUE_DEPTH;
+                       else
+                               qdepth = MPT3SAS_SAS_QUEUE_DEPTH;
+
+                       mpt3sas_scsih_change_queue_depth(sdev, qdepth);
+               }
+               break;
+       case 1:
+               ioc->enable_sdev_max_qd = 1;
+               shost_for_each_device(sdev, ioc->shost)
+                       mpt3sas_scsih_change_queue_depth(sdev,
+                           shost->can_queue);
+               break;
+       default:
+               return -EINVAL;
+       }
+
+       return strlen(buf);
+}
+static DEVICE_ATTR_RW(enable_sdev_max_qd);
+
 struct device_attribute *mpt3sas_host_attrs[] = {
        &dev_attr_version_fw,
        &dev_attr_version_bios,
@@ -3374,7 +3526,9 @@ struct device_attribute *mpt3sas_host_attrs[] = {
        &dev_attr_diag_trigger_event,
        &dev_attr_diag_trigger_scsi,
        &dev_attr_diag_trigger_mpi,
+       &dev_attr_drv_support_bitmap,
        &dev_attr_BRM_status,
+       &dev_attr_enable_sdev_max_qd,
        NULL,
 };
 
index 717ba08..c8e512b 100644 (file)
@@ -51,7 +51,6 @@
 #include <linux/workqueue.h>
 #include <linux/delay.h>
 #include <linux/pci.h>
-#include <linux/pci-aspm.h>
 #include <linux/interrupt.h>
 #include <linux/aer.h>
 #include <linux/raid_class.h>
@@ -155,6 +154,10 @@ static int prot_mask = -1;
 module_param(prot_mask, int, 0444);
 MODULE_PARM_DESC(prot_mask, " host protection capabilities mask, def=7 ");
 
+static bool enable_sdev_max_qd;
+module_param(enable_sdev_max_qd, bool, 0444);
+MODULE_PARM_DESC(enable_sdev_max_qd,
+       "Enable sdev max qd as can_queue, def=disabled(0)");
 
 /* raid transport support */
 static struct raid_template *mpt3sas_raid_template;
@@ -1152,6 +1155,11 @@ _scsih_pcie_device_add(struct MPT3SAS_ADAPTER *ioc,
        list_add_tail(&pcie_device->list, &ioc->pcie_device_list);
        spin_unlock_irqrestore(&ioc->pcie_device_lock, flags);
 
+       if (pcie_device->access_status ==
+           MPI26_PCIEDEV0_ASTATUS_DEVICE_BLOCKED) {
+               clear_bit(pcie_device->handle, ioc->pend_os_device_add);
+               return;
+       }
        if (scsi_add_device(ioc->shost, PCIE_CHANNEL, pcie_device->id, 0)) {
                _scsih_pcie_device_remove(ioc, pcie_device);
        } else if (!pcie_device->starget) {
@@ -1196,7 +1204,9 @@ _scsih_pcie_device_init_add(struct MPT3SAS_ADAPTER *ioc,
        spin_lock_irqsave(&ioc->pcie_device_lock, flags);
        pcie_device_get(pcie_device);
        list_add_tail(&pcie_device->list, &ioc->pcie_device_init_list);
-       _scsih_determine_boot_device(ioc, pcie_device, PCIE_CHANNEL);
+       if (pcie_device->access_status !=
+           MPI26_PCIEDEV0_ASTATUS_DEVICE_BLOCKED)
+               _scsih_determine_boot_device(ioc, pcie_device, PCIE_CHANNEL);
        spin_unlock_irqrestore(&ioc->pcie_device_lock, flags);
 }
 /**
@@ -1433,17 +1443,20 @@ _scsih_is_end_device(u32 device_info)
 }
 
 /**
- * _scsih_is_nvme_device - determines if device is an nvme device
+ * _scsih_is_nvme_pciescsi_device - determines if
+ *                     device is an pcie nvme/scsi device
  * @device_info: bitfield providing information about the device.
  * Context: none
  *
- * Return: 1 if nvme device.
+ * Returns 1 if device is pcie device type nvme/scsi.
  */
 static int
-_scsih_is_nvme_device(u32 device_info)
+_scsih_is_nvme_pciescsi_device(u32 device_info)
 {
-       if ((device_info & MPI26_PCIE_DEVINFO_MASK_DEVICE_TYPE)
-                                       == MPI26_PCIE_DEVINFO_NVME)
+       if (((device_info & MPI26_PCIE_DEVINFO_MASK_DEVICE_TYPE)
+           == MPI26_PCIE_DEVINFO_NVME) ||
+           ((device_info & MPI26_PCIE_DEVINFO_MASK_DEVICE_TYPE)
+           == MPI26_PCIE_DEVINFO_SCSI))
                return 1;
        else
                return 0;
@@ -1509,7 +1522,13 @@ scsih_change_queue_depth(struct scsi_device *sdev, int qdepth)
 
        max_depth = shost->can_queue;
 
-       /* limit max device queue for SATA to 32 */
+       /*
+        * limit max device queue for SATA to 32 if enable_sdev_max_qd
+        * is disabled.
+        */
+       if (ioc->enable_sdev_max_qd)
+               goto not_sata;
+
        sas_device_priv_data = sdev->hostdata;
        if (!sas_device_priv_data)
                goto not_sata;
@@ -1539,6 +1558,25 @@ scsih_change_queue_depth(struct scsi_device *sdev, int qdepth)
 }
 
 /**
+ * mpt3sas_scsih_change_queue_depth - setting device queue depth
+ * @sdev: scsi device struct
+ * @qdepth: requested queue depth
+ *
+ * Returns nothing.
+ */
+void
+mpt3sas_scsih_change_queue_depth(struct scsi_device *sdev, int qdepth)
+{
+       struct Scsi_Host *shost = sdev->host;
+       struct MPT3SAS_ADAPTER *ioc = shost_priv(shost);
+
+       if (ioc->enable_sdev_max_qd)
+               qdepth = shost->can_queue;
+
+       scsih_change_queue_depth(sdev, qdepth);
+}
+
+/**
  * scsih_target_alloc - target add routine
  * @starget: scsi target struct
  *
@@ -2296,7 +2334,7 @@ scsih_slave_configure(struct scsi_device *sdev)
                                                MPT3SAS_RAID_MAX_SECTORS);
                }
 
-               scsih_change_queue_depth(sdev, qdepth);
+               mpt3sas_scsih_change_queue_depth(sdev, qdepth);
 
                /* raid transport support */
                if (!ioc->is_warpdrive)
@@ -2360,7 +2398,7 @@ scsih_slave_configure(struct scsi_device *sdev)
 
                pcie_device_put(pcie_device);
                spin_unlock_irqrestore(&ioc->pcie_device_lock, flags);
-               scsih_change_queue_depth(sdev, qdepth);
+               mpt3sas_scsih_change_queue_depth(sdev, qdepth);
                /* Enable QUEUE_FLAG_NOMERGES flag, so that IOs won't be
                 ** merged and can eliminate holes created during merging
                 ** operation.
@@ -2420,7 +2458,7 @@ scsih_slave_configure(struct scsi_device *sdev)
                _scsih_display_sata_capabilities(ioc, handle, sdev);
 
 
-       scsih_change_queue_depth(sdev, qdepth);
+       mpt3sas_scsih_change_queue_depth(sdev, qdepth);
 
        if (ssp_target) {
                sas_read_port_mode_page(sdev);
@@ -2872,7 +2910,8 @@ scsih_abort(struct scsi_cmnd *scmd)
 
        handle = sas_device_priv_data->sas_target->handle;
        pcie_device = mpt3sas_get_pdev_by_handle(ioc, handle);
-       if (pcie_device && (!ioc->tm_custom_handling))
+       if (pcie_device && (!ioc->tm_custom_handling) &&
+           (!(mpt3sas_scsih_is_pcie_scsi_device(pcie_device->device_info))))
                timeout = ioc->nvme_abort_timeout;
        r = mpt3sas_scsih_issue_locked_tm(ioc, handle, scmd->device->lun,
                MPI2_SCSITASKMGMT_TASKTYPE_ABORT_TASK,
@@ -2943,11 +2982,13 @@ scsih_dev_reset(struct scsi_cmnd *scmd)
 
        pcie_device = mpt3sas_get_pdev_by_handle(ioc, handle);
 
-       if (pcie_device && (!ioc->tm_custom_handling)) {
+       if (pcie_device && (!ioc->tm_custom_handling) &&
+           (!(mpt3sas_scsih_is_pcie_scsi_device(pcie_device->device_info)))) {
                tr_timeout = pcie_device->reset_timeout;
                tr_method = MPI26_SCSITASKMGMT_MSGFLAGS_PROTOCOL_LVL_RST_PCIE;
        } else
                tr_method = MPI2_SCSITASKMGMT_MSGFLAGS_LINK_RESET;
+
        r = mpt3sas_scsih_issue_locked_tm(ioc, handle, scmd->device->lun,
                MPI2_SCSITASKMGMT_TASKTYPE_LOGICAL_UNIT_RESET, 0, 0,
                tr_timeout, tr_method);
@@ -3020,7 +3061,8 @@ scsih_target_reset(struct scsi_cmnd *scmd)
 
        pcie_device = mpt3sas_get_pdev_by_handle(ioc, handle);
 
-       if (pcie_device && (!ioc->tm_custom_handling)) {
+       if (pcie_device && (!ioc->tm_custom_handling) &&
+           (!(mpt3sas_scsih_is_pcie_scsi_device(pcie_device->device_info)))) {
                tr_timeout = pcie_device->reset_timeout;
                tr_method = MPI26_SCSITASKMGMT_MSGFLAGS_PROTOCOL_LVL_RST_PCIE;
        } else
@@ -3598,7 +3640,9 @@ _scsih_tm_tr_send(struct MPT3SAS_ADAPTER *ioc, u16 handle)
                        sas_address = pcie_device->wwid;
                }
                spin_unlock_irqrestore(&ioc->pcie_device_lock, flags);
-               if (pcie_device && (!ioc->tm_custom_handling))
+               if (pcie_device && (!ioc->tm_custom_handling) &&
+                   (!(mpt3sas_scsih_is_pcie_scsi_device(
+                   pcie_device->device_info))))
                        tr_method =
                            MPI26_SCSITASKMGMT_MSGFLAGS_PROTOCOL_LVL_RST_PCIE;
                else
@@ -4654,11 +4698,8 @@ scsih_qcmd(struct Scsi_Host *shost, struct scsi_cmnd *scmd)
         * since we're lockless at this point
         */
        do {
-               if (test_bit(0, &sas_device_priv_data->ata_command_pending)) {
-                       scmd->result = SAM_STAT_BUSY;
-                       scmd->scsi_done(scmd);
-                       return 0;
-               }
+               if (test_bit(0, &sas_device_priv_data->ata_command_pending))
+                       return SCSI_MLQUEUE_DEVICE_BUSY;
        } while (_scsih_set_satl_pending(scmd, true));
 
        if (scmd->sc_data_direction == DMA_FROM_DEVICE)
@@ -6456,24 +6497,17 @@ _scsih_sas_device_status_change_event_debug(struct MPT3SAS_ADAPTER *ioc,
 /**
  * _scsih_sas_device_status_change_event - handle device status change
  * @ioc: per adapter object
- * @fw_event: The fw_event_work object
+ * @event_data: The fw event
  * Context: user.
  */
 static void
 _scsih_sas_device_status_change_event(struct MPT3SAS_ADAPTER *ioc,
-       struct fw_event_work *fw_event)
+       Mpi2EventDataSasDeviceStatusChange_t *event_data)
 {
        struct MPT3SAS_TARGET *target_priv_data;
        struct _sas_device *sas_device;
        u64 sas_address;
        unsigned long flags;
-       Mpi2EventDataSasDeviceStatusChange_t *event_data =
-               (Mpi2EventDataSasDeviceStatusChange_t *)
-               fw_event->event_data;
-
-       if (ioc->logging_level & MPT_DEBUG_EVENT_WORK_TASK)
-               _scsih_sas_device_status_change_event_debug(ioc,
-                    event_data);
 
        /* In MPI Revision K (0xC), the internal device reset complete was
         * implemented, so avoid setting tm_busy flag for older firmware.
@@ -6505,6 +6539,12 @@ _scsih_sas_device_status_change_event(struct MPT3SAS_ADAPTER *ioc,
        else
                target_priv_data->tm_busy = 0;
 
+       if (ioc->logging_level & MPT_DEBUG_EVENT_WORK_TASK)
+               ioc_info(ioc,
+                   "%s tm_busy flag for handle(0x%04x)\n",
+                   (target_priv_data->tm_busy == 1) ? "Enable" : "Disable",
+                   target_priv_data->handle);
+
 out:
        if (sas_device)
                sas_device_put(sas_device);
@@ -6539,6 +6579,11 @@ _scsih_check_pcie_access_status(struct MPT3SAS_ADAPTER *ioc, u64 wwid,
                break;
        case MPI26_PCIEDEV0_ASTATUS_DEVICE_BLOCKED:
                desc = "PCIe device blocked";
+               ioc_info(ioc,
+                   "Device with Access Status (%s): wwid(0x%016llx), "
+                   "handle(0x%04x)\n ll only be added to the internal list",
+                   desc, (u64)wwid, handle);
+               rc = 0;
                break;
        case MPI26_PCIEDEV0_ASTATUS_MEMORY_SPACE_ACCESS_FAILED:
                desc = "PCIe device mem space access failed";
@@ -6643,7 +6688,8 @@ _scsih_pcie_device_remove_from_sml(struct MPT3SAS_ADAPTER *ioc,
                         pcie_device->enclosure_level,
                         pcie_device->connector_name);
 
-       if (pcie_device->starget)
+       if (pcie_device->starget && (pcie_device->access_status !=
+                               MPI26_PCIEDEV0_ASTATUS_DEVICE_BLOCKED))
                scsi_remove_target(&pcie_device->starget->dev);
        dewtprintk(ioc,
                   ioc_info(ioc, "%s: exit: handle(0x%04x), wwid(0x%016llx)\n",
@@ -6694,7 +6740,7 @@ _scsih_pcie_check_device(struct MPT3SAS_ADAPTER *ioc, u16 handle)
 
        /* check if this is end device */
        device_info = le32_to_cpu(pcie_device_pg0.DeviceInfo);
-       if (!(_scsih_is_nvme_device(device_info)))
+       if (!(_scsih_is_nvme_pciescsi_device(device_info)))
                return;
 
        wwid = le64_to_cpu(pcie_device_pg0.WWID);
@@ -6709,6 +6755,7 @@ _scsih_pcie_check_device(struct MPT3SAS_ADAPTER *ioc, u16 handle)
        if (unlikely(pcie_device->handle != handle)) {
                starget = pcie_device->starget;
                sas_target_priv_data = starget->hostdata;
+               pcie_device->access_status = pcie_device_pg0.AccessStatus;
                starget_printk(KERN_INFO, starget,
                    "handle changed from(0x%04x) to (0x%04x)!!!\n",
                    pcie_device->handle, handle);
@@ -6803,7 +6850,8 @@ _scsih_pcie_add_device(struct MPT3SAS_ADAPTER *ioc, u16 handle)
            pcie_device_pg0.AccessStatus))
                return 0;
 
-       if (!(_scsih_is_nvme_device(le32_to_cpu(pcie_device_pg0.DeviceInfo))))
+       if (!(_scsih_is_nvme_pciescsi_device(le32_to_cpu
+           (pcie_device_pg0.DeviceInfo))))
                return 0;
 
        pcie_device = mpt3sas_get_pdev_by_wwid(ioc, wwid);
@@ -6813,6 +6861,31 @@ _scsih_pcie_add_device(struct MPT3SAS_ADAPTER *ioc, u16 handle)
                return 0;
        }
 
+       /* PCIe Device Page 2 contains read-only information about a
+        * specific NVMe device; therefore, this page is only
+        * valid for NVMe devices and skip for pcie devices of type scsi.
+        */
+       if (!(mpt3sas_scsih_is_pcie_scsi_device(
+               le32_to_cpu(pcie_device_pg0.DeviceInfo)))) {
+               if (mpt3sas_config_get_pcie_device_pg2(ioc, &mpi_reply,
+                   &pcie_device_pg2, MPI2_SAS_DEVICE_PGAD_FORM_HANDLE,
+                   handle)) {
+                       ioc_err(ioc,
+                           "failure at %s:%d/%s()!\n", __FILE__,
+                           __LINE__, __func__);
+                       return 0;
+               }
+
+               ioc_status = le16_to_cpu(mpi_reply.IOCStatus) &
+                                       MPI2_IOCSTATUS_MASK;
+               if (ioc_status != MPI2_IOCSTATUS_SUCCESS) {
+                       ioc_err(ioc,
+                           "failure at %s:%d/%s()!\n", __FILE__,
+                           __LINE__, __func__);
+                       return 0;
+               }
+       }
+
        pcie_device = kzalloc(sizeof(struct _pcie_device), GFP_KERNEL);
        if (!pcie_device) {
                ioc_err(ioc, "failure at %s:%d/%s()!\n",
@@ -6824,6 +6897,7 @@ _scsih_pcie_add_device(struct MPT3SAS_ADAPTER *ioc, u16 handle)
        pcie_device->id = ioc->pcie_target_id++;
        pcie_device->channel = PCIE_CHANNEL;
        pcie_device->handle = handle;
+       pcie_device->access_status = pcie_device_pg0.AccessStatus;
        pcie_device->device_info = le32_to_cpu(pcie_device_pg0.DeviceInfo);
        pcie_device->wwid = wwid;
        pcie_device->port_num = pcie_device_pg0.PortNum;
@@ -6855,27 +6929,16 @@ _scsih_pcie_add_device(struct MPT3SAS_ADAPTER *ioc, u16 handle)
                            le64_to_cpu(enclosure_dev->pg0.EnclosureLogicalID);
        }
        /* TODO -- Add device name once FW supports it */
-       if (mpt3sas_config_get_pcie_device_pg2(ioc, &mpi_reply,
-               &pcie_device_pg2, MPI2_SAS_DEVICE_PGAD_FORM_HANDLE, handle)) {
-               ioc_err(ioc, "failure at %s:%d/%s()!\n",
-                       __FILE__, __LINE__, __func__);
-               kfree(pcie_device);
-               return 0;
-       }
-
-       ioc_status = le16_to_cpu(mpi_reply.IOCStatus) & MPI2_IOCSTATUS_MASK;
-       if (ioc_status != MPI2_IOCSTATUS_SUCCESS) {
-               ioc_err(ioc, "failure at %s:%d/%s()!\n",
-                       __FILE__, __LINE__, __func__);
-               kfree(pcie_device);
-               return 0;
-       }
-       pcie_device->nvme_mdts =
-               le32_to_cpu(pcie_device_pg2.MaximumDataTransferSize);
-       if (pcie_device_pg2.ControllerResetTO)
-               pcie_device->reset_timeout =
-                       pcie_device_pg2.ControllerResetTO;
-       else
+       if (!(mpt3sas_scsih_is_pcie_scsi_device(
+           le32_to_cpu(pcie_device_pg0.DeviceInfo)))) {
+               pcie_device->nvme_mdts =
+                   le32_to_cpu(pcie_device_pg2.MaximumDataTransferSize);
+               if (pcie_device_pg2.ControllerResetTO)
+                       pcie_device->reset_timeout =
+                           pcie_device_pg2.ControllerResetTO;
+               else
+                       pcie_device->reset_timeout = 30;
+       } else
                pcie_device->reset_timeout = 30;
 
        if (ioc->wait_for_discovery_to_complete)
@@ -8507,6 +8570,8 @@ _scsih_mark_responding_pcie_device(struct MPT3SAS_ADAPTER *ioc,
                if ((pcie_device->wwid == le64_to_cpu(pcie_device_pg0->WWID))
                    && (pcie_device->slot == le16_to_cpu(
                    pcie_device_pg0->Slot))) {
+                       pcie_device->access_status =
+                                       pcie_device_pg0->AccessStatus;
                        pcie_device->responding = 1;
                        starget = pcie_device->starget;
                        if (starget && starget->hostdata) {
@@ -8594,7 +8659,7 @@ _scsih_search_responding_pcie_devices(struct MPT3SAS_ADAPTER *ioc)
                }
                handle = le16_to_cpu(pcie_device_pg0.DevHandle);
                device_info = le32_to_cpu(pcie_device_pg0.DeviceInfo);
-               if (!(_scsih_is_nvme_device(device_info)))
+               if (!(_scsih_is_nvme_pciescsi_device(device_info)))
                        continue;
                _scsih_mark_responding_pcie_device(ioc, &pcie_device_pg0);
        }
@@ -9175,7 +9240,7 @@ _scsih_scan_for_devices_after_reset(struct MPT3SAS_ADAPTER *ioc)
                        break;
                }
                handle = le16_to_cpu(pcie_device_pg0.DevHandle);
-               if (!(_scsih_is_nvme_device(
+               if (!(_scsih_is_nvme_pciescsi_device(
                        le32_to_cpu(pcie_device_pg0.DeviceInfo))))
                        continue;
                pcie_device = mpt3sas_get_pdev_by_wwid(ioc,
@@ -9308,7 +9373,10 @@ _mpt3sas_fw_work(struct MPT3SAS_ADAPTER *ioc, struct fw_event_work *fw_event)
                _scsih_sas_topology_change_event(ioc, fw_event);
                break;
        case MPI2_EVENT_SAS_DEVICE_STATUS_CHANGE:
-               _scsih_sas_device_status_change_event(ioc, fw_event);
+               if (ioc->logging_level & MPT_DEBUG_EVENT_WORK_TASK)
+                       _scsih_sas_device_status_change_event_debug(ioc,
+                           (Mpi2EventDataSasDeviceStatusChange_t *)
+                           fw_event->event_data);
                break;
        case MPI2_EVENT_SAS_DISCOVERY:
                _scsih_sas_discovery_event(ioc, fw_event);
@@ -9481,6 +9549,10 @@ mpt3sas_scsih_event_callback(struct MPT3SAS_ADAPTER *ioc, u8 msix_index,
                break;
        }
        case MPI2_EVENT_SAS_DEVICE_STATUS_CHANGE:
+               _scsih_sas_device_status_change_event(ioc,
+                   (Mpi2EventDataSasDeviceStatusChange_t *)
+                   mpi_reply->EventData);
+               break;
        case MPI2_EVENT_IR_OPERATION_STATUS:
        case MPI2_EVENT_SAS_DISCOVERY:
        case MPI2_EVENT_SAS_DEVICE_DISCOVERY_ERROR:
@@ -10039,6 +10111,12 @@ _scsih_probe_pcie(struct MPT3SAS_ADAPTER *ioc)
                        pcie_device_put(pcie_device);
                        continue;
                }
+               if (pcie_device->access_status ==
+                   MPI26_PCIEDEV0_ASTATUS_DEVICE_BLOCKED) {
+                       pcie_device_make_active(ioc, pcie_device);
+                       pcie_device_put(pcie_device);
+                       continue;
+               }
                rc = scsi_add_device(ioc->shost, PCIE_CHANNEL,
                        pcie_device->id, 0);
                if (rc) {
@@ -10453,6 +10531,13 @@ _scsih_probe(struct pci_dev *pdev, const struct pci_device_id *id)
        ioc->tm_sas_control_cb_idx = tm_sas_control_cb_idx;
        ioc->logging_level = logging_level;
        ioc->schedule_dead_ioc_flush_running_cmds = &_scsih_flush_running_cmds;
+       /*
+        * Enable MEMORY MOVE support flag.
+        */
+       ioc->drv_support_bitmap |= MPT_DRV_SUPPORT_BITMAP_MEMMOVE;
+
+       ioc->enable_sdev_max_qd = enable_sdev_max_qd;
+
        /* misc semaphores and spin locks */
        mutex_init(&ioc->reset_in_progress_mutex);
        /* initializing pci_access_mutex lock */
index e6a9549..e0b427f 100644 (file)
@@ -3910,11 +3910,14 @@ static void __init ncr_prepare_setting(struct ncb *np)
                                        np->scsi_mode = SMODE_HVD;
                                break;
                        }
+                       /* fall through */
                case 3: /* SYMBIOS controllers report HVD through GPIO3 */
                        if (INB(nc_gpreg) & 0x08)
                                break;
+                       /* fall through */
                case 2: /* Set HVD unconditionally */
                        np->scsi_mode = SMODE_HVD;
+                       /* fall through */
                case 1: /* Trust previous settings for HVD */
                        if (np->sv_stest2 & 0x20)
                                np->scsi_mode = SMODE_HVD;
@@ -6714,6 +6717,7 @@ void ncr_int_sir (struct ncb *np)
                        OUTL_DSP (scr_to_cpu(tp->lp[0]->jump_ccb[0]));
                        return;
                }
+               /* fall through */
        case SIR_RESEL_BAD_TARGET:      /* Will send a TARGET RESET message */
        case SIR_RESEL_BAD_LUN:         /* Will send a TARGET RESET message */
        case SIR_RESEL_BAD_I_T_L_Q:     /* Will send an ABORT TAG message   */
index 9453705..7e48154 100644 (file)
@@ -1308,28 +1308,22 @@ out:
 
 int pm8001_abort_task_set(struct domain_device *dev, u8 *lun)
 {
-       int rc = TMF_RESP_FUNC_FAILED;
        struct pm8001_tmf_task tmf_task;
 
        tmf_task.tmf = TMF_ABORT_TASK_SET;
-       rc = pm8001_issue_ssp_tmf(dev, lun, &tmf_task);
-       return rc;
+       return pm8001_issue_ssp_tmf(dev, lun, &tmf_task);
 }
 
 int pm8001_clear_aca(struct domain_device *dev, u8 *lun)
 {
-       int rc = TMF_RESP_FUNC_FAILED;
        struct pm8001_tmf_task tmf_task;
 
        tmf_task.tmf = TMF_CLEAR_ACA;
-       rc = pm8001_issue_ssp_tmf(dev, lun, &tmf_task);
-
-       return rc;
+       return pm8001_issue_ssp_tmf(dev, lun, &tmf_task);
 }
 
 int pm8001_clear_task_set(struct domain_device *dev, u8 *lun)
 {
-       int rc = TMF_RESP_FUNC_FAILED;
        struct pm8001_tmf_task tmf_task;
        struct pm8001_device *pm8001_dev = dev->lldd_dev;
        struct pm8001_hba_info *pm8001_ha = pm8001_find_ha_by_dev(dev);
@@ -1338,7 +1332,6 @@ int pm8001_clear_task_set(struct domain_device *dev, u8 *lun)
                pm8001_printk("I_T_L_Q clear task set[%x]\n",
                pm8001_dev->device_id));
        tmf_task.tmf = TMF_CLEAR_TASK_SET;
-       rc = pm8001_issue_ssp_tmf(dev, lun, &tmf_task);
-       return rc;
+       return pm8001_issue_ssp_tmf(dev, lun, &tmf_task);
 }
 
index 71ff393..398d2af 100644 (file)
@@ -5841,7 +5841,7 @@ out_disable_device:
 }
 
 /*
- * PCI driver structure of pcmraid driver
+ * PCI driver structure of pmcraid driver
  */
 static struct pci_driver pmcraid_driver = {
        .name = PMCRAID_DRIVER_NAME,
index 5a02121..f3f399f 100644 (file)
@@ -49,6 +49,7 @@
 #define QEDF_ABORT_TIMEOUT     (10 * 1000)
 #define QEDF_CLEANUP_TIMEOUT   1
 #define QEDF_MAX_CDB_LEN       16
+#define QEDF_LL2_BUF_SIZE      2500    /* Buffer size required for LL2 Rx */
 
 #define UPSTREAM_REMOVE                1
 #define UPSTREAM_KEEP          1
index d905a30..b88bed9 100644 (file)
@@ -47,13 +47,13 @@ qedf_dbg_host_init(struct qedf_dbg_ctx *qedf,
  * @pf: the pf that is stopping
  **/
 void
-qedf_dbg_host_exit(struct qedf_dbg_ctx *qedf)
+qedf_dbg_host_exit(struct qedf_dbg_ctx *qedf_dbg)
 {
-       QEDF_INFO(qedf, QEDF_LOG_DEBUGFS, "Destroying debugfs host "
+       QEDF_INFO(qedf_dbg, QEDF_LOG_DEBUGFS, "Destroying debugfs host "
                   "entry\n");
        /* remove debugfs  entries of this PF */
-       debugfs_remove_recursive(qedf->bdf_dentry);
-       qedf->bdf_dentry = NULL;
+       debugfs_remove_recursive(qedf_dbg->bdf_dentry);
+       qedf_dbg->bdf_dentry = NULL;
 }
 
 /**
@@ -140,10 +140,10 @@ qedf_dbg_debug_cmd_read(struct file *filp, char __user *buffer, size_t count,
                        loff_t *ppos)
 {
        int cnt;
-       struct qedf_dbg_ctx *qedf =
+       struct qedf_dbg_ctx *qedf_dbg =
                                (struct qedf_dbg_ctx *)filp->private_data;
 
-       QEDF_INFO(qedf, QEDF_LOG_DEBUGFS, "entered\n");
+       QEDF_INFO(qedf_dbg, QEDF_LOG_DEBUGFS, "debug mask=0x%x\n", qedf_debug);
        cnt = sprintf(buffer, "debug mask = 0x%x\n", qedf_debug);
 
        cnt = min_t(int, count, cnt - *ppos);
@@ -158,7 +158,7 @@ qedf_dbg_debug_cmd_write(struct file *filp, const char __user *buffer,
        uint32_t val;
        void *kern_buf;
        int rval;
-       struct qedf_dbg_ctx *qedf =
+       struct qedf_dbg_ctx *qedf_dbg =
            (struct qedf_dbg_ctx *)filp->private_data;
 
        if (!count || *ppos)
@@ -178,7 +178,7 @@ qedf_dbg_debug_cmd_write(struct file *filp, const char __user *buffer,
        else
                qedf_debug = val;
 
-       QEDF_INFO(qedf, QEDF_LOG_DEBUGFS, "Setting debug=0x%x.\n", val);
+       QEDF_INFO(qedf_dbg, QEDF_LOG_DEBUGFS, "Setting debug=0x%x.\n", val);
        return count;
 }
 
index 5996f68..87e169d 100644 (file)
@@ -179,8 +179,11 @@ static void qedf_rrq_compl(struct qedf_els_cb_arg *cb_arg)
 
        orig_io_req = cb_arg->aborted_io_req;
 
-       if (!orig_io_req)
+       if (!orig_io_req) {
+               QEDF_ERR(&qedf->dbg_ctx,
+                        "Original io_req is NULL, rrq_req = %p.\n", rrq_req);
                goto out_free;
+       }
 
        if (rrq_req->event != QEDF_IOREQ_EV_ELS_TMO &&
            rrq_req->event != QEDF_IOREQ_EV_ELS_ERR_DETECT)
@@ -350,8 +353,10 @@ void qedf_restart_rport(struct qedf_rport *fcport)
        u32 port_id;
        unsigned long flags;
 
-       if (!fcport)
+       if (!fcport) {
+               QEDF_ERR(NULL, "fcport is NULL.\n");
                return;
+       }
 
        spin_lock_irqsave(&fcport->rport_lock, flags);
        if (test_bit(QEDF_RPORT_IN_RESET, &fcport->flags) ||
@@ -418,8 +423,11 @@ static void qedf_l2_els_compl(struct qedf_els_cb_arg *cb_arg)
         * If we are flushing the command just free the cb_arg as none of the
         * response data will be valid.
         */
-       if (els_req->event == QEDF_IOREQ_EV_ELS_FLUSH)
+       if (els_req->event == QEDF_IOREQ_EV_ELS_FLUSH) {
+               QEDF_ERR(NULL, "els_req xid=0x%x event is flush.\n",
+                        els_req->xid);
                goto free_arg;
+       }
 
        fcport = els_req->fcport;
        mp_req = &(els_req->mp_req);
@@ -532,8 +540,10 @@ static void qedf_srr_compl(struct qedf_els_cb_arg *cb_arg)
 
        orig_io_req = cb_arg->aborted_io_req;
 
-       if (!orig_io_req)
+       if (!orig_io_req) {
+               QEDF_ERR(NULL, "orig_io_req is NULL.\n");
                goto out_free;
+       }
 
        clear_bit(QEDF_CMD_SRR_SENT, &orig_io_req->flags);
 
@@ -547,8 +557,11 @@ static void qedf_srr_compl(struct qedf_els_cb_arg *cb_arg)
                   orig_io_req, orig_io_req->xid, srr_req->xid, refcount);
 
        /* If a SRR times out, simply free resources */
-       if (srr_req->event == QEDF_IOREQ_EV_ELS_TMO)
+       if (srr_req->event == QEDF_IOREQ_EV_ELS_TMO) {
+               QEDF_ERR(&qedf->dbg_ctx,
+                        "ELS timeout rec_xid=0x%x.\n", srr_req->xid);
                goto out_put;
+       }
 
        /* Normalize response data into struct fc_frame */
        mp_req = &(srr_req->mp_req);
@@ -721,8 +734,11 @@ void qedf_process_seq_cleanup_compl(struct qedf_ctx *qedf,
        cb_arg = io_req->cb_arg;
 
        /* If we timed out just free resources */
-       if (io_req->event == QEDF_IOREQ_EV_ELS_TMO || !cqe)
+       if (io_req->event == QEDF_IOREQ_EV_ELS_TMO || !cqe) {
+               QEDF_ERR(&qedf->dbg_ctx,
+                        "cqe is NULL or timeout event (0x%x)", io_req->event);
                goto free;
+       }
 
        /* Kill the timer we put on the request */
        cancel_delayed_work_sync(&io_req->timeout_work);
@@ -825,8 +841,10 @@ static void qedf_rec_compl(struct qedf_els_cb_arg *cb_arg)
 
        orig_io_req = cb_arg->aborted_io_req;
 
-       if (!orig_io_req)
+       if (!orig_io_req) {
+               QEDF_ERR(NULL, "orig_io_req is NULL.\n");
                goto out_free;
+       }
 
        if (rec_req->event != QEDF_IOREQ_EV_ELS_TMO &&
            rec_req->event != QEDF_IOREQ_EV_ELS_ERR_DETECT)
@@ -838,8 +856,12 @@ static void qedf_rec_compl(struct qedf_els_cb_arg *cb_arg)
                   orig_io_req, orig_io_req->xid, rec_req->xid, refcount);
 
        /* If a REC times out, free resources */
-       if (rec_req->event == QEDF_IOREQ_EV_ELS_TMO)
+       if (rec_req->event == QEDF_IOREQ_EV_ELS_TMO) {
+               QEDF_ERR(&qedf->dbg_ctx,
+                        "Got TMO event, orig_io_req %p orig_io_xid=0x%x.\n",
+                        orig_io_req, orig_io_req->xid);
                goto out_put;
+       }
 
        /* Normalize response data into struct fc_frame */
        mp_req = &(rec_req->mp_req);
index 362d2be..bb82f08 100644 (file)
@@ -23,8 +23,11 @@ void qedf_fcoe_send_vlan_req(struct qedf_ctx *qedf)
        int rc = -1;
 
        skb = dev_alloc_skb(sizeof(struct fip_vlan));
-       if (!skb)
+       if (!skb) {
+               QEDF_ERR(&qedf->dbg_ctx,
+                        "Failed to allocate skb.\n");
                return;
+       }
 
        eth_fr = (char *)skb->data;
        vlan = (struct fip_vlan *)eth_fr;
@@ -250,18 +253,24 @@ void qedf_fip_recv(struct qedf_ctx *qedf, struct sk_buff *skb)
                                        fc_wwpn_valid = true;
                                break;
                        case FIP_DT_VN_ID:
+                               fabric_id_valid = false;
                                vp = (struct fip_vn_desc *)desc;
-                               QEDF_INFO(&qedf->dbg_ctx, QEDF_LOG_DISC,
-                                         "vx_port fd_fc_id=%x fd_mac=%pM.\n",
-                                         ntoh24(vp->fd_fc_id), vp->fd_mac);
-                               /* Check vx_port fabric ID */
-                               if (ntoh24(vp->fd_fc_id) !=
-                                   qedf->lport->port_id)
-                                       fabric_id_valid = false;
-                               /* Check vx_port MAC */
-                               if (!ether_addr_equal(vp->fd_mac,
-                                                     qedf->data_src_addr))
-                                       fabric_id_valid = false;
+
+                               QEDF_ERR(&qedf->dbg_ctx,
+                                        "CVL vx_port fd_fc_id=0x%x fd_mac=%pM fd_wwpn=%016llx.\n",
+                                        ntoh24(vp->fd_fc_id), vp->fd_mac,
+                                        get_unaligned_be64(&vp->fd_wwpn));
+                               /* Check for vx_port wwpn OR Check vx_port
+                                * fabric ID OR Check vx_port MAC
+                                */
+                               if ((get_unaligned_be64(&vp->fd_wwpn) ==
+                                       qedf->wwpn) ||
+                                  (ntoh24(vp->fd_fc_id) ==
+                                       qedf->lport->port_id) ||
+                                  (ether_addr_equal(vp->fd_mac,
+                                       qedf->data_src_addr))) {
+                                       fabric_id_valid = true;
+                               }
                                break;
                        default:
                                /* Ignore anything else */
index d881e82..e749a2d 100644 (file)
@@ -104,6 +104,8 @@ static void qedf_cmd_timeout(struct work_struct *work)
                qedf_process_seq_cleanup_compl(qedf, NULL, io_req);
                break;
        default:
+               QEDF_INFO(&qedf->dbg_ctx, QEDF_LOG_IO,
+                         "Hit default case, xid=0x%x.\n", io_req->xid);
                break;
        }
 }
@@ -122,8 +124,10 @@ void qedf_cmd_mgr_free(struct qedf_cmd_mgr *cmgr)
        num_ios = max_xid - min_xid + 1;
 
        /* Free fcoe_bdt_ctx structures */
-       if (!cmgr->io_bdt_pool)
+       if (!cmgr->io_bdt_pool) {
+               QEDF_ERR(&qedf->dbg_ctx, "io_bdt_pool is NULL.\n");
                goto free_cmd_pool;
+       }
 
        bd_tbl_sz = QEDF_MAX_BDS_PER_CMD * sizeof(struct scsi_sge);
        for (i = 0; i < num_ios; i++) {
@@ -226,8 +230,11 @@ struct qedf_cmd_mgr *qedf_cmd_mgr_alloc(struct qedf_ctx *qedf)
                io_req->sense_buffer = dma_alloc_coherent(&qedf->pdev->dev,
                    QEDF_SCSI_SENSE_BUFFERSIZE, &io_req->sense_buffer_dma,
                    GFP_KERNEL);
-               if (!io_req->sense_buffer)
+               if (!io_req->sense_buffer) {
+                       QEDF_ERR(&qedf->dbg_ctx,
+                                "Failed to alloc sense buffer.\n");
                        goto mem_err;
+               }
 
                /* Allocate task parameters to pass to f/w init funcions */
                io_req->task_params = kzalloc(sizeof(*io_req->task_params),
@@ -437,8 +444,12 @@ void qedf_release_cmd(struct kref *ref)
        struct qedf_rport *fcport = io_req->fcport;
        unsigned long flags;
 
-       if (io_req->cmd_type == QEDF_SCSI_CMD)
+       if (io_req->cmd_type == QEDF_SCSI_CMD) {
+               QEDF_WARN(&fcport->qedf->dbg_ctx,
+                         "Cmd released called without scsi_done called, io_req %p xid=0x%x.\n",
+                         io_req, io_req->xid);
                WARN_ON(io_req->sc_cmd);
+       }
 
        if (io_req->cmd_type == QEDF_ELS ||
            io_req->cmd_type == QEDF_TASK_MGMT_CMD)
@@ -447,8 +458,10 @@ void qedf_release_cmd(struct kref *ref)
        atomic_inc(&cmd_mgr->free_list_cnt);
        atomic_dec(&fcport->num_active_ios);
        atomic_set(&io_req->state, QEDF_CMD_ST_INACTIVE);
-       if (atomic_read(&fcport->num_active_ios) < 0)
+       if (atomic_read(&fcport->num_active_ios) < 0) {
                QEDF_WARN(&(fcport->qedf->dbg_ctx), "active_ios < 0.\n");
+               WARN_ON(1);
+       }
 
        /* Increment task retry identifier now that the request is released */
        io_req->task_retry_identifier++;
@@ -951,6 +964,9 @@ qedf_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *sc_cmd)
 
        if (test_bit(QEDF_UNLOADING, &qedf->flags) ||
            test_bit(QEDF_DBG_STOP_IO, &qedf->flags)) {
+               QEDF_INFO(&qedf->dbg_ctx, QEDF_LOG_IO,
+                         "Returning DNC as unloading or stop io, flags 0x%lx.\n",
+                         qedf->flags);
                sc_cmd->result = DID_NO_CONNECT << 16;
                sc_cmd->scsi_done(sc_cmd);
                return 0;
@@ -967,6 +983,9 @@ qedf_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *sc_cmd)
 
        rval = fc_remote_port_chkready(rport);
        if (rval) {
+               QEDF_INFO(&qedf->dbg_ctx, QEDF_LOG_IO,
+                         "fc_remote_port_chkready failed=0x%x for port_id=0x%06x.\n",
+                         rval, rport->port_id);
                sc_cmd->result = rval;
                sc_cmd->scsi_done(sc_cmd);
                return 0;
@@ -974,12 +993,14 @@ qedf_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *sc_cmd)
 
        /* Retry command if we are doing a qed drain operation */
        if (test_bit(QEDF_DRAIN_ACTIVE, &qedf->flags)) {
+               QEDF_INFO(&qedf->dbg_ctx, QEDF_LOG_IO, "Drain active.\n");
                rc = SCSI_MLQUEUE_HOST_BUSY;
                goto exit_qcmd;
        }
 
        if (lport->state != LPORT_ST_READY ||
            atomic_read(&qedf->link_state) != QEDF_LINK_UP) {
+               QEDF_INFO(&qedf->dbg_ctx, QEDF_LOG_IO, "Link down.\n");
                rc = SCSI_MLQUEUE_HOST_BUSY;
                goto exit_qcmd;
        }
@@ -1297,8 +1318,10 @@ void qedf_scsi_done(struct qedf_ctx *qedf, struct qedf_ioreq *io_req,
        struct scsi_cmnd *sc_cmd;
        int refcount;
 
-       if (!io_req)
+       if (!io_req) {
+               QEDF_INFO(&qedf->dbg_ctx, QEDF_LOG_IO, "io_req is NULL\n");
                return;
+       }
 
        if (test_and_set_bit(QEDF_CMD_ERR_SCSI_DONE, &io_req->flags)) {
                QEDF_INFO(&qedf->dbg_ctx, QEDF_LOG_IO,
@@ -1414,8 +1437,12 @@ void qedf_process_warning_compl(struct qedf_ctx *qedf, struct fcoe_cqe *cqe,
        u64 err_warn_bit_map;
        u8 err_warn = 0xff;
 
-       if (!cqe)
+       if (!cqe) {
+               QEDF_INFO(&qedf->dbg_ctx, QEDF_LOG_IO,
+                         "cqe is NULL for io_req %p xid=0x%x\n",
+                         io_req, io_req->xid);
                return;
+       }
 
        QEDF_ERR(&(io_req->fcport->qedf->dbg_ctx), "Warning CQE, "
                  "xid=0x%x\n", io_req->xid);
@@ -1477,8 +1504,11 @@ void qedf_process_error_detect(struct qedf_ctx *qedf, struct fcoe_cqe *cqe,
 {
        int rval;
 
-       if (!cqe)
+       if (!cqe) {
+               QEDF_INFO(&qedf->dbg_ctx, QEDF_LOG_IO,
+                         "cqe is NULL for io_req %p\n", io_req);
                return;
+       }
 
        QEDF_ERR(&(io_req->fcport->qedf->dbg_ctx), "Error detection CQE, "
                  "xid=0x%x\n", io_req->xid);
@@ -1543,8 +1573,10 @@ void qedf_flush_active_ios(struct qedf_rport *fcport, int lun)
        int wait_cnt = 100;
        int refcount = 0;
 
-       if (!fcport)
+       if (!fcport) {
+               QEDF_ERR(NULL, "fcport is NULL\n");
                return;
+       }
 
        /* Check that fcport is still offloaded */
        if (!test_bit(QEDF_RPORT_SESSION_READY, &fcport->flags)) {
@@ -1976,6 +2008,10 @@ void qedf_process_abts_compl(struct qedf_ctx *qedf, struct fcoe_cqe *cqe,
        clear_bit(QEDF_CMD_IN_ABORT, &io_req->flags);
 
        if (io_req->sc_cmd) {
+               if (!io_req->return_scsi_cmd_on_abts)
+                       QEDF_INFO(&qedf->dbg_ctx, QEDF_LOG_SCSI_TM,
+                                 "Not call scsi_done for xid=0x%x.\n",
+                                 io_req->xid);
                if (io_req->return_scsi_cmd_on_abts)
                        qedf_scsi_done(qedf, io_req, DID_ERROR);
        }
@@ -2201,6 +2237,10 @@ int qedf_initiate_cleanup(struct qedf_ioreq *io_req,
        }
 
        if (io_req->sc_cmd) {
+               if (!io_req->return_scsi_cmd_on_abts)
+                       QEDF_INFO(&qedf->dbg_ctx, QEDF_LOG_SCSI_TM,
+                                 "Not call scsi_done for xid=0x%x.\n",
+                                 io_req->xid);
                if (io_req->return_scsi_cmd_on_abts)
                        qedf_scsi_done(qedf, io_req, DID_ERROR);
        }
@@ -2241,7 +2281,7 @@ static int qedf_execute_tmf(struct qedf_rport *fcport, struct scsi_cmnd *sc_cmd,
        u16 sqe_idx;
 
        if (!sc_cmd) {
-               QEDF_ERR(&(qedf->dbg_ctx), "invalid arg\n");
+               QEDF_ERR(&qedf->dbg_ctx, "sc_cmd is NULL\n");
                return FAILED;
        }
 
@@ -2363,8 +2403,8 @@ int qedf_initiate_tmf(struct scsi_cmnd *sc_cmd, u8 tm_flags)
 
        QEDF_ERR(NULL,
                 "tm_flags 0x%x sc_cmd %p op = 0x%02x target_id = 0x%x lun=%d\n",
-                tm_flags, sc_cmd, sc_cmd->cmnd[0], rport->scsi_target_id,
-                (int)sc_cmd->device->lun);
+                tm_flags, sc_cmd, sc_cmd->cmd_len ? sc_cmd->cmnd[0] : 0xff,
+                rport->scsi_target_id, (int)sc_cmd->device->lun);
 
        if (!rdata || !kref_get_unless_zero(&rdata->kref)) {
                QEDF_ERR(NULL, "stale rport\n");
@@ -2515,6 +2555,11 @@ void qedf_process_unsol_compl(struct qedf_ctx *qedf, uint16_t que_idx,
        fh = (struct fc_frame_header *)fc_frame_header_get(fp);
        memcpy(fh, (void *)bdq_addr, pktlen);
 
+       QEDF_WARN(&qedf->dbg_ctx,
+                 "Processing Unsolicated frame, src=%06x dest=%06x r_ctl=0x%x type=0x%x cmd=%02x\n",
+                 ntoh24(fh->fh_s_id), ntoh24(fh->fh_d_id), fh->fh_r_ctl,
+                 fh->fh_type, fc_frame_payload_op(fp));
+
        /* Initialize the frame so libfc sees it as a valid frame */
        crc = fcoe_fc_crc(fp);
        fc_frame_init(fp);
index 4254272..1659d35 100644 (file)
@@ -27,6 +27,7 @@ const struct qed_fcoe_ops *qed_ops;
 
 static int qedf_probe(struct pci_dev *pdev, const struct pci_device_id *id);
 static void qedf_remove(struct pci_dev *pdev);
+static void qedf_shutdown(struct pci_dev *pdev);
 
 /*
  * Driver module parameters.
@@ -110,16 +111,18 @@ static struct kmem_cache *qedf_io_work_cache;
 
 void qedf_set_vlan_id(struct qedf_ctx *qedf, int vlan_id)
 {
-       qedf->vlan_id = vlan_id;
-       qedf->vlan_id |= qedf->prio << VLAN_PRIO_SHIFT;
-       QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_DISC, "Setting vlan_id=%04x "
-                  "prio=%d.\n", vlan_id, qedf->prio);
+       int vlan_id_tmp = 0;
+
+       vlan_id_tmp = vlan_id  | (qedf->prio << VLAN_PRIO_SHIFT);
+       qedf->vlan_id = vlan_id_tmp;
+       QEDF_INFO(&qedf->dbg_ctx, QEDF_LOG_DISC,
+                 "Setting vlan_id=0x%04x prio=%d.\n",
+                 vlan_id_tmp, qedf->prio);
 }
 
 /* Returns true if we have a valid vlan, false otherwise */
 static bool qedf_initiate_fipvlan_req(struct qedf_ctx *qedf)
 {
-       int rc;
 
        while (qedf->fipvlan_retries--) {
                /* This is to catch if link goes down during fipvlan retries */
@@ -128,20 +131,25 @@ static bool qedf_initiate_fipvlan_req(struct qedf_ctx *qedf)
                        return false;
                }
 
-               if (qedf->vlan_id > 0)
+               if (test_bit(QEDF_UNLOADING, &qedf->flags)) {
+                       QEDF_ERR(&qedf->dbg_ctx, "Driver unloading.\n");
+                       return false;
+               }
+
+               if (qedf->vlan_id > 0) {
+                       QEDF_INFO(&qedf->dbg_ctx, QEDF_LOG_DISC,
+                                 "vlan = 0x%x already set, calling ctlr_link_up.\n",
+                                 qedf->vlan_id);
+                       if (atomic_read(&qedf->link_state) == QEDF_LINK_UP)
+                               fcoe_ctlr_link_up(&qedf->ctlr);
                        return true;
+               }
 
                QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_DISC,
                           "Retry %d.\n", qedf->fipvlan_retries);
                init_completion(&qedf->fipvlan_compl);
                qedf_fcoe_send_vlan_req(qedf);
-               rc = wait_for_completion_timeout(&qedf->fipvlan_compl,
-                   1 * HZ);
-               if (rc > 0 &&
-                   (atomic_read(&qedf->link_state) == QEDF_LINK_UP)) {
-                       fcoe_ctlr_link_up(&qedf->ctlr);
-                       return true;
-               }
+               wait_for_completion_timeout(&qedf->fipvlan_compl, 1 * HZ);
        }
 
        return false;
@@ -162,6 +170,8 @@ static void qedf_handle_link_update(struct work_struct *work)
                        return;
 
                if (atomic_read(&qedf->link_state) != QEDF_LINK_UP) {
+                       QEDF_INFO(&qedf->dbg_ctx, QEDF_LOG_DISC,
+                                 "Link is down, resetting vlan_id.\n");
                        qedf->vlan_id = 0;
                        return;
                }
@@ -311,8 +321,10 @@ int qedf_send_flogi(struct qedf_ctx *qedf)
 
        lport = qedf->lport;
 
-       if (!lport->tt.elsct_send)
+       if (!lport->tt.elsct_send) {
+               QEDF_ERR(&qedf->dbg_ctx, "tt.elsct_send not set.\n");
                return -EINVAL;
+       }
 
        fp = fc_frame_alloc(lport, sizeof(struct fc_els_flogi));
        if (!fp) {
@@ -330,11 +342,6 @@ int qedf_send_flogi(struct qedf_ctx *qedf)
        return 0;
 }
 
-struct qedf_tmp_rdata_item {
-       struct fc_rport_priv *rdata;
-       struct list_head list;
-};
-
 /*
  * This function is called if link_down_tmo is in use.  If we get a link up and
  * link_down_tmo has not expired then use just FLOGI/ADISC to recover our
@@ -344,9 +351,8 @@ static void qedf_link_recovery(struct work_struct *work)
 {
        struct qedf_ctx *qedf =
            container_of(work, struct qedf_ctx, link_recovery.work);
-       struct qedf_rport *fcport;
+       struct fc_lport *lport = qedf->lport;
        struct fc_rport_priv *rdata;
-       struct qedf_tmp_rdata_item *rdata_item, *tmp_rdata_item;
        bool rc;
        int retries = 30;
        int rval, i;
@@ -413,33 +419,14 @@ static void qedf_link_recovery(struct work_struct *work)
         * Call lport->tt.rport_login which will cause libfc to send an
         * ADISC since the rport is in state ready.
         */
-       rcu_read_lock();
-       list_for_each_entry_rcu(fcport, &qedf->fcports, peers) {
-               rdata = fcport->rdata;
-               if (rdata == NULL)
-                       continue;
-               rdata_item = kzalloc(sizeof(struct qedf_tmp_rdata_item),
-                   GFP_ATOMIC);
-               if (!rdata_item)
-                       continue;
+       mutex_lock(&lport->disc.disc_mutex);
+       list_for_each_entry_rcu(rdata, &lport->disc.rports, peers) {
                if (kref_get_unless_zero(&rdata->kref)) {
-                       rdata_item->rdata = rdata;
-                       list_add(&rdata_item->list, &rdata_login_list);
-               } else
-                       kfree(rdata_item);
-       }
-       rcu_read_unlock();
-       /*
-        * Do the fc_rport_login outside of the rcu lock so we don't take a
-        * mutex in an atomic context.
-        */
-       list_for_each_entry_safe(rdata_item, tmp_rdata_item, &rdata_login_list,
-           list) {
-               list_del(&rdata_item->list);
-               fc_rport_login(rdata_item->rdata);
-               kref_put(&rdata_item->rdata->kref, fc_rport_destroy);
-               kfree(rdata_item);
+                       fc_rport_login(rdata);
+                       kref_put(&rdata->kref, fc_rport_destroy);
+               }
        }
+       mutex_unlock(&lport->disc.disc_mutex);
 }
 
 static void qedf_update_link_speed(struct qedf_ctx *qedf,
@@ -467,6 +454,9 @@ static void qedf_update_link_speed(struct qedf_ctx *qedf,
        case 100000:
                lport->link_speed = FC_PORTSPEED_100GBIT;
                break;
+       case 20000:
+               lport->link_speed = FC_PORTSPEED_20GBIT;
+               break;
        default:
                lport->link_speed = FC_PORTSPEED_UNKNOWN;
                break;
@@ -476,16 +466,40 @@ static void qedf_update_link_speed(struct qedf_ctx *qedf,
         * Set supported link speed by querying the supported
         * capabilities of the link.
         */
-       if (link->supported_caps & SUPPORTED_10000baseKR_Full)
+       if ((link->supported_caps & QED_LM_10000baseT_Full_BIT) ||
+           (link->supported_caps & QED_LM_10000baseKX4_Full_BIT) ||
+           (link->supported_caps & QED_LM_10000baseR_FEC_BIT) ||
+           (link->supported_caps & QED_LM_10000baseCR_Full_BIT) ||
+           (link->supported_caps & QED_LM_10000baseSR_Full_BIT) ||
+           (link->supported_caps & QED_LM_10000baseLR_Full_BIT) ||
+           (link->supported_caps & QED_LM_10000baseLRM_Full_BIT) ||
+           (link->supported_caps & QED_LM_10000baseKR_Full_BIT)) {
                lport->link_supported_speeds |= FC_PORTSPEED_10GBIT;
-       if (link->supported_caps & SUPPORTED_25000baseKR_Full)
+       }
+       if ((link->supported_caps & QED_LM_25000baseKR_Full_BIT) ||
+           (link->supported_caps & QED_LM_25000baseCR_Full_BIT) ||
+           (link->supported_caps & QED_LM_25000baseSR_Full_BIT)) {
                lport->link_supported_speeds |= FC_PORTSPEED_25GBIT;
-       if (link->supported_caps & SUPPORTED_40000baseLR4_Full)
+       }
+       if ((link->supported_caps & QED_LM_40000baseLR4_Full_BIT) ||
+           (link->supported_caps & QED_LM_40000baseKR4_Full_BIT) ||
+           (link->supported_caps & QED_LM_40000baseCR4_Full_BIT) ||
+           (link->supported_caps & QED_LM_40000baseSR4_Full_BIT)) {
                lport->link_supported_speeds |= FC_PORTSPEED_40GBIT;
-       if (link->supported_caps & SUPPORTED_50000baseKR2_Full)
+       }
+       if ((link->supported_caps & QED_LM_50000baseKR2_Full_BIT) ||
+           (link->supported_caps & QED_LM_50000baseCR2_Full_BIT) ||
+           (link->supported_caps & QED_LM_50000baseSR2_Full_BIT)) {
                lport->link_supported_speeds |= FC_PORTSPEED_50GBIT;
-       if (link->supported_caps & SUPPORTED_100000baseKR4_Full)
+       }
+       if ((link->supported_caps & QED_LM_100000baseKR4_Full_BIT) ||
+           (link->supported_caps & QED_LM_100000baseSR4_Full_BIT) ||
+           (link->supported_caps & QED_LM_100000baseCR4_Full_BIT) ||
+           (link->supported_caps & QED_LM_100000baseLR4_ER4_Full_BIT)) {
                lport->link_supported_speeds |= FC_PORTSPEED_100GBIT;
+       }
+       if (link->supported_caps & QED_LM_20000baseKR2_Full_BIT)
+               lport->link_supported_speeds |= FC_PORTSPEED_20GBIT;
        fc_host_supported_speeds(lport->host) = lport->link_supported_speeds;
 }
 
@@ -493,6 +507,16 @@ static void qedf_link_update(void *dev, struct qed_link_output *link)
 {
        struct qedf_ctx *qedf = (struct qedf_ctx *)dev;
 
+       /*
+        * Prevent race where we're removing the module and we get link update
+        * for qed.
+        */
+       if (test_bit(QEDF_UNLOADING, &qedf->flags)) {
+               QEDF_ERR(&qedf->dbg_ctx,
+                        "Ignore link update, driver getting unload.\n");
+               return;
+       }
+
        if (link->link_up) {
                if (atomic_read(&qedf->link_state) == QEDF_LINK_UP) {
                        QEDF_INFO((&qedf->dbg_ctx), QEDF_LOG_DISC,
@@ -2340,12 +2364,14 @@ static void qedf_recv_frame(struct qedf_ctx *qedf,
        fr_dev(fp) = lport;
        fr_sof(fp) = hp->fcoe_sof;
        if (skb_copy_bits(skb, fr_len, &crc_eof, sizeof(crc_eof))) {
+               QEDF_INFO(NULL, QEDF_LOG_LL2, "skb_copy_bits failed.\n");
                kfree_skb(skb);
                return;
        }
        fr_eof(fp) = crc_eof.fcoe_eof;
        fr_crc(fp) = crc_eof.fcoe_crc32;
        if (pskb_trim(skb, fr_len)) {
+               QEDF_INFO(NULL, QEDF_LOG_LL2, "pskb_trim failed.\n");
                kfree_skb(skb);
                return;
        }
@@ -2406,9 +2432,9 @@ static void qedf_recv_frame(struct qedf_ctx *qedf,
         * empty then this is not addressed to our port so simply drop it.
         */
        if (lport->port_id != ntoh24(fh->fh_d_id) && !vn_port) {
-               QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_LL2,
-                   "Dropping frame due to destination mismatch: lport->port_id=%x fh->d_id=%x.\n",
-                   lport->port_id, ntoh24(fh->fh_d_id));
+               QEDF_INFO(&qedf->dbg_ctx, QEDF_LOG_LL2,
+                         "Dropping frame due to destination mismatch: lport->port_id=0x%x fh->d_id=0x%x.\n",
+                         lport->port_id, ntoh24(fh->fh_d_id));
                kfree_skb(skb);
                return;
        }
@@ -2417,6 +2443,8 @@ static void qedf_recv_frame(struct qedf_ctx *qedf,
        if ((fh->fh_type == FC_TYPE_BLS) && (f_ctl & FC_FC_SEQ_CTX) &&
            (f_ctl & FC_FC_EX_CTX)) {
                /* Drop incoming ABTS response that has both SEQ/EX CTX set */
+               QEDF_INFO(&qedf->dbg_ctx, QEDF_LOG_LL2,
+                         "Dropping ABTS response as both SEQ/EX CTX set.\n");
                kfree_skb(skb);
                return;
        }
@@ -2560,8 +2588,9 @@ static int qedf_alloc_and_init_sb(struct qedf_ctx *qedf,
            sizeof(struct status_block_e4), &sb_phys, GFP_KERNEL);
 
        if (!sb_virt) {
-               QEDF_ERR(&(qedf->dbg_ctx), "Status block allocation failed "
-                         "for id = %d.\n", sb_id);
+               QEDF_ERR(&qedf->dbg_ctx,
+                        "Status block allocation failed for id = %d.\n",
+                        sb_id);
                return -ENOMEM;
        }
 
@@ -2569,8 +2598,9 @@ static int qedf_alloc_and_init_sb(struct qedf_ctx *qedf,
            sb_id, QED_SB_TYPE_STORAGE);
 
        if (ret) {
-               QEDF_ERR(&(qedf->dbg_ctx), "Status block initialization "
-                         "failed for id = %d.\n", sb_id);
+               QEDF_ERR(&qedf->dbg_ctx,
+                        "Status block initialization failed (0x%x) for id = %d.\n",
+                        ret, sb_id);
                return ret;
        }
 
@@ -2654,13 +2684,18 @@ void qedf_process_cqe(struct qedf_ctx *qedf, struct fcoe_cqe *cqe)
        io_req = &qedf->cmd_mgr->cmds[xid];
 
        /* Completion not for a valid I/O anymore so just return */
-       if (!io_req)
+       if (!io_req) {
+               QEDF_ERR(&qedf->dbg_ctx,
+                        "io_req is NULL for xid=0x%x.\n", xid);
                return;
+       }
 
        fcport = io_req->fcport;
 
        if (fcport == NULL) {
-               QEDF_ERR(&(qedf->dbg_ctx), "fcport is NULL.\n");
+               QEDF_ERR(&qedf->dbg_ctx,
+                        "fcport is NULL for xid=0x%x io_req=%p.\n",
+                        xid, io_req);
                return;
        }
 
@@ -2669,7 +2704,8 @@ void qedf_process_cqe(struct qedf_ctx *qedf, struct fcoe_cqe *cqe)
         * isn't valid and shouldn't be taken. We should just return.
         */
        if (!test_bit(QEDF_RPORT_SESSION_READY, &fcport->flags)) {
-               QEDF_ERR(&(qedf->dbg_ctx), "Session not offloaded yet.\n");
+               QEDF_ERR(&qedf->dbg_ctx,
+                        "Session not offloaded yet, fcport = %p.\n", fcport);
                return;
        }
 
@@ -2881,6 +2917,7 @@ static int qedf_alloc_global_queues(struct qedf_ctx *qedf)
         */
        if (!qedf->p_cpuq) {
                status = 1;
+               QEDF_ERR(&qedf->dbg_ctx, "p_cpuq is NULL.\n");
                goto mem_alloc_failure;
        }
 
@@ -2896,8 +2933,10 @@ static int qedf_alloc_global_queues(struct qedf_ctx *qedf)
 
        /* Allocate DMA coherent buffers for BDQ */
        rc = qedf_alloc_bdq(qedf);
-       if (rc)
+       if (rc) {
+               QEDF_ERR(&qedf->dbg_ctx, "Unable to allocate bdq.\n");
                goto mem_alloc_failure;
+       }
 
        /* Allocate a CQ and an associated PBL for each MSI-X vector */
        for (i = 0; i < qedf->num_queues; i++) {
@@ -3107,6 +3146,7 @@ static struct pci_driver qedf_pci_driver = {
        .id_table = qedf_pci_tbl,
        .probe = qedf_probe,
        .remove = qedf_remove,
+       .shutdown = qedf_shutdown,
 };
 
 static int __qedf_probe(struct pci_dev *pdev, int mode)
@@ -3209,6 +3249,7 @@ static int __qedf_probe(struct pci_dev *pdev, int mode)
        qed_params.is_vf = is_vf;
        qedf->cdev = qed_ops->common->probe(pdev, &qed_params);
        if (!qedf->cdev) {
+               QEDF_ERR(&qedf->dbg_ctx, "common probe failed.\n");
                rc = -ENODEV;
                goto err1;
        }
@@ -3277,8 +3318,10 @@ static int __qedf_probe(struct pci_dev *pdev, int mode)
 
        /* Setup interrupts */
        rc = qedf_setup_int(qedf);
-       if (rc)
+       if (rc) {
+               QEDF_ERR(&qedf->dbg_ctx, "Setup interrupts failed.\n");
                goto err3;
+       }
 
        rc = qed_ops->start(qedf->cdev, &qedf->tasks);
        if (rc) {
@@ -3360,7 +3403,7 @@ static int __qedf_probe(struct pci_dev *pdev, int mode)
        }
 
        memset(&params, 0, sizeof(params));
-       params.mtu = 9000;
+       params.mtu = QEDF_LL2_BUF_SIZE;
        ether_addr_copy(params.ll2_mac_address, qedf->mac);
 
        /* Start LL2 processing thread */
@@ -3719,6 +3762,11 @@ void qedf_get_protocol_tlv_data(void *dev, void *data)
        fcoe->scsi_tsk_full = qedf->task_set_fulls;
 }
 
+static void qedf_shutdown(struct pci_dev *pdev)
+{
+       __qedf_remove(pdev, QEDF_MODE_NORMAL);
+}
+
 /* Generic TLV data callback */
 void qedf_get_generic_tlv_data(void *dev, struct qed_generic_tlvs *data)
 {
@@ -3845,7 +3893,7 @@ static void __exit qedf_cleanup(void)
 }
 
 MODULE_LICENSE("GPL");
-MODULE_DESCRIPTION("QLogic QEDF 25/40/50/100Gb FCoE Driver");
+MODULE_DESCRIPTION("QLogic FastLinQ 4xxxx FCoE Module");
 MODULE_AUTHOR("QLogic Corporation");
 MODULE_VERSION(QEDF_VERSION);
 module_init(qedf_init);
index e57533d..b0e37af 100644 (file)
@@ -4,9 +4,9 @@
  *  Copyright (c) 2016-2018 Cavium Inc.
  */
 
-#define QEDF_VERSION           "8.37.25.20"
+#define QEDF_VERSION           "8.42.3.0"
 #define QEDF_DRIVER_MAJOR_VER          8
-#define QEDF_DRIVER_MINOR_VER          37
-#define QEDF_DRIVER_REV_VER            25
-#define QEDF_DRIVER_ENG_VER            20
+#define QEDF_DRIVER_MINOR_VER          42
+#define QEDF_DRIVER_REV_VER            3
+#define QEDF_DRIVER_ENG_VER            0
 
index 6b7b390..8190c2a 100644 (file)
@@ -382,7 +382,7 @@ qla2x00_sysfs_write_optrom_ctl(struct file *filp, struct kobject *kobj,
                ha->optrom_region_size = size;
 
                ha->optrom_state = QLA_SREADING;
-               ha->optrom_buffer = vmalloc(ha->optrom_region_size);
+               ha->optrom_buffer = vzalloc(ha->optrom_region_size);
                if (ha->optrom_buffer == NULL) {
                        ql_log(ql_log_warn, vha, 0x7062,
                            "Unable to allocate memory for optrom retrieval "
@@ -404,7 +404,6 @@ qla2x00_sysfs_write_optrom_ctl(struct file *filp, struct kobject *kobj,
                    "Reading flash region -- 0x%x/0x%x.\n",
                    ha->optrom_region_start, ha->optrom_region_size);
 
-               memset(ha->optrom_buffer, 0, ha->optrom_region_size);
                ha->isp_ops->read_optrom(vha, ha->optrom_buffer,
                    ha->optrom_region_start, ha->optrom_region_size);
                break;
@@ -457,7 +456,7 @@ qla2x00_sysfs_write_optrom_ctl(struct file *filp, struct kobject *kobj,
                ha->optrom_region_size = size;
 
                ha->optrom_state = QLA_SWRITING;
-               ha->optrom_buffer = vmalloc(ha->optrom_region_size);
+               ha->optrom_buffer = vzalloc(ha->optrom_region_size);
                if (ha->optrom_buffer == NULL) {
                        ql_log(ql_log_warn, vha, 0x7066,
                            "Unable to allocate memory for optrom update "
@@ -472,7 +471,6 @@ qla2x00_sysfs_write_optrom_ctl(struct file *filp, struct kobject *kobj,
                    "Staging flash region write -- 0x%x/0x%x.\n",
                    ha->optrom_region_start, ha->optrom_region_size);
 
-               memset(ha->optrom_buffer, 0, ha->optrom_region_size);
                break;
        case 3:
                if (ha->optrom_state != QLA_SWRITING) {
@@ -726,7 +724,8 @@ qla2x00_sysfs_write_reset(struct file *filp, struct kobject *kobj,
                        break;
                } else {
                        /* Make sure FC side is not in reset */
-                       qla2x00_wait_for_hba_online(vha);
+                       WARN_ON_ONCE(qla2x00_wait_for_hba_online(vha) !=
+                                    QLA_SUCCESS);
 
                        /* Issue MPI reset */
                        scsi_block_requests(vha->host);
@@ -1126,7 +1125,8 @@ qla2x00_pci_info_show(struct device *dev, struct device_attribute *attr,
        char pci_info[30];
 
        return scnprintf(buf, PAGE_SIZE, "%s\n",
-           vha->hw->isp_ops->pci_info_str(vha, pci_info));
+                        vha->hw->isp_ops->pci_info_str(vha, pci_info,
+                                                       sizeof(pci_info)));
 }
 
 static ssize_t
index 5441557..28d587a 100644 (file)
 #include <linux/bsg-lib.h>
 
 /* BSG support for ELS/CT pass through */
-void
-qla2x00_bsg_job_done(void *ptr, int res)
+void qla2x00_bsg_job_done(srb_t *sp, int res)
 {
-       srb_t *sp = ptr;
        struct bsg_job *bsg_job = sp->u.bsg_job;
        struct fc_bsg_reply *bsg_reply = bsg_job->reply;
 
@@ -25,10 +23,8 @@ qla2x00_bsg_job_done(void *ptr, int res)
        sp->free(sp);
 }
 
-void
-qla2x00_bsg_sp_free(void *ptr)
+void qla2x00_bsg_sp_free(srb_t *sp)
 {
-       srb_t *sp = ptr;
        struct qla_hw_data *ha = sp->vha->hw;
        struct bsg_job *bsg_job = sp->u.bsg_job;
        struct fc_bsg_request *bsg_request = bsg_job->request;
@@ -341,6 +337,8 @@ qla2x00_process_els(struct bsg_job *bsg_job)
                dma_map_sg(&ha->pdev->dev, bsg_job->request_payload.sg_list,
                bsg_job->request_payload.sg_cnt, DMA_TO_DEVICE);
        if (!req_sg_cnt) {
+               dma_unmap_sg(&ha->pdev->dev, bsg_job->request_payload.sg_list,
+                   bsg_job->request_payload.sg_cnt, DMA_TO_DEVICE);
                rval = -ENOMEM;
                goto done_free_fcport;
        }
@@ -348,6 +346,8 @@ qla2x00_process_els(struct bsg_job *bsg_job)
        rsp_sg_cnt = dma_map_sg(&ha->pdev->dev, bsg_job->reply_payload.sg_list,
                bsg_job->reply_payload.sg_cnt, DMA_FROM_DEVICE);
         if (!rsp_sg_cnt) {
+               dma_unmap_sg(&ha->pdev->dev, bsg_job->reply_payload.sg_list,
+                   bsg_job->reply_payload.sg_cnt, DMA_FROM_DEVICE);
                rval = -ENOMEM;
                goto done_free_fcport;
        }
@@ -1778,8 +1778,8 @@ qla24xx_process_bidir_cmd(struct bsg_job *bsg_job)
        uint16_t nextlid = 0;
        uint32_t tot_dsds;
        srb_t *sp = NULL;
-       uint32_t req_data_len = 0;
-       uint32_t rsp_data_len = 0;
+       uint32_t req_data_len;
+       uint32_t rsp_data_len;
 
        /* Check the type of the adapter */
        if (!IS_BIDI_CAPABLE(ha)) {
@@ -1884,6 +1884,9 @@ qla24xx_process_bidir_cmd(struct bsg_job *bsg_job)
                goto done_unmap_sg;
        }
 
+       req_data_len = bsg_job->request_payload.payload_len;
+       rsp_data_len = bsg_job->reply_payload.payload_len;
+
        if (req_data_len != rsp_data_len) {
                rval = EXT_STATUS_BUSY;
                ql_log(ql_log_warn, vha, 0x70aa,
@@ -1891,10 +1894,6 @@ qla24xx_process_bidir_cmd(struct bsg_job *bsg_job)
                goto done_unmap_sg;
        }
 
-       req_data_len = bsg_job->request_payload.payload_len;
-       rsp_data_len = bsg_job->reply_payload.payload_len;
-
-
        /* Alloc SRB structure */
        sp = qla2x00_get_sp(vha, &(vha->bidir_fcport), GFP_KERNEL);
        if (!sp) {
index 9e80646..30afc59 100644 (file)
@@ -2743,7 +2743,8 @@ ql_dump_regs(uint level, scsi_qla_host_t *vha, uint id)
 
 
 void
-ql_dump_buffer(uint level, scsi_qla_host_t *vha, uint id, void *buf, uint size)
+ql_dump_buffer(uint level, scsi_qla_host_t *vha, uint id, const void *buf,
+              uint size)
 {
        uint cnt;
 
index bad2b12..873a6ae 100644 (file)
 #include <scsi/scsi_transport_fc.h>
 #include <scsi/scsi_bsg_fc.h>
 
+/* Big endian Fibre Channel S_ID (source ID) or D_ID (destination ID). */
+typedef struct {
+       uint8_t domain;
+       uint8_t area;
+       uint8_t al_pa;
+} be_id_t;
+
+/* Little endian Fibre Channel S_ID (source ID) or D_ID (destination ID). */
+typedef struct {
+       uint8_t al_pa;
+       uint8_t area;
+       uint8_t domain;
+} le_id_t;
+
 #include "qla_bsg.h"
 #include "qla_dsd.h"
 #include "qla_nx.h"
 #define RD_REG_BYTE_RELAXED(addr)      readb_relaxed(addr)
 #define RD_REG_WORD_RELAXED(addr)      readw_relaxed(addr)
 #define RD_REG_DWORD_RELAXED(addr)     readl_relaxed(addr)
-#define WRT_REG_BYTE(addr, data)       writeb(data,addr)
-#define WRT_REG_WORD(addr, data)       writew(data,addr)
-#define WRT_REG_DWORD(addr, data)      writel(data,addr)
+#define WRT_REG_BYTE(addr, data)       writeb(data, addr)
+#define WRT_REG_WORD(addr, data)       writew(data, addr)
+#define WRT_REG_DWORD(addr, data)      writel(data, addr)
 
 /*
  * ISP83XX specific remote register addresses
  * 133Mhz slot.
  */
 #define RD_REG_WORD_PIO(addr)          (inw((unsigned long)addr))
-#define WRT_REG_WORD_PIO(addr, data)   (outw(data,(unsigned long)addr))
+#define WRT_REG_WORD_PIO(addr, data)   (outw(data, (unsigned long)addr))
 
 /*
  * Fibre Channel device definitions.
@@ -303,7 +317,8 @@ struct srb_cmd {
        uint32_t request_sense_length;
        uint32_t fw_sense_length;
        uint8_t *request_sense_ptr;
-       void *ctx;
+       struct ct6_dsd *ct6_ctx;
+       struct crc_context *crc_ctx;
 };
 
 /*
@@ -343,6 +358,51 @@ typedef union {
 } port_id_t;
 #define INVALID_PORT_ID        0xFFFFFF
 
+static inline le_id_t be_id_to_le(be_id_t id)
+{
+       le_id_t res;
+
+       res.domain = id.domain;
+       res.area   = id.area;
+       res.al_pa  = id.al_pa;
+
+       return res;
+}
+
+static inline be_id_t le_id_to_be(le_id_t id)
+{
+       be_id_t res;
+
+       res.domain = id.domain;
+       res.area   = id.area;
+       res.al_pa  = id.al_pa;
+
+       return res;
+}
+
+static inline port_id_t be_to_port_id(be_id_t id)
+{
+       port_id_t res;
+
+       res.b.domain = id.domain;
+       res.b.area   = id.area;
+       res.b.al_pa  = id.al_pa;
+       res.b.rsvd_1 = 0;
+
+       return res;
+}
+
+static inline be_id_t port_id_to_be_id(port_id_t port_id)
+{
+       be_id_t res;
+
+       res.domain = port_id.b.domain;
+       res.area   = port_id.b.area;
+       res.al_pa  = port_id.b.al_pa;
+
+       return res;
+}
+
 struct els_logo_payload {
        uint8_t opcode;
        uint8_t rsvd[3];
@@ -395,7 +455,7 @@ struct srb_iocb {
                        struct els_logo_payload *els_logo_pyld;
                        dma_addr_t els_logo_pyld_dma;
                } els_logo;
-               struct {
+               struct els_plogi {
 #define ELS_DCMD_PLOGI 0x3
                        uint32_t flags;
                        uint32_t els_cmd;
@@ -537,6 +597,7 @@ typedef struct srb {
        wait_queue_head_t nvme_ls_waitq;
        struct fc_port *fcport;
        struct scsi_qla_host *vha;
+       unsigned int start_timer:1;
        uint32_t handle;
        uint16_t flags;
        uint16_t type;
@@ -554,14 +615,22 @@ typedef struct srb {
                struct bsg_job *bsg_job;
                struct srb_cmd scmd;
        } u;
-       void (*done)(void *, int);
-       void (*free)(void *);
+       /*
+        * Report completion status @res and call sp_put(@sp). @res is
+        * an NVMe status code, a SCSI result (e.g. DID_OK << 16) or a
+        * QLA_* status value.
+        */
+       void (*done)(struct srb *sp, int res);
+       /* Stop the timer and free @sp. Only used by the FCP code. */
+       void (*free)(struct srb *sp);
+       /*
+        * Call nvme_private->fd->done() and free @sp. Only used by the NVMe
+        * code.
+        */
        void (*put_fn)(struct kref *kref);
 } srb_t;
 
 #define GET_CMD_SP(sp) (sp->u.scmd.cmd)
-#define SET_CMD_SP(sp, cmd) (sp->u.scmd.cmd = cmd)
-#define GET_CMD_CTX_SP(sp) (sp->u.scmd.ctx)
 
 #define GET_CMD_SENSE_LEN(sp) \
        (sp->u.scmd.request_sense_length)
@@ -921,6 +990,11 @@ struct mbx_cmd_32 {
 #define MBS_LINK_DOWN_ERROR            0x400B
 #define MBS_DIAG_ECHO_TEST_ERROR       0x400C
 
+static inline bool qla2xxx_is_valid_mbs(unsigned int mbs)
+{
+       return MBS_COMMAND_COMPLETE <= mbs && mbs <= MBS_DIAG_ECHO_TEST_ERROR;
+}
+
 /*
  * ISP mailbox asynchronous event status codes
  */
@@ -1851,7 +1925,7 @@ struct crc_context {
                        uint16_t        reserved_2;
                        uint16_t        reserved_3;
                        uint32_t        reserved_4;
-                       struct dsd64    data_dsd;
+                       struct dsd64    data_dsd[1];
                        uint32_t        reserved_5[2];
                        uint32_t        reserved_6;
                } nobundling;
@@ -1861,7 +1935,7 @@ struct crc_context {
                        uint16_t        reserved_1;
                        __le16  dseg_count;     /* Data segment count */
                        uint32_t        reserved_2;
-                       struct dsd64    data_dsd;
+                       struct dsd64    data_dsd[1];
                        struct dsd64    dif_dsd;
                } bundling;
        } u;
@@ -2289,22 +2363,6 @@ enum login_state {       /* FW control Target side */
        DSC_LS_LOGO_PEND,
 };
 
-enum fcport_mgt_event {
-       FCME_RELOGIN = 1,
-       FCME_RSCN,
-       FCME_PLOGI_DONE,        /* Initiator side sent LLIOCB */
-       FCME_PRLI_DONE,
-       FCME_GNL_DONE,
-       FCME_GPSC_DONE,
-       FCME_GPDB_DONE,
-       FCME_GPNID_DONE,
-       FCME_GFFID_DONE,
-       FCME_ADISC_DONE,
-       FCME_GNNID_DONE,
-       FCME_GFPNID_DONE,
-       FCME_ELS_PLOGI_DONE,
-};
-
 enum rscn_addr_format {
        RSCN_PORT_ADDR,
        RSCN_AREA_ADDR,
@@ -2422,7 +2480,6 @@ typedef struct fc_port {
 #define QLA_FCPORT_FOUND       2
 
 struct event_arg {
-       enum fcport_mgt_event   event;
        fc_port_t               *fcport;
        srb_t                   *sp;
        port_id_t               id;
@@ -2745,7 +2802,7 @@ struct ct_sns_req {
                /* GA_NXT, GPN_ID, GNN_ID, GFT_ID, GFPN_ID */
                struct {
                        uint8_t reserved;
-                       uint8_t port_id[3];
+                       be_id_t port_id;
                } port_id;
 
                struct {
@@ -2764,13 +2821,13 @@ struct ct_sns_req {
 
                struct {
                        uint8_t reserved;
-                       uint8_t port_id[3];
+                       be_id_t port_id;
                        uint8_t fc4_types[32];
                } rft_id;
 
                struct {
                        uint8_t reserved;
-                       uint8_t port_id[3];
+                       be_id_t port_id;
                        uint16_t reserved2;
                        uint8_t fc4_feature;
                        uint8_t fc4_type;
@@ -2778,7 +2835,7 @@ struct ct_sns_req {
 
                struct {
                        uint8_t reserved;
-                       uint8_t port_id[3];
+                       be_id_t port_id;
                        uint8_t node_name[8];
                } rnn_id;
 
@@ -2865,7 +2922,7 @@ struct ct_rsp_hdr {
 
 struct ct_sns_gid_pt_data {
        uint8_t control_byte;
-       uint8_t port_id[3];
+       be_id_t port_id;
 };
 
 /* It's the same for both GPN_FT and GNN_FT */
@@ -2895,7 +2952,7 @@ struct ct_sns_rsp {
        union {
                struct {
                        uint8_t port_type;
-                       uint8_t port_id[3];
+                       be_id_t port_id;
                        uint8_t port_name[8];
                        uint8_t sym_port_name_len;
                        uint8_t sym_port_name[255];
@@ -3111,7 +3168,7 @@ struct isp_operations {
        void (*update_fw_options) (struct scsi_qla_host *);
        int (*load_risc) (struct scsi_qla_host *, uint32_t *);
 
-       char * (*pci_info_str) (struct scsi_qla_host *, char *);
+       char * (*pci_info_str)(struct scsi_qla_host *, char *, size_t);
        char * (*fw_version_str)(struct scsi_qla_host *, char *, size_t);
 
        irq_handler_t intr_handler;
@@ -3850,7 +3907,7 @@ struct qla_hw_data {
 
        /* NVRAM configuration data */
 #define MAX_NVRAM_SIZE  4096
-#define VPD_OFFSET      MAX_NVRAM_SIZE / 2
+#define VPD_OFFSET      (MAX_NVRAM_SIZE / 2)
        uint16_t        nvram_size;
        uint16_t        nvram_base;
        void            *nvram;
@@ -4628,6 +4685,7 @@ struct secure_flash_update_block_pk {
 #define QLA_SUSPENDED                  0x106
 #define QLA_BUSY                       0x107
 #define QLA_ALREADY_REGISTERED         0x109
+#define QLA_OS_TIMER_EXPIRED           0x10a
 
 #define NVRAM_DELAY()          udelay(10)
 
index a432cae..0a6fb35 100644 (file)
@@ -57,10 +57,9 @@ qla2x00_dfs_tgt_port_database_show(struct seq_file *s, void *unused)
 {
        scsi_qla_host_t *vha = s->private;
        struct qla_hw_data *ha = vha->hw;
-       struct gid_list_info *gid_list;
+       struct gid_list_info *gid_list, *gid;
        dma_addr_t gid_list_dma;
        fc_port_t fc_port;
-       char *id_iter;
        int rc, i;
        uint16_t entries, loop_id;
        struct qla_tgt *tgt = vha->vha_tgt.qla_tgt;
@@ -82,13 +81,11 @@ qla2x00_dfs_tgt_port_database_show(struct seq_file *s, void *unused)
                if (rc != QLA_SUCCESS)
                        goto out_free_id_list;
 
-               id_iter = (char *)gid_list;
+               gid = gid_list;
 
                seq_puts(s, "Port Name  Port ID         Loop ID\n");
 
                for (i = 0; i < entries; i++) {
-                       struct gid_list_info *gid =
-                           (struct gid_list_info *)id_iter;
                        loop_id = le16_to_cpu(gid->loop_id);
                        memset(&fc_port, 0, sizeof(fc_port_t));
 
@@ -99,7 +96,7 @@ qla2x00_dfs_tgt_port_database_show(struct seq_file *s, void *unused)
                                fc_port.port_name, fc_port.d_id.b.domain,
                                fc_port.d_id.b.area, fc_port.d_id.b.al_pa,
                                fc_port.loop_id);
-                       id_iter += ha->gid_list_info_size;
+                       gid = (void *)gid + ha->gid_list_info_size;
                }
 out_free_id_list:
                dma_free_coherent(&ha->pdev->dev, qla2x00_gid_list_size(ha),
index 7479924..2078805 100644 (file)
@@ -1,6 +1,8 @@
 #ifndef _QLA_DSD_H_
 #define _QLA_DSD_H_
 
+#include <asm/unaligned.h>
+
 /* 32-bit data segment descriptor (8 bytes) */
 struct dsd32 {
        __le32 address;
index df079a8..732bb87 100644 (file)
@@ -761,13 +761,13 @@ struct els_entry_24xx {
 #define ECF_CLR_PASSTHRU_PEND  BIT_12
 #define ECF_INCL_FRAME_HDR     BIT_11
 
-       uint32_t rx_byte_count;
-       uint32_t tx_byte_count;
+       __le32   rx_byte_count;
+       __le32   tx_byte_count;
 
        __le64   tx_address __packed;   /* Data segment 0 address. */
-       uint32_t tx_len;                /* Data segment 0 length. */
+       __le32   tx_len;                /* Data segment 0 length. */
        __le64   rx_address __packed;   /* Data segment 1 address. */
-       uint32_t rx_len;                /* Data segment 1 length. */
+       __le32   rx_len;                /* Data segment 1 length. */
 };
 
 struct els_sts_entry_24xx {
index f9669fd..d11416d 100644 (file)
@@ -45,6 +45,8 @@ extern int qla2x00_local_device_login(scsi_qla_host_t *, fc_port_t *);
 
 extern int qla24xx_els_dcmd_iocb(scsi_qla_host_t *, int, port_id_t);
 extern int qla24xx_els_dcmd2_iocb(scsi_qla_host_t *, int, fc_port_t *, bool);
+extern void qla2x00_els_dcmd2_free(scsi_qla_host_t *vha,
+                                  struct els_plogi *els_plogi);
 
 extern void qla2x00_update_fcports(scsi_qla_host_t *);
 
@@ -96,7 +98,11 @@ extern int qla2x00_init_rings(scsi_qla_host_t *);
 extern struct qla_qpair *qla2xxx_create_qpair(struct scsi_qla_host *,
        int, int, bool);
 extern int qla2xxx_delete_qpair(struct scsi_qla_host *, struct qla_qpair *);
-void qla2x00_fcport_event_handler(scsi_qla_host_t *, struct event_arg *);
+void qla2x00_handle_rscn(scsi_qla_host_t *vha, struct event_arg *ea);
+void qla24xx_handle_plogi_done_event(struct scsi_qla_host *vha,
+                                    struct event_arg *ea);
+void qla24xx_handle_relogin_event(scsi_qla_host_t *vha,
+                                 struct event_arg *ea);
 int qla24xx_async_gpdb(struct scsi_qla_host *, fc_port_t *, u8);
 int qla24xx_async_prli(struct scsi_qla_host *, fc_port_t *);
 int qla24xx_async_notify_ack(scsi_qla_host_t *, fc_port_t *,
@@ -213,9 +219,9 @@ extern int qla2x00_post_uevent_work(struct scsi_qla_host *, u32);
 
 extern int qla2x00_post_uevent_work(struct scsi_qla_host *, u32);
 extern void qla2x00_disable_board_on_pci_error(struct work_struct *);
-extern void qla2x00_sp_compl(void *, int);
-extern void qla2xxx_qpair_sp_free_dma(void *);
-extern void qla2xxx_qpair_sp_compl(void *, int);
+extern void qla2x00_sp_compl(srb_t *sp, int);
+extern void qla2xxx_qpair_sp_free_dma(srb_t *sp);
+extern void qla2xxx_qpair_sp_compl(srb_t *sp, int);
 extern void qla24xx_sched_upd_fcport(fc_port_t *);
 void qla2x00_handle_login_done_event(struct scsi_qla_host *, fc_port_t *,
        uint16_t *);
@@ -244,7 +250,7 @@ extern void qla2x00_do_dpc_all_vps(scsi_qla_host_t *);
 extern int qla24xx_vport_create_req_sanity_check(struct fc_vport *);
 extern scsi_qla_host_t *qla24xx_create_vhost(struct fc_vport *);
 
-extern void qla2x00_sp_free_dma(void *);
+extern void qla2x00_sp_free_dma(srb_t *sp);
 extern char *qla2x00_get_fw_version_str(struct scsi_qla_host *, char *);
 
 extern void qla2x00_mark_device_lost(scsi_qla_host_t *, fc_port_t *, int, int);
@@ -272,6 +278,7 @@ extern void qla2x00_build_scsi_iocbs_32(srb_t *, cmd_entry_t *, uint16_t);
 extern void qla2x00_build_scsi_iocbs_64(srb_t *, cmd_entry_t *, uint16_t);
 extern void qla24xx_build_scsi_iocbs(srb_t *, struct cmd_type_7 *,
        uint16_t, struct req_que *);
+extern uint32_t qla2xxx_get_next_handle(struct req_que *req);
 extern int qla2x00_start_scsi(srb_t *sp);
 extern int qla24xx_start_scsi(srb_t *sp);
 int qla2x00_marker(struct scsi_qla_host *, struct qla_qpair *,
@@ -554,7 +561,7 @@ fc_port_t *qla2x00_find_fcport_by_nportid(scsi_qla_host_t *, port_id_t *, u8);
  * Global Function Prototypes in qla_sup.c source file.
  */
 extern void qla2x00_release_nvram_protection(scsi_qla_host_t *);
-extern uint32_t *qla24xx_read_flash_data(scsi_qla_host_t *, uint32_t *,
+extern int qla24xx_read_flash_data(scsi_qla_host_t *, uint32_t *,
     uint32_t, uint32_t);
 extern uint8_t *qla2x00_read_nvram_data(scsi_qla_host_t *, void *, uint32_t,
     uint32_t);
@@ -630,7 +637,7 @@ extern ulong qla27xx_fwdt_template_size(void *);
 
 extern void qla2xxx_dump_post_process(scsi_qla_host_t *, int);
 extern void ql_dump_regs(uint, scsi_qla_host_t *, uint);
-extern void ql_dump_buffer(uint, scsi_qla_host_t *, uint, void *, uint);
+extern void ql_dump_buffer(uint, scsi_qla_host_t *, uint, const void *, uint);
 /*
  * Global Function Prototypes in qla_gs.c source file.
  */
@@ -732,7 +739,7 @@ extern int qlafx00_initialize_adapter(struct scsi_qla_host *);
 extern int qlafx00_soft_reset(scsi_qla_host_t *);
 extern int qlafx00_chip_diag(scsi_qla_host_t *);
 extern void qlafx00_config_rings(struct scsi_qla_host *);
-extern char *qlafx00_pci_info_str(struct scsi_qla_host *, char *);
+extern char *qlafx00_pci_info_str(struct scsi_qla_host *, char *, size_t);
 extern char *qlafx00_fw_version_str(struct scsi_qla_host *, char *, size_t);
 extern irqreturn_t qlafx00_intr_handler(int, void *);
 extern void qlafx00_enable_intrs(struct qla_hw_data *);
@@ -790,10 +797,10 @@ extern int qla82xx_restart_isp(scsi_qla_host_t *);
 
 /* IOCB related functions */
 extern int qla82xx_start_scsi(srb_t *);
-extern void qla2x00_sp_free(void *);
+extern void qla2x00_sp_free(srb_t *sp);
 extern void qla2x00_sp_timeout(struct timer_list *);
-extern void qla2x00_bsg_job_done(void *, int);
-extern void qla2x00_bsg_sp_free(void *);
+extern void qla2x00_bsg_job_done(srb_t *sp, int);
+extern void qla2x00_bsg_sp_free(srb_t *sp);
 extern void qla2x00_start_iocbs(struct scsi_qla_host *, struct req_que *);
 
 /* Interrupt related */
@@ -822,8 +829,8 @@ extern int qla82xx_device_state_handler(scsi_qla_host_t *);
 extern void qla8xxx_dev_failed_handler(scsi_qla_host_t *);
 extern void qla82xx_clear_qsnt_ready(scsi_qla_host_t *);
 
-extern void qla2x00_set_model_info(scsi_qla_host_t *, uint8_t *,
-                                  size_t, char *);
+extern void qla2x00_set_model_info(scsi_qla_host_t *, uint8_t *, size_t,
+                                  const char *);
 extern int qla82xx_mbx_intr_enable(scsi_qla_host_t *);
 extern int qla82xx_mbx_intr_disable(scsi_qla_host_t *);
 extern void qla82xx_start_iocbs(scsi_qla_host_t *);
index 9f58e59..dc0e366 100644 (file)
@@ -226,9 +226,7 @@ qla2x00_ga_nxt(scsi_qla_host_t *vha, fc_port_t *fcport)
        ct_rsp = &ha->ct_sns->p.rsp;
 
        /* Prepare CT arguments -- port_id */
-       ct_req->req.port_id.port_id[0] = fcport->d_id.b.domain;
-       ct_req->req.port_id.port_id[1] = fcport->d_id.b.area;
-       ct_req->req.port_id.port_id[2] = fcport->d_id.b.al_pa;
+       ct_req->req.port_id.port_id = port_id_to_be_id(fcport->d_id);
 
        /* Execute MS IOCB */
        rval = qla2x00_issue_iocb(vha, ha->ms_iocb, ha->ms_iocb_dma,
@@ -242,9 +240,7 @@ qla2x00_ga_nxt(scsi_qla_host_t *vha, fc_port_t *fcport)
                rval = QLA_FUNCTION_FAILED;
        } else {
                /* Populate fc_port_t entry. */
-               fcport->d_id.b.domain = ct_rsp->rsp.ga_nxt.port_id[0];
-               fcport->d_id.b.area = ct_rsp->rsp.ga_nxt.port_id[1];
-               fcport->d_id.b.al_pa = ct_rsp->rsp.ga_nxt.port_id[2];
+               fcport->d_id = be_to_port_id(ct_rsp->rsp.ga_nxt.port_id);
 
                memcpy(fcport->node_name, ct_rsp->rsp.ga_nxt.node_name,
                    WWN_SIZE);
@@ -337,9 +333,7 @@ qla2x00_gid_pt(scsi_qla_host_t *vha, sw_info_t *list)
                /* Set port IDs in switch info list. */
                for (i = 0; i < ha->max_fibre_devices; i++) {
                        gid_data = &ct_rsp->rsp.gid_pt.entries[i];
-                       list[i].d_id.b.domain = gid_data->port_id[0];
-                       list[i].d_id.b.area = gid_data->port_id[1];
-                       list[i].d_id.b.al_pa = gid_data->port_id[2];
+                       list[i].d_id = be_to_port_id(gid_data->port_id);
                        memset(list[i].fabric_port_name, 0, WWN_SIZE);
                        list[i].fp_speed = PORT_SPEED_UNKNOWN;
 
@@ -403,9 +397,7 @@ qla2x00_gpn_id(scsi_qla_host_t *vha, sw_info_t *list)
                ct_rsp = &ha->ct_sns->p.rsp;
 
                /* Prepare CT arguments -- port_id */
-               ct_req->req.port_id.port_id[0] = list[i].d_id.b.domain;
-               ct_req->req.port_id.port_id[1] = list[i].d_id.b.area;
-               ct_req->req.port_id.port_id[2] = list[i].d_id.b.al_pa;
+               ct_req->req.port_id.port_id = port_id_to_be_id(list[i].d_id);
 
                /* Execute MS IOCB */
                rval = qla2x00_issue_iocb(vha, ha->ms_iocb, ha->ms_iocb_dma,
@@ -472,9 +464,7 @@ qla2x00_gnn_id(scsi_qla_host_t *vha, sw_info_t *list)
                ct_rsp = &ha->ct_sns->p.rsp;
 
                /* Prepare CT arguments -- port_id */
-               ct_req->req.port_id.port_id[0] = list[i].d_id.b.domain;
-               ct_req->req.port_id.port_id[1] = list[i].d_id.b.area;
-               ct_req->req.port_id.port_id[2] = list[i].d_id.b.al_pa;
+               ct_req->req.port_id.port_id = port_id_to_be_id(list[i].d_id);
 
                /* Execute MS IOCB */
                rval = qla2x00_issue_iocb(vha, ha->ms_iocb, ha->ms_iocb_dma,
@@ -509,9 +499,8 @@ qla2x00_gnn_id(scsi_qla_host_t *vha, sw_info_t *list)
        return (rval);
 }
 
-static void qla2x00_async_sns_sp_done(void *s, int rc)
+static void qla2x00_async_sns_sp_done(srb_t *sp, int rc)
 {
-       struct srb *sp = s;
        struct scsi_qla_host *vha = sp->vha;
        struct ct_sns_pkt *ct_sns;
        struct qla_work_evt *e;
@@ -639,9 +628,7 @@ static int qla_async_rftid(scsi_qla_host_t *vha, port_id_t *d_id)
        ct_req = qla2x00_prep_ct_req(ct_sns, RFT_ID_CMD, RFT_ID_RSP_SIZE);
 
        /* Prepare CT arguments -- port_id, FC-4 types */
-       ct_req->req.rft_id.port_id[0] = vha->d_id.b.domain;
-       ct_req->req.rft_id.port_id[1] = vha->d_id.b.area;
-       ct_req->req.rft_id.port_id[2] = vha->d_id.b.al_pa;
+       ct_req->req.rft_id.port_id = port_id_to_be_id(vha->d_id);
        ct_req->req.rft_id.fc4_types[2] = 0x01;         /* FCP-3 */
 
        if (vha->flags.nvme_enabled)
@@ -737,9 +724,7 @@ static int qla_async_rffid(scsi_qla_host_t *vha, port_id_t *d_id,
        ct_req = qla2x00_prep_ct_req(ct_sns, RFF_ID_CMD, RFF_ID_RSP_SIZE);
 
        /* Prepare CT arguments -- port_id, FC-4 feature, FC-4 type */
-       ct_req->req.rff_id.port_id[0] = d_id->b.domain;
-       ct_req->req.rff_id.port_id[1] = d_id->b.area;
-       ct_req->req.rff_id.port_id[2] = d_id->b.al_pa;
+       ct_req->req.rff_id.port_id = port_id_to_be_id(*d_id);
        ct_req->req.rff_id.fc4_feature = fc4feature;
        ct_req->req.rff_id.fc4_type = fc4type;          /* SCSI - FCP */
 
@@ -830,9 +815,7 @@ static int qla_async_rnnid(scsi_qla_host_t *vha, port_id_t *d_id,
        ct_req = qla2x00_prep_ct_req(ct_sns, RNN_ID_CMD, RNN_ID_RSP_SIZE);
 
        /* Prepare CT arguments -- port_id, node_name */
-       ct_req->req.rnn_id.port_id[0] = vha->d_id.b.domain;
-       ct_req->req.rnn_id.port_id[1] = vha->d_id.b.area;
-       ct_req->req.rnn_id.port_id[2] = vha->d_id.b.al_pa;
+       ct_req->req.rnn_id.port_id = port_id_to_be_id(vha->d_id);
        memcpy(ct_req->req.rnn_id.node_name, vha->node_name, WWN_SIZE);
 
        sp->u.iocb_cmd.u.ctarg.req_size = RNN_ID_REQ_SIZE;
@@ -1479,7 +1462,7 @@ qla24xx_prep_ms_fdmi_iocb(scsi_qla_host_t *vha, uint32_t req_size,
        return ct_pkt;
 }
 
-static inline ms_iocb_entry_t *
+static void
 qla2x00_update_ms_fdmi_iocb(scsi_qla_host_t *vha, uint32_t req_size)
 {
        struct qla_hw_data *ha = vha->hw;
@@ -1493,8 +1476,6 @@ qla2x00_update_ms_fdmi_iocb(scsi_qla_host_t *vha, uint32_t req_size)
                ms_pkt->req_bytecount = cpu_to_le32(req_size);
                ms_pkt->req_dsd.length = ms_pkt->req_bytecount;
        }
-
-       return ms_pkt;
 }
 
 /**
@@ -1557,7 +1538,7 @@ qla2x00_fdmi_rhba(scsi_qla_host_t *vha)
        /* Attributes */
        ct_req->req.rhba.attrs.count =
            cpu_to_be32(FDMI_HBA_ATTR_COUNT);
-       entries = ct_req->req.rhba.hba_identifier;
+       entries = &ct_req->req;
 
        /* Nodename. */
        eiter = entries + size;
@@ -1766,7 +1747,7 @@ qla2x00_fdmi_rpa(scsi_qla_host_t *vha)
 
        /* Attributes */
        ct_req->req.rpa.attrs.count = cpu_to_be32(FDMI_PORT_ATTR_COUNT);
-       entries = ct_req->req.rpa.port_name;
+       entries = &ct_req->req;
 
        /* FC4 types. */
        eiter = entries + size;
@@ -1979,7 +1960,7 @@ qla2x00_fdmiv2_rhba(scsi_qla_host_t *vha)
 
        /* Attributes */
        ct_req->req.rhba2.attrs.count = cpu_to_be32(FDMIV2_HBA_ATTR_COUNT);
-       entries = ct_req->req.rhba2.hba_identifier;
+       entries = &ct_req->req;
 
        /* Nodename. */
        eiter = entries + size;
@@ -2338,7 +2319,7 @@ qla2x00_fdmiv2_rpa(scsi_qla_host_t *vha)
 
        /* Attributes */
        ct_req->req.rpa2.attrs.count = cpu_to_be32(FDMIV2_PORT_ATTR_COUNT);
-       entries = ct_req->req.rpa2.port_name;
+       entries = &ct_req->req;
 
        /* FC4 types. */
        eiter = entries + size;
@@ -2730,9 +2711,7 @@ qla2x00_gfpn_id(scsi_qla_host_t *vha, sw_info_t *list)
                ct_rsp = &ha->ct_sns->p.rsp;
 
                /* Prepare CT arguments -- port_id */
-               ct_req->req.port_id.port_id[0] = list[i].d_id.b.domain;
-               ct_req->req.port_id.port_id[1] = list[i].d_id.b.area;
-               ct_req->req.port_id.port_id[2] = list[i].d_id.b.al_pa;
+               ct_req->req.port_id.port_id = port_id_to_be_id(list[i].d_id);
 
                /* Execute MS IOCB */
                rval = qla2x00_issue_iocb(vha, ha->ms_iocb, ha->ms_iocb_dma,
@@ -2936,9 +2915,7 @@ qla2x00_gff_id(scsi_qla_host_t *vha, sw_info_t *list)
                ct_rsp = &ha->ct_sns->p.rsp;
 
                /* Prepare CT arguments -- port_id */
-               ct_req->req.port_id.port_id[0] = list[i].d_id.b.domain;
-               ct_req->req.port_id.port_id[1] = list[i].d_id.b.area;
-               ct_req->req.port_id.port_id[2] = list[i].d_id.b.al_pa;
+               ct_req->req.port_id.port_id = port_id_to_be_id(list[i].d_id);
 
                /* Execute MS IOCB */
                rval = qla2x00_issue_iocb(vha, ha->ms_iocb, ha->ms_iocb_dma,
@@ -3011,9 +2988,8 @@ void qla24xx_handle_gpsc_event(scsi_qla_host_t *vha, struct event_arg *ea)
        qla_post_iidma_work(vha, fcport);
 }
 
-static void qla24xx_async_gpsc_sp_done(void *s, int res)
+static void qla24xx_async_gpsc_sp_done(srb_t *sp, int res)
 {
-       struct srb *sp = s;
        struct scsi_qla_host *vha = sp->vha;
        struct qla_hw_data *ha = vha->hw;
        fc_port_t *fcport = sp->fcport;
@@ -3055,11 +3031,10 @@ static void qla24xx_async_gpsc_sp_done(void *s, int res)
                    be16_to_cpu(ct_rsp->rsp.gpsc.speed));
        }
        memset(&ea, 0, sizeof(ea));
-       ea.event = FCME_GPSC_DONE;
        ea.rc = res;
        ea.fcport = fcport;
        ea.sp = sp;
-       qla2x00_fcport_event_handler(vha, &ea);
+       qla24xx_handle_gpsc_event(vha, &ea);
 
 done:
        sp->free(sp);
@@ -3144,17 +3119,7 @@ void qla24xx_sp_unmap(scsi_qla_host_t *vha, srb_t *sp)
 
        switch (sp->type) {
        case SRB_ELS_DCMD:
-               if (c->u.els_plogi.els_plogi_pyld)
-                       dma_free_coherent(&vha->hw->pdev->dev,
-                           c->u.els_plogi.tx_size,
-                           c->u.els_plogi.els_plogi_pyld,
-                           c->u.els_plogi.els_plogi_pyld_dma);
-
-               if (c->u.els_plogi.els_resp_pyld)
-                       dma_free_coherent(&vha->hw->pdev->dev,
-                           c->u.els_plogi.rx_size,
-                           c->u.els_plogi.els_resp_pyld,
-                           c->u.els_plogi.els_resp_pyld_dma);
+               qla2x00_els_dcmd2_free(vha, &c->u.els_plogi);
                break;
        case SRB_CT_PTHRU_CMD:
        default:
@@ -3280,9 +3245,8 @@ void qla24xx_handle_gpnid_event(scsi_qla_host_t *vha, struct event_arg *ea)
        }
 }
 
-static void qla2x00_async_gpnid_sp_done(void *s, int res)
+static void qla2x00_async_gpnid_sp_done(srb_t *sp, int res)
 {
-       struct srb *sp = s;
        struct scsi_qla_host *vha = sp->vha;
        struct ct_sns_req *ct_req =
            (struct ct_sns_req *)sp->u.iocb_cmd.u.ctarg.req;
@@ -3295,22 +3259,19 @@ static void qla2x00_async_gpnid_sp_done(void *s, int res)
        if (res)
                ql_dbg(ql_dbg_disc, vha, 0x2066,
                    "Async done-%s fail res %x rscn gen %d ID %3phC. %8phC\n",
-                   sp->name, res, sp->gen1, ct_req->req.port_id.port_id,
+                   sp->name, res, sp->gen1, &ct_req->req.port_id.port_id,
                    ct_rsp->rsp.gpn_id.port_name);
        else
                ql_dbg(ql_dbg_disc, vha, 0x2066,
                    "Async done-%s good rscn gen %d ID %3phC. %8phC\n",
-                   sp->name, sp->gen1, ct_req->req.port_id.port_id,
+                   sp->name, sp->gen1, &ct_req->req.port_id.port_id,
                    ct_rsp->rsp.gpn_id.port_name);
 
        memset(&ea, 0, sizeof(ea));
        memcpy(ea.port_name, ct_rsp->rsp.gpn_id.port_name, WWN_SIZE);
        ea.sp = sp;
-       ea.id.b.domain = ct_req->req.port_id.port_id[0];
-       ea.id.b.area = ct_req->req.port_id.port_id[1];
-       ea.id.b.al_pa = ct_req->req.port_id.port_id[2];
+       ea.id = be_to_port_id(ct_req->req.port_id.port_id);
        ea.rc = res;
-       ea.event = FCME_GPNID_DONE;
 
        spin_lock_irqsave(&vha->hw->tgt.sess_lock, flags);
        list_del(&sp->elem);
@@ -3329,25 +3290,22 @@ static void qla2x00_async_gpnid_sp_done(void *s, int res)
                return;
        }
 
-       qla2x00_fcport_event_handler(vha, &ea);
+       qla24xx_handle_gpnid_event(vha, &ea);
 
        e = qla2x00_alloc_work(vha, QLA_EVT_UNMAP);
        if (!e) {
                /* please ignore kernel warning. otherwise, we have mem leak. */
-               if (sp->u.iocb_cmd.u.ctarg.req) {
-                       dma_free_coherent(&vha->hw->pdev->dev,
-                               sp->u.iocb_cmd.u.ctarg.req_allocated_size,
-                               sp->u.iocb_cmd.u.ctarg.req,
-                               sp->u.iocb_cmd.u.ctarg.req_dma);
-                       sp->u.iocb_cmd.u.ctarg.req = NULL;
-               }
-               if (sp->u.iocb_cmd.u.ctarg.rsp) {
-                       dma_free_coherent(&vha->hw->pdev->dev,
-                               sp->u.iocb_cmd.u.ctarg.rsp_allocated_size,
-                               sp->u.iocb_cmd.u.ctarg.rsp,
-                               sp->u.iocb_cmd.u.ctarg.rsp_dma);
-                       sp->u.iocb_cmd.u.ctarg.rsp = NULL;
-               }
+               dma_free_coherent(&vha->hw->pdev->dev,
+                                 sp->u.iocb_cmd.u.ctarg.req_allocated_size,
+                                 sp->u.iocb_cmd.u.ctarg.req,
+                                 sp->u.iocb_cmd.u.ctarg.req_dma);
+               sp->u.iocb_cmd.u.ctarg.req = NULL;
+
+               dma_free_coherent(&vha->hw->pdev->dev,
+                                 sp->u.iocb_cmd.u.ctarg.rsp_allocated_size,
+                                 sp->u.iocb_cmd.u.ctarg.rsp,
+                                 sp->u.iocb_cmd.u.ctarg.rsp_dma);
+               sp->u.iocb_cmd.u.ctarg.rsp = NULL;
 
                sp->free(sp);
                return;
@@ -3419,9 +3377,7 @@ int qla24xx_async_gpnid(scsi_qla_host_t *vha, port_id_t *id)
        ct_req = qla2x00_prep_ct_req(ct_sns, GPN_ID_CMD, GPN_ID_RSP_SIZE);
 
        /* GPN_ID req */
-       ct_req->req.port_id.port_id[0] = id->b.domain;
-       ct_req->req.port_id.port_id[1] = id->b.area;
-       ct_req->req.port_id.port_id[2] = id->b.al_pa;
+       ct_req->req.port_id.port_id = port_id_to_be_id(*id);
 
        sp->u.iocb_cmd.u.ctarg.req_size = GPN_ID_REQ_SIZE;
        sp->u.iocb_cmd.u.ctarg.rsp_size = GPN_ID_RSP_SIZE;
@@ -3432,7 +3388,7 @@ int qla24xx_async_gpnid(scsi_qla_host_t *vha, port_id_t *id)
 
        ql_dbg(ql_dbg_disc, vha, 0x2067,
            "Async-%s hdl=%x ID %3phC.\n", sp->name,
-           sp->handle, ct_req->req.port_id.port_id);
+           sp->handle, &ct_req->req.port_id.port_id);
 
        rval = qla2x00_start_sp(sp);
        if (rval != QLA_SUCCESS)
@@ -3467,54 +3423,52 @@ done:
 
 void qla24xx_handle_gffid_event(scsi_qla_host_t *vha, struct event_arg *ea)
 {
-       fc_port_t *fcport = ea->fcport;
+       fc_port_t *fcport = ea->fcport;
 
-       qla24xx_post_gnl_work(vha, fcport);
+       qla24xx_post_gnl_work(vha, fcport);
 }
 
-void qla24xx_async_gffid_sp_done(void *s, int res)
+void qla24xx_async_gffid_sp_done(srb_t *sp, int res)
 {
-       struct srb *sp = s;
-       struct scsi_qla_host *vha = sp->vha;
-       fc_port_t *fcport = sp->fcport;
-       struct ct_sns_rsp *ct_rsp;
-       struct event_arg ea;
-
-       ql_dbg(ql_dbg_disc, vha, 0x2133,
-          "Async done-%s res %x ID %x. %8phC\n",
-          sp->name, res, fcport->d_id.b24, fcport->port_name);
-
-       fcport->flags &= ~FCF_ASYNC_SENT;
-       ct_rsp = &fcport->ct_desc.ct_sns->p.rsp;
-       /*
-       * FC-GS-7, 5.2.3.12 FC-4 Features - format
-       * The format of the FC-4 Features object, as defined by the FC-4,
-       * Shall be an array of 4-bit values, one for each type code value
-       */
-       if (!res) {
-              if (ct_rsp->rsp.gff_id.fc4_features[GFF_FCP_SCSI_OFFSET] & 0xf) {
-                      /* w1 b00:03 */
-                      fcport->fc4_type =
-                          ct_rsp->rsp.gff_id.fc4_features[GFF_FCP_SCSI_OFFSET];
-                      fcport->fc4_type &= 0xf;
-              }
+       struct scsi_qla_host *vha = sp->vha;
+       fc_port_t *fcport = sp->fcport;
+       struct ct_sns_rsp *ct_rsp;
+       struct event_arg ea;
 
-              if (ct_rsp->rsp.gff_id.fc4_features[GFF_NVME_OFFSET] & 0xf) {
-                      /* w5 [00:03]/28h */
-                      fcport->fc4f_nvme =
-                          ct_rsp->rsp.gff_id.fc4_features[GFF_NVME_OFFSET];
-                      fcport->fc4f_nvme &= 0xf;
+       ql_dbg(ql_dbg_disc, vha, 0x2133,
+              "Async done-%s res %x ID %x. %8phC\n",
+              sp->name, res, fcport->d_id.b24, fcport->port_name);
+
+       fcport->flags &= ~FCF_ASYNC_SENT;
+       ct_rsp = &fcport->ct_desc.ct_sns->p.rsp;
+       /*
+        * FC-GS-7, 5.2.3.12 FC-4 Features - format
+        * The format of the FC-4 Features object, as defined by the FC-4,
+        * Shall be an array of 4-bit values, one for each type code value
+        */
+       if (!res) {
+               if (ct_rsp->rsp.gff_id.fc4_features[GFF_FCP_SCSI_OFFSET] & 0xf) {
+                       /* w1 b00:03 */
+                       fcport->fc4_type =
+                           ct_rsp->rsp.gff_id.fc4_features[GFF_FCP_SCSI_OFFSET];
+                       fcport->fc4_type &= 0xf;
               }
-       }
 
-       memset(&ea, 0, sizeof(ea));
-       ea.sp = sp;
-       ea.fcport = sp->fcport;
-       ea.rc = res;
-       ea.event = FCME_GFFID_DONE;
+               if (ct_rsp->rsp.gff_id.fc4_features[GFF_NVME_OFFSET] & 0xf) {
+                       /* w5 [00:03]/28h */
+                       fcport->fc4f_nvme =
+                           ct_rsp->rsp.gff_id.fc4_features[GFF_NVME_OFFSET];
+                       fcport->fc4f_nvme &= 0xf;
+               }
+       }
+
+       memset(&ea, 0, sizeof(ea));
+       ea.sp = sp;
+       ea.fcport = sp->fcport;
+       ea.rc = res;
 
-       qla2x00_fcport_event_handler(vha, &ea);
-       sp->free(sp);
+       qla24xx_handle_gffid_event(vha, &ea);
+       sp->free(sp);
 }
 
 /* Get FC4 Feature with Nport ID. */
@@ -3674,7 +3628,6 @@ void qla24xx_async_gnnft_done(scsi_qla_host_t *vha, srb_t *sp)
                list_for_each_entry(fcport, &vha->vp_fcports, list) {
                        if (memcmp(rp->port_name, fcport->port_name, WWN_SIZE))
                                continue;
-                       fcport->scan_needed = 0;
                        fcport->scan_state = QLA_FCPORT_FOUND;
                        found = true;
                        /*
@@ -3683,10 +3636,12 @@ void qla24xx_async_gnnft_done(scsi_qla_host_t *vha, srb_t *sp)
                        if ((fcport->flags & FCF_FABRIC_DEVICE) == 0) {
                                qla2x00_clear_loop_id(fcport);
                                fcport->flags |= FCF_FABRIC_DEVICE;
-                       } else if (fcport->d_id.b24 != rp->id.b24) {
+                       } else if (fcport->d_id.b24 != rp->id.b24 ||
+                               fcport->scan_needed) {
                                qlt_schedule_sess_for_deletion(fcport);
                        }
                        fcport->d_id.b24 = rp->id.b24;
+                       fcport->scan_needed = 0;
                        break;
                }
 
@@ -3898,9 +3853,8 @@ static void qla2x00_find_free_fcp_nvme_slot(struct scsi_qla_host *vha,
        }
 }
 
-static void qla2x00_async_gpnft_gnnft_sp_done(void *s, int res)
+static void qla2x00_async_gpnft_gnnft_sp_done(srb_t *sp, int res)
 {
-       struct srb *sp = s;
        struct scsi_qla_host *vha = sp->vha;
        struct ct_sns_req *ct_req =
                (struct ct_sns_req *)sp->u.iocb_cmd.u.ctarg.req;
@@ -4053,9 +4007,6 @@ static int qla24xx_async_gnnft(scsi_qla_host_t *vha, struct srb *sp,
 
        rval = qla2x00_start_sp(sp);
        if (rval != QLA_SUCCESS) {
-               spin_lock_irqsave(&vha->work_lock, flags);
-               vha->scan.scan_flags &= ~SF_SCANNING;
-               spin_unlock_irqrestore(&vha->work_lock, flags);
                goto done_free_sp;
        }
 
@@ -4079,6 +4030,17 @@ done_free_sp:
 
        sp->free(sp);
 
+       spin_lock_irqsave(&vha->work_lock, flags);
+       vha->scan.scan_flags &= ~SF_SCANNING;
+       if (vha->scan.scan_flags == 0) {
+               ql_dbg(ql_dbg_disc, vha, 0xffff,
+                   "%s: schedule\n", __func__);
+               vha->scan.scan_flags |= SF_QUEUED;
+               schedule_delayed_work(&vha->scan.scan_work, 5);
+       }
+       spin_unlock_irqrestore(&vha->work_lock, flags);
+
+
        return rval;
 } /* GNNFT */
 
@@ -4152,7 +4114,7 @@ int qla24xx_async_gpnft(scsi_qla_host_t *vha, u8 fc4_type, srb_t *sp)
                                                                rspsz,
                                                                &sp->u.iocb_cmd.u.ctarg.rsp_dma,
                                                                GFP_KERNEL);
-               sp->u.iocb_cmd.u.ctarg.rsp_allocated_size = sizeof(struct ct_sns_pkt);
+               sp->u.iocb_cmd.u.ctarg.rsp_allocated_size = rspsz;
                if (!sp->u.iocb_cmd.u.ctarg.rsp) {
                        ql_log(ql_log_warn, vha, 0xffff,
                            "Failed to allocate ct_sns request.\n");
@@ -4208,9 +4170,6 @@ int qla24xx_async_gpnft(scsi_qla_host_t *vha, u8 fc4_type, srb_t *sp)
 
        rval = qla2x00_start_sp(sp);
        if (rval != QLA_SUCCESS) {
-               spin_lock_irqsave(&vha->work_lock, flags);
-               vha->scan.scan_flags &= ~SF_SCANNING;
-               spin_unlock_irqrestore(&vha->work_lock, flags);
                goto done_free_sp;
        }
 
@@ -4234,6 +4193,17 @@ done_free_sp:
 
        sp->free(sp);
 
+       spin_lock_irqsave(&vha->work_lock, flags);
+       vha->scan.scan_flags &= ~SF_SCANNING;
+       if (vha->scan.scan_flags == 0) {
+               ql_dbg(ql_dbg_disc, vha, 0xffff,
+                   "%s: schedule\n", __func__);
+               vha->scan.scan_flags |= SF_QUEUED;
+               schedule_delayed_work(&vha->scan.scan_work, 5);
+       }
+       spin_unlock_irqrestore(&vha->work_lock, flags);
+
+
        return rval;
 }
 
@@ -4261,9 +4231,8 @@ void qla24xx_handle_gnnid_event(scsi_qla_host_t *vha, struct event_arg *ea)
        qla24xx_post_gnl_work(vha, ea->fcport);
 }
 
-static void qla2x00_async_gnnid_sp_done(void *s, int res)
+static void qla2x00_async_gnnid_sp_done(srb_t *sp, int res)
 {
-       struct srb *sp = s;
        struct scsi_qla_host *vha = sp->vha;
        fc_port_t *fcport = sp->fcport;
        u8 *node_name = fcport->ct_desc.ct_sns->p.rsp.rsp.gnn_id.node_name;
@@ -4279,13 +4248,12 @@ static void qla2x00_async_gnnid_sp_done(void *s, int res)
        ea.fcport = fcport;
        ea.sp = sp;
        ea.rc = res;
-       ea.event = FCME_GNNID_DONE;
 
        ql_dbg(ql_dbg_disc, vha, 0x204f,
            "Async done-%s res %x, WWPN %8phC %8phC\n",
            sp->name, res, fcport->port_name, fcport->node_name);
 
-       qla2x00_fcport_event_handler(vha, &ea);
+       qla24xx_handle_gnnid_event(vha, &ea);
 
        sp->free(sp);
 }
@@ -4318,9 +4286,7 @@ int qla24xx_async_gnnid(scsi_qla_host_t *vha, fc_port_t *fcport)
            GNN_ID_RSP_SIZE);
 
        /* GNN_ID req */
-       ct_req->req.port_id.port_id[0] = fcport->d_id.b.domain;
-       ct_req->req.port_id.port_id[1] = fcport->d_id.b.area;
-       ct_req->req.port_id.port_id[2] = fcport->d_id.b.al_pa;
+       ct_req->req.port_id.port_id = port_id_to_be_id(fcport->d_id);
 
 
        /* req & rsp use the same buffer */
@@ -4396,9 +4362,8 @@ void qla24xx_handle_gfpnid_event(scsi_qla_host_t *vha, struct event_arg *ea)
        qla24xx_post_gpsc_work(vha, fcport);
 }
 
-static void qla2x00_async_gfpnid_sp_done(void *s, int res)
+static void qla2x00_async_gfpnid_sp_done(srb_t *sp, int res)
 {
-       struct srb *sp = s;
        struct scsi_qla_host *vha = sp->vha;
        fc_port_t *fcport = sp->fcport;
        u8 *fpn = fcport->ct_desc.ct_sns->p.rsp.rsp.gfpn_id.port_name;
@@ -4413,13 +4378,12 @@ static void qla2x00_async_gfpnid_sp_done(void *s, int res)
        ea.fcport = fcport;
        ea.sp = sp;
        ea.rc = res;
-       ea.event = FCME_GFPNID_DONE;
 
        ql_dbg(ql_dbg_disc, vha, 0x204f,
            "Async done-%s res %x, WWPN %8phC %8phC\n",
            sp->name, res, fcport->port_name, fcport->fabric_port_name);
 
-       qla2x00_fcport_event_handler(vha, &ea);
+       qla24xx_handle_gfpnid_event(vha, &ea);
 
        sp->free(sp);
 }
@@ -4450,9 +4414,7 @@ int qla24xx_async_gfpnid(scsi_qla_host_t *vha, fc_port_t *fcport)
            GFPN_ID_RSP_SIZE);
 
        /* GFPN_ID req */
-       ct_req->req.port_id.port_id[0] = fcport->d_id.b.domain;
-       ct_req->req.port_id.port_id[1] = fcport->d_id.b.area;
-       ct_req->req.port_id.port_id[2] = fcport->d_id.b.al_pa;
+       ct_req->req.port_id.port_id = port_id_to_be_id(fcport->d_id);
 
 
        /* req & rsp use the same buffer */
index d4c3bae..643d232 100644 (file)
@@ -37,8 +37,8 @@ static struct qla_chip_state_84xx *qla84xx_get_chip(struct scsi_qla_host *);
 static int qla84xx_init_chip(scsi_qla_host_t *);
 static int qla25xx_init_queues(struct qla_hw_data *);
 static int qla24xx_post_prli_work(struct scsi_qla_host*, fc_port_t *);
-static void qla24xx_handle_plogi_done_event(struct scsi_qla_host *,
-    struct event_arg *);
+static void qla24xx_handle_gpdb_event(scsi_qla_host_t *vha,
+                                     struct event_arg *ea);
 static void qla24xx_handle_prli_done_event(struct scsi_qla_host *,
     struct event_arg *);
 static void __qla24xx_handle_gpdb_event(scsi_qla_host_t *, struct event_arg *);
@@ -63,10 +63,8 @@ qla2x00_sp_timeout(struct timer_list *t)
        iocb->timeout(sp);
 }
 
-void
-qla2x00_sp_free(void *ptr)
+void qla2x00_sp_free(srb_t *sp)
 {
-       srb_t *sp = ptr;
        struct srb_iocb *iocb = &sp->u.iocb_cmd;
 
        del_timer(&iocb->timer);
@@ -99,22 +97,33 @@ static void qla24xx_abort_iocb_timeout(void *data)
 {
        srb_t *sp = data;
        struct srb_iocb *abt = &sp->u.iocb_cmd;
+       struct qla_qpair *qpair = sp->qpair;
+       u32 handle;
+       unsigned long flags;
+
+       spin_lock_irqsave(qpair->qp_lock_ptr, flags);
+       for (handle = 1; handle < qpair->req->num_outstanding_cmds; handle++) {
+               /* removing the abort */
+               if (qpair->req->outstanding_cmds[handle] == sp) {
+                       qpair->req->outstanding_cmds[handle] = NULL;
+                       break;
+               }
+       }
+       spin_unlock_irqrestore(qpair->qp_lock_ptr, flags);
 
        abt->u.abt.comp_status = CS_TIMEOUT;
-       sp->done(sp, QLA_FUNCTION_TIMEOUT);
+       sp->done(sp, QLA_OS_TIMER_EXPIRED);
 }
 
-static void qla24xx_abort_sp_done(void *ptr, int res)
+static void qla24xx_abort_sp_done(srb_t *sp, int res)
 {
-       srb_t *sp = ptr;
        struct srb_iocb *abt = &sp->u.iocb_cmd;
 
-       if (del_timer(&sp->u.iocb_cmd.timer)) {
-               if (sp->flags & SRB_WAKEUP_ON_COMP)
-                       complete(&abt->u.abt.comp);
-               else
-                       sp->free(sp);
-       }
+       del_timer(&sp->u.iocb_cmd.timer);
+       if (sp->flags & SRB_WAKEUP_ON_COMP)
+               complete(&abt->u.abt.comp);
+       else
+               sp->free(sp);
 }
 
 static int qla24xx_async_abort_cmd(srb_t *cmd_sp, bool wait)
@@ -127,7 +136,7 @@ static int qla24xx_async_abort_cmd(srb_t *cmd_sp, bool wait)
        sp = qla2xxx_get_qpair_sp(cmd_sp->vha, cmd_sp->qpair, cmd_sp->fcport,
                                  GFP_ATOMIC);
        if (!sp)
-               goto done;
+               return rval;
 
        abt_iocb = &sp->u.iocb_cmd;
        sp->type = SRB_ABT_CMD;
@@ -151,20 +160,18 @@ static int qla24xx_async_abort_cmd(srb_t *cmd_sp, bool wait)
               cmd_sp->type);
 
        rval = qla2x00_start_sp(sp);
-       if (rval != QLA_SUCCESS)
-               goto done_free_sp;
+       if (rval != QLA_SUCCESS) {
+               sp->free(sp);
+               return rval;
+       }
 
        if (wait) {
                wait_for_completion(&abt_iocb->u.abt.comp);
                rval = abt_iocb->u.abt.comp_status == CS_COMPLETE ?
                        QLA_SUCCESS : QLA_FUNCTION_FAILED;
-       } else {
-               goto done;
+               sp->free(sp);
        }
 
-done_free_sp:
-       sp->free(sp);
-done:
        return rval;
 }
 
@@ -234,13 +241,15 @@ qla2x00_async_iocb_timeout(void *data)
                        sp->done(sp, QLA_FUNCTION_TIMEOUT);
                }
                break;
+       default:
+               WARN_ON_ONCE(true);
+               sp->done(sp, QLA_FUNCTION_TIMEOUT);
+               break;
        }
 }
 
-static void
-qla2x00_async_login_sp_done(void *ptr, int res)
+static void qla2x00_async_login_sp_done(srb_t *sp, int res)
 {
-       srb_t *sp = ptr;
        struct scsi_qla_host *vha = sp->vha;
        struct srb_iocb *lio = &sp->u.iocb_cmd;
        struct event_arg ea;
@@ -252,14 +261,13 @@ qla2x00_async_login_sp_done(void *ptr, int res)
 
        if (!test_bit(UNLOADING, &vha->dpc_flags)) {
                memset(&ea, 0, sizeof(ea));
-               ea.event = FCME_PLOGI_DONE;
                ea.fcport = sp->fcport;
                ea.data[0] = lio->u.logio.data[0];
                ea.data[1] = lio->u.logio.data[1];
                ea.iop[0] = lio->u.logio.iop[0];
                ea.iop[1] = lio->u.logio.iop[1];
                ea.sp = sp;
-               qla2x00_fcport_event_handler(vha, &ea);
+               qla24xx_handle_plogi_done_event(vha, &ea);
        }
 
        sp->free(sp);
@@ -289,8 +297,13 @@ qla2x00_async_login(struct scsi_qla_host *vha, fc_port_t *fcport,
        struct srb_iocb *lio;
        int rval = QLA_FUNCTION_FAILED;
 
-       if (!vha->flags.online)
-               goto done;
+       if (!vha->flags.online || (fcport->flags & FCF_ASYNC_SENT) ||
+           fcport->loop_id == FC_NO_LOOP_ID) {
+               ql_log(ql_log_warn, vha, 0xffff,
+                   "%s: %8phC - not sending command.\n",
+                   __func__, fcport->port_name);
+               return rval;
+       }
 
        sp = qla2x00_get_sp(vha, fcport, GFP_KERNEL);
        if (!sp)
@@ -341,11 +354,8 @@ done:
        return rval;
 }
 
-static void
-qla2x00_async_logout_sp_done(void *ptr, int res)
+static void qla2x00_async_logout_sp_done(srb_t *sp, int res)
 {
-       srb_t *sp = ptr;
-
        sp->fcport->flags &= ~(FCF_ASYNC_SENT | FCF_ASYNC_ACTIVE);
        sp->fcport->login_gen++;
        qlt_logo_completion_handler(sp->fcport, res);
@@ -359,9 +369,6 @@ qla2x00_async_logout(struct scsi_qla_host *vha, fc_port_t *fcport)
        struct srb_iocb *lio;
        int rval = QLA_FUNCTION_FAILED;
 
-       if (!vha->flags.online || (fcport->flags & FCF_ASYNC_SENT))
-               return rval;
-
        fcport->flags |= FCF_ASYNC_SENT;
        sp = qla2x00_get_sp(vha, fcport, GFP_KERNEL);
        if (!sp)
@@ -405,10 +412,8 @@ qla2x00_async_prlo_done(struct scsi_qla_host *vha, fc_port_t *fcport,
        qlt_logo_completion_handler(fcport, data[0]);
 }
 
-static void
-qla2x00_async_prlo_sp_done(void *s, int res)
+static void qla2x00_async_prlo_sp_done(srb_t *sp, int res)
 {
-       srb_t *sp = (srb_t *)s;
        struct srb_iocb *lio = &sp->u.iocb_cmd;
        struct scsi_qla_host *vha = sp->vha;
 
@@ -469,6 +474,9 @@ void qla24xx_handle_adisc_event(scsi_qla_host_t *vha, struct event_arg *ea)
            fcport->fw_login_state, ea->rc, fcport->login_gen, ea->sp->gen2,
            fcport->rscn_gen, ea->sp->gen1, fcport->loop_id);
 
+       WARN_ONCE(!qla2xxx_is_valid_mbs(ea->data[0]), "mbs: %#x\n",
+                 ea->data[0]);
+
        if (ea->data[0] != MBS_COMMAND_COMPLETE) {
                ql_dbg(ql_dbg_disc, vha, 0x2066,
                    "%s %8phC: adisc fail: post delete\n",
@@ -511,10 +519,8 @@ static int qla_post_els_plogi_work(struct scsi_qla_host *vha, fc_port_t *fcport)
        return qla2x00_post_work(vha, e);
 }
 
-static void
-qla2x00_async_adisc_sp_done(void *ptr, int res)
+static void qla2x00_async_adisc_sp_done(srb_t *sp, int res)
 {
-       srb_t *sp = ptr;
        struct scsi_qla_host *vha = sp->vha;
        struct event_arg ea;
        struct srb_iocb *lio = &sp->u.iocb_cmd;
@@ -526,7 +532,6 @@ qla2x00_async_adisc_sp_done(void *ptr, int res)
        sp->fcport->flags &= ~(FCF_ASYNC_SENT | FCF_ASYNC_ACTIVE);
 
        memset(&ea, 0, sizeof(ea));
-       ea.event = FCME_ADISC_DONE;
        ea.rc = res;
        ea.data[0] = lio->u.logio.data[0];
        ea.data[1] = lio->u.logio.data[1];
@@ -535,7 +540,7 @@ qla2x00_async_adisc_sp_done(void *ptr, int res)
        ea.fcport = sp->fcport;
        ea.sp = sp;
 
-       qla2x00_fcport_event_handler(vha, &ea);
+       qla24xx_handle_adisc_event(vha, &ea);
 
        sp->free(sp);
 }
@@ -803,6 +808,15 @@ static void qla24xx_handle_gnl_done_event(scsi_qla_host_t *vha,
                        fcport->fw_login_state = current_login_state;
                        fcport->d_id = id;
                        switch (current_login_state) {
+                       case DSC_LS_PRLI_PEND:
+                               /*
+                                * In the middle of PRLI. Let it finish.
+                                * Allow relogin code to recheck state again
+                                * with GNL. Push disc_state back to DELETED
+                                * so GNL can go out again
+                                */
+                               fcport->disc_state = DSC_DELETED;
+                               break;
                        case DSC_LS_PRLI_COMP:
                                if ((e->prli_svc_param_word_3[0] & BIT_4) == 0)
                                        fcport->port_type = FCT_INITIATOR;
@@ -917,10 +931,8 @@ static void qla24xx_handle_gnl_done_event(scsi_qla_host_t *vha,
        }
 } /* gnl_event */
 
-static void
-qla24xx_async_gnl_sp_done(void *s, int res)
+static void qla24xx_async_gnl_sp_done(srb_t *sp, int res)
 {
-       struct srb *sp = s;
        struct scsi_qla_host *vha = sp->vha;
        unsigned long flags;
        struct fc_port *fcport = NULL, *tf;
@@ -943,7 +955,6 @@ qla24xx_async_gnl_sp_done(void *s, int res)
        memset(&ea, 0, sizeof(ea));
        ea.sp = sp;
        ea.rc = res;
-       ea.event = FCME_GNL_DONE;
 
        if (sp->u.iocb_cmd.u.mbx.in_mb[1] >=
            sizeof(struct get_name_list_extended)) {
@@ -982,7 +993,7 @@ qla24xx_async_gnl_sp_done(void *s, int res)
                spin_unlock_irqrestore(&vha->hw->tgt.sess_lock, flags);
                ea.fcport = fcport;
 
-               qla2x00_fcport_event_handler(vha, &ea);
+               qla24xx_handle_gnl_done_event(vha, &ea);
        }
 
        /* create new fcport if fw has knowledge of new sessions */
@@ -1107,10 +1118,8 @@ int qla24xx_post_gnl_work(struct scsi_qla_host *vha, fc_port_t *fcport)
        return qla2x00_post_work(vha, e);
 }
 
-static
-void qla24xx_async_gpdb_sp_done(void *s, int res)
+static void qla24xx_async_gpdb_sp_done(srb_t *sp, int res)
 {
-       struct srb *sp = s;
        struct scsi_qla_host *vha = sp->vha;
        struct qla_hw_data *ha = vha->hw;
        fc_port_t *fcport = sp->fcport;
@@ -1129,11 +1138,10 @@ void qla24xx_async_gpdb_sp_done(void *s, int res)
 
        fcport->flags &= ~(FCF_ASYNC_SENT | FCF_ASYNC_ACTIVE);
        memset(&ea, 0, sizeof(ea));
-       ea.event = FCME_GPDB_DONE;
        ea.fcport = fcport;
        ea.sp = sp;
 
-       qla2x00_fcport_event_handler(vha, &ea);
+       qla24xx_handle_gpdb_event(vha, &ea);
 
        dma_pool_free(ha->s_dma_pool, sp->u.iocb_cmd.u.mbx.in,
                sp->u.iocb_cmd.u.mbx.in_dma);
@@ -1154,10 +1162,8 @@ static int qla24xx_post_prli_work(struct scsi_qla_host *vha, fc_port_t *fcport)
        return qla2x00_post_work(vha, e);
 }
 
-static void
-qla2x00_async_prli_sp_done(void *ptr, int res)
+static void qla2x00_async_prli_sp_done(srb_t *sp, int res)
 {
-       srb_t *sp = ptr;
        struct scsi_qla_host *vha = sp->vha;
        struct srb_iocb *lio = &sp->u.iocb_cmd;
        struct event_arg ea;
@@ -1170,7 +1176,6 @@ qla2x00_async_prli_sp_done(void *ptr, int res)
 
        if (!test_bit(UNLOADING, &vha->dpc_flags)) {
                memset(&ea, 0, sizeof(ea));
-               ea.event = FCME_PRLI_DONE;
                ea.fcport = sp->fcport;
                ea.data[0] = lio->u.logio.data[0];
                ea.data[1] = lio->u.logio.data[1];
@@ -1178,7 +1183,7 @@ qla2x00_async_prli_sp_done(void *ptr, int res)
                ea.iop[1] = lio->u.logio.iop[1];
                ea.sp = sp;
 
-               qla2x00_fcport_event_handler(vha, &ea);
+               qla24xx_handle_prli_done_event(vha, &ea);
        }
 
        sp->free(sp);
@@ -1262,8 +1267,13 @@ int qla24xx_async_gpdb(struct scsi_qla_host *vha, fc_port_t *fcport, u8 opt)
        struct port_database_24xx *pd;
        struct qla_hw_data *ha = vha->hw;
 
-       if (!vha->flags.online || (fcport->flags & FCF_ASYNC_SENT))
+       if (!vha->flags.online || (fcport->flags & FCF_ASYNC_SENT) ||
+           fcport->loop_id == FC_NO_LOOP_ID) {
+               ql_log(ql_log_warn, vha, 0xffff,
+                   "%s: %8phC - not sending command.\n",
+                   __func__, fcport->port_name);
                return rval;
+       }
 
        fcport->disc_state = DSC_GPDB;
 
@@ -1473,7 +1483,7 @@ int qla24xx_fcport_handle_login(struct scsi_qla_host *vha, fc_port_t *fcport)
        u64 wwn;
        u16 sec;
 
-       ql_dbg(ql_dbg_disc + ql_dbg_verbose, vha, 0x20d8,
+       ql_dbg(ql_dbg_disc, vha, 0x20d8,
            "%s %8phC DS %d LS %d P %d fl %x confl %p rscn %d|%d login %d lid %d scan %d\n",
            __func__, fcport->port_name, fcport->disc_state,
            fcport->fw_login_state, fcport->login_pause, fcport->flags,
@@ -1484,6 +1494,7 @@ int qla24xx_fcport_handle_login(struct scsi_qla_host *vha, fc_port_t *fcport)
                return 0;
 
        if ((fcport->loop_id != FC_NO_LOOP_ID) &&
+           qla_dual_mode_enabled(vha) &&
            ((fcport->fw_login_state == DSC_LS_PLOGI_PEND) ||
             (fcport->fw_login_state == DSC_LS_PRLI_PEND)))
                return 0;
@@ -1636,12 +1647,34 @@ int qla24xx_post_newsess_work(struct scsi_qla_host *vha, port_id_t *id,
        return qla2x00_post_work(vha, e);
 }
 
-static
+void qla2x00_handle_rscn(scsi_qla_host_t *vha, struct event_arg *ea)
+{
+       fc_port_t *fcport;
+       unsigned long flags;
+
+       fcport = qla2x00_find_fcport_by_nportid(vha, &ea->id, 1);
+       if (fcport) {
+               fcport->scan_needed = 1;
+               fcport->rscn_gen++;
+       }
+
+       spin_lock_irqsave(&vha->work_lock, flags);
+       if (vha->scan.scan_flags == 0) {
+               ql_dbg(ql_dbg_disc, vha, 0xffff, "%s: schedule\n", __func__);
+               vha->scan.scan_flags |= SF_QUEUED;
+               schedule_delayed_work(&vha->scan.scan_work, 5);
+       }
+       spin_unlock_irqrestore(&vha->work_lock, flags);
+}
+
 void qla24xx_handle_relogin_event(scsi_qla_host_t *vha,
        struct event_arg *ea)
 {
        fc_port_t *fcport = ea->fcport;
 
+       if (test_bit(UNLOADING, &vha->dpc_flags))
+               return;
+
        ql_dbg(ql_dbg_disc, vha, 0x2102,
            "%s %8phC DS %d LS %d P %d del %d cnfl %p rscn %d|%d login %d|%d fl %x\n",
            __func__, fcport->port_name, fcport->disc_state,
@@ -1651,110 +1684,16 @@ void qla24xx_handle_relogin_event(scsi_qla_host_t *vha,
            fcport->last_login_gen, fcport->login_gen,
            fcport->flags);
 
-       if ((fcport->fw_login_state == DSC_LS_PLOGI_PEND) ||
-           (fcport->fw_login_state == DSC_LS_PRLI_PEND))
-               return;
-
-       if (fcport->fw_login_state == DSC_LS_PLOGI_COMP) {
-               if (time_before_eq(jiffies, fcport->plogi_nack_done_deadline)) {
-                       set_bit(RELOGIN_NEEDED, &vha->dpc_flags);
-                       return;
-               }
-       }
-
        if (fcport->last_rscn_gen != fcport->rscn_gen) {
-               ql_dbg(ql_dbg_disc, vha, 0x20e9, "%s %d %8phC post gidpn\n",
+               ql_dbg(ql_dbg_disc, vha, 0x20e9, "%s %d %8phC post gnl\n",
                    __func__, __LINE__, fcport->port_name);
-
+               qla24xx_post_gnl_work(vha, fcport);
                return;
        }
 
        qla24xx_fcport_handle_login(vha, fcport);
 }
 
-
-static void qla_handle_els_plogi_done(scsi_qla_host_t *vha,
-                                     struct event_arg *ea)
-{
-       ql_dbg(ql_dbg_disc, vha, 0x2118,
-           "%s %d %8phC post PRLI\n",
-           __func__, __LINE__, ea->fcport->port_name);
-       qla24xx_post_prli_work(vha, ea->fcport);
-}
-
-void qla2x00_fcport_event_handler(scsi_qla_host_t *vha, struct event_arg *ea)
-{
-       fc_port_t *fcport;
-
-       switch (ea->event) {
-       case FCME_RELOGIN:
-               if (test_bit(UNLOADING, &vha->dpc_flags))
-                       return;
-
-               qla24xx_handle_relogin_event(vha, ea);
-               break;
-       case FCME_RSCN:
-               if (test_bit(UNLOADING, &vha->dpc_flags))
-                       return;
-               {
-                       unsigned long flags;
-
-                       fcport = qla2x00_find_fcport_by_nportid
-                               (vha, &ea->id, 1);
-                       if (fcport) {
-                               fcport->scan_needed = 1;
-                               fcport->rscn_gen++;
-                       }
-
-                       spin_lock_irqsave(&vha->work_lock, flags);
-                       if (vha->scan.scan_flags == 0) {
-                               ql_dbg(ql_dbg_disc, vha, 0xffff,
-                                   "%s: schedule\n", __func__);
-                               vha->scan.scan_flags |= SF_QUEUED;
-                               schedule_delayed_work(&vha->scan.scan_work, 5);
-                       }
-                       spin_unlock_irqrestore(&vha->work_lock, flags);
-               }
-               break;
-       case FCME_GNL_DONE:
-               qla24xx_handle_gnl_done_event(vha, ea);
-               break;
-       case FCME_GPSC_DONE:
-               qla24xx_handle_gpsc_event(vha, ea);
-               break;
-       case FCME_PLOGI_DONE:   /* Initiator side sent LLIOCB */
-               qla24xx_handle_plogi_done_event(vha, ea);
-               break;
-       case FCME_PRLI_DONE:
-               qla24xx_handle_prli_done_event(vha, ea);
-               break;
-       case FCME_GPDB_DONE:
-               qla24xx_handle_gpdb_event(vha, ea);
-               break;
-       case FCME_GPNID_DONE:
-               qla24xx_handle_gpnid_event(vha, ea);
-               break;
-       case FCME_GFFID_DONE:
-               qla24xx_handle_gffid_event(vha, ea);
-               break;
-       case FCME_ADISC_DONE:
-               qla24xx_handle_adisc_event(vha, ea);
-               break;
-       case FCME_GNNID_DONE:
-               qla24xx_handle_gnnid_event(vha, ea);
-               break;
-       case FCME_GFPNID_DONE:
-               qla24xx_handle_gfpnid_event(vha, ea);
-               break;
-       case FCME_ELS_PLOGI_DONE:
-               qla_handle_els_plogi_done(vha, ea);
-               break;
-       default:
-               BUG_ON(1);
-               break;
-       }
-}
-
 /*
  * RSCN(s) came in for this fcport, but the RSCN(s) was not able
  * to be consumed by the fcport
@@ -1772,10 +1711,9 @@ void qla_rscn_replay(fc_port_t *fcport)
 
        if (fcport->scan_needed) {
                memset(&ea, 0, sizeof(ea));
-               ea.event = FCME_RSCN;
                ea.id = fcport->d_id;
                ea.id.b.rsvd_1 = RSCN_PORT_ADDR;
-               qla2x00_fcport_event_handler(fcport->vha, &ea);
+               qla2x00_handle_rscn(fcport->vha, &ea);
        }
 }
 
@@ -1789,10 +1727,8 @@ qla2x00_tmf_iocb_timeout(void *data)
        complete(&tmf->u.tmf.comp);
 }
 
-static void
-qla2x00_tmf_sp_done(void *ptr, int res)
+static void qla2x00_tmf_sp_done(srb_t *sp, int res)
 {
-       srb_t *sp = ptr;
        struct srb_iocb *tmf = &sp->u.iocb_cmd;
 
        complete(&tmf->u.tmf.comp);
@@ -1890,6 +1826,9 @@ qla24xx_async_abort_command(srb_t *sp)
 static void
 qla24xx_handle_prli_done_event(struct scsi_qla_host *vha, struct event_arg *ea)
 {
+       WARN_ONCE(!qla2xxx_is_valid_mbs(ea->data[0]), "mbs: %#x\n",
+                 ea->data[0]);
+
        switch (ea->data[0]) {
        case MBS_COMMAND_COMPLETE:
                ql_dbg(ql_dbg_disc, vha, 0x2118,
@@ -1929,7 +1868,7 @@ qla24xx_handle_prli_done_event(struct scsi_qla_host *vha, struct event_arg *ea)
        }
 }
 
-static void
+void
 qla24xx_handle_plogi_done_event(struct scsi_qla_host *vha, struct event_arg *ea)
 {
        port_id_t cid;  /* conflict Nport id */
@@ -1953,8 +1892,11 @@ qla24xx_handle_plogi_done_event(struct scsi_qla_host *vha, struct event_arg *ea)
                return;
        }
 
-       if (fcport->disc_state == DSC_DELETE_PEND)
+       if ((fcport->disc_state == DSC_DELETE_PEND) ||
+           (fcport->disc_state == DSC_DELETED)) {
+               set_bit(RELOGIN_NEEDED, &vha->dpc_flags);
                return;
+       }
 
        if (ea->sp->gen2 != fcport->login_gen) {
                /* target side must have changed it. */
@@ -1972,6 +1914,9 @@ qla24xx_handle_plogi_done_event(struct scsi_qla_host *vha, struct event_arg *ea)
                return;
        }
 
+       WARN_ONCE(!qla2xxx_is_valid_mbs(ea->data[0]), "mbs: %#x\n",
+                 ea->data[0]);
+
        switch (ea->data[0]) {
        case MBS_COMMAND_COMPLETE:
                /*
@@ -2266,6 +2211,10 @@ qla2x00_initialize_adapter(scsi_qla_host_t *vha)
        if (qla_ini_mode_enabled(vha) || qla_dual_mode_enabled(vha))
                rval = qla2x00_init_rings(vha);
 
+       /* No point in continuing if firmware initialization failed. */
+       if (rval != QLA_SUCCESS)
+               return rval;
+
        ha->flags.chip_reset_done = 1;
 
        if (rval == QLA_SUCCESS && IS_QLA84XX(ha)) {
@@ -3082,103 +3031,113 @@ qla24xx_chip_diag(scsi_qla_host_t *vha)
 }
 
 static void
-qla2x00_alloc_offload_mem(scsi_qla_host_t *vha)
+qla2x00_init_fce_trace(scsi_qla_host_t *vha)
 {
        int rval;
        dma_addr_t tc_dma;
        void *tc;
        struct qla_hw_data *ha = vha->hw;
 
-       if (ha->eft) {
+       if (!IS_FWI2_CAPABLE(ha))
+               return;
+
+       if (!IS_QLA25XX(ha) && !IS_QLA81XX(ha) && !IS_QLA83XX(ha) &&
+           !IS_QLA27XX(ha) && !IS_QLA28XX(ha))
+               return;
+
+       if (ha->fce) {
                ql_dbg(ql_dbg_init, vha, 0x00bd,
-                   "%s: Offload Mem is already allocated.\n",
-                   __func__);
+                      "%s: FCE Mem is already allocated.\n",
+                      __func__);
                return;
        }
 
-       if (IS_FWI2_CAPABLE(ha)) {
-               /* Allocate memory for Fibre Channel Event Buffer. */
-               if (!IS_QLA25XX(ha) && !IS_QLA81XX(ha) && !IS_QLA83XX(ha) &&
-                   !IS_QLA27XX(ha) && !IS_QLA28XX(ha))
-                       goto try_eft;
+       /* Allocate memory for Fibre Channel Event Buffer. */
+       tc = dma_alloc_coherent(&ha->pdev->dev, FCE_SIZE, &tc_dma,
+                               GFP_KERNEL);
+       if (!tc) {
+               ql_log(ql_log_warn, vha, 0x00be,
+                      "Unable to allocate (%d KB) for FCE.\n",
+                      FCE_SIZE / 1024);
+               return;
+       }
 
-               if (ha->fce)
-                       dma_free_coherent(&ha->pdev->dev,
-                           FCE_SIZE, ha->fce, ha->fce_dma);
+       rval = qla2x00_enable_fce_trace(vha, tc_dma, FCE_NUM_BUFFERS,
+                                       ha->fce_mb, &ha->fce_bufs);
+       if (rval) {
+               ql_log(ql_log_warn, vha, 0x00bf,
+                      "Unable to initialize FCE (%d).\n", rval);
+               dma_free_coherent(&ha->pdev->dev, FCE_SIZE, tc, tc_dma);
+               return;
+       }
 
-               /* Allocate memory for Fibre Channel Event Buffer. */
-               tc = dma_alloc_coherent(&ha->pdev->dev, FCE_SIZE, &tc_dma,
-                                       GFP_KERNEL);
-               if (!tc) {
-                       ql_log(ql_log_warn, vha, 0x00be,
-                           "Unable to allocate (%d KB) for FCE.\n",
-                           FCE_SIZE / 1024);
-                       goto try_eft;
-               }
-
-               rval = qla2x00_enable_fce_trace(vha, tc_dma, FCE_NUM_BUFFERS,
-                   ha->fce_mb, &ha->fce_bufs);
-               if (rval) {
-                       ql_log(ql_log_warn, vha, 0x00bf,
-                           "Unable to initialize FCE (%d).\n", rval);
-                       dma_free_coherent(&ha->pdev->dev, FCE_SIZE, tc,
-                           tc_dma);
-                       ha->flags.fce_enabled = 0;
-                       goto try_eft;
-               }
-               ql_dbg(ql_dbg_init, vha, 0x00c0,
-                   "Allocate (%d KB) for FCE...\n", FCE_SIZE / 1024);
-
-               ha->flags.fce_enabled = 1;
-               ha->fce_dma = tc_dma;
-               ha->fce = tc;
-
-try_eft:
-               if (ha->eft)
-                       dma_free_coherent(&ha->pdev->dev,
-                           EFT_SIZE, ha->eft, ha->eft_dma);
+       ql_dbg(ql_dbg_init, vha, 0x00c0,
+              "Allocated (%d KB) for FCE...\n", FCE_SIZE / 1024);
 
-               /* Allocate memory for Extended Trace Buffer. */
-               tc = dma_alloc_coherent(&ha->pdev->dev, EFT_SIZE, &tc_dma,
-                                       GFP_KERNEL);
-               if (!tc) {
-                       ql_log(ql_log_warn, vha, 0x00c1,
-                           "Unable to allocate (%d KB) for EFT.\n",
-                           EFT_SIZE / 1024);
-                       goto eft_err;
-               }
+       ha->flags.fce_enabled = 1;
+       ha->fce_dma = tc_dma;
+       ha->fce = tc;
+}
 
-               rval = qla2x00_enable_eft_trace(vha, tc_dma, EFT_NUM_BUFFERS);
-               if (rval) {
-                       ql_log(ql_log_warn, vha, 0x00c2,
-                           "Unable to initialize EFT (%d).\n", rval);
-                       dma_free_coherent(&ha->pdev->dev, EFT_SIZE, tc,
-                           tc_dma);
-                       goto eft_err;
-               }
-               ql_dbg(ql_dbg_init, vha, 0x00c3,
-                   "Allocated (%d KB) EFT ...\n", EFT_SIZE / 1024);
+static void
+qla2x00_init_eft_trace(scsi_qla_host_t *vha)
+{
+       int rval;
+       dma_addr_t tc_dma;
+       void *tc;
+       struct qla_hw_data *ha = vha->hw;
+
+       if (!IS_FWI2_CAPABLE(ha))
+               return;
 
-               ha->eft_dma = tc_dma;
-               ha->eft = tc;
+       if (ha->eft) {
+               ql_dbg(ql_dbg_init, vha, 0x00bd,
+                   "%s: EFT Mem is already allocated.\n",
+                   __func__);
+               return;
        }
 
-eft_err:
-       return;
+       /* Allocate memory for Extended Trace Buffer. */
+       tc = dma_alloc_coherent(&ha->pdev->dev, EFT_SIZE, &tc_dma,
+                               GFP_KERNEL);
+       if (!tc) {
+               ql_log(ql_log_warn, vha, 0x00c1,
+                      "Unable to allocate (%d KB) for EFT.\n",
+                      EFT_SIZE / 1024);
+               return;
+       }
+
+       rval = qla2x00_enable_eft_trace(vha, tc_dma, EFT_NUM_BUFFERS);
+       if (rval) {
+               ql_log(ql_log_warn, vha, 0x00c2,
+                      "Unable to initialize EFT (%d).\n", rval);
+               dma_free_coherent(&ha->pdev->dev, EFT_SIZE, tc, tc_dma);
+               return;
+       }
+
+       ql_dbg(ql_dbg_init, vha, 0x00c3,
+              "Allocated (%d KB) EFT ...\n", EFT_SIZE / 1024);
+
+       ha->eft_dma = tc_dma;
+       ha->eft = tc;
+}
+
+static void
+qla2x00_alloc_offload_mem(scsi_qla_host_t *vha)
+{
+       qla2x00_init_fce_trace(vha);
+       qla2x00_init_eft_trace(vha);
 }
 
 void
 qla2x00_alloc_fw_dump(scsi_qla_host_t *vha)
 {
-       int rval;
        uint32_t dump_size, fixed_size, mem_size, req_q_size, rsp_q_size,
            eft_size, fce_size, mq_size;
        struct qla_hw_data *ha = vha->hw;
        struct req_que *req = ha->req_q_map[0];
        struct rsp_que *rsp = ha->rsp_q_map[0];
        struct qla2xxx_fw_dump *fw_dump;
-       dma_addr_t tc_dma;
-       void *tc;
 
        dump_size = fixed_size = mem_size = eft_size = fce_size = mq_size = 0;
        req_q_size = rsp_q_size = 0;
@@ -3216,37 +3175,13 @@ qla2x00_alloc_fw_dump(scsi_qla_host_t *vha)
                }
                if (ha->tgt.atio_ring)
                        mq_size += ha->tgt.atio_q_length * sizeof(request_t);
-               /* Allocate memory for Fibre Channel Event Buffer. */
-               if (!IS_QLA25XX(ha) && !IS_QLA81XX(ha) && !IS_QLA83XX(ha) &&
-                   !IS_QLA27XX(ha) && !IS_QLA28XX(ha))
-                       goto try_eft;
 
-               fce_size = sizeof(struct qla2xxx_fce_chain) + FCE_SIZE;
-try_eft:
+               qla2x00_init_fce_trace(vha);
+               if (ha->fce)
+                       fce_size = sizeof(struct qla2xxx_fce_chain) + FCE_SIZE;
+               qla2x00_init_eft_trace(vha);
                if (ha->eft)
-                       dma_free_coherent(&ha->pdev->dev,
-                           EFT_SIZE, ha->eft, ha->eft_dma);
-
-               /* Allocate memory for Extended Trace Buffer. */
-               tc = dma_alloc_coherent(&ha->pdev->dev, EFT_SIZE, &tc_dma,
-                                        GFP_KERNEL);
-               if (!tc) {
-                       ql_log(ql_log_warn, vha, 0x00c1,
-                           "Unable to allocate (%d KB) for EFT.\n",
-                           EFT_SIZE / 1024);
-                       goto allocate;
-               }
-
-               rval = qla2x00_enable_eft_trace(vha, tc_dma, EFT_NUM_BUFFERS);
-               if (rval) {
-                       ql_log(ql_log_warn, vha, 0x00c2,
-                           "Unable to initialize EFT (%d).\n", rval);
-                       dma_free_coherent(&ha->pdev->dev, EFT_SIZE, tc,
-                           tc_dma);
-               }
-               ql_dbg(ql_dbg_init, vha, 0x00c3,
-                   "Allocated (%d KB) EFT ...\n", EFT_SIZE / 1024);
-               eft_size = EFT_SIZE;
+                       eft_size = EFT_SIZE;
        }
 
        if (IS_QLA27XX(ha) || IS_QLA28XX(ha)) {
@@ -3268,24 +3203,22 @@ try_eft:
                            j, fwdt->dump_size);
                        dump_size += fwdt->dump_size;
                }
-               goto allocate;
+       } else {
+               req_q_size = req->length * sizeof(request_t);
+               rsp_q_size = rsp->length * sizeof(response_t);
+               dump_size = offsetof(struct qla2xxx_fw_dump, isp);
+               dump_size += fixed_size + mem_size + req_q_size + rsp_q_size
+                       + eft_size;
+               ha->chain_offset = dump_size;
+               dump_size += mq_size + fce_size;
+               if (ha->exchoffld_buf)
+                       dump_size += sizeof(struct qla2xxx_offld_chain) +
+                               ha->exchoffld_size;
+               if (ha->exlogin_buf)
+                       dump_size += sizeof(struct qla2xxx_offld_chain) +
+                               ha->exlogin_size;
        }
 
-       req_q_size = req->length * sizeof(request_t);
-       rsp_q_size = rsp->length * sizeof(response_t);
-       dump_size = offsetof(struct qla2xxx_fw_dump, isp);
-       dump_size += fixed_size + mem_size + req_q_size + rsp_q_size + eft_size;
-       ha->chain_offset = dump_size;
-       dump_size += mq_size + fce_size;
-
-       if (ha->exchoffld_buf)
-               dump_size += sizeof(struct qla2xxx_offld_chain) +
-                       ha->exchoffld_size;
-       if (ha->exlogin_buf)
-               dump_size += sizeof(struct qla2xxx_offld_chain) +
-                       ha->exlogin_size;
-
-allocate:
        if (!ha->fw_dump_len || dump_size > ha->fw_dump_alloc_len) {
 
                ql_dbg(ql_dbg_init, vha, 0x00c5,
@@ -4400,7 +4333,7 @@ qla2x00_configure_hba(scsi_qla_host_t *vha)
 
 inline void
 qla2x00_set_model_info(scsi_qla_host_t *vha, uint8_t *model, size_t len,
-       char *def)
+                      const char *def)
 {
        char *st, *en;
        uint16_t index;
@@ -4412,7 +4345,7 @@ qla2x00_set_model_info(scsi_qla_host_t *vha, uint8_t *model, size_t len,
        if (len > sizeof(zero))
                len = sizeof(zero);
        if (memcmp(model, &zero, len) != 0) {
-               strncpy(ha->model_number, model, len);
+               memcpy(ha->model_number, model, len);
                st = en = ha->model_number;
                en += len - 1;
                while (en > st) {
@@ -4425,21 +4358,23 @@ qla2x00_set_model_info(scsi_qla_host_t *vha, uint8_t *model, size_t len,
                if (use_tbl &&
                    ha->pdev->subsystem_vendor == PCI_VENDOR_ID_QLOGIC &&
                    index < QLA_MODEL_NAMES)
-                       strncpy(ha->model_desc,
+                       strlcpy(ha->model_desc,
                            qla2x00_model_name[index * 2 + 1],
-                           sizeof(ha->model_desc) - 1);
+                           sizeof(ha->model_desc));
        } else {
                index = (ha->pdev->subsystem_device & 0xff);
                if (use_tbl &&
                    ha->pdev->subsystem_vendor == PCI_VENDOR_ID_QLOGIC &&
                    index < QLA_MODEL_NAMES) {
-                       strcpy(ha->model_number,
-                           qla2x00_model_name[index * 2]);
-                       strncpy(ha->model_desc,
+                       strlcpy(ha->model_number,
+                               qla2x00_model_name[index * 2],
+                               sizeof(ha->model_number));
+                       strlcpy(ha->model_desc,
                            qla2x00_model_name[index * 2 + 1],
-                           sizeof(ha->model_desc) - 1);
+                           sizeof(ha->model_desc));
                } else {
-                       strcpy(ha->model_number, def);
+                       strlcpy(ha->model_number, def,
+                               sizeof(ha->model_number));
                }
        }
        if (IS_FWI2_CAPABLE(ha))
@@ -5044,7 +4979,7 @@ qla2x00_configure_local_loop(scsi_qla_host_t *vha)
 
        uint16_t        index;
        uint16_t        entries;
-       char            *id_iter;
+       struct gid_list_info *gid;
        uint16_t        loop_id;
        uint8_t         domain, area, al_pa;
        struct qla_hw_data *ha = vha->hw;
@@ -5119,18 +5054,16 @@ qla2x00_configure_local_loop(scsi_qla_host_t *vha)
        new_fcport->flags &= ~FCF_FABRIC_DEVICE;
 
        /* Add devices to port list. */
-       id_iter = (char *)ha->gid_list;
+       gid = ha->gid_list;
        for (index = 0; index < entries; index++) {
-               domain = ((struct gid_list_info *)id_iter)->domain;
-               area = ((struct gid_list_info *)id_iter)->area;
-               al_pa = ((struct gid_list_info *)id_iter)->al_pa;
+               domain = gid->domain;
+               area = gid->area;
+               al_pa = gid->al_pa;
                if (IS_QLA2100(ha) || IS_QLA2200(ha))
-                       loop_id = (uint16_t)
-                           ((struct gid_list_info *)id_iter)->loop_id_2100;
+                       loop_id = gid->loop_id_2100;
                else
-                       loop_id = le16_to_cpu(
-                           ((struct gid_list_info *)id_iter)->loop_id);
-               id_iter += ha->gid_list_info_size;
+                       loop_id = le16_to_cpu(gid->loop_id);
+               gid = (void *)gid + ha->gid_list_info_size;
 
                /* Bypass reserved domain fields. */
                if ((domain & 0xf0) == 0xf0)
@@ -5355,7 +5288,7 @@ qla2x00_reg_remote_port(scsi_qla_host_t *vha, fc_port_t *fcport)
            "%s %8phN. rport %p is %s mode\n",
            __func__, fcport->port_name, rport,
            (fcport->port_type == FCT_TARGET) ? "tgt" :
-           ((fcport->port_type & FCT_NVME) ? "nvme" :"ini"));
+           ((fcport->port_type & FCT_NVME) ? "nvme" : "ini"));
 
        fc_remote_port_rolechg(rport, rport_ids.roles);
 }
@@ -6596,7 +6529,8 @@ qla2x00_quiesce_io(scsi_qla_host_t *vha)
                                        LOOP_DOWN_TIME);
        }
        /* Wait for pending cmds to complete */
-       qla2x00_eh_wait_for_pending_commands(vha, 0, 0, WAIT_HOST);
+       WARN_ON_ONCE(qla2x00_eh_wait_for_pending_commands(vha, 0, 0, WAIT_HOST)
+                    != QLA_SUCCESS);
 }
 
 void
@@ -6684,8 +6618,10 @@ qla2x00_abort_isp_cleanup(scsi_qla_host_t *vha)
        }
 
        /* Clear all async request states across all VPs. */
-       list_for_each_entry(fcport, &vha->vp_fcports, list)
+       list_for_each_entry(fcport, &vha->vp_fcports, list) {
                fcport->flags &= ~(FCF_LOGIN_NEEDED | FCF_ASYNC_SENT);
+               fcport->scan_state = 0;
+       }
        spin_lock_irqsave(&ha->vport_slock, flags);
        list_for_each_entry(vp, &ha->vp_list, list) {
                atomic_inc(&vp->vref_count);
@@ -7519,8 +7455,12 @@ qla27xx_get_active_image(struct scsi_qla_host *vha,
                goto check_sec_image;
        }
 
-       qla24xx_read_flash_data(vha, (void *)(&pri_image_status),
-           ha->flt_region_img_status_pri, sizeof(pri_image_status) >> 2);
+       if (qla24xx_read_flash_data(vha, (void *)(&pri_image_status),
+           ha->flt_region_img_status_pri, sizeof(pri_image_status) >> 2) !=
+           QLA_SUCCESS) {
+               WARN_ON_ONCE(true);
+               goto check_sec_image;
+       }
        qla27xx_print_image(vha, "Primary image", &pri_image_status);
 
        if (qla27xx_check_image_status_signature(&pri_image_status)) {
@@ -8274,7 +8214,7 @@ qla81xx_nvram_config(scsi_qla_host_t *vha)
                    active_regions.aux.vpd_nvram == QLA27XX_PRIMARY_IMAGE ?
                    "primary" : "secondary");
        }
-       qla24xx_read_flash_data(vha, ha->vpd, faddr, ha->vpd_size >> 2);
+       ha->isp_ops->read_optrom(vha, ha->vpd, faddr << 2, ha->vpd_size);
 
        /* Get NVRAM data into cache and calculate checksum. */
        faddr = ha->flt_region_nvram;
@@ -8286,7 +8226,7 @@ qla81xx_nvram_config(scsi_qla_host_t *vha)
            "Loading %s nvram image.\n",
            active_regions.aux.vpd_nvram == QLA27XX_PRIMARY_IMAGE ?
            "primary" : "secondary");
-       qla24xx_read_flash_data(vha, ha->nvram, faddr, ha->nvram_size >> 2);
+       ha->isp_ops->read_optrom(vha, ha->nvram, faddr << 2, ha->nvram_size);
 
        dptr = (uint32_t *)nv;
        for (cnt = 0, chksum = 0; cnt < ha->nvram_size >> 2; cnt++, dptr++)
index bf063c6..0c3d907 100644 (file)
@@ -152,6 +152,18 @@ qla2x00_chip_is_down(scsi_qla_host_t *vha)
        return (qla2x00_reset_active(vha) || !vha->hw->flags.fw_started);
 }
 
+static void qla2xxx_init_sp(srb_t *sp, scsi_qla_host_t *vha,
+                           struct qla_qpair *qpair, fc_port_t *fcport)
+{
+       memset(sp, 0, sizeof(*sp));
+       sp->fcport = fcport;
+       sp->iocbs = 1;
+       sp->vha = vha;
+       sp->qpair = qpair;
+       sp->cmd_type = TYPE_SRB;
+       INIT_LIST_HEAD(&sp->elem);
+}
+
 static inline srb_t *
 qla2xxx_get_qpair_sp(scsi_qla_host_t *vha, struct qla_qpair *qpair,
     fc_port_t *fcport, gfp_t flag)
@@ -164,19 +176,9 @@ qla2xxx_get_qpair_sp(scsi_qla_host_t *vha, struct qla_qpair *qpair,
                return NULL;
 
        sp = mempool_alloc(qpair->srb_mempool, flag);
-       if (!sp)
-               goto done;
-
-       memset(sp, 0, sizeof(*sp));
-       sp->fcport = fcport;
-       sp->iocbs = 1;
-       sp->vha = vha;
-       sp->qpair = qpair;
-       sp->cmd_type = TYPE_SRB;
-       INIT_LIST_HEAD(&sp->elem);
-
-done:
-       if (!sp)
+       if (sp)
+               qla2xxx_init_sp(sp, vha, qpair, fcport);
+       else
                QLA_QPAIR_MARK_NOT_BUSY(qpair);
        return sp;
 }
index 9312b19..e92e52a 100644 (file)
@@ -292,6 +292,26 @@ void qla2x00_build_scsi_iocbs_64(srb_t *sp, cmd_entry_t *cmd_pkt,
        }
 }
 
+/*
+ * Find the first handle that is not in use, starting from
+ * req->current_outstanding_cmd + 1. The caller must hold the lock that is
+ * associated with @req.
+ */
+uint32_t qla2xxx_get_next_handle(struct req_que *req)
+{
+       uint32_t index, handle = req->current_outstanding_cmd;
+
+       for (index = 1; index < req->num_outstanding_cmds; index++) {
+               handle++;
+               if (handle == req->num_outstanding_cmds)
+                       handle = 1;
+               if (!req->outstanding_cmds[handle])
+                       return handle;
+       }
+
+       return 0;
+}
+
 /**
  * qla2x00_start_scsi() - Send a SCSI command to the ISP
  * @sp: command to send to the ISP
@@ -306,7 +326,6 @@ qla2x00_start_scsi(srb_t *sp)
        scsi_qla_host_t *vha;
        struct scsi_cmnd *cmd;
        uint32_t        *clr_ptr;
-       uint32_t        index;
        uint32_t        handle;
        cmd_entry_t     *cmd_pkt;
        uint16_t        cnt;
@@ -339,16 +358,8 @@ qla2x00_start_scsi(srb_t *sp)
        /* Acquire ring specific lock */
        spin_lock_irqsave(&ha->hardware_lock, flags);
 
-       /* Check for room in outstanding command list. */
-       handle = req->current_outstanding_cmd;
-       for (index = 1; index < req->num_outstanding_cmds; index++) {
-               handle++;
-               if (handle == req->num_outstanding_cmds)
-                       handle = 1;
-               if (!req->outstanding_cmds[handle])
-                       break;
-       }
-       if (index == req->num_outstanding_cmds)
+       handle = qla2xxx_get_next_handle(req);
+       if (handle == 0)
                goto queuing_error;
 
        /* Map the sg table so we have an accurate count of sg entries needed */
@@ -610,7 +621,7 @@ qla24xx_build_scsi_type_6_iocbs(srb_t *sp, struct cmd_type_6 *cmd_pkt,
        }
 
        cur_seg = scsi_sglist(cmd);
-       ctx = GET_CMD_CTX_SP(sp);
+       ctx = sp->u.scmd.ct6_ctx;
 
        while (tot_dsds) {
                avail_dsds = (tot_dsds > QLA_DSDS_PER_IOCB) ?
@@ -943,8 +954,7 @@ alloc_and_fill:
 
                        if (sp) {
                                list_add_tail(&dsd_ptr->list,
-                                   &((struct crc_context *)
-                                           sp->u.scmd.ctx)->dsd_list);
+                                             &sp->u.scmd.crc_ctx->dsd_list);
 
                                sp->flags |= SRB_CRC_CTX_DSD_VALID;
                        } else {
@@ -1041,8 +1051,7 @@ qla24xx_walk_and_build_sglist(struct qla_hw_data *ha, srb_t *sp,
 
                        if (sp) {
                                list_add_tail(&dsd_ptr->list,
-                                   &((struct crc_context *)
-                                           sp->u.scmd.ctx)->dsd_list);
+                                             &sp->u.scmd.crc_ctx->dsd_list);
 
                                sp->flags |= SRB_CRC_CTX_DSD_VALID;
                        } else {
@@ -1088,7 +1097,7 @@ qla24xx_walk_and_build_prot_sglist(struct qla_hw_data *ha, srb_t *sp,
 
                sgl = scsi_prot_sglist(cmd);
                vha = sp->vha;
-               difctx = sp->u.scmd.ctx;
+               difctx = sp->u.scmd.crc_ctx;
                direction_to_device = cmd->sc_data_direction == DMA_TO_DEVICE;
                ql_dbg(ql_dbg_tgt + ql_dbg_verbose, vha, 0xe021,
                  "%s: scsi_cmnd: %p, crc_ctx: %p, sp: %p\n",
@@ -1364,6 +1373,7 @@ qla24xx_walk_and_build_prot_sglist(struct qla_hw_data *ha, srb_t *sp,
        cur_dsd++;
        return 0;
 }
+
 /**
  * qla24xx_build_scsi_crc_2_iocbs() - Build IOCB command utilizing Command
  *                                                     Type 6 IOCB types.
@@ -1427,7 +1437,7 @@ qla24xx_build_scsi_crc_2_iocbs(srb_t *sp, struct cmd_type_crc_2 *cmd_pkt,
                bundling = 0;
 
        /* Allocate CRC context from global pool */
-       crc_ctx_pkt = sp->u.scmd.ctx =
+       crc_ctx_pkt = sp->u.scmd.crc_ctx =
            dma_pool_zalloc(ha->dl_dma_pool, GFP_ATOMIC, &crc_ctx_dma);
 
        if (!crc_ctx_pkt)
@@ -1515,7 +1525,7 @@ qla24xx_build_scsi_crc_2_iocbs(srb_t *sp, struct cmd_type_crc_2 *cmd_pkt,
        }
 
        if (!bundling) {
-               cur_dsd = &crc_ctx_pkt->u.nobundling.data_dsd;
+               cur_dsd = &crc_ctx_pkt->u.nobundling.data_dsd[0];
        } else {
                /*
                 * Configure Bundling if we need to fetch interlaving
@@ -1525,7 +1535,7 @@ qla24xx_build_scsi_crc_2_iocbs(srb_t *sp, struct cmd_type_crc_2 *cmd_pkt,
                crc_ctx_pkt->u.bundling.dif_byte_count = cpu_to_le32(dif_bytes);
                crc_ctx_pkt->u.bundling.dseg_count = cpu_to_le16(tot_dsds -
                                                        tot_prot_dsds);
-               cur_dsd = &crc_ctx_pkt->u.bundling.data_dsd;
+               cur_dsd = &crc_ctx_pkt->u.bundling.data_dsd[0];
        }
 
        /* Finish the common fields of CRC pkt */
@@ -1583,7 +1593,6 @@ qla24xx_start_scsi(srb_t *sp)
        int             nseg;
        unsigned long   flags;
        uint32_t        *clr_ptr;
-       uint32_t        index;
        uint32_t        handle;
        struct cmd_type_7 *cmd_pkt;
        uint16_t        cnt;
@@ -1611,16 +1620,8 @@ qla24xx_start_scsi(srb_t *sp)
        /* Acquire ring specific lock */
        spin_lock_irqsave(&ha->hardware_lock, flags);
 
-       /* Check for room in outstanding command list. */
-       handle = req->current_outstanding_cmd;
-       for (index = 1; index < req->num_outstanding_cmds; index++) {
-               handle++;
-               if (handle == req->num_outstanding_cmds)
-                       handle = 1;
-               if (!req->outstanding_cmds[handle])
-                       break;
-       }
-       if (index == req->num_outstanding_cmds)
+       handle = qla2xxx_get_next_handle(req);
+       if (handle == 0)
                goto queuing_error;
 
        /* Map the sg table so we have an accurate count of sg entries needed */
@@ -1723,7 +1724,6 @@ qla24xx_dif_start_scsi(srb_t *sp)
        int                     nseg;
        unsigned long           flags;
        uint32_t                *clr_ptr;
-       uint32_t                index;
        uint32_t                handle;
        uint16_t                cnt;
        uint16_t                req_cnt = 0;
@@ -1764,17 +1764,8 @@ qla24xx_dif_start_scsi(srb_t *sp)
        /* Acquire ring specific lock */
        spin_lock_irqsave(&ha->hardware_lock, flags);
 
-       /* Check for room in outstanding command list. */
-       handle = req->current_outstanding_cmd;
-       for (index = 1; index < req->num_outstanding_cmds; index++) {
-               handle++;
-               if (handle == req->num_outstanding_cmds)
-                       handle = 1;
-               if (!req->outstanding_cmds[handle])
-                       break;
-       }
-
-       if (index == req->num_outstanding_cmds)
+       handle = qla2xxx_get_next_handle(req);
+       if (handle == 0)
                goto queuing_error;
 
        /* Compute number of required data segments */
@@ -1919,7 +1910,6 @@ qla2xxx_start_scsi_mq(srb_t *sp)
        int             nseg;
        unsigned long   flags;
        uint32_t        *clr_ptr;
-       uint32_t        index;
        uint32_t        handle;
        struct cmd_type_7 *cmd_pkt;
        uint16_t        cnt;
@@ -1950,16 +1940,8 @@ qla2xxx_start_scsi_mq(srb_t *sp)
                vha->marker_needed = 0;
        }
 
-       /* Check for room in outstanding command list. */
-       handle = req->current_outstanding_cmd;
-       for (index = 1; index < req->num_outstanding_cmds; index++) {
-               handle++;
-               if (handle == req->num_outstanding_cmds)
-                       handle = 1;
-               if (!req->outstanding_cmds[handle])
-                       break;
-       }
-       if (index == req->num_outstanding_cmds)
+       handle = qla2xxx_get_next_handle(req);
+       if (handle == 0)
                goto queuing_error;
 
        /* Map the sg table so we have an accurate count of sg entries needed */
@@ -2063,7 +2045,6 @@ qla2xxx_dif_start_scsi_mq(srb_t *sp)
        int                     nseg;
        unsigned long           flags;
        uint32_t                *clr_ptr;
-       uint32_t                index;
        uint32_t                handle;
        uint16_t                cnt;
        uint16_t                req_cnt = 0;
@@ -2118,17 +2099,8 @@ qla2xxx_dif_start_scsi_mq(srb_t *sp)
                vha->marker_needed = 0;
        }
 
-       /* Check for room in outstanding command list. */
-       handle = req->current_outstanding_cmd;
-       for (index = 1; index < req->num_outstanding_cmds; index++) {
-               handle++;
-               if (handle == req->num_outstanding_cmds)
-                       handle = 1;
-               if (!req->outstanding_cmds[handle])
-                       break;
-       }
-
-       if (index == req->num_outstanding_cmds)
+       handle = qla2xxx_get_next_handle(req);
+       if (handle == 0)
                goto queuing_error;
 
        /* Compute number of required data segments */
@@ -2275,7 +2247,7 @@ __qla2x00_alloc_iocbs(struct qla_qpair *qpair, srb_t *sp)
        struct qla_hw_data *ha = vha->hw;
        struct req_que *req = qpair->req;
        device_reg_t *reg = ISP_QUE_REG(ha, req->id);
-       uint32_t index, handle;
+       uint32_t handle;
        request_t *pkt;
        uint16_t cnt, req_cnt;
 
@@ -2315,16 +2287,8 @@ __qla2x00_alloc_iocbs(struct qla_qpair *qpair, srb_t *sp)
                goto queuing_error;
 
        if (sp) {
-               /* Check for room in outstanding command list. */
-               handle = req->current_outstanding_cmd;
-               for (index = 1; index < req->num_outstanding_cmds; index++) {
-                       handle++;
-                       if (handle == req->num_outstanding_cmds)
-                               handle = 1;
-                       if (!req->outstanding_cmds[handle])
-                               break;
-               }
-               if (index == req->num_outstanding_cmds) {
+               handle = qla2xxx_get_next_handle(req);
+               if (handle == 0) {
                        ql_log(ql_log_warn, vha, 0x700b,
                            "No room on outstanding cmd array.\n");
                        goto queuing_error;
@@ -2540,13 +2504,11 @@ void qla2x00_init_timer(srb_t *sp, unsigned long tmo)
        sp->free = qla2x00_sp_free;
        if (IS_QLAFX00(sp->vha->hw) && sp->type == SRB_FXIOCB_DCMD)
                init_completion(&sp->u.iocb_cmd.u.fxiocb.fxiocb_comp);
-       add_timer(&sp->u.iocb_cmd.timer);
+       sp->start_timer = 1;
 }
 
-static void
-qla2x00_els_dcmd_sp_free(void *data)
+static void qla2x00_els_dcmd_sp_free(srb_t *sp)
 {
-       srb_t *sp = data;
        struct srb_iocb *elsio = &sp->u.iocb_cmd;
 
        kfree(sp->fcport);
@@ -2576,10 +2538,8 @@ qla2x00_els_dcmd_iocb_timeout(void *data)
        complete(&lio->u.els_logo.comp);
 }
 
-static void
-qla2x00_els_dcmd_sp_done(void *ptr, int res)
+static void qla2x00_els_dcmd_sp_done(srb_t *sp, int res)
 {
-       srb_t *sp = ptr;
        fc_port_t *fcport = sp->fcport;
        struct srb_iocb *lio = &sp->u.iocb_cmd;
        struct scsi_qla_host *vha = sp->vha;
@@ -2699,16 +2659,16 @@ qla24xx_els_logo_iocb(srb_t *sp, struct els_entry_24xx *els_iocb)
        els_iocb->s_id[0] = vha->d_id.b.al_pa;
        els_iocb->s_id[1] = vha->d_id.b.area;
        els_iocb->s_id[2] = vha->d_id.b.domain;
-       els_iocb->control_flags = 0;
 
        if (elsio->u.els_logo.els_cmd == ELS_DCMD_PLOGI) {
+               els_iocb->control_flags = 0;
                els_iocb->tx_byte_count = els_iocb->tx_len =
-                       sizeof(struct els_plogi_payload);
+                       cpu_to_le32(sizeof(struct els_plogi_payload));
                put_unaligned_le64(elsio->u.els_plogi.els_plogi_pyld_dma,
                                   &els_iocb->tx_address);
                els_iocb->rx_dsd_count = 1;
                els_iocb->rx_byte_count = els_iocb->rx_len =
-                       sizeof(struct els_plogi_payload);
+                       cpu_to_le32(sizeof(struct els_plogi_payload));
                put_unaligned_le64(elsio->u.els_plogi.els_resp_pyld_dma,
                                   &els_iocb->rx_address);
 
@@ -2717,7 +2677,9 @@ qla24xx_els_logo_iocb(srb_t *sp, struct els_entry_24xx *els_iocb)
                ql_dump_buffer(ql_log_info, vha, 0x0109,
                    (uint8_t *)els_iocb, 0x70);
        } else {
-               els_iocb->tx_byte_count = sizeof(struct els_logo_payload);
+               els_iocb->control_flags = 1 << 13;
+               els_iocb->tx_byte_count =
+                       cpu_to_le32(sizeof(struct els_logo_payload));
                put_unaligned_le64(elsio->u.els_logo.els_logo_pyld_dma,
                                   &els_iocb->tx_address);
                els_iocb->tx_len = cpu_to_le32(sizeof(struct els_logo_payload));
@@ -2755,10 +2717,23 @@ qla2x00_els_dcmd2_iocb_timeout(void *data)
        sp->done(sp, QLA_FUNCTION_TIMEOUT);
 }
 
-static void
-qla2x00_els_dcmd2_sp_done(void *ptr, int res)
+void qla2x00_els_dcmd2_free(scsi_qla_host_t *vha, struct els_plogi *els_plogi)
+{
+       if (els_plogi->els_plogi_pyld)
+               dma_free_coherent(&vha->hw->pdev->dev,
+                                 els_plogi->tx_size,
+                                 els_plogi->els_plogi_pyld,
+                                 els_plogi->els_plogi_pyld_dma);
+
+       if (els_plogi->els_resp_pyld)
+               dma_free_coherent(&vha->hw->pdev->dev,
+                                 els_plogi->rx_size,
+                                 els_plogi->els_resp_pyld,
+                                 els_plogi->els_resp_pyld_dma);
+}
+
+static void qla2x00_els_dcmd2_sp_done(srb_t *sp, int res)
 {
-       srb_t *sp = ptr;
        fc_port_t *fcport = sp->fcport;
        struct srb_iocb *lio = &sp->u.iocb_cmd;
        struct scsi_qla_host *vha = sp->vha;
@@ -2780,26 +2755,16 @@ qla2x00_els_dcmd2_sp_done(void *ptr, int res)
                } else {
                        memset(&ea, 0, sizeof(ea));
                        ea.fcport = fcport;
-                       ea.rc = res;
-                       ea.event = FCME_ELS_PLOGI_DONE;
-                       qla2x00_fcport_event_handler(vha, &ea);
+                       ea.data[0] = MBS_COMMAND_COMPLETE;
+                       ea.sp = sp;
+                       qla24xx_handle_plogi_done_event(vha, &ea);
                }
 
                e = qla2x00_alloc_work(vha, QLA_EVT_UNMAP);
                if (!e) {
                        struct srb_iocb *elsio = &sp->u.iocb_cmd;
 
-                       if (elsio->u.els_plogi.els_plogi_pyld)
-                               dma_free_coherent(&sp->vha->hw->pdev->dev,
-                                   elsio->u.els_plogi.tx_size,
-                                   elsio->u.els_plogi.els_plogi_pyld,
-                                   elsio->u.els_plogi.els_plogi_pyld_dma);
-
-                       if (elsio->u.els_plogi.els_resp_pyld)
-                               dma_free_coherent(&sp->vha->hw->pdev->dev,
-                                   elsio->u.els_plogi.rx_size,
-                                   elsio->u.els_plogi.els_resp_pyld,
-                                   elsio->u.els_plogi.els_resp_pyld_dma);
+                       qla2x00_els_dcmd2_free(vha, &elsio->u.els_plogi);
                        sp->free(sp);
                        return;
                }
@@ -2899,18 +2864,7 @@ qla24xx_els_dcmd2_iocb(scsi_qla_host_t *vha, int els_opcode,
 
 out:
        fcport->flags &= ~(FCF_ASYNC_SENT);
-       if (elsio->u.els_plogi.els_plogi_pyld)
-               dma_free_coherent(&sp->vha->hw->pdev->dev,
-                   elsio->u.els_plogi.tx_size,
-                   elsio->u.els_plogi.els_plogi_pyld,
-                   elsio->u.els_plogi.els_plogi_pyld_dma);
-
-       if (elsio->u.els_plogi.els_resp_pyld)
-               dma_free_coherent(&sp->vha->hw->pdev->dev,
-                   elsio->u.els_plogi.rx_size,
-                   elsio->u.els_plogi.els_resp_pyld,
-                   elsio->u.els_plogi.els_resp_pyld_dma);
-
+       qla2x00_els_dcmd2_free(vha, &elsio->u.els_plogi);
        sp->free(sp);
 done:
        return rval;
@@ -3115,7 +3069,6 @@ qla82xx_start_scsi(srb_t *sp)
        unsigned long   flags;
        struct scsi_cmnd *cmd;
        uint32_t        *clr_ptr;
-       uint32_t        index;
        uint32_t        handle;
        uint16_t        cnt;
        uint16_t        req_cnt;
@@ -3155,16 +3108,8 @@ qla82xx_start_scsi(srb_t *sp)
        /* Acquire ring specific lock */
        spin_lock_irqsave(&ha->hardware_lock, flags);
 
-       /* Check for room in outstanding command list. */
-       handle = req->current_outstanding_cmd;
-       for (index = 1; index < req->num_outstanding_cmds; index++) {
-               handle++;
-               if (handle == req->num_outstanding_cmds)
-                       handle = 1;
-               if (!req->outstanding_cmds[handle])
-                       break;
-       }
-       if (index == req->num_outstanding_cmds)
+       handle = qla2xxx_get_next_handle(req);
+       if (handle == 0)
                goto queuing_error;
 
        /* Map the sg table so we have an accurate count of sg entries needed */
@@ -3235,7 +3180,7 @@ sufficient_dsds:
                                goto queuing_error;
                }
 
-               ctx = sp->u.scmd.ctx =
+               ctx = sp->u.scmd.ct6_ctx =
                    mempool_alloc(ha->ctx_mempool, GFP_ATOMIC);
                if (!ctx) {
                        ql_log(ql_log_fatal, vha, 0x3010,
@@ -3431,9 +3376,9 @@ queuing_error:
        if (tot_dsds)
                scsi_dma_unmap(cmd);
 
-       if (sp->u.scmd.ctx) {
-               mempool_free(sp->u.scmd.ctx, ha->ctx_mempool);
-               sp->u.scmd.ctx = NULL;
+       if (sp->u.scmd.crc_ctx) {
+               mempool_free(sp->u.scmd.crc_ctx, ha->ctx_mempool);
+               sp->u.scmd.crc_ctx = NULL;
        }
        spin_unlock_irqrestore(&ha->hardware_lock, flags);
 
@@ -3668,6 +3613,9 @@ qla2x00_start_sp(srb_t *sp)
                break;
        }
 
+       if (sp->start_timer)
+               add_timer(&sp->u.iocb_cmd.timer);
+
        wmb();
        qla2x00_start_iocbs(vha, qp->req);
 done:
@@ -3769,7 +3717,6 @@ qla2x00_start_bidir(srb_t *sp, struct scsi_qla_host *vha, uint32_t tot_dsds)
        struct qla_hw_data *ha = vha->hw;
        unsigned long flags;
        uint32_t handle;
-       uint32_t index;
        uint16_t req_cnt;
        uint16_t cnt;
        uint32_t *clr_ptr;
@@ -3794,17 +3741,8 @@ qla2x00_start_bidir(srb_t *sp, struct scsi_qla_host *vha, uint32_t tot_dsds)
        /* Acquire ring specific lock */
        spin_lock_irqsave(&ha->hardware_lock, flags);
 
-       /* Check for room in outstanding command list. */
-       handle = req->current_outstanding_cmd;
-       for (index = 1; index < req->num_outstanding_cmds; index++) {
-               handle++;
-               if (handle == req->num_outstanding_cmds)
-                       handle = 1;
-               if (!req->outstanding_cmds[handle])
-                       break;
-       }
-
-       if (index == req->num_outstanding_cmds) {
+       handle = qla2xxx_get_next_handle(req);
+       if (handle == 0) {
                rval = EXT_STATUS_BUSY;
                goto queuing_error;
        }
index 78aec50..4c26630 100644 (file)
@@ -776,7 +776,6 @@ skip_rio:
        case MBA_LOOP_INIT_ERR:
                ql_log(ql_log_warn, vha, 0x5090,
                    "LOOP INIT ERROR (%x).\n", mb[1]);
-               ha->isp_ops->fw_dump(vha, 1);
                set_bit(ISP_ABORT_NEEDED, &vha->dpc_flags);
                break;
 
@@ -1119,10 +1118,9 @@ global_port_update:
                        struct event_arg ea;
 
                        memset(&ea, 0, sizeof(ea));
-                       ea.event = FCME_RSCN;
                        ea.id.b24 = rscn_entry;
                        ea.id.b.rsvd_1 = rscn_entry >> 24;
-                       qla2x00_fcport_event_handler(vha, &ea);
+                       qla2x00_handle_rscn(vha, &ea);
                        qla2x00_post_aen_work(vha, FCH_EVT_RSCN, rscn_entry);
                }
                break;
@@ -1514,7 +1512,7 @@ qla2x00_ct_entry(scsi_qla_host_t *vha, struct req_que *req,
                    if (comp_status == CS_DATA_UNDERRUN) {
                            res = DID_OK << 16;
                            bsg_reply->reply_payload_rcv_len =
-                               le16_to_cpu(((sts_entry_t *)pkt)->rsp_info_len);
+                               le16_to_cpu(pkt->rsp_info_len);
 
                            ql_log(ql_log_warn, vha, 0x5048,
                                "CT pass-through-%s error comp_status=0x%x total_byte=0x%x.\n",
@@ -2257,11 +2255,8 @@ qla25xx_process_bidir_status_iocb(scsi_qla_host_t *vha, void *pkt,
        struct bsg_job *bsg_job = NULL;
        struct fc_bsg_request *bsg_request;
        struct fc_bsg_reply *bsg_reply;
-       sts_entry_t *sts;
-       struct sts_entry_24xx *sts24;
-
-       sts = (sts_entry_t *) pkt;
-       sts24 = (struct sts_entry_24xx *) pkt;
+       sts_entry_t *sts = pkt;
+       struct sts_entry_24xx *sts24 = pkt;
 
        /* Validate handle. */
        if (index >= req->num_outstanding_cmds) {
@@ -2407,8 +2402,8 @@ qla2x00_status_entry(scsi_qla_host_t *vha, struct rsp_que *rsp, void *pkt)
        srb_t           *sp;
        fc_port_t       *fcport;
        struct scsi_cmnd *cp;
-       sts_entry_t *sts;
-       struct sts_entry_24xx *sts24;
+       sts_entry_t *sts = pkt;
+       struct sts_entry_24xx *sts24 = pkt;
        uint16_t        comp_status;
        uint16_t        scsi_status;
        uint16_t        ox_id;
@@ -2426,8 +2421,6 @@ qla2x00_status_entry(scsi_qla_host_t *vha, struct rsp_que *rsp, void *pkt)
        uint16_t state_flags = 0;
        uint16_t retry_delay = 0;
 
-       sts = (sts_entry_t *) pkt;
-       sts24 = (struct sts_entry_24xx *) pkt;
        if (IS_FWI2_CAPABLE(ha)) {
                comp_status = le16_to_cpu(sts24->comp_status);
                scsi_status = le16_to_cpu(sts24->scsi_status) & SS_MASK;
@@ -2727,7 +2720,7 @@ check_scsi_status:
                                "Port to be marked lost on fcport=%02x%02x%02x, current "
                                "port state= %s comp_status %x.\n", fcport->d_id.b.domain,
                                fcport->d_id.b.area, fcport->d_id.b.al_pa,
-                               port_state_str[atomic_read(&fcport->state)],
+                               port_state_str[FCS_ONLINE],
                                comp_status);
 
                        qla2x00_mark_device_lost(fcport->vha, fcport, 1, 1);
@@ -2844,6 +2837,8 @@ qla2x00_status_cont_entry(struct rsp_que *rsp, sts_cont_entry_t *pkt)
        if (sense_len == 0) {
                rsp->status_srb = NULL;
                sp->done(sp, cp->result);
+       } else {
+               WARN_ON_ONCE(true);
        }
 }
 
@@ -3471,10 +3466,8 @@ qla24xx_enable_msix(struct qla_hw_data *ha, struct rsp_que *rsp)
                    ha->msix_count, ret);
                goto msix_out;
        } else if (ret < ha->msix_count) {
-               ql_log(ql_log_warn, vha, 0x00c6,
-                   "MSI-X: Failed to enable support "
-                    "with %d vectors, using %d vectors.\n",
-                   ha->msix_count, ret);
+               ql_log(ql_log_info, vha, 0x00c6,
+                   "MSI-X: Using %d vectors\n", ret);
                ha->msix_count = ret;
                /* Recalculate queue values */
                if (ha->mqiobase && (ql2xmqsupport || ql2xnvmeenable)) {
index 133f5f6..4c858e2 100644 (file)
@@ -253,21 +253,9 @@ qla2x00_mailbox_command(scsi_qla_host_t *vha, mbx_cmd_t *mcp)
        if ((!abort_active && io_lock_on) || IS_NOPOLLING_TYPE(ha)) {
                set_bit(MBX_INTR_WAIT, &ha->mbx_cmd_flags);
 
-               if (IS_P3P_TYPE(ha)) {
-                       if (RD_REG_DWORD(&reg->isp82.hint) &
-                               HINT_MBX_INT_PENDING) {
-                               ha->flags.mbox_busy = 0;
-                               spin_unlock_irqrestore(&ha->hardware_lock,
-                                       flags);
-
-                               atomic_dec(&ha->num_pend_mbx_stage2);
-                               ql_dbg(ql_dbg_mbx, vha, 0x1010,
-                                   "Pending mailbox timeout, exiting.\n");
-                               rval = QLA_FUNCTION_TIMEOUT;
-                               goto premature_exit;
-                       }
+               if (IS_P3P_TYPE(ha))
                        WRT_REG_DWORD(&reg->isp82.hint, HINT_MBX_INT_PENDING);
-               else if (IS_FWI2_CAPABLE(ha))
+               else if (IS_FWI2_CAPABLE(ha))
                        WRT_REG_DWORD(&reg->isp24.hccr, HCCRX_SET_HOST_INT);
                else
                        WRT_REG_WORD(&reg->isp.hccr, HCCR_SET_HOST_INT);
@@ -394,8 +382,12 @@ qla2x00_mailbox_command(scsi_qla_host_t *vha, mbx_cmd_t *mcp)
                        goto premature_exit;
                }
 
-               if (ha->mailbox_out[0] != MBS_COMMAND_COMPLETE)
+               if (ha->mailbox_out[0] != MBS_COMMAND_COMPLETE) {
+                       ql_dbg(ql_dbg_mbx, vha, 0x11ff,
+                              "mb_out[0] = %#x <> %#x\n", ha->mailbox_out[0],
+                              MBS_COMMAND_COMPLETE);
                        rval = QLA_FUNCTION_FAILED;
+               }
 
                /* Load return mailbox registers. */
                iptr2 = mcp->mb;
@@ -6213,10 +6205,8 @@ qla26xx_dport_diagnostics(scsi_qla_host_t *vha,
        return rval;
 }
 
-static void qla2x00_async_mb_sp_done(void *s, int res)
+static void qla2x00_async_mb_sp_done(srb_t *sp, int res)
 {
-       struct srb *sp = s;
-
        sp->u.iocb_cmd.u.mbx.rc = res;
 
        complete(&sp->u.iocb_cmd.u.mbx.comp);
index b2977e4..1a9a11a 100644 (file)
@@ -901,10 +901,8 @@ failed:
        return 0;
 }
 
-static void qla_ctrlvp_sp_done(void *s, int res)
+static void qla_ctrlvp_sp_done(srb_t *sp, int res)
 {
-       struct srb *sp = s;
-
        if (sp->comp)
                complete(sp->comp);
        /* don't free sp here. Let the caller do the free */
index 942ee13..605b59c 100644 (file)
@@ -10,7 +10,6 @@
 #include <linux/pci.h>
 #include <linux/ratelimit.h>
 #include <linux/vmalloc.h>
-#include <linux/bsg-lib.h>
 #include <scsi/scsi_tcq.h>
 #include <linux/utsname.h>
 
@@ -149,7 +148,8 @@ qlafx00_mailbox_command(scsi_qla_host_t *vha, struct mbx_cmd_32 *mcp)
                QLAFX00_SET_HST_INTR(ha, ha->mbx_intr_code);
                spin_unlock_irqrestore(&ha->hardware_lock, flags);
 
-               wait_for_completion_timeout(&ha->mbx_intr_comp, mcp->tov * HZ);
+               WARN_ON_ONCE(wait_for_completion_timeout(&ha->mbx_intr_comp,
+                                                        mcp->tov * HZ) != 0);
        } else {
                ql_dbg(ql_dbg_mbx, vha, 0x112c,
                    "Cmd=%x Polling Mode.\n", command);
@@ -688,14 +688,12 @@ qlafx00_config_rings(struct scsi_qla_host *vha)
 }
 
 char *
-qlafx00_pci_info_str(struct scsi_qla_host *vha, char *str)
+qlafx00_pci_info_str(struct scsi_qla_host *vha, char *str, size_t str_len)
 {
        struct qla_hw_data *ha = vha->hw;
 
-       if (pci_is_pcie(ha->pdev)) {
-               strcpy(str, "PCIe iSA");
-               return str;
-       }
+       if (pci_is_pcie(ha->pdev))
+               strlcpy(str, "PCIe iSA", str_len);
        return str;
 }
 
@@ -1799,10 +1797,8 @@ qla2x00_fxdisc_iocb_timeout(void *data)
        complete(&lio->u.fxiocb.fxiocb_comp);
 }
 
-static void
-qla2x00_fxdisc_sp_done(void *ptr, int res)
+static void qla2x00_fxdisc_sp_done(srb_t *sp, int res)
 {
-       srb_t *sp = ptr;
        struct srb_iocb *lio = &sp->u.iocb_cmd;
 
        complete(&lio->u.fxiocb.fxiocb_comp);
@@ -1881,22 +1877,22 @@ qlafx00_fx_disc(scsi_qla_host_t *vha, fc_port_t *fcport, uint16_t fx_type)
                        phost_info = &preg_hsi->hsi;
                        memset(preg_hsi, 0, sizeof(struct register_host_info));
                        phost_info->os_type = OS_TYPE_LINUX;
-                       strncpy(phost_info->sysname,
-                           p_sysid->sysname, SYSNAME_LENGTH);
-                       strncpy(phost_info->nodename,
-                           p_sysid->nodename, NODENAME_LENGTH);
+                       strlcpy(phost_info->sysname, p_sysid->sysname,
+                               sizeof(phost_info->sysname));
+                       strlcpy(phost_info->nodename, p_sysid->nodename,
+                               sizeof(phost_info->nodename));
                        if (!strcmp(phost_info->nodename, "(none)"))
                                ha->mr.host_info_resend = true;
-                       strncpy(phost_info->release,
-                           p_sysid->release, RELEASE_LENGTH);
-                       strncpy(phost_info->version,
-                           p_sysid->version, VERSION_LENGTH);
-                       strncpy(phost_info->machine,
-                           p_sysid->machine, MACHINE_LENGTH);
-                       strncpy(phost_info->domainname,
-                           p_sysid->domainname, DOMNAME_LENGTH);
-                       strncpy(phost_info->hostdriver,
-                           QLA2XXX_VERSION, VERSION_LENGTH);
+                       strlcpy(phost_info->release, p_sysid->release,
+                               sizeof(phost_info->release));
+                       strlcpy(phost_info->version, p_sysid->version,
+                               sizeof(phost_info->version));
+                       strlcpy(phost_info->machine, p_sysid->machine,
+                               sizeof(phost_info->machine));
+                       strlcpy(phost_info->domainname, p_sysid->domainname,
+                               sizeof(phost_info->domainname));
+                       strlcpy(phost_info->hostdriver, QLA2XXX_VERSION,
+                               sizeof(phost_info->hostdriver));
                        preg_hsi->utc = (uint64_t)ktime_get_real_seconds();
                        ql_dbg(ql_dbg_init, vha, 0x0149,
                            "ISP%04X: Host registration with firmware\n",
@@ -1941,8 +1937,10 @@ qlafx00_fx_disc(scsi_qla_host_t *vha, fc_port_t *fcport, uint16_t fx_type)
        if (fx_type == FXDISC_GET_CONFIG_INFO) {
                struct config_info_data *pinfo =
                    (struct config_info_data *) fdisc->u.fxiocb.rsp_addr;
-               strcpy(vha->hw->model_number, pinfo->model_num);
-               strcpy(vha->hw->model_desc, pinfo->model_description);
+               strlcpy(vha->hw->model_number, pinfo->model_num,
+                       ARRAY_SIZE(vha->hw->model_number));
+               strlcpy(vha->hw->model_desc, pinfo->model_description,
+                       ARRAY_SIZE(vha->hw->model_desc));
                memcpy(&vha->hw->mr.symbolic_name, pinfo->symbolic_name,
                    sizeof(vha->hw->mr.symbolic_name));
                memcpy(&vha->hw->mr.serial_num, pinfo->serial_num,
@@ -2541,6 +2539,8 @@ check_scsi_status:
 
        if (rsp->status_srb == NULL)
                sp->done(sp, res);
+       else
+               WARN_ON_ONCE(true);
 }
 
 /**
@@ -2618,6 +2618,8 @@ qlafx00_status_cont_entry(struct rsp_que *rsp, sts_cont_entry_t *pkt)
        if (sense_len == 0) {
                rsp->status_srb = NULL;
                sp->done(sp, cp->result);
+       } else {
+               WARN_ON_ONCE(true);
        }
 }
 
@@ -3073,7 +3075,6 @@ qlafx00_start_scsi(srb_t *sp)
 {
        int             nseg;
        unsigned long   flags;
-       uint32_t        index;
        uint32_t        handle;
        uint16_t        cnt;
        uint16_t        req_cnt;
@@ -3097,16 +3098,8 @@ qlafx00_start_scsi(srb_t *sp)
        /* Acquire ring specific lock */
        spin_lock_irqsave(&ha->hardware_lock, flags);
 
-       /* Check for room in outstanding command list. */
-       handle = req->current_outstanding_cmd;
-       for (index = 1; index < req->num_outstanding_cmds; index++) {
-               handle++;
-               if (handle == req->num_outstanding_cmds)
-                       handle = 1;
-               if (!req->outstanding_cmds[handle])
-                       break;
-       }
-       if (index == req->num_outstanding_cmds)
+       handle = qla2xxx_get_next_handle(req);
+       if (handle == 0)
                goto queuing_error;
 
        /* Map the sg table so we have an accurate count of sg entries needed */
index 963094b..6cc19e0 100644 (file)
@@ -180,10 +180,9 @@ static void qla_nvme_ls_complete(struct work_struct *work)
        kref_put(&priv->sp->cmd_kref, qla_nvme_release_ls_cmd_kref);
 }
 
-static void qla_nvme_sp_ls_done(void *ptr, int res)
+static void qla_nvme_sp_ls_done(srb_t *sp, int res)
 {
-       srb_t *sp = ptr;
-       struct nvme_private *priv;
+       struct nvme_private *priv = sp->priv;
 
        if (WARN_ON_ONCE(kref_read(&sp->cmd_kref) == 0))
                return;
@@ -191,17 +190,15 @@ static void qla_nvme_sp_ls_done(void *ptr, int res)
        if (res)
                res = -EINVAL;
 
-       priv = (struct nvme_private *)sp->priv;
        priv->comp_status = res;
        INIT_WORK(&priv->ls_work, qla_nvme_ls_complete);
        schedule_work(&priv->ls_work);
 }
 
 /* it assumed that QPair lock is held. */
-static void qla_nvme_sp_done(void *ptr, int res)
+static void qla_nvme_sp_done(srb_t *sp, int res)
 {
-       srb_t *sp = ptr;
-       struct nvme_private *priv = (struct nvme_private *)sp->priv;
+       struct nvme_private *priv = sp->priv;
 
        priv->comp_status = res;
        kref_put(&sp->cmd_kref, qla_nvme_release_fcp_cmd_kref);
@@ -222,7 +219,7 @@ static void qla_nvme_abort_work(struct work_struct *work)
               "%s called for sp=%p, hndl=%x on fcport=%p deleted=%d\n",
               __func__, sp, sp->handle, fcport, fcport->deleted);
 
-       if (!ha->flags.fw_started && (fcport && fcport->deleted))
+       if (!ha->flags.fw_started && fcport->deleted)
                goto out;
 
        if (ha->flags.host_shutting_down) {
@@ -267,7 +264,6 @@ static void qla_nvme_ls_abort(struct nvme_fc_local_port *lport,
        schedule_work(&priv->abort_work);
 }
 
-
 static int qla_nvme_ls_req(struct nvme_fc_local_port *lport,
     struct nvme_fc_remote_port *rport, struct nvmefc_ls_req *fd)
 {
@@ -357,7 +353,6 @@ static inline int qla2x00_start_nvme_mq(srb_t *sp)
 {
        unsigned long   flags;
        uint32_t        *clr_ptr;
-       uint32_t        index;
        uint32_t        handle;
        struct cmd_nvme *cmd_pkt;
        uint16_t        cnt, i;
@@ -381,17 +376,8 @@ static inline int qla2x00_start_nvme_mq(srb_t *sp)
        /* Acquire qpair specific lock */
        spin_lock_irqsave(&qpair->qp_lock, flags);
 
-       /* Check for room in outstanding command list. */
-       handle = req->current_outstanding_cmd;
-       for (index = 1; index < req->num_outstanding_cmds; index++) {
-               handle++;
-               if (handle == req->num_outstanding_cmds)
-                       handle = 1;
-               if (!req->outstanding_cmds[handle])
-                       break;
-       }
-
-       if (index == req->num_outstanding_cmds) {
+       handle = qla2xxx_get_next_handle(req);
+       if (handle == 0) {
                rval = -EBUSY;
                goto queuing_error;
        }
@@ -653,7 +639,9 @@ void qla_nvme_unregister_remote_port(struct fc_port *fcport)
            "%s: unregister remoteport on %p %8phN\n",
            __func__, fcport, fcport->port_name);
 
-       nvme_fc_set_remoteport_devloss(fcport->nvme_remote_port, 0);
+       if (test_bit(PFLG_DRIVER_REMOVING, &fcport->vha->pci_flags))
+               nvme_fc_set_remoteport_devloss(fcport->nvme_remote_port, 0);
+
        init_completion(&fcport->nvme_del_done);
        ret = nvme_fc_unregister_remoteport(fcport->nvme_remote_port);
        if (ret)
index 67bb4a2..ef91290 100644 (file)
@@ -7,7 +7,6 @@
 #ifndef __QLA_NVME_H
 #define __QLA_NVME_H
 
-#include <linux/blk-mq.h>
 #include <uapi/scsi/fc/fc_fs.h>
 #include <uapi/scsi/fc/fc_els.h>
 #include <linux/nvme-fc-driver.h>
@@ -119,7 +118,7 @@ struct pt_ls4_rx_unsol {
        uint32_t exchange_address;
        uint8_t d_id[3];
        uint8_t r_ctl;
-       uint8_t s_id[3];
+       be_id_t s_id;
        uint8_t cs_ctl;
        uint8_t f_ctl[3];
        uint8_t type;
@@ -144,5 +143,5 @@ int  qla_nvme_register_remote(struct scsi_qla_host *, struct fc_port *);
 void qla_nvme_delete(struct scsi_qla_host *);
 void qla24xx_nvme_ls4_iocb(struct scsi_qla_host *, struct pt_ls4_request *,
     struct req_que *);
-void qla24xx_async_gffid_sp_done(void *, int);
+void qla24xx_async_gffid_sp_done(struct srb *sp, int);
 #endif
index c760ae3..2b2028f 100644 (file)
@@ -1977,7 +1977,7 @@ qla82xx_check_rcvpeg_state(struct qla_hw_data *ha)
        } while (--retries);
 
        ql_log(ql_log_fatal, vha, 0x00ac,
-           "Rcv Peg initializatin failed: 0x%x.\n", val);
+           "Rcv Peg initialization failed: 0x%x.\n", val);
        read_lock(&ha->hw_lock);
        qla82xx_wr_32(ha, CRB_RCVPEG_STATE, PHAN_INITIALIZE_FAILED);
        read_unlock(&ha->hw_lock);
@@ -1985,7 +1985,7 @@ qla82xx_check_rcvpeg_state(struct qla_hw_data *ha)
 }
 
 /* ISR related functions */
-static struct qla82xx_legacy_intr_set legacy_intr[] = \
+static struct qla82xx_legacy_intr_set legacy_intr[] =
        QLA82XX_LEGACY_INTR_CONFIG;
 
 /*
@@ -2287,7 +2287,9 @@ qla82xx_disable_intrs(struct qla_hw_data *ha)
 {
        scsi_qla_host_t *vha = pci_get_drvdata(ha->pdev);
 
-       qla82xx_mbx_intr_disable(vha);
+       if (ha->interrupts_on)
+               qla82xx_mbx_intr_disable(vha);
+
        spin_lock_irq(&ha->hardware_lock);
        if (IS_QLA8044(ha))
                qla8044_wr_reg(ha, LEG_INTR_MASK_OFFSET, 1);
@@ -3286,7 +3288,7 @@ qla82xx_device_state_handler(scsi_qla_host_t *vha)
                case QLA8XXX_DEV_NEED_QUIESCENT:
                        qla82xx_need_qsnt_handler(vha);
                        /* Reset timeout value after quiescence handler */
-                       dev_init_timeout = jiffies + (ha->fcoe_dev_init_timeout\
+                       dev_init_timeout = jiffies + (ha->fcoe_dev_init_timeout
                                                         * HZ);
                        break;
                case QLA8XXX_DEV_QUIESCENT:
@@ -3301,7 +3303,7 @@ qla82xx_device_state_handler(scsi_qla_host_t *vha)
                        qla82xx_idc_lock(ha);
 
                        /* Reset timeout value after quiescence handler */
-                       dev_init_timeout = jiffies + (ha->fcoe_dev_init_timeout\
+                       dev_init_timeout = jiffies + (ha->fcoe_dev_init_timeout
                                                         * HZ);
                        break;
                case QLA8XXX_DEV_FAILED:
@@ -3686,7 +3688,7 @@ qla82xx_chip_reset_cleanup(scsi_qla_host_t *vha)
                        for (cnt = 1; cnt < req->num_outstanding_cmds; cnt++) {
                                sp = req->outstanding_cmds[cnt];
                                if (sp) {
-                                       if ((!sp->u.scmd.ctx ||
+                                       if ((!sp->u.scmd.crc_ctx ||
                                            (sp->flags &
                                                SRB_FCP_CMND_DMA_VALID)) &&
                                                !ha->flags.isp82xx_fw_hung) {
@@ -3710,10 +3712,12 @@ qla82xx_chip_reset_cleanup(scsi_qla_host_t *vha)
 
                /* Wait for pending cmds (physical and virtual) to complete */
                if (qla2x00_eh_wait_for_pending_commands(vha, 0, 0,
-                   WAIT_HOST)) {
+                   WAIT_HOST) == QLA_SUCCESS) {
                        ql_dbg(ql_dbg_init, vha, 0x00b3,
                            "Done wait for "
                            "pending commands.\n");
+               } else {
+                       WARN_ON_ONCE(true);
                }
        }
 }
@@ -4232,7 +4236,7 @@ qla82xx_md_collect(scsi_qla_host_t *vha)
                goto md_failed;
        }
 
-       entry_hdr = (qla82xx_md_entry_hdr_t *) \
+       entry_hdr = (qla82xx_md_entry_hdr_t *)
            (((uint8_t *)ha->md_tmplt_hdr) + tmplt_hdr->first_entry_offset);
 
        /* Walk through the entry headers */
@@ -4339,7 +4343,7 @@ qla82xx_md_collect(scsi_qla_host_t *vha)
                data_collected = (uint8_t *)data_ptr -
                    (uint8_t *)ha->md_dump;
 skip_nxt_entry:
-               entry_hdr = (qla82xx_md_entry_hdr_t *) \
+               entry_hdr = (qla82xx_md_entry_hdr_t *)
                    (((uint8_t *)entry_hdr) + entry_hdr->entry_size);
        }
 
index 3c7beef..230abee 100644 (file)
 #define QLA82XX_ADDR_QDR_NET           (0x0000000300000000ULL)
 #define QLA82XX_P3_ADDR_QDR_NET_MAX    (0x0000000303ffffffULL)
 
-#define QLA82XX_PCI_CRBSPACE           (unsigned long)0x06000000
-#define QLA82XX_PCI_DIRECT_CRB         (unsigned long)0x04400000
-#define QLA82XX_PCI_CAMQM              (unsigned long)0x04800000
-#define QLA82XX_PCI_CAMQM_MAX          (unsigned long)0x04ffffff
-#define QLA82XX_PCI_DDR_NET            (unsigned long)0x00000000
-#define QLA82XX_PCI_QDR_NET            (unsigned long)0x04000000
-#define QLA82XX_PCI_QDR_NET_MAX                (unsigned long)0x043fffff
+#define QLA82XX_PCI_CRBSPACE           0x06000000UL
+#define QLA82XX_PCI_DIRECT_CRB         0x04400000UL
+#define QLA82XX_PCI_CAMQM              0x04800000UL
+#define QLA82XX_PCI_CAMQM_MAX          0x04ffffffUL
+#define QLA82XX_PCI_DDR_NET            0x00000000UL
+#define QLA82XX_PCI_QDR_NET            0x04000000UL
+#define QLA82XX_PCI_QDR_NET_MAX                0x043fffffUL
 
 /*
  *   Register offsets for MN
index 369ac04..c056f46 100644 (file)
@@ -2810,7 +2810,7 @@ error:
 
 #define ISP8044_PEX_DMA_ENGINE_INDEX           8
 #define ISP8044_PEX_DMA_BASE_ADDRESS           0x77320000
-#define ISP8044_PEX_DMA_NUM_OFFSET             0x10000
+#define ISP8044_PEX_DMA_NUM_OFFSET             0x10000UL
 #define ISP8044_PEX_DMA_CMD_ADDR_LOW           0x0
 #define ISP8044_PEX_DMA_CMD_ADDR_HIGH          0x04
 #define ISP8044_PEX_DMA_CMD_STS_AND_CNTRL      0x08
index 98e60a3..73db01e 100644 (file)
@@ -69,7 +69,7 @@ MODULE_PARM_DESC(ql2xplogiabsentdevice,
                "a Fabric scan.  This is needed for several broken switches. "
                "Default is 0 - no PLOGI. 1 - perform PLOGI.");
 
-int ql2xloginretrycount = 0;
+int ql2xloginretrycount;
 module_param(ql2xloginretrycount, int, S_IRUGO);
 MODULE_PARM_DESC(ql2xloginretrycount,
                "Specify an alternate value for the NVRAM login retry count.");
@@ -234,7 +234,7 @@ MODULE_PARM_DESC(ql2xmdenable,
                "0 - MiniDump disabled. "
                "1 (Default) - MiniDump enabled.");
 
-int ql2xexlogins = 0;
+int ql2xexlogins;
 module_param(ql2xexlogins, uint, S_IRUGO|S_IWUSR);
 MODULE_PARM_DESC(ql2xexlogins,
                 "Number of extended Logins. "
@@ -250,7 +250,7 @@ module_param(ql2xiniexchg, uint, 0644);
 MODULE_PARM_DESC(ql2xiniexchg,
        "Number of initiator exchanges.");
 
-int ql2xfwholdabts = 0;
+int ql2xfwholdabts;
 module_param(ql2xfwholdabts, int, S_IRUGO);
 MODULE_PARM_DESC(ql2xfwholdabts,
                "Allow FW to hold status IOCB until ABTS rsp received. "
@@ -536,80 +536,70 @@ static void qla2x00_free_queues(struct qla_hw_data *ha)
 }
 
 static char *
-qla2x00_pci_info_str(struct scsi_qla_host *vha, char *str)
+qla2x00_pci_info_str(struct scsi_qla_host *vha, char *str, size_t str_len)
 {
        struct qla_hw_data *ha = vha->hw;
-       static char *pci_bus_modes[] = {
+       static const char *const pci_bus_modes[] = {
                "33", "66", "100", "133",
        };
        uint16_t pci_bus;
 
-       strcpy(str, "PCI");
        pci_bus = (ha->pci_attr & (BIT_9 | BIT_10)) >> 9;
        if (pci_bus) {
-               strcat(str, "-X (");
-               strcat(str, pci_bus_modes[pci_bus]);
+               snprintf(str, str_len, "PCI-X (%s MHz)",
+                        pci_bus_modes[pci_bus]);
        } else {
                pci_bus = (ha->pci_attr & BIT_8) >> 8;
-               strcat(str, " (");
-               strcat(str, pci_bus_modes[pci_bus]);
+               snprintf(str, str_len, "PCI (%s MHz)", pci_bus_modes[pci_bus]);
        }
-       strcat(str, " MHz)");
 
-       return (str);
+       return str;
 }
 
 static char *
-qla24xx_pci_info_str(struct scsi_qla_host *vha, char *str)
+qla24xx_pci_info_str(struct scsi_qla_host *vha, char *str, size_t str_len)
 {
-       static char *pci_bus_modes[] = { "33", "66", "100", "133", };
+       static const char *const pci_bus_modes[] = {
+               "33", "66", "100", "133",
+       };
        struct qla_hw_data *ha = vha->hw;
        uint32_t pci_bus;
 
        if (pci_is_pcie(ha->pdev)) {
-               char lwstr[6];
                uint32_t lstat, lspeed, lwidth;
+               const char *speed_str;
 
                pcie_capability_read_dword(ha->pdev, PCI_EXP_LNKCAP, &lstat);
                lspeed = lstat & PCI_EXP_LNKCAP_SLS;
                lwidth = (lstat & PCI_EXP_LNKCAP_MLW) >> 4;
 
-               strcpy(str, "PCIe (");
                switch (lspeed) {
                case 1:
-                       strcat(str, "2.5GT/s ");
+                       speed_str = "2.5GT/s";
                        break;
                case 2:
-                       strcat(str, "5.0GT/s ");
+                       speed_str = "5.0GT/s";
                        break;
                case 3:
-                       strcat(str, "8.0GT/s ");
+                       speed_str = "8.0GT/s";
                        break;
                default:
-                       strcat(str, "<unknown> ");
+                       speed_str = "<unknown>";
                        break;
                }
-               snprintf(lwstr, sizeof(lwstr), "x%d)", lwidth);
-               strcat(str, lwstr);
+               snprintf(str, str_len, "PCIe (%s x%d)", speed_str, lwidth);
 
                return str;
        }
 
-       strcpy(str, "PCI");
        pci_bus = (ha->pci_attr & CSRX_PCIX_BUS_MODE_MASK) >> 8;
-       if (pci_bus == 0 || pci_bus == 8) {
-               strcat(str, " (");
-               strcat(str, pci_bus_modes[pci_bus >> 3]);
-       } else {
-               strcat(str, "-X ");
-               if (pci_bus & BIT_2)
-                       strcat(str, "Mode 2");
-               else
-                       strcat(str, "Mode 1");
-               strcat(str, " (");
-               strcat(str, pci_bus_modes[pci_bus & ~BIT_2]);
-       }
-       strcat(str, " MHz)");
+       if (pci_bus == 0 || pci_bus == 8)
+               snprintf(str, str_len, "PCI (%s MHz)",
+                        pci_bus_modes[pci_bus >> 3]);
+       else
+               snprintf(str, str_len, "PCI-X Mode %d (%s MHz)",
+                        pci_bus & 4 ? 2 : 1,
+                        pci_bus_modes[pci_bus & 3]);
 
        return str;
 }
@@ -662,13 +652,10 @@ qla24xx_fw_version_str(struct scsi_qla_host *vha, char *str, size_t size)
        return str;
 }
 
-void
-qla2x00_sp_free_dma(void *ptr)
+void qla2x00_sp_free_dma(srb_t *sp)
 {
-       srb_t *sp = ptr;
        struct qla_hw_data *ha = sp->vha->hw;
        struct scsi_cmnd *cmd = GET_CMD_SP(sp);
-       void *ctx = GET_CMD_CTX_SP(sp);
 
        if (sp->flags & SRB_DMA_VALID) {
                scsi_dma_unmap(cmd);
@@ -681,24 +668,21 @@ qla2x00_sp_free_dma(void *ptr)
                sp->flags &= ~SRB_CRC_PROT_DMA_VALID;
        }
 
-       if (!ctx)
-               return;
-
        if (sp->flags & SRB_CRC_CTX_DSD_VALID) {
                /* List assured to be having elements */
-               qla2x00_clean_dsd_pool(ha, ctx);
+               qla2x00_clean_dsd_pool(ha, sp->u.scmd.crc_ctx);
                sp->flags &= ~SRB_CRC_CTX_DSD_VALID;
        }
 
        if (sp->flags & SRB_CRC_CTX_DMA_VALID) {
-               struct crc_context *ctx0 = ctx;
+               struct crc_context *ctx0 = sp->u.scmd.crc_ctx;
 
                dma_pool_free(ha->dl_dma_pool, ctx0, ctx0->crc_ctx_dma);
                sp->flags &= ~SRB_CRC_CTX_DMA_VALID;
        }
 
        if (sp->flags & SRB_FCP_CMND_DMA_VALID) {
-               struct ct6_dsd *ctx1 = ctx;
+               struct ct6_dsd *ctx1 = sp->u.scmd.ct6_ctx;
 
                dma_pool_free(ha->fcp_cmnd_dma_pool, ctx1->fcp_cmnd,
                    ctx1->fcp_cmnd_dma);
@@ -709,10 +693,8 @@ qla2x00_sp_free_dma(void *ptr)
        }
 }
 
-void
-qla2x00_sp_compl(void *ptr, int res)
+void qla2x00_sp_compl(srb_t *sp, int res)
 {
-       srb_t *sp = ptr;
        struct scsi_cmnd *cmd = GET_CMD_SP(sp);
        struct completion *comp = sp->comp;
 
@@ -727,16 +709,12 @@ qla2x00_sp_compl(void *ptr, int res)
        cmd->scsi_done(cmd);
        if (comp)
                complete(comp);
-       qla2x00_rel_sp(sp);
 }
 
-void
-qla2xxx_qpair_sp_free_dma(void *ptr)
+void qla2xxx_qpair_sp_free_dma(srb_t *sp)
 {
-       srb_t *sp = (srb_t *)ptr;
        struct scsi_cmnd *cmd = GET_CMD_SP(sp);
        struct qla_hw_data *ha = sp->fcport->vha->hw;
-       void *ctx = GET_CMD_CTX_SP(sp);
 
        if (sp->flags & SRB_DMA_VALID) {
                scsi_dma_unmap(cmd);
@@ -749,17 +727,14 @@ qla2xxx_qpair_sp_free_dma(void *ptr)
                sp->flags &= ~SRB_CRC_PROT_DMA_VALID;
        }
 
-       if (!ctx)
-               return;
-
        if (sp->flags & SRB_CRC_CTX_DSD_VALID) {
                /* List assured to be having elements */
-               qla2x00_clean_dsd_pool(ha, ctx);
+               qla2x00_clean_dsd_pool(ha, sp->u.scmd.crc_ctx);
                sp->flags &= ~SRB_CRC_CTX_DSD_VALID;
        }
 
        if (sp->flags & SRB_DIF_BUNDL_DMA_VALID) {
-               struct crc_context *difctx = ctx;
+               struct crc_context *difctx = sp->u.scmd.crc_ctx;
                struct dsd_dma *dif_dsd, *nxt_dsd;
 
                list_for_each_entry_safe(dif_dsd, nxt_dsd,
@@ -795,7 +770,7 @@ qla2xxx_qpair_sp_free_dma(void *ptr)
        }
 
        if (sp->flags & SRB_FCP_CMND_DMA_VALID) {
-               struct ct6_dsd *ctx1 = ctx;
+               struct ct6_dsd *ctx1 = sp->u.scmd.ct6_ctx;
 
                dma_pool_free(ha->fcp_cmnd_dma_pool, ctx1->fcp_cmnd,
                    ctx1->fcp_cmnd_dma);
@@ -807,17 +782,15 @@ qla2xxx_qpair_sp_free_dma(void *ptr)
        }
 
        if (sp->flags & SRB_CRC_CTX_DMA_VALID) {
-               struct crc_context *ctx0 = ctx;
+               struct crc_context *ctx0 = sp->u.scmd.crc_ctx;
 
-               dma_pool_free(ha->dl_dma_pool, ctx, ctx0->crc_ctx_dma);
+               dma_pool_free(ha->dl_dma_pool, ctx0, ctx0->crc_ctx_dma);
                sp->flags &= ~SRB_CRC_CTX_DMA_VALID;
        }
 }
 
-void
-qla2xxx_qpair_sp_compl(void *ptr, int res)
+void qla2xxx_qpair_sp_compl(srb_t *sp, int res)
 {
-       srb_t *sp = ptr;
        struct scsi_cmnd *cmd = GET_CMD_SP(sp);
        struct completion *comp = sp->comp;
 
@@ -832,7 +805,6 @@ qla2xxx_qpair_sp_compl(void *ptr, int res)
        cmd->scsi_done(cmd);
        if (comp)
                complete(comp);
-       qla2xxx_rel_qpair_sp(sp->qpair, sp);
 }
 
 static int
@@ -845,9 +817,6 @@ qla2xxx_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *cmd)
        struct scsi_qla_host *base_vha = pci_get_drvdata(ha->pdev);
        srb_t *sp;
        int rval;
-       struct qla_qpair *qpair = NULL;
-       uint32_t tag;
-       uint16_t hwq;
 
        if (unlikely(test_bit(UNLOADING, &base_vha->dpc_flags)) ||
            WARN_ON_ONCE(!rport)) {
@@ -856,6 +825,10 @@ qla2xxx_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *cmd)
        }
 
        if (ha->mqenable) {
+               uint32_t tag;
+               uint16_t hwq;
+               struct qla_qpair *qpair = NULL;
+
                tag = blk_mq_unique_tag(cmd->request);
                hwq = blk_mq_unique_tag_to_hwq(tag);
                qpair = ha->queue_pair_map[hwq];
@@ -925,9 +898,8 @@ qla2xxx_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *cmd)
        else
                goto qc24_target_busy;
 
-       sp = qla2x00_get_sp(vha, fcport, GFP_ATOMIC);
-       if (!sp)
-               goto qc24_host_busy;
+       sp = scsi_cmd_priv(cmd);
+       qla2xxx_init_sp(sp, vha, vha->hw->base_qpair, fcport);
 
        sp->u.scmd.cmd = cmd;
        sp->type = SRB_SCSI_CMD;
@@ -948,9 +920,6 @@ qla2xxx_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *cmd)
 qc24_host_busy_free_sp:
        sp->free(sp);
 
-qc24_host_busy:
-       return SCSI_MLQUEUE_HOST_BUSY;
-
 qc24_target_busy:
        return SCSI_MLQUEUE_TARGET_BUSY;
 
@@ -1011,9 +980,8 @@ qla2xxx_mqueuecommand(struct Scsi_Host *host, struct scsi_cmnd *cmd,
        else
                goto qc24_target_busy;
 
-       sp = qla2xxx_get_qpair_sp(vha, qpair, fcport, GFP_ATOMIC);
-       if (!sp)
-               goto qc24_host_busy;
+       sp = scsi_cmd_priv(cmd);
+       qla2xxx_init_sp(sp, vha, qpair, fcport);
 
        sp->u.scmd.cmd = cmd;
        sp->type = SRB_SCSI_CMD;
@@ -1037,9 +1005,6 @@ qla2xxx_mqueuecommand(struct Scsi_Host *host, struct scsi_cmnd *cmd,
 qc24_host_busy_free_sp:
        sp->free(sp);
 
-qc24_host_busy:
-       return SCSI_MLQUEUE_HOST_BUSY;
-
 qc24_target_busy:
        return SCSI_MLQUEUE_TARGET_BUSY;
 
@@ -1058,8 +1023,8 @@ qc24_fail_command:
  *    cmd = Scsi Command to wait on.
  *
  * Return:
- *    Not Found : 0
- *    Found : 1
+ *    Completed in time : QLA_SUCCESS
+ *    Did not complete in time : QLA_FUNCTION_FAILED
  */
 static int
 qla2x00_eh_wait_on_command(struct scsi_cmnd *cmd)
@@ -1269,14 +1234,13 @@ static int
 qla2xxx_eh_abort(struct scsi_cmnd *cmd)
 {
        scsi_qla_host_t *vha = shost_priv(cmd->device->host);
+       DECLARE_COMPLETION_ONSTACK(comp);
        srb_t *sp;
        int ret;
        unsigned int id;
        uint64_t lun;
-       unsigned long flags;
        int rval;
        struct qla_hw_data *ha = vha->hw;
-       struct qla_qpair *qpair;
 
        if (qla2x00_isp_reg_stat(ha)) {
                ql_log(ql_log_info, vha, 0x8042,
@@ -1288,28 +1252,14 @@ qla2xxx_eh_abort(struct scsi_cmnd *cmd)
        if (ret != 0)
                return ret;
 
-       sp = (srb_t *) CMD_SP(cmd);
-       if (!sp)
-               return SUCCESS;
-
-       qpair = sp->qpair;
-       if (!qpair)
-               return SUCCESS;
+       sp = scsi_cmd_priv(cmd);
 
-       spin_lock_irqsave(qpair->qp_lock_ptr, flags);
-       if (sp->type != SRB_SCSI_CMD || GET_CMD_SP(sp) != cmd) {
-               /* there's a chance an interrupt could clear
-                  the ptr as part of done & free */
-               spin_unlock_irqrestore(qpair->qp_lock_ptr, flags);
+       if (sp->fcport && sp->fcport->deleted)
                return SUCCESS;
-       }
 
-       if (sp_get(sp)){
-               /* ref_count is already 0 */
-               spin_unlock_irqrestore(qpair->qp_lock_ptr, flags);
+       /* Return if the command has already finished. */
+       if (sp_get(sp))
                return SUCCESS;
-       }
-       spin_unlock_irqrestore(qpair->qp_lock_ptr, flags);
 
        id = cmd->device->id;
        lun = cmd->device->lun;
@@ -1331,6 +1281,23 @@ qla2xxx_eh_abort(struct scsi_cmnd *cmd)
                sp->done(sp, DID_ABORT << 16);
                ret = SUCCESS;
                break;
+       case QLA_FUNCTION_PARAMETER_ERROR: {
+               /* Wait for the command completion. */
+               uint32_t ratov = ha->r_a_tov/10;
+               uint32_t ratov_j = msecs_to_jiffies(4 * ratov * 1000);
+
+               WARN_ON_ONCE(sp->comp);
+               sp->comp = &comp;
+               if (!wait_for_completion_timeout(&comp, ratov_j)) {
+                       ql_dbg(ql_dbg_taskm, vha, 0xffff,
+                           "%s: Abort wait timer (4 * R_A_TOV[%d]) expired\n",
+                           __func__, ha->r_a_tov);
+                       ret = FAILED;
+               } else {
+                       ret = SUCCESS;
+               }
+               break;
+       }
        default:
                /*
                 * Either abort failed or abort and completion raced. Let
@@ -1340,6 +1307,8 @@ qla2xxx_eh_abort(struct scsi_cmnd *cmd)
                break;
        }
 
+       sp->comp = NULL;
+       atomic_dec(&sp->ref_count);
        ql_log(ql_log_info, vha, 0x801c,
            "Abort command issued nexus=%ld:%d:%llu -- %x.\n",
            vha->host_no, id, lun, ret);
@@ -1347,6 +1316,9 @@ qla2xxx_eh_abort(struct scsi_cmnd *cmd)
        return ret;
 }
 
+/*
+ * Returns: QLA_SUCCESS or QLA_FUNCTION_FAILED.
+ */
 int
 qla2x00_eh_wait_for_pending_commands(scsi_qla_host_t *vha, unsigned int t,
        uint64_t l, enum nexus_wait_type type)
@@ -1420,6 +1392,9 @@ __qla2xxx_eh_generic_reset(char *name, enum nexus_wait_type type,
        if (err != 0)
                return err;
 
+       if (fcport->deleted)
+               return SUCCESS;
+
        ql_log(ql_log_info, vha, 0x8009,
            "%s RESET ISSUED nexus=%ld:%d:%llu cmd=%p.\n", name, vha->host_no,
            cmd->device->id, cmd->device->lun, cmd);
@@ -1534,6 +1509,9 @@ qla2xxx_eh_bus_reset(struct scsi_cmnd *cmd)
                return ret;
        ret = FAILED;
 
+       if (qla2x00_chip_is_down(vha))
+               return ret;
+
        ql_log(ql_log_info, vha, 0x8012,
            "BUS RESET ISSUED nexus=%ld:%d:%llu.\n", vha->host_no, id, lun);
 
@@ -1746,6 +1724,8 @@ static void qla2x00_abort_srb(struct qla_qpair *qp, srb_t *sp, const int res,
                spin_lock_irqsave(qp->qp_lock_ptr, *flags);
                sp->comp = NULL;
        }
+
+       atomic_dec(&sp->ref_count);
 }
 
 static void
@@ -1800,8 +1780,13 @@ qla2x00_abort_all_cmds(scsi_qla_host_t *vha, int res)
        int que;
        struct qla_hw_data *ha = vha->hw;
 
+       /* Continue only if initialization complete. */
+       if (!ha->base_qpair)
+               return;
        __qla2x00_abort_all_cmds(ha->base_qpair, res);
 
+       if (!ha->queue_pair_map)
+               return;
        for (que = 0; que < ha->max_qpairs; que++) {
                if (!ha->queue_pair_map[que])
                        continue;
@@ -2477,7 +2462,7 @@ static struct isp_operations qla27xx_isp_ops = {
        .config_rings           = qla24xx_config_rings,
        .reset_adapter          = qla24xx_reset_adapter,
        .nvram_config           = qla81xx_nvram_config,
-       .update_fw_options      = qla81xx_update_fw_options,
+       .update_fw_options      = qla24xx_update_fw_options,
        .load_risc              = qla81xx_load_risc,
        .pci_info_str           = qla24xx_pci_info_str,
        .fw_version_str         = qla24xx_fw_version_str,
@@ -3154,6 +3139,7 @@ qla2x00_probe_one(struct pci_dev *pdev, const struct pci_device_id *id)
                ql_log(ql_log_fatal, base_vha, 0x003d,
                    "Failed to allocate memory for queue pointers..."
                    "aborting.\n");
+               ret = -ENODEV;
                goto probe_failed;
        }
 
@@ -3418,7 +3404,8 @@ skip_dpc:
            "QLogic %s - %s.\n", ha->model_number, ha->model_desc);
        ql_log(ql_log_info, base_vha, 0x00fc,
            "ISP%04X: %s @ %s hdma%c host#=%ld fw=%s.\n",
-           pdev->device, ha->isp_ops->pci_info_str(base_vha, pci_info),
+           pdev->device, ha->isp_ops->pci_info_str(base_vha, pci_info,
+                                                      sizeof(pci_info)),
            pci_name(pdev), ha->flags.enable_64bit_addressing ? '+' : '-',
            base_vha->host_no,
            ha->isp_ops->fw_version_str(base_vha, fw_str, sizeof(fw_str)));
@@ -4598,6 +4585,7 @@ qla2x00_free_fw_dump(struct qla_hw_data *ha)
 
        ha->fce = NULL;
        ha->fce_dma = 0;
+       ha->flags.fce_enabled = 0;
        ha->eft = NULL;
        ha->eft_dma = 0;
        ha->fw_dumped = 0;
@@ -4716,7 +4704,7 @@ qla2x00_mem_free(struct qla_hw_data *ha)
        mempool_destroy(ha->ctx_mempool);
        ha->ctx_mempool = NULL;
 
-       if (ql2xenabledif) {
+       if (ql2xenabledif && ha->dif_bundl_pool) {
                struct dsd_dma *dsd, *nxt;
 
                list_for_each_entry_safe(dsd, nxt, &ha->pool.unusable.head,
@@ -4739,8 +4727,7 @@ qla2x00_mem_free(struct qla_hw_data *ha)
                }
        }
 
-       if (ha->dif_bundl_pool)
-               dma_pool_destroy(ha->dif_bundl_pool);
+       dma_pool_destroy(ha->dif_bundl_pool);
        ha->dif_bundl_pool = NULL;
 
        qlt_mem_free(ha);
@@ -4812,7 +4799,7 @@ struct scsi_qla_host *qla2x00_create_host(struct scsi_host_template *sht,
        if (!vha->gnl.l) {
                ql_log(ql_log_fatal, vha, 0xd04a,
                    "Alloc failed for name list.\n");
-               scsi_remove_host(vha->host);
+               scsi_host_put(vha->host);
                return NULL;
        }
 
@@ -4825,7 +4812,7 @@ struct scsi_qla_host *qla2x00_create_host(struct scsi_host_template *sht,
                dma_free_coherent(&ha->pdev->dev, vha->gnl.size,
                    vha->gnl.l, vha->gnl.ldma);
                vha->gnl.l = NULL;
-               scsi_remove_host(vha->host);
+               scsi_host_put(vha->host);
                return NULL;
        }
        INIT_DELAYED_WORK(&vha->scan.scan_work, qla_scan_work_fn);
@@ -5054,8 +5041,10 @@ void qla24xx_create_new_sess(struct scsi_qla_host *vha, struct qla_work_evt *e)
                                   "%s %8phC mem alloc fail.\n",
                                   __func__, e->u.new_sess.port_name);
 
-                       if (pla)
+                       if (pla) {
+                               list_del(&pla->list);
                                kmem_cache_free(qla_tgt_plogi_cachep, pla);
+                       }
                        return;
                }
 
@@ -5086,6 +5075,7 @@ void qla24xx_create_new_sess(struct scsi_qla_host *vha, struct qla_work_evt *e)
        if (fcport) {
                fcport->id_changed = 1;
                fcport->scan_state = QLA_FCPORT_FOUND;
+               fcport->chip_reset = vha->hw->base_qpair->chip_reset;
                memcpy(fcport->node_name, e->u.new_sess.node_name, WWN_SIZE);
 
                if (pla) {
@@ -5165,8 +5155,10 @@ void qla24xx_create_new_sess(struct scsi_qla_host *vha, struct qla_work_evt *e)
 
        if (free_fcport) {
                qla2x00_free_fcport(fcport);
-               if (pla)
+               if (pla) {
+                       list_del(&pla->list);
                        kmem_cache_free(qla_tgt_plogi_cachep, pla);
+               }
        }
 }
 
@@ -5346,9 +5338,8 @@ void qla2x00_relogin(struct scsi_qla_host *vha)
                        } else {
                                if (vha->hw->current_topology != ISP_CFG_NL) {
                                        memset(&ea, 0, sizeof(ea));
-                                       ea.event = FCME_RELOGIN;
                                        ea.fcport = fcport;
-                                       qla2x00_fcport_event_handler(vha, &ea);
+                                       qla24xx_handle_relogin_event(vha, &ea);
                                } else if (vha->hw->current_topology ==
                                    ISP_CFG_NL) {
                                        fcport->login_retry--;
@@ -5686,7 +5677,6 @@ exit:
 void
 qla83xx_idc_lock(scsi_qla_host_t *base_vha, uint16_t requester_id)
 {
-       uint16_t options = (requester_id << 15) | BIT_6;
        uint32_t data;
        uint32_t lock_owner;
        struct qla_hw_data *ha = base_vha->hw;
@@ -5719,22 +5709,6 @@ retry_lock:
        }
 
        return;
-
-       /* XXX: IDC-lock implementation using access-control mbx */
-retry_lock2:
-       if (qla83xx_access_control(base_vha, options, 0, 0, NULL)) {
-               ql_dbg(ql_dbg_p3p, base_vha, 0xb072,
-                   "Failed to acquire IDC lock. retrying...\n");
-               /* Retry/Perform IDC-Lock recovery */
-               if (qla83xx_idc_lock_recovery(base_vha) == QLA_SUCCESS) {
-                       qla83xx_wait_logic();
-                       goto retry_lock2;
-               } else
-                       ql_log(ql_log_warn, base_vha, 0xb076,
-                           "IDC Lock recovery FAILED.\n");
-       }
-
-       return;
 }
 
 void
@@ -7156,6 +7130,7 @@ struct scsi_host_template qla2xxx_driver_template = {
 
        .supported_mode         = MODE_INITIATOR,
        .track_queue_depth      = 1,
+       .cmd_size               = sizeof(srb_t),
 };
 
 static const struct pci_error_handlers qla2xxx_err_handler = {
index 1eb8238..f2d5115 100644 (file)
@@ -473,22 +473,24 @@ qla24xx_read_flash_dword(struct qla_hw_data *ha, uint32_t addr, uint32_t *data)
        return QLA_FUNCTION_TIMEOUT;
 }
 
-uint32_t *
+int
 qla24xx_read_flash_data(scsi_qla_host_t *vha, uint32_t *dwptr, uint32_t faddr,
     uint32_t dwords)
 {
        ulong i;
+       int ret = QLA_SUCCESS;
        struct qla_hw_data *ha = vha->hw;
 
        /* Dword reads to flash. */
        faddr =  flash_data_addr(ha, faddr);
        for (i = 0; i < dwords; i++, faddr++, dwptr++) {
-               if (qla24xx_read_flash_dword(ha, faddr, dwptr))
+               ret = qla24xx_read_flash_dword(ha, faddr, dwptr);
+               if (ret != QLA_SUCCESS)
                        break;
                cpu_to_le32s(dwptr);
        }
 
-       return dwptr;
+       return ret;
 }
 
 static int
@@ -680,8 +682,8 @@ qla2xxx_get_flt_info(scsi_qla_host_t *vha, uint32_t flt_addr)
 
        ha->flt_region_flt = flt_addr;
        wptr = (uint16_t *)ha->flt;
-       qla24xx_read_flash_data(vha, (void *)flt, flt_addr,
-           (sizeof(struct qla_flt_header) + FLT_REGIONS_SIZE) >> 2);
+       ha->isp_ops->read_optrom(vha, (void *)flt, flt_addr << 2,
+           (sizeof(struct qla_flt_header) + FLT_REGIONS_SIZE));
 
        if (le16_to_cpu(*wptr) == 0xffff)
                goto no_flash_data;
@@ -948,11 +950,11 @@ qla2xxx_get_fdt_info(scsi_qla_host_t *vha)
        struct req_que *req = ha->req_q_map[0];
        uint16_t cnt, chksum;
        uint16_t *wptr = (void *)req->ring;
-       struct qla_fdt_layout *fdt = (void *)req->ring;
+       struct qla_fdt_layout *fdt = (struct qla_fdt_layout *)req->ring;
        uint8_t man_id, flash_id;
        uint16_t mid = 0, fid = 0;
 
-       qla24xx_read_flash_data(vha, (void *)fdt, ha->flt_region_fdt,
+       ha->isp_ops->read_optrom(vha, fdt, ha->flt_region_fdt << 2,
            OPTROM_BURST_DWORDS);
        if (le16_to_cpu(*wptr) == 0xffff)
                goto no_flash_data;
index 1c1f63b..0ffda61 100644 (file)
@@ -188,18 +188,17 @@ static inline int qlt_issue_marker(struct scsi_qla_host *vha, int vha_locked)
 
 static inline
 struct scsi_qla_host *qlt_find_host_by_d_id(struct scsi_qla_host *vha,
-       uint8_t *d_id)
+                                           be_id_t d_id)
 {
        struct scsi_qla_host *host;
-       uint32_t key = 0;
+       uint32_t key;
 
-       if ((vha->d_id.b.area == d_id[1]) && (vha->d_id.b.domain == d_id[0]) &&
-           (vha->d_id.b.al_pa == d_id[2]))
+       if (vha->d_id.b.area == d_id.area &&
+           vha->d_id.b.domain == d_id.domain &&
+           vha->d_id.b.al_pa == d_id.al_pa)
                return vha;
 
-       key  = (uint32_t)d_id[0] << 16;
-       key |= (uint32_t)d_id[1] <<  8;
-       key |= (uint32_t)d_id[2];
+       key = be_to_port_id(d_id).b24;
 
        host = btree_lookup32(&vha->hw->tgt.host_map, key);
        if (!host)
@@ -357,9 +356,9 @@ static bool qlt_24xx_atio_pkt_all_vps(struct scsi_qla_host *vha,
                        ql_dbg(ql_dbg_tgt, vha, 0xe03e,
                            "qla_target(%d): Received ATIO_TYPE7 "
                            "with unknown d_id %x:%x:%x\n", vha->vp_idx,
-                           atio->u.isp24.fcp_hdr.d_id[0],
-                           atio->u.isp24.fcp_hdr.d_id[1],
-                           atio->u.isp24.fcp_hdr.d_id[2]);
+                           atio->u.isp24.fcp_hdr.d_id.domain,
+                           atio->u.isp24.fcp_hdr.d_id.area,
+                           atio->u.isp24.fcp_hdr.d_id.al_pa);
 
 
                        qlt_queue_unknown_atio(vha, atio, ha_locked);
@@ -560,10 +559,8 @@ static int qla24xx_post_nack_work(struct scsi_qla_host *vha, fc_port_t *fcport,
        return qla2x00_post_work(vha, e);
 }
 
-static
-void qla2x00_async_nack_sp_done(void *s, int res)
+static void qla2x00_async_nack_sp_done(srb_t *sp, int res)
 {
-       struct srb *sp = (struct srb *)s;
        struct scsi_qla_host *vha = sp->vha;
        unsigned long flags;
 
@@ -789,6 +786,8 @@ qlt_plogi_ack_find_add(struct scsi_qla_host *vha, port_id_t *id,
 {
        struct qlt_plogi_ack_t *pla;
 
+       lockdep_assert_held(&vha->hw->hardware_lock);
+
        list_for_each_entry(pla, &vha->plogi_ack_list, list) {
                if (pla->id.b24 == id->b24) {
                        ql_dbg(ql_dbg_disc + ql_dbg_verbose, vha, 0x210d,
@@ -1209,7 +1208,6 @@ static void qla24xx_chk_fcp_state(struct fc_port *sess)
                sess->logout_on_delete = 0;
                sess->logo_ack_needed = 0;
                sess->fw_login_state = DSC_LS_PORT_UNAVAIL;
-               sess->scan_state = 0;
        }
 }
 
@@ -1284,13 +1282,12 @@ static void qlt_clear_tgt_db(struct qla_tgt *tgt)
        /* At this point tgt could be already dead */
 }
 
-static int qla24xx_get_loop_id(struct scsi_qla_host *vha, const uint8_t *s_id,
+static int qla24xx_get_loop_id(struct scsi_qla_host *vha, be_id_t s_id,
        uint16_t *loop_id)
 {
        struct qla_hw_data *ha = vha->hw;
        dma_addr_t gid_list_dma;
-       struct gid_list_info *gid_list;
-       char *id_iter;
+       struct gid_list_info *gid_list, *gid;
        int res, rc, i;
        uint16_t entries;
 
@@ -1313,19 +1310,17 @@ static int qla24xx_get_loop_id(struct scsi_qla_host *vha, const uint8_t *s_id,
                goto out_free_id_list;
        }
 
-       id_iter = (char *)gid_list;
+       gid = gid_list;
        res = -ENOENT;
        for (i = 0; i < entries; i++) {
-               struct gid_list_info *gid = (struct gid_list_info *)id_iter;
-
-               if ((gid->al_pa == s_id[2]) &&
-                   (gid->area == s_id[1]) &&
-                   (gid->domain == s_id[0])) {
+               if (gid->al_pa == s_id.al_pa &&
+                   gid->area == s_id.area &&
+                   gid->domain == s_id.domain) {
                        *loop_id = le16_to_cpu(gid->loop_id);
                        res = 0;
                        break;
                }
-               id_iter += ha->gid_list_info_size;
+               gid = (void *)gid + ha->gid_list_info_size;
        }
 
 out_free_id_list:
@@ -1582,11 +1577,10 @@ static void qlt_release(struct qla_tgt *tgt)
        struct qla_qpair_hint *h;
        struct qla_hw_data *ha = vha->hw;
 
-       if ((vha->vha_tgt.qla_tgt != NULL) && !tgt->tgt_stop &&
-           !tgt->tgt_stopped)
+       if (!tgt->tgt_stop && !tgt->tgt_stopped)
                qlt_stop_phase1(tgt);
 
-       if ((vha->vha_tgt.qla_tgt != NULL) && !tgt->tgt_stopped)
+       if (!tgt->tgt_stopped)
                qlt_stop_phase2(tgt);
 
        for (i = 0; i < vha->hw->max_qpairs + 1; i++) {
@@ -1772,12 +1766,8 @@ static int qlt_build_abts_resp_iocb(struct qla_tgt_mgmt_cmd *mcmd)
        resp->fcp_hdr_le.f_ctl[1] = *p++;
        resp->fcp_hdr_le.f_ctl[2] = *p;
 
-       resp->fcp_hdr_le.d_id[0] = abts->fcp_hdr_le.s_id[0];
-       resp->fcp_hdr_le.d_id[1] = abts->fcp_hdr_le.s_id[1];
-       resp->fcp_hdr_le.d_id[2] = abts->fcp_hdr_le.s_id[2];
-       resp->fcp_hdr_le.s_id[0] = abts->fcp_hdr_le.d_id[0];
-       resp->fcp_hdr_le.s_id[1] = abts->fcp_hdr_le.d_id[1];
-       resp->fcp_hdr_le.s_id[2] = abts->fcp_hdr_le.d_id[2];
+       resp->fcp_hdr_le.d_id = abts->fcp_hdr_le.s_id;
+       resp->fcp_hdr_le.s_id = abts->fcp_hdr_le.d_id;
 
        resp->exchange_addr_to_abort = abts->exchange_addr_to_abort;
        if (mcmd->fc_tm_rsp == FCP_TMF_CMPL) {
@@ -1848,19 +1838,11 @@ static void qlt_24xx_send_abts_resp(struct qla_qpair *qpair,
        resp->fcp_hdr_le.f_ctl[1] = *p++;
        resp->fcp_hdr_le.f_ctl[2] = *p;
        if (ids_reversed) {
-               resp->fcp_hdr_le.d_id[0] = abts->fcp_hdr_le.d_id[0];
-               resp->fcp_hdr_le.d_id[1] = abts->fcp_hdr_le.d_id[1];
-               resp->fcp_hdr_le.d_id[2] = abts->fcp_hdr_le.d_id[2];
-               resp->fcp_hdr_le.s_id[0] = abts->fcp_hdr_le.s_id[0];
-               resp->fcp_hdr_le.s_id[1] = abts->fcp_hdr_le.s_id[1];
-               resp->fcp_hdr_le.s_id[2] = abts->fcp_hdr_le.s_id[2];
+               resp->fcp_hdr_le.d_id = abts->fcp_hdr_le.d_id;
+               resp->fcp_hdr_le.s_id = abts->fcp_hdr_le.s_id;
        } else {
-               resp->fcp_hdr_le.d_id[0] = abts->fcp_hdr_le.s_id[0];
-               resp->fcp_hdr_le.d_id[1] = abts->fcp_hdr_le.s_id[1];
-               resp->fcp_hdr_le.d_id[2] = abts->fcp_hdr_le.s_id[2];
-               resp->fcp_hdr_le.s_id[0] = abts->fcp_hdr_le.d_id[0];
-               resp->fcp_hdr_le.s_id[1] = abts->fcp_hdr_le.d_id[1];
-               resp->fcp_hdr_le.s_id[2] = abts->fcp_hdr_le.d_id[2];
+               resp->fcp_hdr_le.d_id = abts->fcp_hdr_le.s_id;
+               resp->fcp_hdr_le.s_id = abts->fcp_hdr_le.d_id;
        }
        resp->exchange_addr_to_abort = abts->exchange_addr_to_abort;
        if (status == FCP_TMF_CMPL) {
@@ -1927,18 +1909,14 @@ static void qlt_24xx_retry_term_exchange(struct scsi_qla_host *vha,
        tmp = (CTIO7_FLAGS_STATUS_MODE_1 | CTIO7_FLAGS_TERMINATE);
 
        if (mcmd) {
-               ctio->initiator_id[0] = entry->fcp_hdr_le.s_id[0];
-               ctio->initiator_id[1] = entry->fcp_hdr_le.s_id[1];
-               ctio->initiator_id[2] = entry->fcp_hdr_le.s_id[2];
+               ctio->initiator_id = entry->fcp_hdr_le.s_id;
 
                if (mcmd->flags & QLA24XX_MGMT_ABORT_IO_ATTR_VALID)
                        tmp |= (mcmd->abort_io_attr << 9);
                else if (qpair->retry_term_cnt & 1)
                        tmp |= (0x4 << 9);
        } else {
-               ctio->initiator_id[0] = entry->fcp_hdr_le.d_id[0];
-               ctio->initiator_id[1] = entry->fcp_hdr_le.d_id[1];
-               ctio->initiator_id[2] = entry->fcp_hdr_le.d_id[2];
+               ctio->initiator_id = entry->fcp_hdr_le.d_id;
 
                if (qpair->retry_term_cnt & 1)
                        tmp |= (0x4 << 9);
@@ -1972,8 +1950,7 @@ static void qlt_24xx_retry_term_exchange(struct scsi_qla_host *vha,
  * XXX does not go through the list of other port (which may have cmds
  *     for the same lun)
  */
-static void abort_cmds_for_lun(struct scsi_qla_host *vha,
-                               u64 lun, uint8_t *s_id)
+static void abort_cmds_for_lun(struct scsi_qla_host *vha, u64 lun, be_id_t s_id)
 {
        struct qla_tgt_sess_op *op;
        struct qla_tgt_cmd *cmd;
@@ -2149,7 +2126,7 @@ static void qlt_24xx_handle_abts(struct scsi_qla_host *vha,
        struct qla_hw_data *ha = vha->hw;
        struct fc_port *sess;
        uint32_t tag = abts->exchange_addr_to_abort;
-       uint8_t s_id[3];
+       be_id_t s_id;
        int rc;
        unsigned long flags;
 
@@ -2173,13 +2150,11 @@ static void qlt_24xx_handle_abts(struct scsi_qla_host *vha,
 
        ql_dbg(ql_dbg_tgt_mgt, vha, 0xf011,
            "qla_target(%d): task abort (s_id=%x:%x:%x, "
-           "tag=%d, param=%x)\n", vha->vp_idx, abts->fcp_hdr_le.s_id[2],
-           abts->fcp_hdr_le.s_id[1], abts->fcp_hdr_le.s_id[0], tag,
+           "tag=%d, param=%x)\n", vha->vp_idx, abts->fcp_hdr_le.s_id.domain,
+           abts->fcp_hdr_le.s_id.area, abts->fcp_hdr_le.s_id.al_pa, tag,
            le32_to_cpu(abts->fcp_hdr_le.parameter));
 
-       s_id[0] = abts->fcp_hdr_le.s_id[2];
-       s_id[1] = abts->fcp_hdr_le.s_id[1];
-       s_id[2] = abts->fcp_hdr_le.s_id[0];
+       s_id = le_id_to_be(abts->fcp_hdr_le.s_id);
 
        spin_lock_irqsave(&ha->tgt.sess_lock, flags);
        sess = ha->tgt.tgt_ops->find_sess_by_s_id(vha, s_id);
@@ -2243,9 +2218,7 @@ static void qlt_24xx_send_task_mgmt_ctio(struct qla_qpair *qpair,
        ctio->nport_handle = mcmd->sess->loop_id;
        ctio->timeout = cpu_to_le16(QLA_TGT_TIMEOUT);
        ctio->vp_index = ha->vp_idx;
-       ctio->initiator_id[0] = atio->u.isp24.fcp_hdr.s_id[2];
-       ctio->initiator_id[1] = atio->u.isp24.fcp_hdr.s_id[1];
-       ctio->initiator_id[2] = atio->u.isp24.fcp_hdr.s_id[0];
+       ctio->initiator_id = be_id_to_le(atio->u.isp24.fcp_hdr.s_id);
        ctio->exchange_addr = atio->u.isp24.exchange_addr;
        temp = (atio->u.isp24.attr << 9)|
                CTIO7_FLAGS_STATUS_MODE_1 | CTIO7_FLAGS_SEND_STATUS;
@@ -2302,9 +2275,7 @@ void qlt_send_resp_ctio(struct qla_qpair *qpair, struct qla_tgt_cmd *cmd,
        ctio->nport_handle = cmd->sess->loop_id;
        ctio->timeout = cpu_to_le16(QLA_TGT_TIMEOUT);
        ctio->vp_index = vha->vp_idx;
-       ctio->initiator_id[0] = atio->u.isp24.fcp_hdr.s_id[2];
-       ctio->initiator_id[1] = atio->u.isp24.fcp_hdr.s_id[1];
-       ctio->initiator_id[2] = atio->u.isp24.fcp_hdr.s_id[0];
+       ctio->initiator_id = be_id_to_le(atio->u.isp24.fcp_hdr.s_id);
        ctio->exchange_addr = atio->u.isp24.exchange_addr;
        temp = (atio->u.isp24.attr << 9) |
            CTIO7_FLAGS_STATUS_MODE_1 | CTIO7_FLAGS_SEND_STATUS;
@@ -2605,9 +2576,7 @@ static int qlt_24xx_build_ctio_pkt(struct qla_qpair *qpair,
        pkt->handle |= CTIO_COMPLETION_HANDLE_MARK;
        pkt->nport_handle = cpu_to_le16(prm->cmd->loop_id);
        pkt->timeout = cpu_to_le16(QLA_TGT_TIMEOUT);
-       pkt->initiator_id[0] = atio->u.isp24.fcp_hdr.s_id[2];
-       pkt->initiator_id[1] = atio->u.isp24.fcp_hdr.s_id[1];
-       pkt->initiator_id[2] = atio->u.isp24.fcp_hdr.s_id[0];
+       pkt->initiator_id = be_id_to_le(atio->u.isp24.fcp_hdr.s_id);
        pkt->exchange_addr = atio->u.isp24.exchange_addr;
        temp = atio->u.isp24.attr << 9;
        pkt->u.status0.flags |= cpu_to_le16(temp);
@@ -3120,9 +3089,7 @@ qlt_build_ctio_crc2_pkt(struct qla_qpair *qpair, struct qla_tgt_prm *prm)
        pkt->handle |= CTIO_COMPLETION_HANDLE_MARK;
        pkt->nport_handle = cpu_to_le16(prm->cmd->loop_id);
        pkt->timeout = cpu_to_le16(QLA_TGT_TIMEOUT);
-       pkt->initiator_id[0] = atio->u.isp24.fcp_hdr.s_id[2];
-       pkt->initiator_id[1] = atio->u.isp24.fcp_hdr.s_id[1];
-       pkt->initiator_id[2] = atio->u.isp24.fcp_hdr.s_id[0];
+       pkt->initiator_id = be_id_to_le(atio->u.isp24.fcp_hdr.s_id);
        pkt->exchange_addr   = atio->u.isp24.exchange_addr;
 
        /* silence compile warning */
@@ -3164,7 +3131,7 @@ qlt_build_ctio_crc2_pkt(struct qla_qpair *qpair, struct qla_tgt_prm *prm)
        pkt->crc_context_len = CRC_CONTEXT_LEN_FW;
 
        if (!bundling) {
-               cur_dsd = &crc_ctx_pkt->u.nobundling.data_dsd;
+               cur_dsd = &crc_ctx_pkt->u.nobundling.data_dsd[0];
        } else {
                /*
                 * Configure Bundling if we need to fetch interlaving
@@ -3174,7 +3141,7 @@ qlt_build_ctio_crc2_pkt(struct qla_qpair *qpair, struct qla_tgt_prm *prm)
                crc_ctx_pkt->u.bundling.dif_byte_count = cpu_to_le32(dif_bytes);
                crc_ctx_pkt->u.bundling.dseg_count =
                        cpu_to_le16(prm->tot_dsds - prm->prot_seg_cnt);
-               cur_dsd = &crc_ctx_pkt->u.bundling.data_dsd;
+               cur_dsd = &crc_ctx_pkt->u.bundling.data_dsd[0];
        }
 
        /* Finish the common fields of CRC pkt */
@@ -3239,7 +3206,8 @@ int qlt_xmit_response(struct qla_tgt_cmd *cmd, int xmit_type,
        if (!qpair->fw_started || (cmd->reset_count != qpair->chip_reset) ||
            (cmd->sess && cmd->sess->deleted)) {
                cmd->state = QLA_TGT_STATE_PROCESSED;
-               return 0;
+               res = 0;
+               goto free;
        }
 
        ql_dbg_qp(ql_dbg_tgt, qpair, 0xe018,
@@ -3250,9 +3218,8 @@ int qlt_xmit_response(struct qla_tgt_cmd *cmd, int xmit_type,
 
        res = qlt_pre_xmit_response(cmd, &prm, xmit_type, scsi_status,
            &full_req_cnt);
-       if (unlikely(res != 0)) {
-               return res;
-       }
+       if (unlikely(res != 0))
+               goto free;
 
        spin_lock_irqsave(qpair->qp_lock_ptr, flags);
 
@@ -3272,7 +3239,8 @@ int qlt_xmit_response(struct qla_tgt_cmd *cmd, int xmit_type,
                        vha->flags.online, qla2x00_reset_active(vha),
                        cmd->reset_count, qpair->chip_reset);
                spin_unlock_irqrestore(qpair->qp_lock_ptr, flags);
-               return 0;
+               res = 0;
+               goto free;
        }
 
        /* Does F/W have an IOCBs for this request */
@@ -3375,6 +3343,8 @@ out_unmap_unlock:
        qlt_unmap_sg(vha, cmd);
        spin_unlock_irqrestore(qpair->qp_lock_ptr, flags);
 
+free:
+       vha->hw->tgt.tgt_ops->free_cmd(cmd);
        return res;
 }
 EXPORT_SYMBOL(qlt_xmit_response);
@@ -3672,9 +3642,7 @@ static int __qlt_send_term_exchange(struct qla_qpair *qpair,
        ctio24->nport_handle = CTIO7_NHANDLE_UNRECOGNIZED;
        ctio24->timeout = cpu_to_le16(QLA_TGT_TIMEOUT);
        ctio24->vp_index = vha->vp_idx;
-       ctio24->initiator_id[0] = atio->u.isp24.fcp_hdr.s_id[2];
-       ctio24->initiator_id[1] = atio->u.isp24.fcp_hdr.s_id[1];
-       ctio24->initiator_id[2] = atio->u.isp24.fcp_hdr.s_id[0];
+       ctio24->initiator_id = be_id_to_le(atio->u.isp24.fcp_hdr.s_id);
        ctio24->exchange_addr = atio->u.isp24.exchange_addr;
        temp = (atio->u.isp24.attr << 9) | CTIO7_FLAGS_STATUS_MODE_1 |
                CTIO7_FLAGS_TERMINATE;
@@ -4107,8 +4075,6 @@ static inline int qlt_get_fcp_task_attr(struct scsi_qla_host *vha,
        return fcp_task_attr;
 }
 
-static struct fc_port *qlt_make_local_sess(struct scsi_qla_host *,
-                                       uint8_t *);
 /*
  * Process context for I/O path into tcm_qla2xxx code
  */
@@ -4352,9 +4318,7 @@ static int qlt_handle_cmd_for_atio(struct scsi_qla_host *vha,
                return -ENODEV;
        }
 
-       id.b.al_pa = atio->u.isp24.fcp_hdr.s_id[2];
-       id.b.area = atio->u.isp24.fcp_hdr.s_id[1];
-       id.b.domain = atio->u.isp24.fcp_hdr.s_id[0];
+       id = be_to_port_id(atio->u.isp24.fcp_hdr.s_id);
        if (IS_SW_RESV_ADDR(id))
                return -EBUSY;
 
@@ -4716,6 +4680,8 @@ static int qlt_handle_login(struct scsi_qla_host *vha,
        struct qlt_plogi_ack_t *pla;
        unsigned long flags;
 
+       lockdep_assert_held(&vha->hw->hardware_lock);
+
        wwn = wwn_to_u64(iocb->u.isp24.port_name);
 
        port_id.b.domain = iocb->u.isp24.port_id[2];
@@ -4799,8 +4765,10 @@ static int qlt_handle_login(struct scsi_qla_host *vha,
                            __func__, sess->port_name, sec);
                }
 
-               if (!conflict_sess)
+               if (!conflict_sess) {
+                       list_del(&pla->list);
                        kmem_cache_free(qla_tgt_plogi_cachep, pla);
+               }
 
                qlt_send_term_imm_notif(vha, iocb, 1);
                goto out;
@@ -4889,6 +4857,8 @@ static int qlt_24xx_handle_els(struct scsi_qla_host *vha,
        int res = 0;
        unsigned long flags;
 
+       lockdep_assert_held(&ha->hardware_lock);
+
        wwn = wwn_to_u64(iocb->u.isp24.port_name);
 
        port_id.b.domain = iocb->u.isp24.port_id[2];
@@ -5165,6 +5135,8 @@ static void qlt_handle_imm_notify(struct scsi_qla_host *vha,
        int send_notify_ack = 1;
        uint16_t status;
 
+       lockdep_assert_held(&ha->hardware_lock);
+
        status = le16_to_cpu(iocb->u.isp2x.status);
        switch (status) {
        case IMM_NTFY_LIP_RESET:
@@ -5302,10 +5274,7 @@ static int __qlt_send_busy(struct qla_qpair *qpair,
        u16 temp;
        port_id_t id;
 
-       id.b.al_pa = atio->u.isp24.fcp_hdr.s_id[2];
-       id.b.area = atio->u.isp24.fcp_hdr.s_id[1];
-       id.b.domain = atio->u.isp24.fcp_hdr.s_id[0];
-       id.b.rsvd_1 = 0;
+       id = be_to_port_id(atio->u.isp24.fcp_hdr.s_id);
 
        spin_lock_irqsave(&ha->tgt.sess_lock, flags);
        sess = qla2x00_find_fcport_by_nportid(vha, &id, 1);
@@ -5333,9 +5302,7 @@ static int __qlt_send_busy(struct qla_qpair *qpair,
        ctio24->nport_handle = sess->loop_id;
        ctio24->timeout = cpu_to_le16(QLA_TGT_TIMEOUT);
        ctio24->vp_index = vha->vp_idx;
-       ctio24->initiator_id[0] = atio->u.isp24.fcp_hdr.s_id[2];
-       ctio24->initiator_id[1] = atio->u.isp24.fcp_hdr.s_id[1];
-       ctio24->initiator_id[2] = atio->u.isp24.fcp_hdr.s_id[0];
+       ctio24->initiator_id = be_id_to_le(atio->u.isp24.fcp_hdr.s_id);
        ctio24->exchange_addr = atio->u.isp24.exchange_addr;
        temp = (atio->u.isp24.attr << 9) |
                CTIO7_FLAGS_STATUS_MODE_1 | CTIO7_FLAGS_SEND_STATUS |
@@ -5767,7 +5734,7 @@ static void qlt_handle_abts_completion(struct scsi_qla_host *vha,
                            entry->error_subcode2);
                        ha->tgt.tgt_ops->free_mcmd(mcmd);
                }
-       } else {
+       } else if (mcmd) {
                ha->tgt.tgt_ops->free_mcmd(mcmd);
        }
 }
@@ -6121,21 +6088,21 @@ static fc_port_t *qlt_get_port_database(struct scsi_qla_host *vha,
 
 /* Must be called under tgt_mutex */
 static struct fc_port *qlt_make_local_sess(struct scsi_qla_host *vha,
-       uint8_t *s_id)
+                                          be_id_t s_id)
 {
        struct fc_port *sess = NULL;
        fc_port_t *fcport = NULL;
        int rc, global_resets;
        uint16_t loop_id = 0;
 
-       if ((s_id[0] == 0xFF) && (s_id[1] == 0xFC)) {
+       if (s_id.domain == 0xFF && s_id.area == 0xFC) {
                /*
                 * This is Domain Controller, so it should be
                 * OK to drop SCSI commands from it.
                 */
                ql_dbg(ql_dbg_tgt_mgt, vha, 0xf042,
                    "Unable to find initiator with S_ID %x:%x:%x",
-                   s_id[0], s_id[1], s_id[2]);
+                   s_id.domain, s_id.area, s_id.al_pa);
                return NULL;
        }
 
@@ -6152,13 +6119,12 @@ retry:
                ql_log(ql_log_info, vha, 0xf071,
                    "qla_target(%d): Unable to find "
                    "initiator with S_ID %x:%x:%x",
-                   vha->vp_idx, s_id[0], s_id[1],
-                   s_id[2]);
+                   vha->vp_idx, s_id.domain, s_id.area, s_id.al_pa);
 
                if (rc == -ENOENT) {
                        qlt_port_logo_t logo;
 
-                       sid_to_portid(s_id, &logo.id);
+                       logo.id = be_to_port_id(s_id);
                        logo.cmd_count = 1;
                        qlt_send_first_logo(vha, &logo);
                }
@@ -6197,8 +6163,7 @@ static void qlt_abort_work(struct qla_tgt *tgt,
        struct qla_hw_data *ha = vha->hw;
        struct fc_port *sess = NULL;
        unsigned long flags = 0, flags2 = 0;
-       uint32_t be_s_id;
-       uint8_t s_id[3];
+       be_id_t s_id;
        int rc;
 
        spin_lock_irqsave(&ha->tgt.sess_lock, flags2);
@@ -6206,12 +6171,9 @@ static void qlt_abort_work(struct qla_tgt *tgt,
        if (tgt->tgt_stop)
                goto out_term2;
 
-       s_id[0] = prm->abts.fcp_hdr_le.s_id[2];
-       s_id[1] = prm->abts.fcp_hdr_le.s_id[1];
-       s_id[2] = prm->abts.fcp_hdr_le.s_id[0];
+       s_id = le_id_to_be(prm->abts.fcp_hdr_le.s_id);
 
-       sess = ha->tgt.tgt_ops->find_sess_by_s_id(vha,
-           (unsigned char *)&be_s_id);
+       sess = ha->tgt.tgt_ops->find_sess_by_s_id(vha, s_id);
        if (!sess) {
                spin_unlock_irqrestore(&ha->tgt.sess_lock, flags2);
 
@@ -6248,9 +6210,6 @@ static void qlt_abort_work(struct qla_tgt *tgt,
 out_term2:
        spin_unlock_irqrestore(&ha->tgt.sess_lock, flags2);
 
-       if (sess)
-               ha->tgt.tgt_ops->put_sess(sess);
-
 out_term:
        spin_lock_irqsave(&ha->hardware_lock, flags);
        qlt_24xx_send_abts_resp(ha->base_qpair, &prm->abts,
@@ -6266,7 +6225,7 @@ static void qlt_tmr_work(struct qla_tgt *tgt,
        struct qla_hw_data *ha = vha->hw;
        struct fc_port *sess;
        unsigned long flags;
-       uint8_t *s_id = NULL; /* to hide compiler warnings */
+       be_id_t s_id;
        int rc;
        u64 unpacked_lun;
        int fn;
@@ -6495,22 +6454,10 @@ void qlt_remove_target_resources(struct qla_hw_data *ha)
 static void qlt_lport_dump(struct scsi_qla_host *vha, u64 wwpn,
        unsigned char *b)
 {
-       int i;
-
-       pr_debug("qla2xxx HW vha->node_name: ");
-       for (i = 0; i < WWN_SIZE; i++)
-               pr_debug("%02x ", vha->node_name[i]);
-       pr_debug("\n");
-       pr_debug("qla2xxx HW vha->port_name: ");
-       for (i = 0; i < WWN_SIZE; i++)
-               pr_debug("%02x ", vha->port_name[i]);
-       pr_debug("\n");
-
-       pr_debug("qla2xxx passed configfs WWPN: ");
+       pr_debug("qla2xxx HW vha->node_name: %8phC\n", vha->node_name);
+       pr_debug("qla2xxx HW vha->port_name: %8phC\n", vha->port_name);
        put_unaligned_be64(wwpn, b);
-       for (i = 0; i < WWN_SIZE; i++)
-               pr_debug("%02x ", b[i]);
-       pr_debug("\n");
+       pr_debug("qla2xxx passed configfs WWPN: %8phC\n", b);
 }
 
 /**
@@ -6671,6 +6618,8 @@ qlt_enable_vha(struct scsi_qla_host *vha)
        if (vha->qlini_mode == QLA2XXX_INI_MODE_ENABLED)
                return;
 
+       if (ha->tgt.num_act_qpairs > ha->max_qpairs)
+               ha->tgt.num_act_qpairs = ha->max_qpairs;
        spin_lock_irqsave(&ha->hardware_lock, flags);
        tgt->tgt_stopped = 0;
        qlt_set_mode(vha);
@@ -6685,7 +6634,8 @@ qlt_enable_vha(struct scsi_qla_host *vha)
        } else {
                set_bit(ISP_ABORT_NEEDED, &base_vha->dpc_flags);
                qla2xxx_wake_dpc(base_vha);
-               qla2x00_wait_for_hba_online(base_vha);
+               WARN_ON_ONCE(qla2x00_wait_for_hba_online(base_vha) !=
+                            QLA_SUCCESS);
        }
        mutex_unlock(&ha->optrom_mutex);
 }
@@ -6716,7 +6666,9 @@ static void qlt_disable_vha(struct scsi_qla_host *vha)
 
        set_bit(ISP_ABORT_NEEDED, &vha->dpc_flags);
        qla2xxx_wake_dpc(vha);
-       qla2x00_wait_for_hba_online(vha);
+       if (qla2x00_wait_for_hba_online(vha) != QLA_SUCCESS)
+               ql_dbg(ql_dbg_tgt, vha, 0xe081,
+                      "qla2x00_wait_for_hba_online() failed\n");
 }
 
 /*
@@ -6815,7 +6767,7 @@ qlt_24xx_process_atio_queue(struct scsi_qla_host *vha, uint8_t ha_locked)
                         */
                        ql_log(ql_log_warn, vha, 0xd03c,
                            "corrupted fcp frame SID[%3phN] OXID[%04x] EXCG[%x] %64phN\n",
-                           pkt->u.isp24.fcp_hdr.s_id,
+                           &pkt->u.isp24.fcp_hdr.s_id,
                            be16_to_cpu(pkt->u.isp24.fcp_hdr.ox_id),
                            le32_to_cpu(pkt->u.isp24.exchange_addr), pkt);
 
index b8d244f..d006f0a 100644 (file)
@@ -247,9 +247,9 @@ struct ctio_to_2xxx {
 
 struct fcp_hdr {
        uint8_t  r_ctl;
-       uint8_t  d_id[3];
+       be_id_t  d_id;
        uint8_t  cs_ctl;
-       uint8_t  s_id[3];
+       be_id_t  s_id;
        uint8_t  type;
        uint8_t  f_ctl[3];
        uint8_t  seq_id;
@@ -261,9 +261,9 @@ struct fcp_hdr {
 } __packed;
 
 struct fcp_hdr_le {
-       uint8_t  d_id[3];
+       le_id_t  d_id;
        uint8_t  r_ctl;
-       uint8_t  s_id[3];
+       le_id_t  s_id;
        uint8_t  cs_ctl;
        uint8_t  f_ctl[3];
        uint8_t  type;
@@ -402,7 +402,7 @@ struct ctio7_to_24xx {
        uint16_t dseg_count;                /* Data segment count. */
        uint8_t  vp_index;
        uint8_t  add_flags;
-       uint8_t  initiator_id[3];
+       le_id_t  initiator_id;
        uint8_t  reserved;
        uint32_t exchange_addr;
        union {
@@ -498,7 +498,7 @@ struct ctio_crc2_to_fw {
        uint8_t  add_flags;             /* additional flags */
 #define CTIO_CRC2_AF_DIF_DSD_ENA BIT_3
 
-       uint8_t  initiator_id[3];       /* initiator ID */
+       le_id_t  initiator_id;          /* initiator ID */
        uint8_t  reserved1;
        uint32_t exchange_addr;         /* rcv exchange address */
        uint16_t reserved2;
@@ -682,7 +682,7 @@ struct qla_tgt_func_tmpl {
        struct fc_port *(*find_sess_by_loop_id)(struct scsi_qla_host *,
                                                const uint16_t);
        struct fc_port *(*find_sess_by_s_id)(struct scsi_qla_host *,
-                                               const uint8_t *);
+                                            const be_id_t);
        void (*clear_nacl_from_fcport_map)(struct fc_port *);
        void (*put_sess)(struct fc_port *);
        void (*shutdown_sess)(struct fc_port *);
@@ -912,7 +912,7 @@ struct qla_tgt_cmd {
        uint8_t scsi_status, sense_key, asc, ascq;
 
        struct crc_context *ctx;
-       uint8_t         *cdb;
+       const uint8_t   *cdb;
        uint64_t        lba;
        uint16_t        a_guard, e_guard, a_app_tag, e_app_tag;
        uint32_t        a_ref_tag, e_ref_tag;
@@ -1030,22 +1030,11 @@ static inline bool qla_dual_mode_enabled(struct scsi_qla_host *ha)
        return (ha->host->active_mode == MODE_DUAL);
 }
 
-static inline uint32_t sid_to_key(const uint8_t *s_id)
+static inline uint32_t sid_to_key(const be_id_t s_id)
 {
-       uint32_t key;
-
-       key = (((unsigned long)s_id[0] << 16) |
-              ((unsigned long)s_id[1] << 8) |
-              (unsigned long)s_id[2]);
-       return key;
-}
-
-static inline void sid_to_portid(const uint8_t *s_id, port_id_t *p)
-{
-       memset(p, 0, sizeof(*p));
-       p->b.domain = s_id[0];
-       p->b.area = s_id[1];
-       p->b.al_pa = s_id[2];
+       return s_id.domain << 16 |
+               s_id.area << 8 |
+               s_id.al_pa;
 }
 
 /*
index de696a0..294d77c 100644 (file)
@@ -429,7 +429,7 @@ qla27xx_fwdt_entry_t266(struct scsi_qla_host *vha,
        ql_dbg(ql_dbg_misc, vha, 0xd20a,
            "%s: reset risc [%lx]\n", __func__, *len);
        if (buf)
-               qla24xx_soft_reset(vha->hw);
+               WARN_ON_ONCE(qla24xx_soft_reset(vha->hw) != QLA_SUCCESS);
 
        return qla27xx_next_entry(ent);
 }
@@ -860,8 +860,9 @@ qla27xx_driver_info(struct qla27xx_fwdt_template *tmp)
 {
        uint8_t v[] = { 0, 0, 0, 0, 0, 0 };
 
-       sscanf(qla2x00_version_str, "%hhu.%hhu.%hhu.%hhu.%hhu.%hhu",
-           v+0, v+1, v+2, v+3, v+4, v+5);
+       WARN_ON_ONCE(sscanf(qla2x00_version_str,
+                           "%hhu.%hhu.%hhu.%hhu.%hhu.%hhu",
+                           v+0, v+1, v+2, v+3, v+4, v+5) != 6);
 
        tmp->driver_info[0] = v[3] << 24 | v[2] << 16 | v[1] << 8 | v[0];
        tmp->driver_info[1] = v[5] << 8 | v[4];
index cd6bdf7..a8f2a95 100644 (file)
@@ -7,7 +7,7 @@
 /*
  * Driver version
  */
-#define QLA2XXX_VERSION      "10.01.00.16-k"
+#define QLA2XXX_VERSION      "10.01.00.19-k"
 
 #define QLA_DRIVER_MAJOR_VER   10
 #define QLA_DRIVER_MINOR_VER   1
index d15412d..042a243 100644 (file)
@@ -620,6 +620,7 @@ static int tcm_qla2xxx_queue_data_in(struct se_cmd *se_cmd)
 {
        struct qla_tgt_cmd *cmd = container_of(se_cmd,
                                struct qla_tgt_cmd, se_cmd);
+       struct scsi_qla_host *vha = cmd->vha;
 
        if (cmd->aborted) {
                /* Cmd can loop during Q-full.  tcm_qla2xxx_aborted_task
@@ -632,6 +633,7 @@ static int tcm_qla2xxx_queue_data_in(struct se_cmd *se_cmd)
                        cmd->se_cmd.transport_state,
                        cmd->se_cmd.t_state,
                        cmd->se_cmd.se_cmd_flags);
+               vha->hw->tgt.tgt_ops->free_cmd(cmd);
                return 0;
        }
 
@@ -659,6 +661,7 @@ static int tcm_qla2xxx_queue_status(struct se_cmd *se_cmd)
 {
        struct qla_tgt_cmd *cmd = container_of(se_cmd,
                                struct qla_tgt_cmd, se_cmd);
+       struct scsi_qla_host *vha = cmd->vha;
        int xmit_type = QLA_TGT_XMIT_STATUS;
 
        if (cmd->aborted) {
@@ -672,6 +675,7 @@ static int tcm_qla2xxx_queue_status(struct se_cmd *se_cmd)
                    cmd, kref_read(&cmd->se_cmd.cmd_kref),
                    cmd->se_cmd.transport_state, cmd->se_cmd.t_state,
                    cmd->se_cmd.se_cmd_flags);
+               vha->hw->tgt.tgt_ops->free_cmd(cmd);
                return 0;
        }
        cmd->bufflen = se_cmd->data_length;
@@ -1136,9 +1140,8 @@ static struct se_portal_group *tcm_qla2xxx_npiv_make_tpg(struct se_wwn *wwn,
 /*
  * Expected to be called with struct qla_hw_data->tgt.sess_lock held
  */
-static struct fc_port *tcm_qla2xxx_find_sess_by_s_id(
-       scsi_qla_host_t *vha,
-       const uint8_t *s_id)
+static struct fc_port *tcm_qla2xxx_find_sess_by_s_id(scsi_qla_host_t *vha,
+                                                    const be_id_t s_id)
 {
        struct tcm_qla2xxx_lport *lport;
        struct se_node_acl *se_nacl;
@@ -1181,7 +1184,7 @@ static void tcm_qla2xxx_set_sess_by_s_id(
        struct tcm_qla2xxx_nacl *nacl,
        struct se_session *se_sess,
        struct fc_port *fc_port,
-       uint8_t *s_id)
+       be_id_t s_id)
 {
        u32 key;
        void *slot;
@@ -1348,14 +1351,9 @@ static void tcm_qla2xxx_clear_sess_lookup(struct tcm_qla2xxx_lport *lport,
                struct tcm_qla2xxx_nacl *nacl, struct fc_port *sess)
 {
        struct se_session *se_sess = sess->se_sess;
-       unsigned char be_sid[3];
-
-       be_sid[0] = sess->d_id.b.domain;
-       be_sid[1] = sess->d_id.b.area;
-       be_sid[2] = sess->d_id.b.al_pa;
 
        tcm_qla2xxx_set_sess_by_s_id(lport, NULL, nacl, se_sess,
-                               sess, be_sid);
+                                    sess, port_id_to_be_id(sess->d_id));
        tcm_qla2xxx_set_sess_by_loop_id(lport, NULL, nacl, se_sess,
                                sess, sess->loop_id);
 }
@@ -1401,19 +1399,14 @@ static int tcm_qla2xxx_session_cb(struct se_portal_group *se_tpg,
        struct fc_port *qlat_sess = p;
        uint16_t loop_id = qlat_sess->loop_id;
        unsigned long flags;
-       unsigned char be_sid[3];
-
-       be_sid[0] = qlat_sess->d_id.b.domain;
-       be_sid[1] = qlat_sess->d_id.b.area;
-       be_sid[2] = qlat_sess->d_id.b.al_pa;
 
        /*
         * And now setup se_nacl and session pointers into HW lport internal
         * mappings for fabric S_ID and LOOP_ID.
         */
        spin_lock_irqsave(&ha->tgt.sess_lock, flags);
-       tcm_qla2xxx_set_sess_by_s_id(lport, se_nacl, nacl,
-                                    se_sess, qlat_sess, be_sid);
+       tcm_qla2xxx_set_sess_by_s_id(lport, se_nacl, nacl, se_sess, qlat_sess,
+                                    port_id_to_be_id(qlat_sess->d_id));
        tcm_qla2xxx_set_sess_by_loop_id(lport, se_nacl, nacl,
                                        se_sess, qlat_sess, loop_id);
        spin_unlock_irqrestore(&ha->tgt.sess_lock, flags);
index 9335849..d539bee 100644 (file)
@@ -200,10 +200,15 @@ static int qlogicpti_mbox_command(struct qlogicpti *qpti, u_short param[], int f
        /* Write mailbox command registers. */
        switch (mbox_param[param[0]] >> 4) {
        case 6: sbus_writew(param[5], qpti->qregs + MBOX5);
+               /* Fall through */
        case 5: sbus_writew(param[4], qpti->qregs + MBOX4);
+               /* Fall through */
        case 4: sbus_writew(param[3], qpti->qregs + MBOX3);
+               /* Fall through */
        case 3: sbus_writew(param[2], qpti->qregs + MBOX2);
+               /* Fall through */
        case 2: sbus_writew(param[1], qpti->qregs + MBOX1);
+               /* Fall through */
        case 1: sbus_writew(param[0], qpti->qregs + MBOX0);
        }
 
@@ -254,10 +259,15 @@ static int qlogicpti_mbox_command(struct qlogicpti *qpti, u_short param[], int f
        /* Read back output parameters. */
        switch (mbox_param[param[0]] & 0xf) {
        case 6: param[5] = sbus_readw(qpti->qregs + MBOX5);
+               /* Fall through */
        case 5: param[4] = sbus_readw(qpti->qregs + MBOX4);
+               /* Fall through */
        case 4: param[3] = sbus_readw(qpti->qregs + MBOX3);
+               /* Fall through */
        case 3: param[2] = sbus_readw(qpti->qregs + MBOX2);
+               /* Fall through */
        case 2: param[1] = sbus_readw(qpti->qregs + MBOX1);
+               /* Fall through */
        case 1: param[0] = sbus_readw(qpti->qregs + MBOX0);
        }
 
index c5a8756..c19ea7a 100644 (file)
@@ -1,4 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
+#include <linux/bitops.h>
 #include <linux/seq_file.h>
 #include <scsi/scsi_cmnd.h>
 #include <scsi/scsi_dbg.h>
@@ -18,9 +19,7 @@ static int scsi_flags_show(struct seq_file *m, const unsigned long flags,
        bool sep = false;
        int i;
 
-       for (i = 0; i < sizeof(flags) * BITS_PER_BYTE; i++) {
-               if (!(flags & BIT(i)))
-                       continue;
+       for_each_set_bit(i, &flags, BITS_PER_LONG) {
                if (sep)
                        seq_puts(m, "|");
                sep = true;
index 4e88d7e..dc210b9 100644 (file)
@@ -1678,10 +1678,11 @@ static blk_status_t scsi_queue_rq(struct blk_mq_hw_ctx *hctx,
                blk_mq_start_request(req);
        }
 
+       cmd->flags &= SCMD_PRESERVED_FLAGS;
        if (sdev->simple_tags)
                cmd->flags |= SCMD_TAGGED;
-       else
-               cmd->flags &= ~SCMD_TAGGED;
+       if (bd->last)
+               cmd->flags |= SCMD_LAST;
 
        scsi_init_cmd_errh(cmd);
        cmd->scsi_done = scsi_mq_done;
@@ -1821,10 +1822,37 @@ void __scsi_init_queue(struct Scsi_Host *shost, struct request_queue *q)
 }
 EXPORT_SYMBOL_GPL(__scsi_init_queue);
 
+static const struct blk_mq_ops scsi_mq_ops_no_commit = {
+       .get_budget     = scsi_mq_get_budget,
+       .put_budget     = scsi_mq_put_budget,
+       .queue_rq       = scsi_queue_rq,
+       .complete       = scsi_softirq_done,
+       .timeout        = scsi_timeout,
+#ifdef CONFIG_BLK_DEBUG_FS
+       .show_rq        = scsi_show_rq,
+#endif
+       .init_request   = scsi_mq_init_request,
+       .exit_request   = scsi_mq_exit_request,
+       .initialize_rq_fn = scsi_initialize_rq,
+       .busy           = scsi_mq_lld_busy,
+       .map_queues     = scsi_map_queues,
+};
+
+
+static void scsi_commit_rqs(struct blk_mq_hw_ctx *hctx)
+{
+       struct request_queue *q = hctx->queue;
+       struct scsi_device *sdev = q->queuedata;
+       struct Scsi_Host *shost = sdev->host;
+
+       shost->hostt->commit_rqs(shost, hctx->queue_num);
+}
+
 static const struct blk_mq_ops scsi_mq_ops = {
        .get_budget     = scsi_mq_get_budget,
        .put_budget     = scsi_mq_put_budget,
        .queue_rq       = scsi_queue_rq,
+       .commit_rqs     = scsi_commit_rqs,
        .complete       = scsi_softirq_done,
        .timeout        = scsi_timeout,
 #ifdef CONFIG_BLK_DEBUG_FS
@@ -1861,7 +1889,10 @@ int scsi_mq_setup_tags(struct Scsi_Host *shost)
                        sizeof(struct scatterlist) * SCSI_INLINE_PROT_SG_CNT;
 
        memset(&shost->tag_set, 0, sizeof(shost->tag_set));
-       shost->tag_set.ops = &scsi_mq_ops;
+       if (shost->hostt->commit_rqs)
+               shost->tag_set.ops = &scsi_mq_ops;
+       else
+               shost->tag_set.ops = &scsi_mq_ops_no_commit;
        shost->tag_set.nr_hw_queues = shost->nr_hw_queues ? : 1;
        shost->tag_set.queue_depth = shost->can_queue;
        shost->tag_set.cmd_size = cmd_size;
@@ -2691,6 +2722,14 @@ void scsi_start_queue(struct scsi_device *sdev)
 int scsi_internal_device_unblock_nowait(struct scsi_device *sdev,
                                        enum scsi_device_state new_state)
 {
+       switch (new_state) {
+       case SDEV_RUNNING:
+       case SDEV_TRANSPORT_OFFLINE:
+               break;
+       default:
+               return -EINVAL;
+       }
+
        /*
         * Try to transition the scsi device to SDEV_RUNNING or one of the
         * offlined states and goose the device queue if successful.
@@ -2748,7 +2787,12 @@ static int scsi_internal_device_unblock(struct scsi_device *sdev,
 static void
 device_block(struct scsi_device *sdev, void *data)
 {
-       scsi_internal_device_block(sdev);
+       int ret;
+
+       ret = scsi_internal_device_block(sdev);
+
+       WARN_ONCE(ret, "scsi_internal_device_block(%s) failed: ret = %d\n",
+                 dev_name(&sdev->sdev_gendev), ret);
 }
 
 static int
index 39b8cc4..c6ed0b1 100644 (file)
 #include <scsi/scsi_eh.h>
 #include <scsi/scsi_dbg.h>
 
-#define SCSI_LOG_SPOOLSIZE 4096
-
-#if (SCSI_LOG_SPOOLSIZE / SCSI_LOG_BUFSIZE) > BITS_PER_LONG
-#warning SCSI logging bitmask too large
-#endif
-
-struct scsi_log_buf {
-       char buffer[SCSI_LOG_SPOOLSIZE];
-       unsigned long map;
-};
-
-static DEFINE_PER_CPU(struct scsi_log_buf, scsi_format_log);
-
 static char *scsi_log_reserve_buffer(size_t *len)
 {
-       struct scsi_log_buf *buf;
-       unsigned long map_bits = sizeof(buf->buffer) / SCSI_LOG_BUFSIZE;
-       unsigned long idx = 0;
-
-       preempt_disable();
-       buf = this_cpu_ptr(&scsi_format_log);
-       idx = find_first_zero_bit(&buf->map, map_bits);
-       if (likely(idx < map_bits)) {
-               while (test_and_set_bit(idx, &buf->map)) {
-                       idx = find_next_zero_bit(&buf->map, map_bits, idx);
-                       if (idx >= map_bits)
-                               break;
-               }
-       }
-       if (WARN_ON(idx >= map_bits)) {
-               preempt_enable();
-               return NULL;
-       }
-       *len = SCSI_LOG_BUFSIZE;
-       return buf->buffer + idx * SCSI_LOG_BUFSIZE;
+       *len = 128;
+       return kmalloc(*len, GFP_ATOMIC);
 }
 
 static void scsi_log_release_buffer(char *bufptr)
 {
-       struct scsi_log_buf *buf;
-       unsigned long idx;
-       int ret;
-
-       buf = this_cpu_ptr(&scsi_format_log);
-       if (bufptr >= buf->buffer &&
-           bufptr < buf->buffer + SCSI_LOG_SPOOLSIZE) {
-               idx = (bufptr - buf->buffer) / SCSI_LOG_BUFSIZE;
-               ret = test_and_clear_bit(idx, &buf->map);
-               WARN_ON(!ret);
-       }
-       preempt_enable();
+       kfree(bufptr);
 }
 
 static inline const char *scmd_name(const struct scsi_cmnd *scmd)
index 4b92555..50928bc 100644 (file)
@@ -1211,9 +1211,6 @@ static blk_status_t sd_setup_read_write_cmnd(struct scsi_cmnd *cmd)
        dix = scsi_prot_sg_count(cmd);
        dif = scsi_host_dif_capable(cmd->device->host, sdkp->protection_type);
 
-       if (write && dix)
-               t10_pi_prepare(cmd->request, sdkp->protection_type);
-
        if (dif || dix)
                protect = sd_setup_protect_cmnd(cmd, dix, dif);
        else
@@ -1981,6 +1978,7 @@ static int sd_done(struct scsi_cmnd *SCpnt)
                        sd_printk(KERN_INFO, sdkp,
                                "Unaligned partial completion (resid=%u, sector_sz=%u)\n",
                                resid, sector_size);
+                       scsi_print_command(SCpnt);
                        resid = min(scsi_bufflen(SCpnt),
                                    round_up(resid, sector_size));
                        scsi_set_resid(SCpnt, resid);
@@ -2054,11 +2052,6 @@ static int sd_done(struct scsi_cmnd *SCpnt)
                                           "sd_done: completed %d of %d bytes\n",
                                           good_bytes, scsi_bufflen(SCpnt)));
 
-       if (rq_data_dir(SCpnt->request) == READ && scsi_prot_sg_count(SCpnt) &&
-           good_bytes)
-               t10_pi_complete(SCpnt->request, sdkp->protection_type,
-                               good_bytes / scsi_prot_interval(SCpnt));
-
        return good_bytes;
 }
 
index 97e159c..bc65068 100644 (file)
@@ -1,6 +1,8 @@
 #
 # Kernel configuration file for the SMARTPQI
 #
+# Copyright (c) 2019 Microchip Technology Inc. and its subsidiaries
+# Copyright (c) 2017-2018 Microsemi Corporation
 # Copyright (c) 2016 Microsemi Corporation
 # Copyright (c) 2016 PMC-Sierra, Inc.
 #  (mailto:esc.storagedev@microsemi.com)
index e8e7688..79d2af3 100644 (file)
@@ -822,6 +822,7 @@ union pqi_reset_register {
 #define PQI_HBA_BUS                    2
 #define PQI_EXTERNAL_RAID_VOLUME_BUS   3
 #define PQI_MAX_BUS                    PQI_EXTERNAL_RAID_VOLUME_BUS
+#define PQI_VSEP_CISS_BTL              379
 
 struct report_lun_header {
        __be32  list_length;
@@ -930,6 +931,9 @@ struct pqi_scsi_dev {
        u8      active_path_index;
        u8      path_map;
        u8      bay;
+       u8      box_index;
+       u8      phys_box_on_bus;
+       u8      phy_connected_dev_type;
        u8      box[8];
        u16     phys_connector[8];
        bool    raid_bypass_configured; /* RAID bypass configured */
@@ -1073,6 +1077,9 @@ struct pqi_ctrl_info {
        unsigned int    ctrl_id;
        struct pci_dev  *pci_dev;
        char            firmware_version[11];
+       char            serial_number[17];
+       char            model[17];
+       char            vendor[9];
        void __iomem    *iomem_base;
        struct pqi_ctrl_registers __iomem *registers;
        struct pqi_device_registers __iomem *pqi_registers;
@@ -1224,12 +1231,21 @@ struct bmic_identify_controller {
        __le16  extended_logical_unit_count;
        u8      reserved1[34];
        __le16  firmware_build_number;
-       u8      reserved2[100];
+       u8      reserved2[8];
+       u8      vendor_id[8];
+       u8      product_id[16];
+       u8      reserved3[68];
        u8      controller_mode;
-       u8      reserved3[32];
+       u8      reserved4[32];
+};
+
+struct bmic_sense_subsystem_info {
+       u8      reserved[44];
+       u8      ctrl_serial_number[16];
 };
 
 #define SA_EXPANDER_SMP_DEVICE         0x05
+#define SA_CONTROLLER_DEVICE           0x07
 /*SCSI Invalid Device Type for SAS devices*/
 #define PQI_SAS_SCSI_INVALID_DEVTYPE   0xff
 
index 8fd5ffc..ea5409b 100644 (file)
 #define BUILD_TIMESTAMP
 #endif
 
-#define DRIVER_VERSION         "1.2.6-015"
+#define DRIVER_VERSION         "1.2.8-026"
 #define DRIVER_MAJOR           1
 #define DRIVER_MINOR           2
-#define DRIVER_RELEASE         6
-#define DRIVER_REVISION                15
+#define DRIVER_RELEASE         8
+#define DRIVER_REVISION                26
 
 #define DRIVER_NAME            "Microsemi PQI Driver (v" \
                                DRIVER_VERSION BUILD_TIMESTAMP ")"
@@ -145,6 +145,18 @@ MODULE_PARM_DESC(lockup_action, "Action to take when controller locked up.\n"
        "\t\tSupported: none, reboot, panic\n"
        "\t\tDefault: none");
 
+static int pqi_expose_ld_first;
+module_param_named(expose_ld_first,
+       pqi_expose_ld_first, int, 0644);
+MODULE_PARM_DESC(expose_ld_first,
+       "Expose logical drives before physical drives.");
+
+static int pqi_hide_vsep;
+module_param_named(hide_vsep,
+       pqi_hide_vsep, int, 0644);
+MODULE_PARM_DESC(hide_vsep,
+       "Hide the virtual SEP for direct attached drives.");
+
 static char *raid_levels[] = {
        "RAID-0",
        "RAID-4",
@@ -472,6 +484,7 @@ static int pqi_build_raid_path_request(struct pqi_ctrl_info *ctrl_info,
                /* fall through */
        case BMIC_IDENTIFY_CONTROLLER:
        case BMIC_IDENTIFY_PHYSICAL_DEVICE:
+       case BMIC_SENSE_SUBSYSTEM_INFORMATION:
                request->data_direction = SOP_READ_FLAG;
                cdb[0] = BMIC_READ;
                cdb[6] = cmd;
@@ -600,6 +613,14 @@ static inline int pqi_identify_controller(struct pqi_ctrl_info *ctrl_info,
                        buffer, sizeof(*buffer));
 }
 
+static inline int pqi_sense_subsystem_info(struct  pqi_ctrl_info *ctrl_info,
+               struct bmic_sense_subsystem_info *sense_info)
+{
+       return pqi_send_ctrl_raid_request(ctrl_info,
+                       BMIC_SENSE_SUBSYSTEM_INFORMATION,
+                       sense_info, sizeof(*sense_info));
+}
+
 static inline int pqi_scsi_inquiry(struct pqi_ctrl_info *ctrl_info,
        u8 *scsi3addr, u16 vpd_page, void *buffer, size_t buffer_length)
 {
@@ -1392,7 +1413,9 @@ static void pqi_get_physical_disk_info(struct pqi_ctrl_info *ctrl_info,
                device->queue_depth = PQI_PHYSICAL_DISK_DEFAULT_MAX_QUEUE_DEPTH;
                return;
        }
-
+       device->box_index = id_phys->box_index;
+       device->phys_box_on_bus = id_phys->phys_box_on_bus;
+       device->phy_connected_dev_type = id_phys->phy_connected_dev_type[0];
        device->queue_depth =
                get_unaligned_le16(&id_phys->current_queue_depth_limit);
        device->device_type = id_phys->device_type;
@@ -1719,6 +1742,10 @@ static void pqi_scsi_update_device(struct pqi_scsi_dev *existing_device,
        existing_device->active_path_index = new_device->active_path_index;
        existing_device->path_map = new_device->path_map;
        existing_device->bay = new_device->bay;
+       existing_device->box_index = new_device->box_index;
+       existing_device->phys_box_on_bus = new_device->phys_box_on_bus;
+       existing_device->phy_connected_dev_type =
+               new_device->phy_connected_dev_type;
        memcpy(existing_device->box, new_device->box,
                sizeof(existing_device->box));
        memcpy(existing_device->phys_connector, new_device->phys_connector,
@@ -1945,6 +1972,11 @@ static inline bool pqi_skip_device(u8 *scsi3addr)
        return false;
 }
 
+static inline void pqi_mask_device(u8 *scsi3addr)
+{
+       scsi3addr[3] |= 0xc0;
+}
+
 static inline bool pqi_is_device_with_sas_address(struct pqi_scsi_dev *device)
 {
        if (!device->is_physical_device)
@@ -1988,6 +2020,8 @@ static int pqi_update_scsi_devices(struct pqi_ctrl_info *ctrl_info)
        unsigned int num_valid_devices;
        bool is_physical_device;
        u8 *scsi3addr;
+       unsigned int physical_index;
+       unsigned int logical_index;
        static char *out_of_memory_msg =
                "failed to allocate memory, device discovery stopped";
 
@@ -2023,6 +2057,21 @@ static int pqi_update_scsi_devices(struct pqi_ctrl_info *ctrl_info)
                        rc = -ENOMEM;
                        goto out;
                }
+               if (pqi_hide_vsep) {
+                       int i;
+
+                       for (i = num_physicals - 1; i >= 0; i--) {
+                               phys_lun_ext_entry =
+                                               &physdev_list->lun_entries[i];
+                               if (CISS_GET_DRIVE_NUMBER(
+                                       phys_lun_ext_entry->lunid) ==
+                                               PQI_VSEP_CISS_BTL) {
+                                       pqi_mask_device(
+                                               phys_lun_ext_entry->lunid);
+                                       break;
+                               }
+                       }
+               }
        }
 
        num_new_devices = num_physicals + num_logicals;
@@ -2050,19 +2099,23 @@ static int pqi_update_scsi_devices(struct pqi_ctrl_info *ctrl_info)
 
        device = NULL;
        num_valid_devices = 0;
+       physical_index = 0;
+       logical_index = 0;
 
        for (i = 0; i < num_new_devices; i++) {
 
-               if (i < num_physicals) {
+               if ((!pqi_expose_ld_first && i < num_physicals) ||
+                       (pqi_expose_ld_first && i >= num_logicals)) {
                        is_physical_device = true;
-                       phys_lun_ext_entry = &physdev_list->lun_entries[i];
+                       phys_lun_ext_entry =
+                               &physdev_list->lun_entries[physical_index++];
                        log_lun_ext_entry = NULL;
                        scsi3addr = phys_lun_ext_entry->lunid;
                } else {
                        is_physical_device = false;
                        phys_lun_ext_entry = NULL;
                        log_lun_ext_entry =
-                               &logdev_list->lun_entries[i - num_physicals];
+                               &logdev_list->lun_entries[logical_index++];
                        scsi3addr = log_lun_ext_entry->lunid;
                }
 
@@ -2122,11 +2175,10 @@ static int pqi_update_scsi_devices(struct pqi_ctrl_info *ctrl_info)
                                        device->aio_handle =
                                                phys_lun_ext_entry->aio_handle;
                        }
-                       if (device->devtype == TYPE_DISK ||
-                               device->devtype == TYPE_ZBC) {
+
                                pqi_get_physical_disk_info(ctrl_info,
                                        device, id_phys);
-                       }
+
                } else {
                        memcpy(device->volume_id, log_lun_ext_entry->volume_id,
                                sizeof(device->volume_id));
@@ -2184,18 +2236,20 @@ static void pqi_remove_all_scsi_devices(struct pqi_ctrl_info *ctrl_info)
 
 static int pqi_scan_scsi_devices(struct pqi_ctrl_info *ctrl_info)
 {
-       int rc;
+       int rc = 0;
 
        if (pqi_ctrl_offline(ctrl_info))
                return -ENXIO;
 
-       mutex_lock(&ctrl_info->scan_mutex);
-
-       rc = pqi_update_scsi_devices(ctrl_info);
-       if (rc)
+       if (!mutex_trylock(&ctrl_info->scan_mutex)) {
                pqi_schedule_rescan_worker_delayed(ctrl_info);
-
-       mutex_unlock(&ctrl_info->scan_mutex);
+               rc = -EINPROGRESS;
+       } else {
+               rc = pqi_update_scsi_devices(ctrl_info);
+               if (rc)
+                       pqi_schedule_rescan_worker_delayed(ctrl_info);
+               mutex_unlock(&ctrl_info->scan_mutex);
+       }
 
        return rc;
 }
@@ -6091,23 +6145,65 @@ static int pqi_ioctl(struct scsi_device *sdev, unsigned int cmd,
        return rc;
 }
 
-static ssize_t pqi_version_show(struct device *dev,
+static ssize_t pqi_firmware_version_show(struct device *dev,
+       struct device_attribute *attr, char *buffer)
+{
+       struct Scsi_Host *shost;
+       struct pqi_ctrl_info *ctrl_info;
+
+       shost = class_to_shost(dev);
+       ctrl_info = shost_to_hba(shost);
+
+       return snprintf(buffer, PAGE_SIZE, "%s\n", ctrl_info->firmware_version);
+}
+
+static ssize_t pqi_driver_version_show(struct device *dev,
+       struct device_attribute *attr, char *buffer)
+{
+       struct Scsi_Host *shost;
+       struct pqi_ctrl_info *ctrl_info;
+
+       shost = class_to_shost(dev);
+       ctrl_info = shost_to_hba(shost);
+
+       return snprintf(buffer, PAGE_SIZE,
+               "%s\n", DRIVER_VERSION BUILD_TIMESTAMP);
+}
+
+static ssize_t pqi_serial_number_show(struct device *dev,
        struct device_attribute *attr, char *buffer)
 {
-       ssize_t count = 0;
        struct Scsi_Host *shost;
        struct pqi_ctrl_info *ctrl_info;
 
        shost = class_to_shost(dev);
        ctrl_info = shost_to_hba(shost);
 
-       count += snprintf(buffer + count, PAGE_SIZE - count,
-               "  driver: %s\n", DRIVER_VERSION BUILD_TIMESTAMP);
+       return snprintf(buffer, PAGE_SIZE, "%s\n", ctrl_info->serial_number);
+}
+
+static ssize_t pqi_model_show(struct device *dev,
+       struct device_attribute *attr, char *buffer)
+{
+       struct Scsi_Host *shost;
+       struct pqi_ctrl_info *ctrl_info;
 
-       count += snprintf(buffer + count, PAGE_SIZE - count,
-               "firmware: %s\n", ctrl_info->firmware_version);
+       shost = class_to_shost(dev);
+       ctrl_info = shost_to_hba(shost);
 
-       return count;
+       return snprintf(buffer, PAGE_SIZE, "%s\n", ctrl_info->model);
+}
+
+static ssize_t pqi_vendor_show(struct device *dev,
+       struct device_attribute *attr, char *buffer)
+{
+       struct Scsi_Host *shost;
+       struct pqi_ctrl_info *ctrl_info;
+
+       shost = class_to_shost(dev);
+       ctrl_info = shost_to_hba(shost);
+
+       return snprintf(buffer, PAGE_SIZE, "%s\n", ctrl_info->vendor);
 }
 
 static ssize_t pqi_host_rescan_store(struct device *dev,
@@ -6160,13 +6256,21 @@ static ssize_t pqi_lockup_action_store(struct device *dev,
        return -EINVAL;
 }
 
-static DEVICE_ATTR(version, 0444, pqi_version_show, NULL);
+static DEVICE_ATTR(driver_version, 0444, pqi_driver_version_show, NULL);
+static DEVICE_ATTR(firmware_version, 0444, pqi_firmware_version_show, NULL);
+static DEVICE_ATTR(model, 0444, pqi_model_show, NULL);
+static DEVICE_ATTR(serial_number, 0444, pqi_serial_number_show, NULL);
+static DEVICE_ATTR(vendor, 0444, pqi_vendor_show, NULL);
 static DEVICE_ATTR(rescan, 0200, NULL, pqi_host_rescan_store);
 static DEVICE_ATTR(lockup_action, 0644,
        pqi_lockup_action_show, pqi_lockup_action_store);
 
 static struct device_attribute *pqi_shost_attrs[] = {
-       &dev_attr_version,
+       &dev_attr_driver_version,
+       &dev_attr_firmware_version,
+       &dev_attr_model,
+       &dev_attr_serial_number,
+       &dev_attr_vendor,
        &dev_attr_rescan,
        &dev_attr_lockup_action,
        NULL
@@ -6558,7 +6662,30 @@ static int pqi_reset(struct pqi_ctrl_info *ctrl_info)
        return rc;
 }
 
-static int pqi_get_ctrl_firmware_version(struct pqi_ctrl_info *ctrl_info)
+static int pqi_get_ctrl_serial_number(struct pqi_ctrl_info *ctrl_info)
+{
+       int rc;
+       struct bmic_sense_subsystem_info *sense_info;
+
+       sense_info = kzalloc(sizeof(*sense_info), GFP_KERNEL);
+       if (!sense_info)
+               return -ENOMEM;
+
+       rc = pqi_sense_subsystem_info(ctrl_info, sense_info);
+       if (rc)
+               goto out;
+
+       memcpy(ctrl_info->serial_number, sense_info->ctrl_serial_number,
+               sizeof(sense_info->ctrl_serial_number));
+       ctrl_info->serial_number[sizeof(sense_info->ctrl_serial_number)] = '\0';
+
+out:
+       kfree(sense_info);
+
+       return rc;
+}
+
+static int pqi_get_ctrl_product_details(struct pqi_ctrl_info *ctrl_info)
 {
        int rc;
        struct bmic_identify_controller *identify;
@@ -6579,6 +6706,14 @@ static int pqi_get_ctrl_firmware_version(struct pqi_ctrl_info *ctrl_info)
                sizeof(ctrl_info->firmware_version),
                "-%u", get_unaligned_le16(&identify->firmware_build_number));
 
+       memcpy(ctrl_info->model, identify->product_id,
+               sizeof(identify->product_id));
+       ctrl_info->model[sizeof(identify->product_id)] = '\0';
+
+       memcpy(ctrl_info->vendor, identify->vendor_id,
+               sizeof(identify->vendor_id));
+       ctrl_info->vendor[sizeof(identify->vendor_id)] = '\0';
+
 out:
        kfree(identify);
 
@@ -7098,10 +7233,17 @@ static int pqi_ctrl_init(struct pqi_ctrl_info *ctrl_info)
        if (rc)
                return rc;
 
-       rc = pqi_get_ctrl_firmware_version(ctrl_info);
+       rc = pqi_get_ctrl_product_details(ctrl_info);
        if (rc) {
                dev_err(&ctrl_info->pci_dev->dev,
-                       "error obtaining firmware version\n");
+                       "error obtaining product details\n");
+               return rc;
+       }
+
+       rc = pqi_get_ctrl_serial_number(ctrl_info);
+       if (rc) {
+               dev_err(&ctrl_info->pci_dev->dev,
+                       "error obtaining ctrl serial number\n");
                return rc;
        }
 
@@ -7241,10 +7383,10 @@ static int pqi_ctrl_init_resume(struct pqi_ctrl_info *ctrl_info)
                return rc;
        }
 
-       rc = pqi_get_ctrl_firmware_version(ctrl_info);
+       rc = pqi_get_ctrl_product_details(ctrl_info);
        if (rc) {
                dev_err(&ctrl_info->pci_dev->dev,
-                       "error obtaining firmware version\n");
+                       "error obtaining product detail\n");
                return rc;
        }
 
@@ -8024,6 +8166,10 @@ static const struct pci_device_id pqi_pci_id_table[] = {
        },
        {
                PCI_DEVICE_SUB(PCI_VENDOR_ID_ADAPTEC2, 0x028f,
+                              0x1bd4, 0x004f)
+       },
+       {
+               PCI_DEVICE_SUB(PCI_VENDOR_ID_ADAPTEC2, 0x028f,
                               0x19e5, 0xd227)
        },
        {
@@ -8088,6 +8234,14 @@ static const struct pci_device_id pqi_pci_id_table[] = {
        },
        {
                PCI_DEVICE_SUB(PCI_VENDOR_ID_ADAPTEC2, 0x028f,
+                              PCI_VENDOR_ID_ADAPTEC2, 0x0808)
+       },
+       {
+               PCI_DEVICE_SUB(PCI_VENDOR_ID_ADAPTEC2, 0x028f,
+                              PCI_VENDOR_ID_ADAPTEC2, 0x0809)
+       },
+       {
+               PCI_DEVICE_SUB(PCI_VENDOR_ID_ADAPTEC2, 0x028f,
                               PCI_VENDOR_ID_ADAPTEC2, 0x0900)
        },
        {
@@ -8244,6 +8398,26 @@ static const struct pci_device_id pqi_pci_id_table[] = {
        },
        {
                PCI_DEVICE_SUB(PCI_VENDOR_ID_ADAPTEC2, 0x028f,
+                              0x1d8d, 0x0800)
+       },
+       {
+               PCI_DEVICE_SUB(PCI_VENDOR_ID_ADAPTEC2, 0x028f,
+                              0x1d8d, 0x0908)
+       },
+       {
+               PCI_DEVICE_SUB(PCI_VENDOR_ID_ADAPTEC2, 0x028f,
+                              0x1d8d, 0x0806)
+       },
+       {
+               PCI_DEVICE_SUB(PCI_VENDOR_ID_ADAPTEC2, 0x028f,
+                              0x1d8d, 0x0916)
+       },
+       {
+               PCI_DEVICE_SUB(PCI_VENDOR_ID_ADAPTEC2, 0x028f,
+                              PCI_VENDOR_ID_GIGABYTE, 0x1000)
+       },
+       {
+               PCI_DEVICE_SUB(PCI_VENDOR_ID_ADAPTEC2, 0x028f,
                               PCI_ANY_ID, PCI_ANY_ID)
        },
        { 0 }
index 5cca1b9..6776dfc 100644 (file)
@@ -312,12 +312,110 @@ static int pqi_sas_get_linkerrors(struct sas_phy *phy)
 static int pqi_sas_get_enclosure_identifier(struct sas_rphy *rphy,
        u64 *identifier)
 {
-       return 0;
+
+       int rc;
+       unsigned long flags;
+       struct Scsi_Host *shost;
+       struct pqi_ctrl_info *ctrl_info;
+       struct pqi_scsi_dev *found_device;
+       struct pqi_scsi_dev *device;
+
+       if (!rphy)
+               return -ENODEV;
+
+       shost = rphy_to_shost(rphy);
+       ctrl_info = shost_to_hba(shost);
+       spin_lock_irqsave(&ctrl_info->scsi_device_list_lock, flags);
+       found_device = pqi_find_device_by_sas_rphy(ctrl_info, rphy);
+
+       if (!found_device) {
+               rc = -ENODEV;
+               goto out;
+       }
+
+       if (found_device->devtype == TYPE_ENCLOSURE) {
+               *identifier = get_unaligned_be64(&found_device->wwid);
+               rc = 0;
+               goto out;
+       }
+
+       if (found_device->box_index == 0xff ||
+               found_device->phys_box_on_bus == 0 ||
+               found_device->bay == 0xff) {
+               rc = -EINVAL;
+               goto out;
+       }
+
+       list_for_each_entry(device, &ctrl_info->scsi_device_list,
+               scsi_device_list_entry) {
+               if (device->devtype == TYPE_ENCLOSURE &&
+                       device->box_index == found_device->box_index &&
+                       device->phys_box_on_bus ==
+                               found_device->phys_box_on_bus &&
+                       memcmp(device->phys_connector,
+                               found_device->phys_connector, 2) == 0) {
+                       *identifier =
+                               get_unaligned_be64(&device->wwid);
+                       rc = 0;
+                       goto out;
+               }
+       }
+
+       if (found_device->phy_connected_dev_type != SA_CONTROLLER_DEVICE) {
+               rc = -EINVAL;
+               goto out;
+       }
+
+       list_for_each_entry(device, &ctrl_info->scsi_device_list,
+               scsi_device_list_entry) {
+               if (device->devtype == TYPE_ENCLOSURE &&
+                       CISS_GET_DRIVE_NUMBER(device->scsi3addr) ==
+                               PQI_VSEP_CISS_BTL) {
+                       *identifier = get_unaligned_be64(&device->wwid);
+                       rc = 0;
+                       goto out;
+               }
+       }
+
+       rc = -EINVAL;
+out:
+       spin_unlock_irqrestore(&ctrl_info->scsi_device_list_lock, flags);
+
+       return rc;
+
 }
 
 static int pqi_sas_get_bay_identifier(struct sas_rphy *rphy)
 {
-       return -ENXIO;
+
+       int rc;
+       unsigned long flags;
+       struct pqi_ctrl_info *ctrl_info;
+       struct pqi_scsi_dev *device;
+       struct Scsi_Host *shost;
+
+       if (!rphy)
+               return -ENODEV;
+
+       shost = rphy_to_shost(rphy);
+       ctrl_info = shost_to_hba(shost);
+       spin_lock_irqsave(&ctrl_info->scsi_device_list_lock, flags);
+       device = pqi_find_device_by_sas_rphy(ctrl_info, rphy);
+
+       if (!device) {
+               rc = -ENODEV;
+               goto out;
+       }
+
+       if (device->bay == 0xff)
+               rc = -EINVAL;
+       else
+               rc = device->bay;
+
+out:
+       spin_unlock_irqrestore(&ctrl_info->scsi_device_list_lock, flags);
+
+       return rc;
 }
 
 static int pqi_sas_phy_reset(struct sas_phy *phy, int hard_reset)
index 3d80ab6..955e4c9 100644 (file)
@@ -397,10 +397,12 @@ static int sun3scsi_dma_finish(int write_flag)
                case CSR_LEFT_3:
                        *vaddr = (dregs->bpack_lo & 0xff00) >> 8;
                        vaddr--;
+                       /* Fall through */
 
                case CSR_LEFT_2:
                        *vaddr = (dregs->bpack_hi & 0x00ff);
                        vaddr--;
+                       /* Fall through */
 
                case CSR_LEFT_1:
                        *vaddr = (dregs->bpack_hi & 0xff00) >> 8;
index dd3f07b..9dc17f1 100644 (file)
@@ -648,7 +648,7 @@ static int sym_read_T93C46_nvram(struct sym_device *np, Tekram_nvram *nvram)
 {
        u_char gpcntl, gpreg;
        u_char old_gpcntl, old_gpreg;
-       int retv = 1;
+       int retv;
 
        /* save current state of GPCNTL and GPREG */
        old_gpreg       = INB(np, nc_gpreg);
index 86dbb72..b2af04c 100644 (file)
@@ -62,23 +62,47 @@ static int cdns_ufs_set_hclkdiv(struct ufs_hba *hba)
 }
 
 /**
- * Sets clocks used by the controller
+ * Called before and after HCE enable bit is set.
  * @hba: host controller instance
- * @on: if true, enable clocks, otherwise disable
  * @status: notify stage (pre, post change)
  *
  * Return zero for success and non-zero for failure
  */
-static int cdns_ufs_setup_clocks(struct ufs_hba *hba, bool on,
-                                enum ufs_notify_change_status status)
+static int cdns_ufs_hce_enable_notify(struct ufs_hba *hba,
+                                     enum ufs_notify_change_status status)
 {
-       if ((!on) || (status == PRE_CHANGE))
+       if (status != PRE_CHANGE)
                return 0;
 
        return cdns_ufs_set_hclkdiv(hba);
 }
 
 /**
+ * Called before and after Link startup is carried out.
+ * @hba: host controller instance
+ * @status: notify stage (pre, post change)
+ *
+ * Return zero for success and non-zero for failure
+ */
+static int cdns_ufs_link_startup_notify(struct ufs_hba *hba,
+                                       enum ufs_notify_change_status status)
+{
+       if (status != PRE_CHANGE)
+               return 0;
+
+       /*
+        * Some UFS devices have issues if LCC is enabled.
+        * So we are setting PA_Local_TX_LCC_Enable to 0
+        * before link startup which will make sure that both host
+        * and device TX LCC are disabled once link startup is
+        * completed.
+        */
+       ufshcd_dme_set(hba, UIC_ARG_MIB(PA_LOCAL_TX_LCC_ENABLE), 0);
+
+       return 0;
+}
+
+/**
  * cdns_ufs_init - performs additional ufs initialization
  * @hba: host controller instance
  *
@@ -114,13 +138,15 @@ static int cdns_ufs_m31_16nm_phy_initialization(struct ufs_hba *hba)
 
 static const struct ufs_hba_variant_ops cdns_ufs_pltfm_hba_vops = {
        .name = "cdns-ufs-pltfm",
-       .setup_clocks = cdns_ufs_setup_clocks,
+       .hce_enable_notify = cdns_ufs_hce_enable_notify,
+       .link_startup_notify = cdns_ufs_link_startup_notify,
 };
 
 static const struct ufs_hba_variant_ops cdns_ufs_m31_16nm_pltfm_hba_vops = {
        .name = "cdns-ufs-pltfm",
        .init = cdns_ufs_init,
-       .setup_clocks = cdns_ufs_setup_clocks,
+       .hce_enable_notify = cdns_ufs_hce_enable_notify,
+       .link_startup_notify = cdns_ufs_link_startup_notify,
        .phy_initialization = cdns_ufs_m31_16nm_phy_initialization,
 };
 
index f4d1dca..6bbb167 100644 (file)
@@ -447,13 +447,11 @@ static int ufs_hisi_resume(struct ufs_hba *hba, enum ufs_pm_op pm_op)
 
 static int ufs_hisi_get_resource(struct ufs_hisi_host *host)
 {
-       struct resource *mem_res;
        struct device *dev = host->hba->dev;
        struct platform_device *pdev = to_platform_device(dev);
 
        /* get resource of ufs sys ctrl */
-       mem_res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
-       host->ufs_sys_ctrl = devm_ioremap_resource(dev, mem_res);
+       host->ufs_sys_ctrl = devm_platform_ioremap_resource(pdev, 1);
        if (IS_ERR(host->ufs_sys_ctrl))
                return PTR_ERR(host->ufs_sys_ctrl);
 
index ee4b1da..a5b7148 100644 (file)
@@ -8,6 +8,7 @@
 #include <linux/of.h>
 #include <linux/platform_device.h>
 #include <linux/phy/phy.h>
+#include <linux/gpio/consumer.h>
 #include <linux/reset-controller.h>
 
 #include "ufshcd.h"
@@ -800,7 +801,6 @@ static int ufs_qcom_pwr_change_notify(struct ufs_hba *hba,
                                struct ufs_pa_layer_attr *dev_max_params,
                                struct ufs_pa_layer_attr *dev_req_params)
 {
-       u32 val;
        struct ufs_qcom_host *host = ufshcd_get_variant(hba);
        struct ufs_dev_params ufs_qcom_cap;
        int ret = 0;
@@ -869,8 +869,6 @@ static int ufs_qcom_pwr_change_notify(struct ufs_hba *hba,
                        ret = -EINVAL;
                }
 
-               val = ~(MAX_U32 << dev_req_params->lane_tx);
-
                /* cache the power mode parameters to use internally */
                memcpy(&host->dev_req_params,
                                dev_req_params, sizeof(*dev_req_params));
@@ -1140,6 +1138,15 @@ static int ufs_qcom_init(struct ufs_hba *hba)
                }
        }
 
+       host->device_reset = devm_gpiod_get_optional(dev, "reset",
+                                                    GPIOD_OUT_HIGH);
+       if (IS_ERR(host->device_reset)) {
+               err = PTR_ERR(host->device_reset);
+               if (err != -EPROBE_DEFER)
+                       dev_err(dev, "failed to acquire reset gpio: %d\n", err);
+               goto out_variant_clear;
+       }
+
        err = ufs_qcom_bus_register(host);
        if (err)
                goto out_variant_clear;
@@ -1546,12 +1553,37 @@ static void ufs_qcom_dump_dbg_regs(struct ufs_hba *hba)
 }
 
 /**
+ * ufs_qcom_device_reset() - toggle the (optional) device reset line
+ * @hba: per-adapter instance
+ *
+ * Toggles the (optional) reset line to reset the attached device.
+ */
+static void ufs_qcom_device_reset(struct ufs_hba *hba)
+{
+       struct ufs_qcom_host *host = ufshcd_get_variant(hba);
+
+       /* reset gpio is optional */
+       if (!host->device_reset)
+               return;
+
+       /*
+        * The UFS device shall detect reset pulses of 1us, sleep for 10us to
+        * be on the safe side.
+        */
+       gpiod_set_value_cansleep(host->device_reset, 1);
+       usleep_range(10, 15);
+
+       gpiod_set_value_cansleep(host->device_reset, 0);
+       usleep_range(10, 15);
+}
+
+/**
  * struct ufs_hba_qcom_vops - UFS QCOM specific variant operations
  *
  * The variant operations configure the necessary controller and PHY
  * handshake during initialization.
  */
-static struct ufs_hba_variant_ops ufs_hba_qcom_vops = {
+static const struct ufs_hba_variant_ops ufs_hba_qcom_vops = {
        .name                   = "qcom",
        .init                   = ufs_qcom_init,
        .exit                   = ufs_qcom_exit,
@@ -1565,6 +1597,7 @@ static struct ufs_hba_variant_ops ufs_hba_qcom_vops = {
        .suspend                = ufs_qcom_suspend,
        .resume                 = ufs_qcom_resume,
        .dbg_register_dump      = ufs_qcom_dump_dbg_regs,
+       .device_reset           = ufs_qcom_device_reset,
 };
 
 /**
index 001915d..d401f17 100644 (file)
@@ -195,6 +195,8 @@ struct ufs_qcom_testbus {
        u8 select_minor;
 };
 
+struct gpio_desc;
+
 struct ufs_qcom_host {
        /*
         * Set this capability if host controller supports the QUniPro mode
@@ -232,6 +234,8 @@ struct ufs_qcom_host {
        struct ufs_qcom_testbus testbus;
 
        struct reset_controller_dev rcdev;
+
+       struct gpio_desc *device_reset;
 };
 
 static inline u32
index f478685..969a36b 100644 (file)
@@ -571,9 +571,10 @@ static ssize_t _name##_show(struct device *dev,                            \
        int ret;                                                        \
        int desc_len = QUERY_DESC_MAX_SIZE;                             \
        u8 *desc_buf;                                                   \
+                                                                       \
        desc_buf = kzalloc(QUERY_DESC_MAX_SIZE, GFP_ATOMIC);            \
-       if (!desc_buf)                                                  \
-               return -ENOMEM;                                         \
+       if (!desc_buf)                                                  \
+               return -ENOMEM;                                         \
        ret = ufshcd_query_descriptor_retry(hba,                        \
                UPIU_QUERY_OPCODE_READ_DESC, QUERY_DESC_IDN_DEVICE,     \
                0, 0, desc_buf, &desc_len);                             \
@@ -582,14 +583,13 @@ static ssize_t _name##_show(struct device *dev,                           \
                goto out;                                               \
        }                                                               \
        index = desc_buf[DEVICE_DESC_PARAM##_pname];                    \
-       memset(desc_buf, 0, QUERY_DESC_MAX_SIZE);                       \
-       if (ufshcd_read_string_desc(hba, index, desc_buf,               \
-               QUERY_DESC_MAX_SIZE, true)) {                           \
-               ret = -EINVAL;                                          \
+       kfree(desc_buf);                                                \
+       desc_buf = NULL;                                                \
+       ret = ufshcd_read_string_desc(hba, index, &desc_buf,            \
+                                     SD_ASCII_STD);                    \
+       if (ret < 0)                                                    \
                goto out;                                               \
-       }                                                               \
-       ret = snprintf(buf, PAGE_SIZE, "%s\n",                          \
-               desc_buf + QUERY_DESC_HDR_SIZE);                        \
+       ret = snprintf(buf, PAGE_SIZE, "%s\n", desc_buf);               \
 out:                                                                   \
        kfree(desc_buf);                                                \
        return ret;                                                     \
index 99a9c4d..3327981 100644 (file)
@@ -541,7 +541,7 @@ struct ufs_dev_info {
  */
 struct ufs_dev_desc {
        u16 wmanufacturerid;
-       char model[MAX_MODEL_LEN + 1];
+       u8 *model;
 };
 
 /**
index d7d521b..8d40dc9 100644 (file)
@@ -391,12 +391,10 @@ int ufshcd_pltfrm_init(struct platform_device *pdev,
 {
        struct ufs_hba *hba;
        void __iomem *mmio_base;
-       struct resource *mem_res;
        int irq, err;
        struct device *dev = &pdev->dev;
 
-       mem_res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-       mmio_base = devm_ioremap_resource(dev, mem_res);
+       mmio_base = devm_platform_ioremap_resource(pdev, 0);
        if (IS_ERR(mmio_base)) {
                err = PTR_ERR(mmio_base);
                goto out;
index 029da74..034dd9c 100644 (file)
@@ -299,16 +299,6 @@ static void ufshcd_scsi_block_requests(struct ufs_hba *hba)
                scsi_block_requests(hba->host);
 }
 
-/* replace non-printable or non-ASCII characters with spaces */
-static inline void ufshcd_remove_non_printable(char *val)
-{
-       if (!val)
-               return;
-
-       if (*val < 0x20 || *val > 0x7e)
-               *val = ' ';
-}
-
 static void ufshcd_add_cmd_upiu_trace(struct ufs_hba *hba, unsigned int tag,
                const char *str)
 {
@@ -390,24 +380,25 @@ static void ufshcd_print_clk_freqs(struct ufs_hba *hba)
        }
 }
 
-static void ufshcd_print_uic_err_hist(struct ufs_hba *hba,
-               struct ufs_uic_err_reg_hist *err_hist, char *err_name)
+static void ufshcd_print_err_hist(struct ufs_hba *hba,
+                                 struct ufs_err_reg_hist *err_hist,
+                                 char *err_name)
 {
        int i;
        bool found = false;
 
-       for (i = 0; i < UIC_ERR_REG_HIST_LENGTH; i++) {
-               int p = (i + err_hist->pos) % UIC_ERR_REG_HIST_LENGTH;
+       for (i = 0; i < UFS_ERR_REG_HIST_LENGTH; i++) {
+               int p = (i + err_hist->pos) % UFS_ERR_REG_HIST_LENGTH;
 
                if (err_hist->reg[p] == 0)
                        continue;
-               dev_err(hba->dev, "%s[%d] = 0x%x at %lld us\n", err_name, i,
+               dev_err(hba->dev, "%s[%d] = 0x%x at %lld us\n", err_name, p,
                        err_hist->reg[p], ktime_to_us(err_hist->tstamp[p]));
                found = true;
        }
 
        if (!found)
-               dev_err(hba->dev, "No record of %s uic errors\n", err_name);
+               dev_err(hba->dev, "No record of %s errors\n", err_name);
 }
 
 static void ufshcd_print_host_regs(struct ufs_hba *hba)
@@ -423,11 +414,22 @@ static void ufshcd_print_host_regs(struct ufs_hba *hba)
                ktime_to_us(hba->ufs_stats.last_hibern8_exit_tstamp),
                hba->ufs_stats.hibern8_exit_cnt);
 
-       ufshcd_print_uic_err_hist(hba, &hba->ufs_stats.pa_err, "pa_err");
-       ufshcd_print_uic_err_hist(hba, &hba->ufs_stats.dl_err, "dl_err");
-       ufshcd_print_uic_err_hist(hba, &hba->ufs_stats.nl_err, "nl_err");
-       ufshcd_print_uic_err_hist(hba, &hba->ufs_stats.tl_err, "tl_err");
-       ufshcd_print_uic_err_hist(hba, &hba->ufs_stats.dme_err, "dme_err");
+       ufshcd_print_err_hist(hba, &hba->ufs_stats.pa_err, "pa_err");
+       ufshcd_print_err_hist(hba, &hba->ufs_stats.dl_err, "dl_err");
+       ufshcd_print_err_hist(hba, &hba->ufs_stats.nl_err, "nl_err");
+       ufshcd_print_err_hist(hba, &hba->ufs_stats.tl_err, "tl_err");
+       ufshcd_print_err_hist(hba, &hba->ufs_stats.dme_err, "dme_err");
+       ufshcd_print_err_hist(hba, &hba->ufs_stats.auto_hibern8_err,
+                             "auto_hibern8_err");
+       ufshcd_print_err_hist(hba, &hba->ufs_stats.fatal_err, "fatal_err");
+       ufshcd_print_err_hist(hba, &hba->ufs_stats.link_startup_err,
+                             "link_startup_fail");
+       ufshcd_print_err_hist(hba, &hba->ufs_stats.resume_err, "resume_fail");
+       ufshcd_print_err_hist(hba, &hba->ufs_stats.suspend_err,
+                             "suspend_fail");
+       ufshcd_print_err_hist(hba, &hba->ufs_stats.dev_reset, "dev_reset");
+       ufshcd_print_err_hist(hba, &hba->ufs_stats.host_reset, "host_reset");
+       ufshcd_print_err_hist(hba, &hba->ufs_stats.task_abort, "task_abort");
 
        ufshcd_print_clk_freqs(hba);
 
@@ -3199,7 +3201,7 @@ out:
 static inline int ufshcd_read_desc(struct ufs_hba *hba,
                                   enum desc_idn desc_id,
                                   int desc_index,
-                                  u8 *buf,
+                                  void *buf,
                                   u32 size)
 {
        return ufshcd_read_desc_param(hba, desc_id, desc_index, 0, buf, size);
@@ -3218,48 +3220,77 @@ static int ufshcd_read_device_desc(struct ufs_hba *hba, u8 *buf, u32 size)
 }
 
 /**
+ * struct uc_string_id - unicode string
+ *
+ * @len: size of this descriptor inclusive
+ * @type: descriptor type
+ * @uc: unicode string character
+ */
+struct uc_string_id {
+       u8 len;
+       u8 type;
+       wchar_t uc[0];
+} __packed;
+
+/* replace non-printable or non-ASCII characters with spaces */
+static inline char ufshcd_remove_non_printable(u8 ch)
+{
+       return (ch >= 0x20 && ch <= 0x7e) ? ch : ' ';
+}
+
+/**
  * ufshcd_read_string_desc - read string descriptor
  * @hba: pointer to adapter instance
  * @desc_index: descriptor index
- * @buf: pointer to buffer where descriptor would be read
- * @size: size of buf
+ * @buf: pointer to buffer where descriptor would be read,
+ *       the caller should free the memory.
  * @ascii: if true convert from unicode to ascii characters
+ *         null terminated string.
  *
- * Return 0 in case of success, non-zero otherwise
+ * Return:
+ * *      string size on success.
+ * *      -ENOMEM: on allocation failure
+ * *      -EINVAL: on a wrong parameter
  */
-int ufshcd_read_string_desc(struct ufs_hba *hba, int desc_index,
-                           u8 *buf, u32 size, bool ascii)
+int ufshcd_read_string_desc(struct ufs_hba *hba, u8 desc_index,
+                           u8 **buf, bool ascii)
 {
-       int err = 0;
+       struct uc_string_id *uc_str;
+       u8 *str;
+       int ret;
 
-       err = ufshcd_read_desc(hba,
-                               QUERY_DESC_IDN_STRING, desc_index, buf, size);
+       if (!buf)
+               return -EINVAL;
 
-       if (err) {
-               dev_err(hba->dev, "%s: reading String Desc failed after %d retries. err = %d\n",
-                       __func__, QUERY_REQ_RETRIES, err);
+       uc_str = kzalloc(QUERY_DESC_MAX_SIZE, GFP_KERNEL);
+       if (!uc_str)
+               return -ENOMEM;
+
+       ret = ufshcd_read_desc(hba, QUERY_DESC_IDN_STRING,
+                              desc_index, uc_str,
+                              QUERY_DESC_MAX_SIZE);
+       if (ret < 0) {
+               dev_err(hba->dev, "Reading String Desc failed after %d retries. err = %d\n",
+                       QUERY_REQ_RETRIES, ret);
+               str = NULL;
+               goto out;
+       }
+
+       if (uc_str->len <= QUERY_DESC_HDR_SIZE) {
+               dev_dbg(hba->dev, "String Desc is of zero length\n");
+               str = NULL;
+               ret = 0;
                goto out;
        }
 
        if (ascii) {
-               int desc_len;
-               int ascii_len;
+               ssize_t ascii_len;
                int i;
-               char *buff_ascii;
-
-               desc_len = buf[0];
                /* remove header and divide by 2 to move from UTF16 to UTF8 */
-               ascii_len = (desc_len - QUERY_DESC_HDR_SIZE) / 2 + 1;
-               if (size < ascii_len + QUERY_DESC_HDR_SIZE) {
-                       dev_err(hba->dev, "%s: buffer allocated size is too small\n",
-                                       __func__);
-                       err = -ENOMEM;
-                       goto out;
-               }
-
-               buff_ascii = kmalloc(ascii_len, GFP_KERNEL);
-               if (!buff_ascii) {
-                       err = -ENOMEM;
+               ascii_len = (uc_str->len - QUERY_DESC_HDR_SIZE) / 2 + 1;
+               str = kzalloc(ascii_len, GFP_KERNEL);
+               if (!str) {
+                       ret = -ENOMEM;
                        goto out;
                }
 
@@ -3267,22 +3298,28 @@ int ufshcd_read_string_desc(struct ufs_hba *hba, int desc_index,
                 * the descriptor contains string in UTF16 format
                 * we need to convert to utf-8 so it can be displayed
                 */
-               utf16s_to_utf8s((wchar_t *)&buf[QUERY_DESC_HDR_SIZE],
-                               desc_len - QUERY_DESC_HDR_SIZE,
-                               UTF16_BIG_ENDIAN, buff_ascii, ascii_len);
+               ret = utf16s_to_utf8s(uc_str->uc,
+                                     uc_str->len - QUERY_DESC_HDR_SIZE,
+                                     UTF16_BIG_ENDIAN, str, ascii_len);
 
                /* replace non-printable or non-ASCII characters with spaces */
-               for (i = 0; i < ascii_len; i++)
-                       ufshcd_remove_non_printable(&buff_ascii[i]);
+               for (i = 0; i < ret; i++)
+                       str[i] = ufshcd_remove_non_printable(str[i]);
 
-               memset(buf + QUERY_DESC_HDR_SIZE, 0,
-                               size - QUERY_DESC_HDR_SIZE);
-               memcpy(buf + QUERY_DESC_HDR_SIZE, buff_ascii, ascii_len);
-               buf[QUERY_DESC_LENGTH_OFFSET] = ascii_len + QUERY_DESC_HDR_SIZE;
-               kfree(buff_ascii);
+               str[ret++] = '\0';
+
+       } else {
+               str = kmemdup(uc_str, uc_str->len, GFP_KERNEL);
+               if (!str) {
+                       ret = -ENOMEM;
+                       goto out;
+               }
+               ret = uc_str->len;
        }
 out:
-       return err;
+       *buf = str;
+       kfree(uc_str);
+       return ret;
 }
 
 /**
@@ -4214,12 +4251,6 @@ static int ufshcd_hba_execute_hce(struct ufs_hba *hba)
 {
        int retry;
 
-       /*
-        * msleep of 1 and 5 used in this function might result in msleep(20),
-        * but it was necessary to send the UFS FPGA to reset mode during
-        * development and testing of this driver. msleep can be changed to
-        * mdelay and retry count can be reduced based on the controller.
-        */
        if (!ufshcd_is_hba_active(hba))
                /* change controller state to "reset state" */
                ufshcd_hba_stop(hba, true);
@@ -4242,7 +4273,7 @@ static int ufshcd_hba_execute_hce(struct ufs_hba *hba)
         * instruction might be read back.
         * This delay can be changed based on the controller.
         */
-       msleep(1);
+       usleep_range(1000, 1100);
 
        /* wait for the host controller to complete initialization */
        retry = 10;
@@ -4254,7 +4285,7 @@ static int ufshcd_hba_execute_hce(struct ufs_hba *hba)
                                "Controller enable failed\n");
                        return -EIO;
                }
-               msleep(5);
+               usleep_range(5000, 5100);
        }
 
        /* enable UIC related interrupts */
@@ -4326,6 +4357,14 @@ static inline int ufshcd_disable_device_tx_lcc(struct ufs_hba *hba)
        return ufshcd_disable_tx_lcc(hba, true);
 }
 
+static void ufshcd_update_reg_hist(struct ufs_err_reg_hist *reg_hist,
+                                  u32 reg)
+{
+       reg_hist->reg[reg_hist->pos] = reg;
+       reg_hist->tstamp[reg_hist->pos] = ktime_get();
+       reg_hist->pos = (reg_hist->pos + 1) % UFS_ERR_REG_HIST_LENGTH;
+}
+
 /**
  * ufshcd_link_startup - Initialize unipro link startup
  * @hba: per adapter instance
@@ -4353,6 +4392,8 @@ link_startup:
 
                /* check if device is detected by inter-connect layer */
                if (!ret && !ufshcd_is_device_present(hba)) {
+                       ufshcd_update_reg_hist(&hba->ufs_stats.link_startup_err,
+                                              0);
                        dev_err(hba->dev, "%s: Device not present\n", __func__);
                        ret = -ENXIO;
                        goto out;
@@ -4363,13 +4404,19 @@ link_startup:
                 * but we can't be sure if the link is up until link startup
                 * succeeds. So reset the local Uni-Pro and try again.
                 */
-               if (ret && ufshcd_hba_enable(hba))
+               if (ret && ufshcd_hba_enable(hba)) {
+                       ufshcd_update_reg_hist(&hba->ufs_stats.link_startup_err,
+                                              (u32)ret);
                        goto out;
+               }
        } while (ret && retries--);
 
-       if (ret)
+       if (ret) {
                /* failed to get the link up... retire */
+               ufshcd_update_reg_hist(&hba->ufs_stats.link_startup_err,
+                                      (u32)ret);
                goto out;
+       }
 
        if (link_startup_again) {
                link_startup_again = false;
@@ -5345,14 +5392,6 @@ out:
        pm_runtime_put_sync(hba->dev);
 }
 
-static void ufshcd_update_uic_reg_hist(struct ufs_uic_err_reg_hist *reg_hist,
-               u32 reg)
-{
-       reg_hist->reg[reg_hist->pos] = reg;
-       reg_hist->tstamp[reg_hist->pos] = ktime_get();
-       reg_hist->pos = (reg_hist->pos + 1) % UIC_ERR_REG_HIST_LENGTH;
-}
-
 /**
  * ufshcd_update_uic_error - check and set fatal UIC error flags.
  * @hba: per-adapter instance
@@ -5371,13 +5410,13 @@ static void ufshcd_update_uic_error(struct ufs_hba *hba)
                 * must be checked but this error is handled separately.
                 */
                dev_dbg(hba->dev, "%s: UIC Lane error reported\n", __func__);
-               ufshcd_update_uic_reg_hist(&hba->ufs_stats.pa_err, reg);
+               ufshcd_update_reg_hist(&hba->ufs_stats.pa_err, reg);
        }
 
        /* PA_INIT_ERROR is fatal and needs UIC reset */
        reg = ufshcd_readl(hba, REG_UIC_ERROR_CODE_DATA_LINK_LAYER);
        if (reg)
-               ufshcd_update_uic_reg_hist(&hba->ufs_stats.dl_err, reg);
+               ufshcd_update_reg_hist(&hba->ufs_stats.dl_err, reg);
 
        if (reg & UIC_DATA_LINK_LAYER_ERROR_PA_INIT)
                hba->uic_error |= UFSHCD_UIC_DL_PA_INIT_ERROR;
@@ -5393,19 +5432,19 @@ static void ufshcd_update_uic_error(struct ufs_hba *hba)
        /* UIC NL/TL/DME errors needs software retry */
        reg = ufshcd_readl(hba, REG_UIC_ERROR_CODE_NETWORK_LAYER);
        if (reg) {
-               ufshcd_update_uic_reg_hist(&hba->ufs_stats.nl_err, reg);
+               ufshcd_update_reg_hist(&hba->ufs_stats.nl_err, reg);
                hba->uic_error |= UFSHCD_UIC_NL_ERROR;
        }
 
        reg = ufshcd_readl(hba, REG_UIC_ERROR_CODE_TRANSPORT_LAYER);
        if (reg) {
-               ufshcd_update_uic_reg_hist(&hba->ufs_stats.tl_err, reg);
+               ufshcd_update_reg_hist(&hba->ufs_stats.tl_err, reg);
                hba->uic_error |= UFSHCD_UIC_TL_ERROR;
        }
 
        reg = ufshcd_readl(hba, REG_UIC_ERROR_CODE_DME);
        if (reg) {
-               ufshcd_update_uic_reg_hist(&hba->ufs_stats.dme_err, reg);
+               ufshcd_update_reg_hist(&hba->ufs_stats.dme_err, reg);
                hba->uic_error |= UFSHCD_UIC_DME_ERROR;
        }
 
@@ -5438,8 +5477,10 @@ static void ufshcd_check_errors(struct ufs_hba *hba)
 {
        bool queue_eh_work = false;
 
-       if (hba->errors & INT_FATAL_ERRORS)
+       if (hba->errors & INT_FATAL_ERRORS) {
+               ufshcd_update_reg_hist(&hba->ufs_stats.fatal_err, hba->errors);
                queue_eh_work = true;
+       }
 
        if (hba->errors & UIC_ERROR) {
                hba->uic_error = 0;
@@ -5454,6 +5495,8 @@ static void ufshcd_check_errors(struct ufs_hba *hba)
                        __func__, (hba->errors & UIC_HIBERNATE_ENTER) ?
                        "Enter" : "Exit",
                        hba->errors, ufshcd_get_upmcrs(hba));
+               ufshcd_update_reg_hist(&hba->ufs_stats.auto_hibern8_err,
+                                      hba->errors);
                queue_eh_work = true;
        }
 
@@ -5652,13 +5695,12 @@ static int __ufshcd_issue_tm_cmd(struct ufs_hba *hba,
                memcpy(treq, hba->utmrdl_base_addr + free_slot, sizeof(*treq));
 
                ufshcd_add_tm_upiu_trace(hba, task_tag, "tm_complete");
-
-               spin_lock_irqsave(hba->host->host_lock, flags);
-               __clear_bit(free_slot, &hba->outstanding_tasks);
-               spin_unlock_irqrestore(hba->host->host_lock, flags);
-
        }
 
+       spin_lock_irqsave(hba->host->host_lock, flags);
+       __clear_bit(free_slot, &hba->outstanding_tasks);
+       spin_unlock_irqrestore(hba->host->host_lock, flags);
+
        clear_bit(free_slot, &hba->tm_condition);
        ufshcd_put_tm_slot(hba, free_slot);
        wake_up(&hba->tm_tag_wq);
@@ -5941,6 +5983,7 @@ static int ufshcd_eh_device_reset_handler(struct scsi_cmnd *cmd)
 
 out:
        hba->req_abort_count = 0;
+       ufshcd_update_reg_hist(&hba->ufs_stats.dev_reset, (u32)err);
        if (!err) {
                err = SUCCESS;
        } else {
@@ -6034,6 +6077,7 @@ static int ufshcd_abort(struct scsi_cmnd *cmd)
         */
        scsi_print_command(hba->lrb[tag].cmd);
        if (!hba->req_abort_count) {
+               ufshcd_update_reg_hist(&hba->ufs_stats.task_abort, 0);
                ufshcd_print_host_regs(hba);
                ufshcd_print_host_state(hba);
                ufshcd_print_pwr_info(hba);
@@ -6169,7 +6213,7 @@ static int ufshcd_host_reset_and_restore(struct ufs_hba *hba)
 out:
        if (err)
                dev_err(hba->dev, "%s: Host init failed %d\n", __func__, err);
-
+       ufshcd_update_reg_hist(&hba->ufs_stats.host_reset, (u32)err);
        return err;
 }
 
@@ -6189,6 +6233,9 @@ static int ufshcd_reset_and_restore(struct ufs_hba *hba)
        int retries = MAX_HOST_RESET_RETRIES;
 
        do {
+               /* Reset the attached device */
+               ufshcd_vops_device_reset(hba);
+
                err = ufshcd_host_reset_and_restore(hba);
        } while (err && --retries);
 
@@ -6453,6 +6500,9 @@ static int ufs_get_device_desc(struct ufs_hba *hba,
        u8 model_index;
        u8 *desc_buf;
 
+       if (!dev_desc)
+               return -EINVAL;
+
        buff_len = max_t(size_t, hba->desc_size.dev_desc,
                         QUERY_DESC_MAX_SIZE + 1);
        desc_buf = kmalloc(buff_len, GFP_KERNEL);
@@ -6476,31 +6526,31 @@ static int ufs_get_device_desc(struct ufs_hba *hba,
                                     desc_buf[DEVICE_DESC_PARAM_MANF_ID + 1];
 
        model_index = desc_buf[DEVICE_DESC_PARAM_PRDCT_NAME];
-
-       /* Zero-pad entire buffer for string termination. */
-       memset(desc_buf, 0, buff_len);
-
-       err = ufshcd_read_string_desc(hba, model_index, desc_buf,
-                                     QUERY_DESC_MAX_SIZE, true/*ASCII*/);
-       if (err) {
+       err = ufshcd_read_string_desc(hba, model_index,
+                                     &dev_desc->model, SD_ASCII_STD);
+       if (err < 0) {
                dev_err(hba->dev, "%s: Failed reading Product Name. err = %d\n",
                        __func__, err);
                goto out;
        }
 
-       desc_buf[QUERY_DESC_MAX_SIZE] = '\0';
-       strlcpy(dev_desc->model, (desc_buf + QUERY_DESC_HDR_SIZE),
-               min_t(u8, desc_buf[QUERY_DESC_LENGTH_OFFSET],
-                     MAX_MODEL_LEN));
-
-       /* Null terminate the model string */
-       dev_desc->model[MAX_MODEL_LEN] = '\0';
+       /*
+        * ufshcd_read_string_desc returns size of the string
+        * reset the error value
+        */
+       err = 0;
 
 out:
        kfree(desc_buf);
        return err;
 }
 
+static void ufs_put_device_desc(struct ufs_dev_desc *dev_desc)
+{
+       kfree(dev_desc->model);
+       dev_desc->model = NULL;
+}
+
 static void ufs_fixup_device_setup(struct ufs_hba *hba,
                                   struct ufs_dev_desc *dev_desc)
 {
@@ -6509,8 +6559,9 @@ static void ufs_fixup_device_setup(struct ufs_hba *hba,
        for (f = ufs_fixups; f->quirk; f++) {
                if ((f->card.wmanufacturerid == dev_desc->wmanufacturerid ||
                     f->card.wmanufacturerid == UFS_ANY_VENDOR) &&
-                   (STR_PRFX_EQUAL(f->card.model, dev_desc->model) ||
-                    !strcmp(f->card.model, UFS_ANY_MODEL)))
+                    ((dev_desc->model &&
+                      STR_PRFX_EQUAL(f->card.model, dev_desc->model)) ||
+                     !strcmp(f->card.model, UFS_ANY_MODEL)))
                        hba->dev_quirks |= f->quirk;
        }
 }
@@ -6681,17 +6732,8 @@ static void ufshcd_tune_unipro_params(struct ufs_hba *hba)
 
 static void ufshcd_clear_dbg_ufs_stats(struct ufs_hba *hba)
 {
-       int err_reg_hist_size = sizeof(struct ufs_uic_err_reg_hist);
-
        hba->ufs_stats.hibern8_exit_cnt = 0;
        hba->ufs_stats.last_hibern8_exit_tstamp = ktime_set(0, 0);
-
-       memset(&hba->ufs_stats.pa_err, 0, err_reg_hist_size);
-       memset(&hba->ufs_stats.dl_err, 0, err_reg_hist_size);
-       memset(&hba->ufs_stats.nl_err, 0, err_reg_hist_size);
-       memset(&hba->ufs_stats.tl_err, 0, err_reg_hist_size);
-       memset(&hba->ufs_stats.dme_err, 0, err_reg_hist_size);
-
        hba->req_abort_count = 0;
 }
 
@@ -6861,6 +6903,8 @@ static int ufshcd_probe_hba(struct ufs_hba *hba)
        }
 
        ufs_fixup_device_setup(hba, &card);
+       ufs_put_device_desc(&card);
+
        ufshcd_tune_unipro_params(hba);
 
        /* UFS device is also active now */
@@ -7823,6 +7867,8 @@ enable_gating:
        ufshcd_release(hba);
 out:
        hba->pm_op_in_progress = 0;
+       if (ret)
+               ufshcd_update_reg_hist(&hba->ufs_stats.suspend_err, (u32)ret);
        return ret;
 }
 
@@ -7925,6 +7971,8 @@ disable_irq_and_vops_clks:
        ufshcd_setup_clocks(hba, false);
 out:
        hba->pm_op_in_progress = 0;
+       if (ret)
+               ufshcd_update_reg_hist(&hba->ufs_stats.resume_err, (u32)ret);
        return ret;
 }
 
@@ -8324,6 +8372,9 @@ int ufshcd_init(struct ufs_hba *hba, void __iomem *mmio_base, unsigned int irq)
                goto exit_gating;
        }
 
+       /* Reset the attached device */
+       ufshcd_vops_device_reset(hba);
+
        /* Host controller enable */
        err = ufshcd_hba_enable(hba);
        if (err) {
index 994d73d..c94cfda 100644 (file)
@@ -298,6 +298,7 @@ struct ufs_pwr_mode_info {
  * @resume: called during host controller PM callback
  * @dbg_register_dump: used to dump controller debug information
  * @phy_initialization: used to initialize phys
+ * @device_reset: called to issue a reset pulse on the UFS device
  */
 struct ufs_hba_variant_ops {
        const char *name;
@@ -326,6 +327,7 @@ struct ufs_hba_variant_ops {
        int     (*resume)(struct ufs_hba *, enum ufs_pm_op);
        void    (*dbg_register_dump)(struct ufs_hba *hba);
        int     (*phy_initialization)(struct ufs_hba *);
+       void    (*device_reset)(struct ufs_hba *hba);
 };
 
 /* clock gating state  */
@@ -412,17 +414,17 @@ struct ufs_init_prefetch {
        u32 icc_level;
 };
 
-#define UIC_ERR_REG_HIST_LENGTH 8
+#define UFS_ERR_REG_HIST_LENGTH 8
 /**
- * struct ufs_uic_err_reg_hist - keeps history of uic errors
+ * struct ufs_err_reg_hist - keeps history of errors
  * @pos: index to indicate cyclic buffer position
  * @reg: cyclic buffer for registers value
  * @tstamp: cyclic buffer for time stamp
  */
-struct ufs_uic_err_reg_hist {
+struct ufs_err_reg_hist {
        int pos;
-       u32 reg[UIC_ERR_REG_HIST_LENGTH];
-       ktime_t tstamp[UIC_ERR_REG_HIST_LENGTH];
+       u32 reg[UFS_ERR_REG_HIST_LENGTH];
+       ktime_t tstamp[UFS_ERR_REG_HIST_LENGTH];
 };
 
 /**
@@ -436,15 +438,37 @@ struct ufs_uic_err_reg_hist {
  * @nl_err: tracks nl-uic errors
  * @tl_err: tracks tl-uic errors
  * @dme_err: tracks dme errors
+ * @auto_hibern8_err: tracks auto-hibernate errors
+ * @fatal_err: tracks fatal errors
+ * @linkup_err: tracks link-startup errors
+ * @resume_err: tracks resume errors
+ * @suspend_err: tracks suspend errors
+ * @dev_reset: tracks device reset events
+ * @host_reset: tracks host reset events
+ * @tsk_abort: tracks task abort events
  */
 struct ufs_stats {
        u32 hibern8_exit_cnt;
        ktime_t last_hibern8_exit_tstamp;
-       struct ufs_uic_err_reg_hist pa_err;
-       struct ufs_uic_err_reg_hist dl_err;
-       struct ufs_uic_err_reg_hist nl_err;
-       struct ufs_uic_err_reg_hist tl_err;
-       struct ufs_uic_err_reg_hist dme_err;
+
+       /* uic specific errors */
+       struct ufs_err_reg_hist pa_err;
+       struct ufs_err_reg_hist dl_err;
+       struct ufs_err_reg_hist nl_err;
+       struct ufs_err_reg_hist tl_err;
+       struct ufs_err_reg_hist dme_err;
+
+       /* fatal errors */
+       struct ufs_err_reg_hist auto_hibern8_err;
+       struct ufs_err_reg_hist fatal_err;
+       struct ufs_err_reg_hist link_startup_err;
+       struct ufs_err_reg_hist resume_err;
+       struct ufs_err_reg_hist suspend_err;
+
+       /* abnormal events */
+       struct ufs_err_reg_hist dev_reset;
+       struct ufs_err_reg_hist host_reset;
+       struct ufs_err_reg_hist task_abort;
 };
 
 /**
@@ -891,8 +915,11 @@ int ufshcd_query_attr(struct ufs_hba *hba, enum query_opcode opcode,
                      enum attr_idn idn, u8 index, u8 selector, u32 *attr_val);
 int ufshcd_query_flag(struct ufs_hba *hba, enum query_opcode opcode,
        enum flag_idn idn, bool *flag_res);
-int ufshcd_read_string_desc(struct ufs_hba *hba, int desc_index,
-                           u8 *buf, u32 size, bool ascii);
+
+#define SD_ASCII_STD true
+#define SD_RAW false
+int ufshcd_read_string_desc(struct ufs_hba *hba, u8 desc_index,
+                           u8 **buf, bool ascii);
 
 int ufshcd_hold(struct ufs_hba *hba, bool async);
 void ufshcd_release(struct ufs_hba *hba);
@@ -1045,6 +1072,12 @@ static inline void ufshcd_vops_dbg_register_dump(struct ufs_hba *hba)
                hba->vops->dbg_register_dump(hba);
 }
 
+static inline void ufshcd_vops_device_reset(struct ufs_hba *hba)
+{
+       if (hba->vops && hba->vops->device_reset)
+               hba->vops->device_reset(hba);
+}
+
 extern struct ufs_pm_lvl_states ufs_pm_lvl_states[];
 
 /*
index 297e107..bfec84a 100644 (file)
@@ -30,6 +30,8 @@
 #include <linux/seqlock.h>
 #include <linux/blk-mq-virtio.h>
 
+#include "sd.h"
+
 #define VIRTIO_SCSI_MEMPOOL_SZ 64
 #define VIRTIO_SCSI_EVENT_LEN 8
 #define VIRTIO_SCSI_VQ_BASE 2
@@ -324,6 +326,36 @@ static void virtscsi_handle_param_change(struct virtio_scsi *vscsi,
        scsi_device_put(sdev);
 }
 
+static void virtscsi_rescan_hotunplug(struct virtio_scsi *vscsi)
+{
+       struct scsi_device *sdev;
+       struct Scsi_Host *shost = virtio_scsi_host(vscsi->vdev);
+       unsigned char scsi_cmd[MAX_COMMAND_SIZE];
+       int result, inquiry_len, inq_result_len = 256;
+       char *inq_result = kmalloc(inq_result_len, GFP_KERNEL);
+
+       shost_for_each_device(sdev, shost) {
+               inquiry_len = sdev->inquiry_len ? sdev->inquiry_len : 36;
+
+               memset(scsi_cmd, 0, sizeof(scsi_cmd));
+               scsi_cmd[0] = INQUIRY;
+               scsi_cmd[4] = (unsigned char) inquiry_len;
+
+               memset(inq_result, 0, inq_result_len);
+
+               result = scsi_execute_req(sdev, scsi_cmd, DMA_FROM_DEVICE,
+                                         inq_result, inquiry_len, NULL,
+                                         SD_TIMEOUT, SD_MAX_RETRIES, NULL);
+
+               if (result == 0 && inq_result[0] >> 5) {
+                       /* PQ indicates the LUN is not attached */
+                       scsi_remove_device(sdev);
+               }
+       }
+
+       kfree(inq_result);
+}
+
 static void virtscsi_handle_event(struct work_struct *work)
 {
        struct virtio_scsi_event_node *event_node =
@@ -335,6 +367,7 @@ static void virtscsi_handle_event(struct work_struct *work)
            cpu_to_virtio32(vscsi->vdev, VIRTIO_SCSI_T_EVENTS_MISSED)) {
                event->event &= ~cpu_to_virtio32(vscsi->vdev,
                                                   VIRTIO_SCSI_T_EVENTS_MISSED);
+               virtscsi_rescan_hotunplug(vscsi);
                scsi_scan_host(virtio_scsi_host(vscsi->vdev));
        }
 
@@ -369,14 +402,7 @@ static void virtscsi_event_done(struct virtqueue *vq)
        virtscsi_vq_done(vscsi, &vscsi->event_vq, virtscsi_complete_event);
 };
 
-/**
- * virtscsi_add_cmd - add a virtio_scsi_cmd to a virtqueue
- * @vq         : the struct virtqueue we're talking about
- * @cmd                : command structure
- * @req_size   : size of the request buffer
- * @resp_size  : size of the response buffer
- */
-static int virtscsi_add_cmd(struct virtqueue *vq,
+static int __virtscsi_add_cmd(struct virtqueue *vq,
                            struct virtio_scsi_cmd *cmd,
                            size_t req_size, size_t resp_size)
 {
@@ -421,17 +447,39 @@ static int virtscsi_add_cmd(struct virtqueue *vq,
        return virtqueue_add_sgs(vq, sgs, out_num, in_num, cmd, GFP_ATOMIC);
 }
 
-static int virtscsi_kick_cmd(struct virtio_scsi_vq *vq,
+static void virtscsi_kick_vq(struct virtio_scsi_vq *vq)
+{
+       bool needs_kick;
+       unsigned long flags;
+
+       spin_lock_irqsave(&vq->vq_lock, flags);
+       needs_kick = virtqueue_kick_prepare(vq->vq);
+       spin_unlock_irqrestore(&vq->vq_lock, flags);
+
+       if (needs_kick)
+               virtqueue_notify(vq->vq);
+}
+
+/**
+ * virtscsi_add_cmd - add a virtio_scsi_cmd to a virtqueue, optionally kick it
+ * @vq         : the struct virtqueue we're talking about
+ * @cmd                : command structure
+ * @req_size   : size of the request buffer
+ * @resp_size  : size of the response buffer
+ * @kick       : whether to kick the virtqueue immediately
+ */
+static int virtscsi_add_cmd(struct virtio_scsi_vq *vq,
                             struct virtio_scsi_cmd *cmd,
-                            size_t req_size, size_t resp_size)
+                            size_t req_size, size_t resp_size,
+                            bool kick)
 {
        unsigned long flags;
        int err;
        bool needs_kick = false;
 
        spin_lock_irqsave(&vq->vq_lock, flags);
-       err = virtscsi_add_cmd(vq->vq, cmd, req_size, resp_size);
-       if (!err)
+       err = __virtscsi_add_cmd(vq->vq, cmd, req_size, resp_size);
+       if (!err && kick)
                needs_kick = virtqueue_kick_prepare(vq->vq);
 
        spin_unlock_irqrestore(&vq->vq_lock, flags);
@@ -496,6 +544,7 @@ static int virtscsi_queuecommand(struct Scsi_Host *shost,
        struct virtio_scsi *vscsi = shost_priv(shost);
        struct virtio_scsi_vq *req_vq = virtscsi_pick_vq_mq(vscsi, sc);
        struct virtio_scsi_cmd *cmd = scsi_cmd_priv(sc);
+       bool kick;
        unsigned long flags;
        int req_size;
        int ret;
@@ -525,7 +574,8 @@ static int virtscsi_queuecommand(struct Scsi_Host *shost,
                req_size = sizeof(cmd->req.cmd);
        }
 
-       ret = virtscsi_kick_cmd(req_vq, cmd, req_size, sizeof(cmd->resp.cmd));
+       kick = (sc->flags & SCMD_LAST) != 0;
+       ret = virtscsi_add_cmd(req_vq, cmd, req_size, sizeof(cmd->resp.cmd), kick);
        if (ret == -EIO) {
                cmd->resp.cmd.response = VIRTIO_SCSI_S_BAD_TARGET;
                spin_lock_irqsave(&req_vq->vq_lock, flags);
@@ -543,8 +593,8 @@ static int virtscsi_tmf(struct virtio_scsi *vscsi, struct virtio_scsi_cmd *cmd)
        int ret = FAILED;
 
        cmd->comp = &comp;
-       if (virtscsi_kick_cmd(&vscsi->ctrl_vq, cmd,
-                             sizeof cmd->req.tmf, sizeof cmd->resp.tmf) < 0)
+       if (virtscsi_add_cmd(&vscsi->ctrl_vq, cmd,
+                             sizeof cmd->req.tmf, sizeof cmd->resp.tmf, true) < 0)
                goto out;
 
        wait_for_completion(&comp);
@@ -658,6 +708,13 @@ static int virtscsi_map_queues(struct Scsi_Host *shost)
        return blk_mq_virtio_map_queues(qmap, vscsi->vdev, 2);
 }
 
+static void virtscsi_commit_rqs(struct Scsi_Host *shost, u16 hwq)
+{
+       struct virtio_scsi *vscsi = shost_priv(shost);
+
+       virtscsi_kick_vq(&vscsi->req_vqs[hwq]);
+}
+
 /*
  * The host guarantees to respond to each command, although I/O
  * latencies might be higher than on bare metal.  Reset the timer
@@ -675,6 +732,7 @@ static struct scsi_host_template virtscsi_host_template = {
        .this_id = -1,
        .cmd_size = sizeof(struct virtio_scsi_cmd),
        .queuecommand = virtscsi_queuecommand,
+       .commit_rqs = virtscsi_commit_rqs,
        .change_queue_depth = virtscsi_change_queue_depth,
        .eh_abort_handler = virtscsi_abort,
        .eh_device_reset_handler = virtscsi_device_reset,
index fb7b289..f81046f 100644 (file)
@@ -1854,6 +1854,7 @@ round_4(unsigned int x)
                case 1: --x;
                        break;
                case 2: ++x;
+                       /* fall through */
                case 3: ++x;
        }
        return x;
index 45b7e50..563894e 100644 (file)
@@ -7,6 +7,10 @@
 soundwire-bus-objs := bus_type.o bus.o slave.o mipi_disco.o stream.o
 obj-$(CONFIG_SOUNDWIRE) += soundwire-bus.o
 
+ifdef CONFIG_DEBUG_FS
+soundwire-bus-objs += debugfs.o
+endif
+
 #Cadence Objs
 soundwire-cadence-objs := cadence_master.o
 obj-$(CONFIG_SOUNDWIRE_CADENCE) += soundwire-cadence.o
index fe74583..fc53dbe 100644 (file)
@@ -49,6 +49,8 @@ int sdw_add_bus_master(struct sdw_bus *bus)
                }
        }
 
+       sdw_bus_debugfs_init(bus);
+
        /*
         * Device numbers in SoundWire are 0 through 15. Enumeration device
         * number (0), Broadcast device number (15), Group numbers (12 and
@@ -77,6 +79,8 @@ int sdw_add_bus_master(struct sdw_bus *bus)
         */
        if (IS_ENABLED(CONFIG_ACPI) && ACPI_HANDLE(bus->dev))
                ret = sdw_acpi_find_slaves(bus);
+       else if (IS_ENABLED(CONFIG_OF) && bus->dev->of_node)
+               ret = sdw_of_find_slaves(bus);
        else
                ret = -ENOTSUPP; /* No ACPI/DT so error out */
 
@@ -109,6 +113,8 @@ static int sdw_delete_slave(struct device *dev, void *data)
        struct sdw_slave *slave = dev_to_sdw_dev(dev);
        struct sdw_bus *bus = slave->bus;
 
+       sdw_slave_debugfs_exit(slave);
+
        mutex_lock(&bus->bus_lock);
 
        if (slave->dev_num) /* clear dev_num if assigned */
@@ -130,6 +136,8 @@ static int sdw_delete_slave(struct device *dev, void *data)
 void sdw_delete_bus_master(struct sdw_bus *bus)
 {
        device_for_each_child(bus->dev, NULL, sdw_delete_slave);
+
+       sdw_bus_debugfs_exit(bus);
 }
 EXPORT_SYMBOL(sdw_delete_bus_master);
 
@@ -470,7 +478,8 @@ static int sdw_assign_device_num(struct sdw_slave *slave)
 
        ret = sdw_write(slave, SDW_SCP_DEVNUMBER, dev_num);
        if (ret < 0) {
-               dev_err(&slave->dev, "Program device_num failed: %d\n", ret);
+               dev_err(&slave->dev, "Program device_num %d failed: %d\n",
+                       dev_num, ret);
                return ret;
        }
 
@@ -527,6 +536,7 @@ static int sdw_program_device_num(struct sdw_bus *bus)
        do {
                ret = sdw_transfer(bus, &msg);
                if (ret == -ENODATA) { /* end of device id reads */
+                       dev_dbg(bus->dev, "No more devices to enumerate\n");
                        ret = 0;
                        break;
                }
@@ -803,7 +813,7 @@ static int sdw_handle_port_interrupt(struct sdw_slave *slave,
 static int sdw_handle_slave_alerts(struct sdw_slave *slave)
 {
        struct sdw_slave_intr_status slave_intr;
-       u8 clear = 0, bit, port_status[15];
+       u8 clear = 0, bit, port_status[15] = {0};
        int port_num, stat, ret, count = 0;
        unsigned long port;
        bool slave_notify = false;
@@ -969,9 +979,15 @@ int sdw_handle_slave_status(struct sdw_bus *bus,
        int i, ret = 0;
 
        if (status[0] == SDW_SLAVE_ATTACHED) {
+               dev_dbg(bus->dev, "Slave attached, programming device number\n");
                ret = sdw_program_device_num(bus);
                if (ret)
                        dev_err(bus->dev, "Slave attach failed: %d\n", ret);
+               /*
+                * programming a device number will have side effects,
+                * so we deal with other devices at a later time
+                */
+               return ret;
        }
 
        /* Continue to check other slave statuses */
index 3048ca1..cb482da 100644 (file)
@@ -15,9 +15,26 @@ static inline int sdw_acpi_find_slaves(struct sdw_bus *bus)
 }
 #endif
 
+int sdw_of_find_slaves(struct sdw_bus *bus);
 void sdw_extract_slave_id(struct sdw_bus *bus,
                          u64 addr, struct sdw_slave_id *id);
 
+#ifdef CONFIG_DEBUG_FS
+void sdw_bus_debugfs_init(struct sdw_bus *bus);
+void sdw_bus_debugfs_exit(struct sdw_bus *bus);
+void sdw_slave_debugfs_init(struct sdw_slave *slave);
+void sdw_slave_debugfs_exit(struct sdw_slave *slave);
+void sdw_debugfs_init(void);
+void sdw_debugfs_exit(void);
+#else
+static inline void sdw_bus_debugfs_init(struct sdw_bus *bus) {}
+static inline void sdw_bus_debugfs_exit(struct sdw_bus *bus) {}
+static inline void sdw_slave_debugfs_init(struct sdw_slave *slave) {}
+static inline void sdw_slave_debugfs_exit(struct sdw_slave *slave) {}
+static inline void sdw_debugfs_init(void) {}
+static inline void sdw_debugfs_exit(void) {}
+#endif
+
 enum {
        SDW_MSG_FLAG_READ = 0,
        SDW_MSG_FLAG_WRITE,
@@ -49,8 +66,11 @@ struct sdw_msg {
 
 #define SDW_DOUBLE_RATE_FACTOR         2
 
-extern int rows[SDW_FRAME_ROWS];
-extern int cols[SDW_FRAME_COLS];
+extern int sdw_rows[SDW_FRAME_ROWS];
+extern int sdw_cols[SDW_FRAME_COLS];
+
+int sdw_find_row_index(int row);
+int sdw_find_col_index(int col);
 
 /**
  * sdw_port_runtime: Runtime port parameters for Master or Slave
index 2655602..4a465f5 100644 (file)
@@ -6,6 +6,7 @@
 #include <linux/pm_domain.h>
 #include <linux/soundwire/sdw.h>
 #include <linux/soundwire/sdw_type.h>
+#include "bus.h"
 
 /**
  * sdw_get_device_id - find the matching SoundWire device id
@@ -177,11 +178,13 @@ EXPORT_SYMBOL_GPL(sdw_unregister_driver);
 
 static int __init sdw_bus_init(void)
 {
+       sdw_debugfs_init();
        return bus_register(&sdw_bus_type);
 }
 
 static void __exit sdw_bus_exit(void)
 {
+       sdw_debugfs_exit();
        bus_unregister(&sdw_bus_type);
 }
 
index 60e8bde..502ed4e 100644 (file)
@@ -8,6 +8,7 @@
 
 #include <linux/delay.h>
 #include <linux/device.h>
+#include <linux/debugfs.h>
 #include <linux/interrupt.h>
 #include <linux/io.h>
 #include <linux/module.h>
 #include "bus.h"
 #include "cadence_master.h"
 
+static int interrupt_mask;
+module_param_named(cnds_mcp_int_mask, interrupt_mask, int, 0444);
+MODULE_PARM_DESC(cdns_mcp_int_mask, "Cadence MCP IntMask");
+
 #define CDNS_MCP_CONFIG                                0x0
 
 #define CDNS_MCP_CONFIG_MCMD_RETRY             GENMASK(27, 24)
@@ -47,6 +52,8 @@
 #define CDNS_MCP_SSPSTAT                       0xC
 #define CDNS_MCP_FRAME_SHAPE                   0x10
 #define CDNS_MCP_FRAME_SHAPE_INIT              0x14
+#define CDNS_MCP_FRAME_SHAPE_COL_MASK          GENMASK(2, 0)
+#define CDNS_MCP_FRAME_SHAPE_ROW_OFFSET                3
 
 #define CDNS_MCP_CONFIG_UPDATE                 0x18
 #define CDNS_MCP_CONFIG_UPDATE_BIT             BIT(0)
@@ -56,6 +63,7 @@
 #define CDNS_MCP_SSP_CTRL1                     0x28
 #define CDNS_MCP_CLK_CTRL0                     0x30
 #define CDNS_MCP_CLK_CTRL1                     0x38
+#define CDNS_MCP_CLK_MCLKD_MASK                GENMASK(7, 0)
 
 #define CDNS_MCP_STAT                          0x40
 
 #define CDNS_MCP_INT_DPINT                     BIT(11)
 #define CDNS_MCP_INT_CTRL_CLASH                        BIT(10)
 #define CDNS_MCP_INT_DATA_CLASH                        BIT(9)
+#define CDNS_MCP_INT_PARITY                    BIT(8)
 #define CDNS_MCP_INT_CMD_ERR                   BIT(7)
+#define CDNS_MCP_INT_RX_NE                     BIT(3)
 #define CDNS_MCP_INT_RX_WL                     BIT(2)
 #define CDNS_MCP_INT_TXE                       BIT(1)
+#define CDNS_MCP_INT_TXF                       BIT(0)
 
 #define CDNS_MCP_INTSET                                0x4C
 
 #define CDNS_PDI_CONFIG_PORT                   GENMASK(4, 0)
 
 /* Driver defaults */
-
-#define CDNS_DEFAULT_CLK_DIVIDER               0
-#define CDNS_DEFAULT_FRAME_SHAPE               0x30
 #define CDNS_DEFAULT_SSP_INTERVAL              0x18
 #define CDNS_TX_TIMEOUT                                2000
 
@@ -224,6 +232,112 @@ static int cdns_clear_bit(struct sdw_cdns *cdns, int offset, u32 value)
 }
 
 /*
+ * debugfs
+ */
+#ifdef CONFIG_DEBUG_FS
+
+#define RD_BUF (2 * PAGE_SIZE)
+
+static ssize_t cdns_sprintf(struct sdw_cdns *cdns,
+                           char *buf, size_t pos, unsigned int reg)
+{
+       return scnprintf(buf + pos, RD_BUF - pos,
+                        "%4x\t%8x\n", reg, cdns_readl(cdns, reg));
+}
+
+static int cdns_reg_show(struct seq_file *s, void *data)
+{
+       struct sdw_cdns *cdns = s->private;
+       char *buf;
+       ssize_t ret;
+       int num_ports;
+       int i, j;
+
+       buf = kzalloc(RD_BUF, GFP_KERNEL);
+       if (!buf)
+               return -ENOMEM;
+
+       ret = scnprintf(buf, RD_BUF, "Register  Value\n");
+       ret += scnprintf(buf + ret, RD_BUF - ret, "\nMCP Registers\n");
+       /* 8 MCP registers */
+       for (i = CDNS_MCP_CONFIG; i <= CDNS_MCP_PHYCTRL; i += sizeof(u32))
+               ret += cdns_sprintf(cdns, buf, ret, i);
+
+       ret += scnprintf(buf + ret, RD_BUF - ret,
+                        "\nStatus & Intr Registers\n");
+       /* 13 Status & Intr registers (offsets 0x70 and 0x74 not defined) */
+       for (i = CDNS_MCP_STAT; i <=  CDNS_MCP_FIFOSTAT; i += sizeof(u32))
+               ret += cdns_sprintf(cdns, buf, ret, i);
+
+       ret += scnprintf(buf + ret, RD_BUF - ret,
+                        "\nSSP & Clk ctrl Registers\n");
+       ret += cdns_sprintf(cdns, buf, ret, CDNS_MCP_SSP_CTRL0);
+       ret += cdns_sprintf(cdns, buf, ret, CDNS_MCP_SSP_CTRL1);
+       ret += cdns_sprintf(cdns, buf, ret, CDNS_MCP_CLK_CTRL0);
+       ret += cdns_sprintf(cdns, buf, ret, CDNS_MCP_CLK_CTRL1);
+
+       ret += scnprintf(buf + ret, RD_BUF - ret,
+                        "\nDPn B0 Registers\n");
+
+       /*
+        * in sdw_cdns_pdi_init() we filter out the Bulk PDIs,
+        * so the indices need to be corrected again
+        */
+       num_ports = cdns->num_ports + CDNS_PCM_PDI_OFFSET;
+
+       for (i = 0; i < num_ports; i++) {
+               ret += scnprintf(buf + ret, RD_BUF - ret,
+                                "\nDP-%d\n", i);
+               for (j = CDNS_DPN_B0_CONFIG(i);
+                    j < CDNS_DPN_B0_ASYNC_CTRL(i); j += sizeof(u32))
+                       ret += cdns_sprintf(cdns, buf, ret, j);
+       }
+
+       ret += scnprintf(buf + ret, RD_BUF - ret,
+                        "\nDPn B1 Registers\n");
+       for (i = 0; i < num_ports; i++) {
+               ret += scnprintf(buf + ret, RD_BUF - ret,
+                                "\nDP-%d\n", i);
+
+               for (j = CDNS_DPN_B1_CONFIG(i);
+                    j < CDNS_DPN_B1_ASYNC_CTRL(i); j += sizeof(u32))
+                       ret += cdns_sprintf(cdns, buf, ret, j);
+       }
+
+       ret += scnprintf(buf + ret, RD_BUF - ret,
+                        "\nDPn Control Registers\n");
+       for (i = 0; i < num_ports; i++)
+               ret += cdns_sprintf(cdns, buf, ret,
+                               CDNS_PORTCTRL + i * CDNS_PORT_OFFSET);
+
+       ret += scnprintf(buf + ret, RD_BUF - ret,
+                        "\nPDIn Config Registers\n");
+
+       /* number of PDI and ports is interchangeable */
+       for (i = 0; i < num_ports; i++)
+               ret += cdns_sprintf(cdns, buf, ret, CDNS_PDI_CONFIG(i));
+
+       seq_printf(s, "%s", buf);
+       kfree(buf);
+
+       return 0;
+}
+DEFINE_SHOW_ATTRIBUTE(cdns_reg);
+
+/**
+ * sdw_cdns_debugfs_init() - Cadence debugfs init
+ * @cdns: Cadence instance
+ * @root: debugfs root
+ */
+void sdw_cdns_debugfs_init(struct sdw_cdns *cdns, struct dentry *root)
+{
+       debugfs_create_file("cdns-registers", 0400, root, cdns, &cdns_reg_fops);
+}
+EXPORT_SYMBOL_GPL(sdw_cdns_debugfs_init);
+
+#endif /* CONFIG_DEBUG_FS */
+
+/*
  * IO Calls
  */
 static enum sdw_command_response
@@ -575,10 +689,14 @@ irqreturn_t sdw_cdns_irq(int irq, void *dev_id)
                }
        }
 
+       if (int_status & CDNS_MCP_INT_PARITY) {
+               /* Parity error detected by Master */
+               dev_err_ratelimited(cdns->dev, "Parity error\n");
+       }
+
        if (int_status & CDNS_MCP_INT_CTRL_CLASH) {
                /* Slave is driving bit slot during control word */
                dev_err_ratelimited(cdns->dev, "Bus clash for control word\n");
-               int_status |= CDNS_MCP_INT_CTRL_CLASH;
        }
 
        if (int_status & CDNS_MCP_INT_DATA_CLASH) {
@@ -587,7 +705,6 @@ irqreturn_t sdw_cdns_irq(int irq, void *dev_id)
                 * ownership of data bits or Slave gone bonkers
                 */
                dev_err_ratelimited(cdns->dev, "Bus clash for data word\n");
-               int_status |= CDNS_MCP_INT_DATA_CLASH;
        }
 
        if (int_status & CDNS_MCP_INT_SLAVE_MASK) {
@@ -644,10 +761,26 @@ static int _cdns_enable_interrupt(struct sdw_cdns *cdns)
        cdns_writel(cdns, CDNS_MCP_SLAVE_INTMASK1,
                    CDNS_MCP_SLAVE_INTMASK1_MASK);
 
-       mask = CDNS_MCP_INT_SLAVE_RSVD | CDNS_MCP_INT_SLAVE_ALERT |
-               CDNS_MCP_INT_SLAVE_ATTACH | CDNS_MCP_INT_SLAVE_NATTACH |
-               CDNS_MCP_INT_CTRL_CLASH | CDNS_MCP_INT_DATA_CLASH |
-               CDNS_MCP_INT_RX_WL | CDNS_MCP_INT_IRQ | CDNS_MCP_INT_DPINT;
+       /* enable detection of all slave state changes */
+       mask = CDNS_MCP_INT_SLAVE_MASK;
+
+       /* enable detection of bus issues */
+       mask |= CDNS_MCP_INT_CTRL_CLASH | CDNS_MCP_INT_DATA_CLASH |
+               CDNS_MCP_INT_PARITY;
+
+       /* no detection of port interrupts for now */
+
+       /* enable detection of RX fifo level */
+       mask |= CDNS_MCP_INT_RX_WL;
+
+       /*
+        * CDNS_MCP_INT_IRQ needs to be set otherwise all previous
+        * settings are irrelevant
+        */
+       mask |= CDNS_MCP_INT_IRQ;
+
+       if (interrupt_mask) /* parameter override */
+               mask = interrupt_mask;
 
        cdns_writel(cdns, CDNS_MCP_INTMASK, mask);
 
@@ -788,13 +921,30 @@ int sdw_cdns_pdi_init(struct sdw_cdns *cdns,
 }
 EXPORT_SYMBOL(sdw_cdns_pdi_init);
 
+static u32 cdns_set_initial_frame_shape(int n_rows, int n_cols)
+{
+       u32 val;
+       int c;
+       int r;
+
+       r = sdw_find_row_index(n_rows);
+       c = sdw_find_col_index(n_cols) & CDNS_MCP_FRAME_SHAPE_COL_MASK;
+
+       val = (r << CDNS_MCP_FRAME_SHAPE_ROW_OFFSET) | c;
+
+       return val;
+}
+
 /**
  * sdw_cdns_init() - Cadence initialization
  * @cdns: Cadence instance
  */
 int sdw_cdns_init(struct sdw_cdns *cdns)
 {
+       struct sdw_bus *bus = &cdns->bus;
+       struct sdw_master_prop *prop = &bus->prop;
        u32 val;
+       int divider;
        int ret;
 
        /* Exit clock stop */
@@ -806,12 +956,20 @@ int sdw_cdns_init(struct sdw_cdns *cdns)
        }
 
        /* Set clock divider */
-       val = cdns_readl(cdns, CDNS_MCP_CLK_CTRL0);
-       val |= CDNS_DEFAULT_CLK_DIVIDER;
-       cdns_writel(cdns, CDNS_MCP_CLK_CTRL0, val);
+       divider = (prop->mclk_freq / prop->max_clk_freq) - 1;
 
-       /* Set the default frame shape */
-       cdns_writel(cdns, CDNS_MCP_FRAME_SHAPE_INIT, CDNS_DEFAULT_FRAME_SHAPE);
+       cdns_updatel(cdns, CDNS_MCP_CLK_CTRL0,
+                    CDNS_MCP_CLK_MCLKD_MASK, divider);
+       cdns_updatel(cdns, CDNS_MCP_CLK_CTRL1,
+                    CDNS_MCP_CLK_MCLKD_MASK, divider);
+
+       /*
+        * Frame shape changes after initialization have to be done
+        * with the bank switch mechanism
+        */
+       val = cdns_set_initial_frame_shape(prop->default_row,
+                                          prop->default_col);
+       cdns_writel(cdns, CDNS_MCP_FRAME_SHAPE_INIT, val);
 
        /* Set SSP interval to default value */
        cdns_writel(cdns, CDNS_MCP_SSP_CTRL0, CDNS_DEFAULT_SSP_INTERVAL);
@@ -851,8 +1009,9 @@ EXPORT_SYMBOL(sdw_cdns_init);
 
 int cdns_bus_conf(struct sdw_bus *bus, struct sdw_bus_params *params)
 {
+       struct sdw_master_prop *prop = &bus->prop;
        struct sdw_cdns *cdns = bus_to_cdns(bus);
-       int mcp_clkctrl_off, mcp_clkctrl;
+       int mcp_clkctrl_off;
        int divider;
 
        if (!params->curr_dr_freq) {
@@ -860,16 +1019,16 @@ int cdns_bus_conf(struct sdw_bus *bus, struct sdw_bus_params *params)
                return -EINVAL;
        }
 
-       divider = (params->max_dr_freq / params->curr_dr_freq) - 1;
+       divider = prop->mclk_freq * SDW_DOUBLE_RATE_FACTOR /
+               params->curr_dr_freq;
+       divider--; /* divider is 1/(N+1) */
 
        if (params->next_bank)
                mcp_clkctrl_off = CDNS_MCP_CLK_CTRL1;
        else
                mcp_clkctrl_off = CDNS_MCP_CLK_CTRL0;
 
-       mcp_clkctrl = cdns_readl(cdns, mcp_clkctrl_off);
-       mcp_clkctrl |= divider;
-       cdns_writel(cdns, mcp_clkctrl_off, mcp_clkctrl);
+       cdns_updatel(cdns, mcp_clkctrl_off, CDNS_MCP_CLK_MCLKD_MASK, divider);
 
        return 0;
 }
@@ -1170,19 +1329,5 @@ int sdw_cdns_alloc_stream(struct sdw_cdns *cdns,
 }
 EXPORT_SYMBOL(sdw_cdns_alloc_stream);
 
-void sdw_cdns_shutdown(struct snd_pcm_substream *substream,
-                      struct snd_soc_dai *dai)
-{
-       struct sdw_cdns_dma_data *dma;
-
-       dma = snd_soc_dai_get_dma_data(dai, substream);
-       if (!dma)
-               return;
-
-       snd_soc_dai_set_dma_data(dai, substream, NULL);
-       kfree(dma);
-}
-EXPORT_SYMBOL(sdw_cdns_shutdown);
-
 MODULE_LICENSE("Dual BSD/GPL");
 MODULE_DESCRIPTION("Cadence Soundwire Library");
index fe2af62..0b72b70 100644 (file)
@@ -163,6 +163,10 @@ int sdw_cdns_pdi_init(struct sdw_cdns *cdns,
                      struct sdw_cdns_stream_config config);
 int sdw_cdns_enable_interrupt(struct sdw_cdns *cdns);
 
+#ifdef CONFIG_DEBUG_FS
+void sdw_cdns_debugfs_init(struct sdw_cdns *cdns, struct dentry *root);
+#endif
+
 int sdw_cdns_get_stream(struct sdw_cdns *cdns,
                        struct sdw_cdns_streams *stream,
                        u32 ch, u32 dir);
@@ -172,8 +176,6 @@ int sdw_cdns_alloc_stream(struct sdw_cdns *cdns,
 void sdw_cdns_config_stream(struct sdw_cdns *cdns, struct sdw_cdns_port *port,
                            u32 ch, u32 dir, struct sdw_cdns_pdi *pdi);
 
-void sdw_cdns_shutdown(struct snd_pcm_substream *substream,
-                      struct snd_soc_dai *dai);
 int sdw_cdns_pcm_set_stream(struct snd_soc_dai *dai,
                            void *stream, int direction);
 int sdw_cdns_pdm_set_stream(struct snd_soc_dai *dai,
diff --git a/drivers/soundwire/debugfs.c b/drivers/soundwire/debugfs.c
new file mode 100644 (file)
index 0000000..fb1140e
--- /dev/null
@@ -0,0 +1,151 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright(c) 2017-2019 Intel Corporation.
+
+#include <linux/device.h>
+#include <linux/debugfs.h>
+#include <linux/mod_devicetable.h>
+#include <linux/slab.h>
+#include <linux/soundwire/sdw.h>
+#include <linux/soundwire/sdw_registers.h>
+#include "bus.h"
+
+static struct dentry *sdw_debugfs_root;
+
+void sdw_bus_debugfs_init(struct sdw_bus *bus)
+{
+       char name[16];
+
+       if (!sdw_debugfs_root)
+               return;
+
+       /* create the debugfs master-N */
+       snprintf(name, sizeof(name), "master-%d", bus->link_id);
+       bus->debugfs = debugfs_create_dir(name, sdw_debugfs_root);
+}
+
+void sdw_bus_debugfs_exit(struct sdw_bus *bus)
+{
+       debugfs_remove_recursive(bus->debugfs);
+}
+
+#define RD_BUF (3 * PAGE_SIZE)
+
+static ssize_t sdw_sprintf(struct sdw_slave *slave,
+                          char *buf, size_t pos, unsigned int reg)
+{
+       int value;
+
+       value = sdw_read(slave, reg);
+
+       if (value < 0)
+               return scnprintf(buf + pos, RD_BUF - pos, "%3x\tXX\n", reg);
+       else
+               return scnprintf(buf + pos, RD_BUF - pos,
+                               "%3x\t%2x\n", reg, value);
+}
+
+static int sdw_slave_reg_show(struct seq_file *s_file, void *data)
+{
+       struct sdw_slave *slave = s_file->private;
+       char *buf;
+       ssize_t ret;
+       int i, j;
+
+       buf = kzalloc(RD_BUF, GFP_KERNEL);
+       if (!buf)
+               return -ENOMEM;
+
+       ret = scnprintf(buf, RD_BUF, "Register  Value\n");
+
+       /* DP0 non-banked registers */
+       ret += scnprintf(buf + ret, RD_BUF - ret, "\nDP0\n");
+       for (i = SDW_DP0_INT; i <= SDW_DP0_PREPARECTRL; i++)
+               ret += sdw_sprintf(slave, buf, ret, i);
+
+       /* DP0 Bank 0 registers */
+       ret += scnprintf(buf + ret, RD_BUF - ret, "Bank0\n");
+       ret += sdw_sprintf(slave, buf, ret, SDW_DP0_CHANNELEN);
+       for (i = SDW_DP0_SAMPLECTRL1; i <= SDW_DP0_LANECTRL; i++)
+               ret += sdw_sprintf(slave, buf, ret, i);
+
+       /* DP0 Bank 1 registers */
+       ret += scnprintf(buf + ret, RD_BUF - ret, "Bank1\n");
+       ret += sdw_sprintf(slave, buf, ret,
+                       SDW_DP0_CHANNELEN + SDW_BANK1_OFFSET);
+       for (i = SDW_DP0_SAMPLECTRL1 + SDW_BANK1_OFFSET;
+                       i <= SDW_DP0_LANECTRL + SDW_BANK1_OFFSET; i++)
+               ret += sdw_sprintf(slave, buf, ret, i);
+
+       /* SCP registers */
+       ret += scnprintf(buf + ret, RD_BUF - ret, "\nSCP\n");
+       for (i = SDW_SCP_INT1; i <= SDW_SCP_BANKDELAY; i++)
+               ret += sdw_sprintf(slave, buf, ret, i);
+       for (i = SDW_SCP_DEVID_0; i <= SDW_SCP_DEVID_5; i++)
+               ret += sdw_sprintf(slave, buf, ret, i);
+
+       /*
+        * SCP Bank 0/1 registers are read-only and cannot be
+        * retrieved from the Slave. The Master typically keeps track
+        * of the current frame size so the information can be found
+        * in other places
+        */
+
+       /* DP1..14 registers */
+       for (i = 1; SDW_VALID_PORT_RANGE(i); i++) {
+
+               /* DPi registers */
+               ret += scnprintf(buf + ret, RD_BUF - ret, "\nDP%d\n", i);
+               for (j = SDW_DPN_INT(i); j <= SDW_DPN_PREPARECTRL(i); j++)
+                       ret += sdw_sprintf(slave, buf, ret, j);
+
+               /* DPi Bank0 registers */
+               ret += scnprintf(buf + ret, RD_BUF - ret, "Bank0\n");
+               for (j = SDW_DPN_CHANNELEN_B0(i);
+                    j <= SDW_DPN_LANECTRL_B0(i); j++)
+                       ret += sdw_sprintf(slave, buf, ret, j);
+
+               /* DPi Bank1 registers */
+               ret += scnprintf(buf + ret, RD_BUF - ret, "Bank1\n");
+               for (j = SDW_DPN_CHANNELEN_B1(i);
+                    j <= SDW_DPN_LANECTRL_B1(i); j++)
+                       ret += sdw_sprintf(slave, buf, ret, j);
+       }
+
+       seq_printf(s_file, "%s", buf);
+       kfree(buf);
+
+       return 0;
+}
+DEFINE_SHOW_ATTRIBUTE(sdw_slave_reg);
+
+void sdw_slave_debugfs_init(struct sdw_slave *slave)
+{
+       struct dentry *master;
+       struct dentry *d;
+       char name[32];
+
+       master = slave->bus->debugfs;
+
+       /* create the debugfs slave-name */
+       snprintf(name, sizeof(name), "%s", dev_name(&slave->dev));
+       d = debugfs_create_dir(name, master);
+
+       debugfs_create_file("registers", 0400, d, slave, &sdw_slave_reg_fops);
+
+       slave->debugfs = d;
+}
+
+void sdw_slave_debugfs_exit(struct sdw_slave *slave)
+{
+       debugfs_remove_recursive(slave->debugfs);
+}
+
+void sdw_debugfs_init(void)
+{
+       sdw_debugfs_root = debugfs_create_dir("soundwire", NULL);
+}
+
+void sdw_debugfs_exit(void)
+{
+       debugfs_remove_recursive(sdw_debugfs_root);
+}
index 317873b..f1e38a2 100644 (file)
@@ -6,6 +6,7 @@
  */
 
 #include <linux/acpi.h>
+#include <linux/debugfs.h>
 #include <linux/delay.h>
 #include <linux/module.h>
 #include <linux/interrupt.h>
@@ -16,6 +17,7 @@
 #include <linux/soundwire/sdw.h>
 #include <linux/soundwire/sdw_intel.h>
 #include "cadence_master.h"
+#include "bus.h"
 #include "intel.h"
 
 /* Intel SHIM Registers Definition */
 
 /* Intel ALH Register definitions */
 #define SDW_ALH_STRMZCFG(x)            (0x000 + (0x4 * (x)))
+#define SDW_ALH_NUM_STREAMS            64
 
 #define SDW_ALH_STRMZCFG_DMAT_VAL      0x3
 #define SDW_ALH_STRMZCFG_DMAT          GENMASK(7, 0)
 #define SDW_ALH_STRMZCFG_CHN           GENMASK(19, 16)
 
+#define SDW_INTEL_QUIRK_MASK_BUS_DISABLE       BIT(1)
+
 enum intel_pdi_type {
        INTEL_PDI_IN = 0,
        INTEL_PDI_OUT = 1,
@@ -98,6 +103,9 @@ struct sdw_intel {
        struct sdw_cdns cdns;
        int instance;
        struct sdw_intel_link_res *res;
+#ifdef CONFIG_DEBUG_FS
+       struct dentry *debugfs;
+#endif
 };
 
 #define cdns_to_intel(_cdns) container_of(_cdns, struct sdw_intel, cdns)
@@ -162,6 +170,118 @@ static int intel_set_bit(void __iomem *base, int offset, u32 value, u32 mask)
 }
 
 /*
+ * debugfs
+ */
+#ifdef CONFIG_DEBUG_FS
+
+#define RD_BUF (2 * PAGE_SIZE)
+
+static ssize_t intel_sprintf(void __iomem *mem, bool l,
+                            char *buf, size_t pos, unsigned int reg)
+{
+       int value;
+
+       if (l)
+               value = intel_readl(mem, reg);
+       else
+               value = intel_readw(mem, reg);
+
+       return scnprintf(buf + pos, RD_BUF - pos, "%4x\t%4x\n", reg, value);
+}
+
+static int intel_reg_show(struct seq_file *s_file, void *data)
+{
+       struct sdw_intel *sdw = s_file->private;
+       void __iomem *s = sdw->res->shim;
+       void __iomem *a = sdw->res->alh;
+       char *buf;
+       ssize_t ret;
+       int i, j;
+       unsigned int links, reg;
+
+       buf = kzalloc(RD_BUF, GFP_KERNEL);
+       if (!buf)
+               return -ENOMEM;
+
+       links = intel_readl(s, SDW_SHIM_LCAP) & GENMASK(2, 0);
+
+       ret = scnprintf(buf, RD_BUF, "Register  Value\n");
+       ret += scnprintf(buf + ret, RD_BUF - ret, "\nShim\n");
+
+       for (i = 0; i < links; i++) {
+               reg = SDW_SHIM_LCAP + i * 4;
+               ret += intel_sprintf(s, true, buf, ret, reg);
+       }
+
+       for (i = 0; i < links; i++) {
+               ret += scnprintf(buf + ret, RD_BUF - ret, "\nLink%d\n", i);
+               ret += intel_sprintf(s, false, buf, ret, SDW_SHIM_CTLSCAP(i));
+               ret += intel_sprintf(s, false, buf, ret, SDW_SHIM_CTLS0CM(i));
+               ret += intel_sprintf(s, false, buf, ret, SDW_SHIM_CTLS1CM(i));
+               ret += intel_sprintf(s, false, buf, ret, SDW_SHIM_CTLS2CM(i));
+               ret += intel_sprintf(s, false, buf, ret, SDW_SHIM_CTLS3CM(i));
+               ret += intel_sprintf(s, false, buf, ret, SDW_SHIM_PCMSCAP(i));
+
+               ret += scnprintf(buf + ret, RD_BUF - ret, "\n PCMSyCH registers\n");
+
+               /*
+                * the value 10 is the number of PDIs. We will need a
+                * cleanup to remove hard-coded Intel configurations
+                * from cadence_master.c
+                */
+               for (j = 0; j < 10; j++) {
+                       ret += intel_sprintf(s, false, buf, ret,
+                                       SDW_SHIM_PCMSYCHM(i, j));
+                       ret += intel_sprintf(s, false, buf, ret,
+                                       SDW_SHIM_PCMSYCHC(i, j));
+               }
+               ret += scnprintf(buf + ret, RD_BUF - ret, "\n PDMSCAP, IOCTL, CTMCTL\n");
+
+               ret += intel_sprintf(s, false, buf, ret, SDW_SHIM_PDMSCAP(i));
+               ret += intel_sprintf(s, false, buf, ret, SDW_SHIM_IOCTL(i));
+               ret += intel_sprintf(s, false, buf, ret, SDW_SHIM_CTMCTL(i));
+       }
+
+       ret += scnprintf(buf + ret, RD_BUF - ret, "\nWake registers\n");
+       ret += intel_sprintf(s, false, buf, ret, SDW_SHIM_WAKEEN);
+       ret += intel_sprintf(s, false, buf, ret, SDW_SHIM_WAKESTS);
+
+       ret += scnprintf(buf + ret, RD_BUF - ret, "\nALH STRMzCFG\n");
+       for (i = 0; i < SDW_ALH_NUM_STREAMS; i++)
+               ret += intel_sprintf(a, true, buf, ret, SDW_ALH_STRMZCFG(i));
+
+       seq_printf(s_file, "%s", buf);
+       kfree(buf);
+
+       return 0;
+}
+DEFINE_SHOW_ATTRIBUTE(intel_reg);
+
+static void intel_debugfs_init(struct sdw_intel *sdw)
+{
+       struct dentry *root = sdw->cdns.bus.debugfs;
+
+       if (!root)
+               return;
+
+       sdw->debugfs = debugfs_create_dir("intel-sdw", root);
+
+       debugfs_create_file("intel-registers", 0400, sdw->debugfs, sdw,
+                           &intel_reg_fops);
+
+       sdw_cdns_debugfs_init(&sdw->cdns, sdw->debugfs);
+}
+
+static void intel_debugfs_exit(struct sdw_intel *sdw)
+{
+       debugfs_remove_recursive(sdw->debugfs);
+}
+#else
+static void intel_debugfs_init(struct sdw_intel *sdw) {}
+static void intel_debugfs_exit(struct sdw_intel *sdw) {}
+#endif /* CONFIG_DEBUG_FS */
+
+/*
  * shim ops
  */
 
@@ -289,6 +409,16 @@ intel_pdi_get_ch_cap(struct sdw_intel *sdw, unsigned int pdi_num, bool pcm)
 
        if (pcm) {
                count = intel_readw(shim, SDW_SHIM_PCMSYCHC(link_id, pdi_num));
+
+               /*
+                * WORKAROUND: on all existing Intel controllers, pdi
+                * number 2 reports channel count as 1 even though it
+                * supports 8 channels. Performing hardcoding for pdi
+                * number 2.
+                */
+               if (pdi_num == 2)
+                       count = 7;
+
        } else {
                count = intel_readw(shim, SDW_SHIM_PDMSCAP(link_id));
                count = ((count & SDW_SHIM_PDMSCAP_CPSS) >>
@@ -397,8 +527,10 @@ static int intel_config_stream(struct sdw_intel *sdw,
                               struct snd_soc_dai *dai,
                               struct snd_pcm_hw_params *hw_params, int link_id)
 {
-       if (sdw->res->ops && sdw->res->ops->config_stream)
-               return sdw->res->ops->config_stream(sdw->res->arg,
+       struct sdw_intel_link_res *res = sdw->res;
+
+       if (res->ops && res->ops->config_stream && res->arg)
+               return res->ops->config_stream(res->arg,
                                substream, dai, hw_params, link_id);
 
        return -EIO;
@@ -649,6 +781,19 @@ intel_hw_free(struct snd_pcm_substream *substream, struct snd_soc_dai *dai)
        return ret;
 }
 
+static void intel_shutdown(struct snd_pcm_substream *substream,
+                          struct snd_soc_dai *dai)
+{
+       struct sdw_cdns_dma_data *dma;
+
+       dma = snd_soc_dai_get_dma_data(dai, substream);
+       if (!dma)
+               return;
+
+       snd_soc_dai_set_dma_data(dai, substream, NULL);
+       kfree(dma);
+}
+
 static int intel_pcm_set_sdw_stream(struct snd_soc_dai *dai,
                                    void *stream, int direction)
 {
@@ -664,14 +809,14 @@ static int intel_pdm_set_sdw_stream(struct snd_soc_dai *dai,
 static const struct snd_soc_dai_ops intel_pcm_dai_ops = {
        .hw_params = intel_hw_params,
        .hw_free = intel_hw_free,
-       .shutdown = sdw_cdns_shutdown,
+       .shutdown = intel_shutdown,
        .set_sdw_stream = intel_pcm_set_sdw_stream,
 };
 
 static const struct snd_soc_dai_ops intel_pdm_dai_ops = {
        .hw_params = intel_hw_params,
        .hw_free = intel_hw_free,
-       .shutdown = sdw_cdns_shutdown,
+       .shutdown = intel_shutdown,
        .set_sdw_stream = intel_pdm_set_sdw_stream,
 };
 
@@ -796,21 +941,44 @@ static int intel_register_dai(struct sdw_intel *sdw)
                                          dais, num_dai);
 }
 
+static int sdw_master_read_intel_prop(struct sdw_bus *bus)
+{
+       struct sdw_master_prop *prop = &bus->prop;
+       struct fwnode_handle *link;
+       char name[32];
+       u32 quirk_mask;
+
+       /* Find master handle */
+       snprintf(name, sizeof(name),
+                "mipi-sdw-link-%d-subproperties", bus->link_id);
+
+       link = device_get_named_child_node(bus->dev, name);
+       if (!link) {
+               dev_err(bus->dev, "Master node %s not found\n", name);
+               return -EIO;
+       }
+
+       fwnode_property_read_u32(link,
+                                "intel-sdw-ip-clock",
+                                &prop->mclk_freq);
+
+       fwnode_property_read_u32(link,
+                                "intel-quirk-mask",
+                                &quirk_mask);
+
+       if (quirk_mask & SDW_INTEL_QUIRK_MASK_BUS_DISABLE)
+               prop->hw_disabled = true;
+
+       return 0;
+}
+
 static int intel_prop_read(struct sdw_bus *bus)
 {
        /* Initialize with default handler to read all DisCo properties */
        sdw_master_read_prop(bus);
 
-       /* BIOS is not giving some values correctly. So, lets override them */
-       bus->prop.num_clk_freq = 1;
-       bus->prop.clk_freq = devm_kcalloc(bus->dev, bus->prop.num_clk_freq,
-                                         sizeof(*bus->prop.clk_freq),
-                                         GFP_KERNEL);
-       if (!bus->prop.clk_freq)
-               return -ENOMEM;
-
-       bus->prop.clk_freq[0] = bus->prop.max_clk_freq;
-       bus->prop.err_threshold = 5;
+       /* read Intel-specific properties */
+       sdw_master_read_intel_prop(bus);
 
        return 0;
 }
@@ -861,6 +1029,12 @@ static int intel_probe(struct platform_device *pdev)
                goto err_master_reg;
        }
 
+       if (sdw->cdns.bus.prop.hw_disabled) {
+               dev_info(&pdev->dev, "SoundWire master %d is disabled, ignoring\n",
+                        sdw->cdns.bus.link_id);
+               return 0;
+       }
+
        /* Initialize shim and controller */
        intel_link_power_up(sdw);
        intel_shim_init(sdw);
@@ -896,6 +1070,8 @@ static int intel_probe(struct platform_device *pdev)
                goto err_dai;
        }
 
+       intel_debugfs_init(sdw);
+
        return 0;
 
 err_dai:
@@ -912,8 +1088,11 @@ static int intel_remove(struct platform_device *pdev)
 
        sdw = platform_get_drvdata(pdev);
 
-       free_irq(sdw->res->irq, sdw);
-       snd_soc_unregister_component(sdw->cdns.dev);
+       if (!sdw->cdns.bus.prop.hw_disabled) {
+               intel_debugfs_exit(sdw);
+               free_irq(sdw->res->irq, sdw);
+               snd_soc_unregister_component(sdw->cdns.dev);
+       }
        sdw_delete_bus_master(&sdw->cdns.bus);
 
        return 0;
index 70637a0..b74c2f1 100644 (file)
 #define SDW_LINK_BASE          0x30000
 #define SDW_LINK_SIZE          0x10000
 
+static int link_mask;
+module_param_named(sdw_link_mask, link_mask, int, 0444);
+MODULE_PARM_DESC(sdw_link_mask, "Intel link mask (one bit per link)");
+
 struct sdw_link_data {
        struct sdw_intel_link_res res;
        struct platform_device *pdev;
@@ -111,6 +115,13 @@ static struct sdw_intel_ctx
 
        /* Create SDW Master devices */
        for (i = 0; i < count; i++) {
+               if (link_mask && !(link_mask & BIT(i))) {
+                       dev_dbg(&adev->dev,
+                               "Link %d masked, will not be enabled\n", i);
+                       link++;
+                       continue;
+               }
+
                link->res.irq = res->irq;
                link->res.registers = res->mmio_base + SDW_LINK_BASE
                                        + (SDW_LINK_SIZE * i);
index 79fee1b..844e6b2 100644 (file)
@@ -60,8 +60,7 @@ int sdw_master_read_prop(struct sdw_bus *bus)
                                 "mipi-sdw-max-clock-frequency",
                                 &prop->max_clk_freq);
 
-       nval = fwnode_property_read_u32_array(link,
-                       "mipi-sdw-clock-frequencies-supported", NULL, 0);
+       nval = fwnode_property_count_u32(link, "mipi-sdw-clock-frequencies-supported");
        if (nval > 0) {
                prop->num_clk_freq = nval;
                prop->clk_freq = devm_kcalloc(bus->dev, prop->num_clk_freq,
@@ -87,8 +86,7 @@ int sdw_master_read_prop(struct sdw_bus *bus)
                }
        }
 
-       nval = fwnode_property_read_u32_array(link,
-                       "mipi-sdw-supported-clock-gears", NULL, 0);
+       nval = fwnode_property_count_u32(link, "mipi-sdw-supported-clock-gears");
        if (nval > 0) {
                prop->num_clk_gears = nval;
                prop->clk_gears = devm_kcalloc(bus->dev, prop->num_clk_gears,
@@ -134,8 +132,7 @@ static int sdw_slave_read_dp0(struct sdw_slave *slave,
        fwnode_property_read_u32(port, "mipi-sdw-port-min-wordlength",
                                 &dp0->min_word);
 
-       nval = fwnode_property_read_u32_array(port,
-                       "mipi-sdw-port-wordlength-configs", NULL, 0);
+       nval = fwnode_property_count_u32(port, "mipi-sdw-port-wordlength-configs");
        if (nval > 0) {
 
                dp0->num_words = nval;
@@ -193,8 +190,7 @@ static int sdw_slave_read_dpn(struct sdw_slave *slave,
                fwnode_property_read_u32(node, "mipi-sdw-port-min-wordlength",
                                         &dpn[i].min_word);
 
-               nval = fwnode_property_read_u32_array(node,
-                               "mipi-sdw-port-wordlength-configs", NULL, 0);
+               nval = fwnode_property_count_u32(node, "mipi-sdw-port-wordlength-configs");
                if (nval > 0) {
                        dpn[i].num_words = nval;
                        dpn[i].words = devm_kcalloc(&slave->dev,
@@ -233,8 +229,7 @@ static int sdw_slave_read_dpn(struct sdw_slave *slave,
                fwnode_property_read_u32(node, "mipi-sdw-max-channel-number",
                                         &dpn[i].max_ch);
 
-               nval = fwnode_property_read_u32_array(node,
-                               "mipi-sdw-channel-number-list", NULL, 0);
+               nval = fwnode_property_count_u32(node, "mipi-sdw-channel-number-list");
                if (nval > 0) {
                        dpn[i].num_ch = nval;
                        dpn[i].ch = devm_kcalloc(&slave->dev, dpn[i].num_ch,
@@ -248,8 +243,7 @@ static int sdw_slave_read_dpn(struct sdw_slave *slave,
                                        dpn[i].ch, dpn[i].num_ch);
                }
 
-               nval = fwnode_property_read_u32_array(node,
-                               "mipi-sdw-channel-combination-list", NULL, 0);
+               nval = fwnode_property_count_u32(node, "mipi-sdw-channel-combination-list");
                if (nval > 0) {
                        dpn[i].num_ch_combinations = nval;
                        dpn[i].ch_combinations = devm_kcalloc(&slave->dev,
index f39a581..48a63ca 100644 (file)
@@ -2,6 +2,7 @@
 // Copyright(c) 2015-17 Intel Corporation.
 
 #include <linux/acpi.h>
+#include <linux/of.h>
 #include <linux/soundwire/sdw.h>
 #include <linux/soundwire/sdw_type.h>
 #include "bus.h"
@@ -35,6 +36,7 @@ static int sdw_slave_add(struct sdw_bus *bus,
 
        slave->dev.release = sdw_slave_release;
        slave->dev.bus = &sdw_bus_type;
+       slave->dev.of_node = of_node_get(to_of_node(fwnode));
        slave->bus = bus;
        slave->status = SDW_SLAVE_UNATTACHED;
        slave->dev_num = 0;
@@ -56,6 +58,7 @@ static int sdw_slave_add(struct sdw_bus *bus,
                mutex_unlock(&bus->bus_lock);
                put_device(&slave->dev);
        }
+       sdw_slave_debugfs_init(slave);
 
        return ret;
 }
@@ -112,3 +115,53 @@ int sdw_acpi_find_slaves(struct sdw_bus *bus)
 }
 
 #endif
+
+/*
+ * sdw_of_find_slaves() - Find Slave devices in master device tree node
+ * @bus: SDW bus instance
+ *
+ * Scans Master DT node for SDW child Slave devices and registers it.
+ */
+int sdw_of_find_slaves(struct sdw_bus *bus)
+{
+       struct device *dev = bus->dev;
+       struct device_node *node;
+
+       for_each_child_of_node(bus->dev->of_node, node) {
+               int link_id, sdw_version, ret, len;
+               const char *compat = NULL;
+               struct sdw_slave_id id;
+               const __be32 *addr;
+
+               compat = of_get_property(node, "compatible", NULL);
+               if (!compat)
+                       continue;
+
+               ret = sscanf(compat, "sdw%01x%04hx%04hx%02hhx", &sdw_version,
+                            &id.mfg_id, &id.part_id, &id.class_id);
+
+               if (ret != 4) {
+                       dev_err(dev, "Invalid compatible string found %s\n",
+                               compat);
+                       continue;
+               }
+
+               addr = of_get_property(node, "reg", &len);
+               if (!addr || (len < 2 * sizeof(u32))) {
+                       dev_err(dev, "Invalid Link and Instance ID\n");
+                       continue;
+               }
+
+               link_id = be32_to_cpup(addr++);
+               id.unique_id = be32_to_cpup(addr);
+               id.sdw_version = sdw_version;
+
+               /* Check for link_id match */
+               if (link_id != bus->link_id)
+                       continue;
+
+               sdw_slave_add(bus, &id, of_fwnode_handle(node));
+       }
+
+       return 0;
+}
index a047675..e69f94a 100644 (file)
  * The rows are arranged as per the array index value programmed
  * in register. The index 15 has dummy value 0 in order to fill hole.
  */
-int rows[SDW_FRAME_ROWS] = {48, 50, 60, 64, 75, 80, 125, 147,
+int sdw_rows[SDW_FRAME_ROWS] = {48, 50, 60, 64, 75, 80, 125, 147,
                        96, 100, 120, 128, 150, 160, 250, 0,
                        192, 200, 240, 256, 72, 144, 90, 180};
 
-int cols[SDW_FRAME_COLS] = {2, 4, 6, 8, 10, 12, 14, 16};
+int sdw_cols[SDW_FRAME_COLS] = {2, 4, 6, 8, 10, 12, 14, 16};
 
-static int sdw_find_col_index(int col)
+int sdw_find_col_index(int col)
 {
        int i;
 
        for (i = 0; i < SDW_FRAME_COLS; i++) {
-               if (cols[i] == col)
+               if (sdw_cols[i] == col)
                        return i;
        }
 
        pr_warn("Requested column not found, selecting lowest column no: 2\n");
        return 0;
 }
+EXPORT_SYMBOL(sdw_find_col_index);
 
-static int sdw_find_row_index(int row)
+int sdw_find_row_index(int row)
 {
        int i;
 
        for (i = 0; i < SDW_FRAME_ROWS; i++) {
-               if (rows[i] == row)
+               if (sdw_rows[i] == row)
                        return i;
        }
 
        pr_warn("Requested row not found, selecting lowest row no: 48\n");
        return 0;
 }
+EXPORT_SYMBOL(sdw_find_row_index);
 
 static int _sdw_program_slave_port_params(struct sdw_bus *bus,
                                          struct sdw_slave *slave,
@@ -367,7 +369,7 @@ static int sdw_enable_disable_master_ports(struct sdw_master_runtime *m_rt,
 static int sdw_enable_disable_ports(struct sdw_master_runtime *m_rt, bool en)
 {
        struct sdw_port_runtime *s_port, *m_port;
-       struct sdw_slave_runtime *s_rt = NULL;
+       struct sdw_slave_runtime *s_rt;
        int ret = 0;
 
        /* Enable/Disable Slave port(s) */
@@ -415,7 +417,7 @@ static int sdw_prep_deprep_slave_ports(struct sdw_bus *bus,
                                       struct sdw_port_runtime *p_rt,
                                       bool prep)
 {
-       struct completion *port_ready = NULL;
+       struct completion *port_ready;
        struct sdw_dpn_prop *dpn_prop;
        struct sdw_prepare_ch prep_ch;
        unsigned int time_left;
@@ -535,7 +537,7 @@ static int sdw_prep_deprep_master_ports(struct sdw_master_runtime *m_rt,
  */
 static int sdw_prep_deprep_ports(struct sdw_master_runtime *m_rt, bool prep)
 {
-       struct sdw_slave_runtime *s_rt = NULL;
+       struct sdw_slave_runtime *s_rt;
        struct sdw_port_runtime *p_rt;
        int ret = 0;
 
@@ -603,7 +605,7 @@ static int sdw_notify_config(struct sdw_master_runtime *m_rt)
  */
 static int sdw_program_params(struct sdw_bus *bus)
 {
-       struct sdw_master_runtime *m_rt = NULL;
+       struct sdw_master_runtime *m_rt;
        int ret = 0;
 
        list_for_each_entry(m_rt, &bus->m_rt_list, bus_node) {
@@ -640,8 +642,8 @@ static int sdw_bank_switch(struct sdw_bus *bus, int m_rt_count)
        int col_index, row_index;
        bool multi_link;
        struct sdw_msg *wr_msg;
-       u8 *wbuf = NULL;
-       int ret = 0;
+       u8 *wbuf;
+       int ret;
        u16 addr;
 
        wr_msg = kzalloc(sizeof(*wr_msg), GFP_KERNEL);
@@ -739,9 +741,9 @@ static int sdw_ml_sync_bank_switch(struct sdw_bus *bus)
 
 static int do_bank_switch(struct sdw_stream_runtime *stream)
 {
-       struct sdw_master_runtime *m_rt = NULL;
+       struct sdw_master_runtime *m_rt;
        const struct sdw_master_ops *ops;
-       struct sdw_bus *bus = NULL;
+       struct sdw_bus *bus;
        bool multi_link = false;
        int ret = 0;
 
@@ -863,7 +865,7 @@ EXPORT_SYMBOL(sdw_release_stream);
  * sdw_alloc_stream should be called only once per stream. Typically
  * invoked from ALSA/ASoC machine/platform driver.
  */
-struct sdw_stream_runtime *sdw_alloc_stream(char *stream_name)
+struct sdw_stream_runtime *sdw_alloc_stream(const char *stream_name)
 {
        struct sdw_stream_runtime *stream;
 
@@ -884,7 +886,7 @@ static struct sdw_master_runtime
 *sdw_find_master_rt(struct sdw_bus *bus,
                    struct sdw_stream_runtime *stream)
 {
-       struct sdw_master_runtime *m_rt = NULL;
+       struct sdw_master_runtime *m_rt;
 
        /* Retrieve Bus handle if already available */
        list_for_each_entry(m_rt, &stream->master_list, stream_node) {
@@ -953,7 +955,7 @@ static struct sdw_slave_runtime
                    struct sdw_stream_config *stream_config,
                    struct sdw_stream_runtime *stream)
 {
-       struct sdw_slave_runtime *s_rt = NULL;
+       struct sdw_slave_runtime *s_rt;
 
        s_rt = kzalloc(sizeof(*s_rt), GFP_KERNEL);
        if (!s_rt)
@@ -1259,7 +1261,7 @@ int sdw_stream_add_master(struct sdw_bus *bus,
                          unsigned int num_ports,
                          struct sdw_stream_runtime *stream)
 {
-       struct sdw_master_runtime *m_rt = NULL;
+       struct sdw_master_runtime *m_rt;
        int ret;
 
        mutex_lock(&bus->bus_lock);
@@ -1426,7 +1428,7 @@ struct sdw_dpn_prop *sdw_get_slave_dpn_prop(struct sdw_slave *slave,
  */
 static void sdw_acquire_bus_lock(struct sdw_stream_runtime *stream)
 {
-       struct sdw_master_runtime *m_rt = NULL;
+       struct sdw_master_runtime *m_rt;
        struct sdw_bus *bus = NULL;
 
        /* Iterate for all Master(s) in Master list */
@@ -1460,9 +1462,9 @@ static void sdw_release_bus_lock(struct sdw_stream_runtime *stream)
 
 static int _sdw_prepare_stream(struct sdw_stream_runtime *stream)
 {
-       struct sdw_master_runtime *m_rt = NULL;
+       struct sdw_master_runtime *m_rt;
        struct sdw_bus *bus = NULL;
-       struct sdw_master_prop *prop = NULL;
+       struct sdw_master_prop *prop;
        struct sdw_bus_params params;
        int ret;
 
@@ -1483,6 +1485,16 @@ static int _sdw_prepare_stream(struct sdw_stream_runtime *stream)
                bus->params.bandwidth += m_rt->stream->params.rate *
                        m_rt->ch_count * m_rt->stream->params.bps;
 
+               /* Compute params */
+               if (bus->compute_params) {
+                       ret = bus->compute_params(bus);
+                       if (ret < 0) {
+                               dev_err(bus->dev, "Compute params failed: %d",
+                                       ret);
+                               return ret;
+                       }
+               }
+
                /* Program params */
                ret = sdw_program_params(bus);
                if (ret < 0) {
@@ -1491,6 +1503,11 @@ static int _sdw_prepare_stream(struct sdw_stream_runtime *stream)
                }
        }
 
+       if (!bus) {
+               pr_err("Configuration error in %s\n", __func__);
+               return -EINVAL;
+       }
+
        ret = do_bank_switch(stream);
        if (ret < 0) {
                dev_err(bus->dev, "Bank switch failed: %d\n", ret);
@@ -1547,7 +1564,7 @@ EXPORT_SYMBOL(sdw_prepare_stream);
 
 static int _sdw_enable_stream(struct sdw_stream_runtime *stream)
 {
-       struct sdw_master_runtime *m_rt = NULL;
+       struct sdw_master_runtime *m_rt;
        struct sdw_bus *bus = NULL;
        int ret;
 
@@ -1571,6 +1588,11 @@ static int _sdw_enable_stream(struct sdw_stream_runtime *stream)
                }
        }
 
+       if (!bus) {
+               pr_err("Configuration error in %s\n", __func__);
+               return -EINVAL;
+       }
+
        ret = do_bank_switch(stream);
        if (ret < 0) {
                dev_err(bus->dev, "Bank switch failed: %d\n", ret);
@@ -1590,7 +1612,7 @@ static int _sdw_enable_stream(struct sdw_stream_runtime *stream)
  */
 int sdw_enable_stream(struct sdw_stream_runtime *stream)
 {
-       int ret = 0;
+       int ret;
 
        if (!stream) {
                pr_err("SoundWire: Handle not found for stream\n");
@@ -1610,12 +1632,12 @@ EXPORT_SYMBOL(sdw_enable_stream);
 
 static int _sdw_disable_stream(struct sdw_stream_runtime *stream)
 {
-       struct sdw_master_runtime *m_rt = NULL;
-       struct sdw_bus *bus = NULL;
+       struct sdw_master_runtime *m_rt;
        int ret;
 
        list_for_each_entry(m_rt, &stream->master_list, stream_node) {
-               bus = m_rt->bus;
+               struct sdw_bus *bus = m_rt->bus;
+
                /* Disable port(s) */
                ret = sdw_enable_disable_ports(m_rt, false);
                if (ret < 0) {
@@ -1626,7 +1648,8 @@ static int _sdw_disable_stream(struct sdw_stream_runtime *stream)
        stream->state = SDW_STREAM_DISABLED;
 
        list_for_each_entry(m_rt, &stream->master_list, stream_node) {
-               bus = m_rt->bus;
+               struct sdw_bus *bus = m_rt->bus;
+
                /* Program params */
                ret = sdw_program_params(bus);
                if (ret < 0) {
@@ -1635,7 +1658,25 @@ static int _sdw_disable_stream(struct sdw_stream_runtime *stream)
                }
        }
 
-       return do_bank_switch(stream);
+       ret = do_bank_switch(stream);
+       if (ret < 0) {
+               pr_err("Bank switch failed: %d\n", ret);
+               return ret;
+       }
+
+       /* make sure alternate bank (previous current) is also disabled */
+       list_for_each_entry(m_rt, &stream->master_list, stream_node) {
+               struct sdw_bus *bus = m_rt->bus;
+
+               /* Disable port(s) */
+               ret = sdw_enable_disable_ports(m_rt, false);
+               if (ret < 0) {
+                       dev_err(bus->dev, "Disable port(s) failed: %d\n", ret);
+                       return ret;
+               }
+       }
+
+       return 0;
 }
 
 /**
@@ -1647,7 +1688,7 @@ static int _sdw_disable_stream(struct sdw_stream_runtime *stream)
  */
 int sdw_disable_stream(struct sdw_stream_runtime *stream)
 {
-       int ret = 0;
+       int ret;
 
        if (!stream) {
                pr_err("SoundWire: Handle not found for stream\n");
@@ -1667,8 +1708,8 @@ EXPORT_SYMBOL(sdw_disable_stream);
 
 static int _sdw_deprepare_stream(struct sdw_stream_runtime *stream)
 {
-       struct sdw_master_runtime *m_rt = NULL;
-       struct sdw_bus *bus = NULL;
+       struct sdw_master_runtime *m_rt;
+       struct sdw_bus *bus;
        int ret = 0;
 
        list_for_each_entry(m_rt, &stream->master_list, stream_node) {
@@ -1706,7 +1747,7 @@ static int _sdw_deprepare_stream(struct sdw_stream_runtime *stream)
  */
 int sdw_deprepare_stream(struct sdw_stream_runtime *stream)
 {
-       int ret = 0;
+       int ret;
 
        if (!stream) {
                pr_err("SoundWire: Handle not found for stream\n");
index aa8d842..b83a1d1 100644 (file)
@@ -120,7 +120,7 @@ static int ion_system_heap_allocate(struct ion_heap *heap,
                if (!page)
                        goto free_pages;
                list_add_tail(&page->lru, &pages);
-               size_remaining -= PAGE_SIZE << compound_order(page);
+               size_remaining -= page_size(page);
                max_order = compound_order(page);
                i++;
        }
@@ -133,7 +133,7 @@ static int ion_system_heap_allocate(struct ion_heap *heap,
 
        sg = table->sgl;
        list_for_each_entry_safe(page, tmp_page, &pages, lru) {
-               sg_set_page(sg, page, PAGE_SIZE << compound_order(page), 0);
+               sg_set_page(sg, page, page_size(page), 0);
                sg = sg_next(sg);
                list_del(&page->lru);
        }
index 661bb93..35be1be 100644 (file)
@@ -1712,6 +1712,24 @@ static int tcmu_init_genl_cmd_reply(struct tcmu_dev *udev, int cmd)
        return 0;
 }
 
+static void tcmu_destroy_genl_cmd_reply(struct tcmu_dev *udev)
+{
+       struct tcmu_nl_cmd *nl_cmd = &udev->curr_nl_cmd;
+
+       if (!tcmu_kern_cmd_reply_supported)
+               return;
+
+       if (udev->nl_reply_supported <= 0)
+               return;
+
+       mutex_lock(&tcmu_nl_cmd_mutex);
+
+       list_del(&nl_cmd->nl_list);
+       memset(nl_cmd, 0, sizeof(*nl_cmd));
+
+       mutex_unlock(&tcmu_nl_cmd_mutex);
+}
+
 static int tcmu_wait_genl_cmd_reply(struct tcmu_dev *udev)
 {
        struct tcmu_nl_cmd *nl_cmd = &udev->curr_nl_cmd;
@@ -1792,6 +1810,8 @@ static int tcmu_netlink_event_send(struct tcmu_dev *udev,
        if (ret == 0 ||
           (ret == -ESRCH && cmd == TCMU_CMD_ADDED_DEVICE))
                return tcmu_wait_genl_cmd_reply(udev);
+       else
+               tcmu_destroy_genl_cmd_reply(udev);
 
        return ret;
 }
index a254792..1354a15 100644 (file)
@@ -136,8 +136,7 @@ int ft_queue_data_in(struct se_cmd *se_cmd)
                                           page, off_in_page, tlen);
                        fr_len(fp) += tlen;
                        fp_skb(fp)->data_len += tlen;
-                       fp_skb(fp)->truesize +=
-                                       PAGE_SIZE << compound_order(page);
+                       fp_skb(fp)->truesize += page_size(page);
                } else {
                        BUG_ON(!page);
                        from = kmap_atomic(page + (mem_off >> PAGE_SHIFT));
index 2da026f..09ddcd0 100644 (file)
@@ -254,6 +254,7 @@ struct tee_shm *tee_shm_register(struct tee_context *ctx, unsigned long addr,
        shm->teedev = teedev;
        shm->ctx = ctx;
        shm->id = -1;
+       addr = untagged_addr(addr);
        start = rounddown(addr, PAGE_SIZE);
        shm->offset = addr - start;
        shm->size = length;
index 8c07a39..709a22f 100644 (file)
@@ -53,7 +53,6 @@
 #define CONTROL0_TSEN_MODE_EXTERNAL    0x2
 #define CONTROL0_TSEN_MODE_MASK                0x3
 
-#define CONTROL1_TSEN_AVG_SHIFT                0
 #define CONTROL1_TSEN_AVG_MASK         0x7
 #define CONTROL1_EXT_TSEN_SW_RESET     BIT(7)
 #define CONTROL1_EXT_TSEN_HW_RESETn    BIT(8)
@@ -267,8 +266,8 @@ static void armada_cp110_init(struct platform_device *pdev,
 
        /* Average the output value over 2^1 = 2 samples */
        regmap_read(priv->syscon, data->syscon_control1_off, &reg);
-       reg &= ~CONTROL1_TSEN_AVG_MASK << CONTROL1_TSEN_AVG_SHIFT;
-       reg |= 1 << CONTROL1_TSEN_AVG_SHIFT;
+       reg &= ~CONTROL1_TSEN_AVG_MASK;
+       reg |= 1;
        regmap_write(priv->syscon, data->syscon_control1_off, reg);
 }
 
index 9716bc3..7130e90 100644 (file)
@@ -77,9 +77,6 @@ int acpi_parse_trt(acpi_handle handle, int *trt_count, struct trt **trtp,
        struct acpi_buffer element = { 0, NULL };
        struct acpi_buffer trt_format = { sizeof("RRNNNNNN"), "RRNNNNNN" };
 
-       if (!acpi_has_method(handle, "_TRT"))
-               return -ENODEV;
-
        status = acpi_evaluate_object(handle, "_TRT", NULL, &buffer);
        if (ACPI_FAILURE(status))
                return -ENODEV;
@@ -158,9 +155,6 @@ int acpi_parse_art(acpi_handle handle, int *art_count, struct art **artp,
        struct acpi_buffer art_format = {
                sizeof("RRNNNNNNNNNNN"), "RRNNNNNNNNNNN" };
 
-       if (!acpi_has_method(handle, "_ART"))
-               return -ENODEV;
-
        status = acpi_evaluate_object(handle, "_ART", NULL, &buffer);
        if (ACPI_FAILURE(status))
                return -ENODEV;
index f5749d4..a7bbd85 100644 (file)
@@ -181,7 +181,7 @@ static int int3403_cdev_add(struct int3403_priv *priv)
 
        p = buf.pointer;
        if (!p || (p->type != ACPI_TYPE_PACKAGE)) {
-               printk(KERN_WARNING "Invalid PPSS data\n");
+               pr_warn("Invalid PPSS data\n");
                kfree(buf.pointer);
                return -EFAULT;
        }
index d3446ac..89a0153 100644 (file)
@@ -39,6 +39,9 @@
 /* GeminiLake thermal reporting device */
 #define PCI_DEVICE_ID_PROC_GLK_THERMAL 0x318C
 
+/* IceLake thermal reporting device */
+#define PCI_DEVICE_ID_PROC_ICL_THERMAL 0x8a03
+
 #define DRV_NAME "proc_thermal"
 
 struct power_config {
@@ -137,6 +140,72 @@ static const struct attribute_group power_limit_attribute_group = {
        .name = "power_limits"
 };
 
+static ssize_t tcc_offset_degree_celsius_show(struct device *dev,
+                              struct device_attribute *attr, char *buf)
+{
+       u64 val;
+       int err;
+
+       err = rdmsrl_safe(MSR_IA32_TEMPERATURE_TARGET, &val);
+       if (err)
+               return err;
+
+       val = (val >> 24) & 0xff;
+       return sprintf(buf, "%d\n", (int)val);
+}
+
+static int tcc_offset_update(int tcc)
+{
+       u64 val;
+       int err;
+
+       if (!tcc)
+               return -EINVAL;
+
+       err = rdmsrl_safe(MSR_IA32_TEMPERATURE_TARGET, &val);
+       if (err)
+               return err;
+
+       val &= ~GENMASK_ULL(31, 24);
+       val |= (tcc & 0xff) << 24;
+
+       err = wrmsrl_safe(MSR_IA32_TEMPERATURE_TARGET, val);
+       if (err)
+               return err;
+
+       return 0;
+}
+
+static int tcc_offset_save;
+
+static ssize_t tcc_offset_degree_celsius_store(struct device *dev,
+                               struct device_attribute *attr, const char *buf,
+                               size_t count)
+{
+       u64 val;
+       int tcc, err;
+
+       err = rdmsrl_safe(MSR_PLATFORM_INFO, &val);
+       if (err)
+               return err;
+
+       if (!(val & BIT(30)))
+               return -EACCES;
+
+       if (kstrtoint(buf, 0, &tcc))
+               return -EINVAL;
+
+       err = tcc_offset_update(tcc);
+       if (err)
+               return err;
+
+       tcc_offset_save = tcc;
+
+       return count;
+}
+
+static DEVICE_ATTR_RW(tcc_offset_degree_celsius);
+
 static int stored_tjmax; /* since it is fixed, we can have local storage */
 
 static int get_tjmax(void)
@@ -332,6 +401,7 @@ static void proc_thermal_remove(struct proc_thermal_device *proc_priv)
        acpi_remove_notify_handler(proc_priv->adev->handle,
                                   ACPI_DEVICE_NOTIFY, proc_thermal_notify);
        int340x_thermal_zone_remove(proc_priv->int340x_zone);
+       sysfs_remove_file(&proc_priv->dev->kobj, &dev_attr_tcc_offset_degree_celsius.attr);
        sysfs_remove_group(&proc_priv->dev->kobj,
                           &power_limit_attribute_group);
 }
@@ -355,8 +425,15 @@ static int int3401_add(struct platform_device *pdev)
 
        dev_info(&pdev->dev, "Creating sysfs group for PROC_THERMAL_PLATFORM_DEV\n");
 
-       return sysfs_create_group(&pdev->dev.kobj,
-                                        &power_limit_attribute_group);
+       ret = sysfs_create_file(&pdev->dev.kobj, &dev_attr_tcc_offset_degree_celsius.attr);
+       if (ret)
+               return ret;
+
+       ret = sysfs_create_group(&pdev->dev.kobj, &power_limit_attribute_group);
+       if (ret)
+               sysfs_remove_file(&pdev->dev.kobj, &dev_attr_tcc_offset_degree_celsius.attr);
+
+       return ret;
 }
 
 static int int3401_remove(struct platform_device *pdev)
@@ -588,8 +665,15 @@ static int  proc_thermal_pci_probe(struct pci_dev *pdev,
 
        dev_info(&pdev->dev, "Creating sysfs group for PROC_THERMAL_PCI\n");
 
-       return sysfs_create_group(&pdev->dev.kobj,
-                                        &power_limit_attribute_group);
+       ret = sysfs_create_file(&pdev->dev.kobj, &dev_attr_tcc_offset_degree_celsius.attr);
+       if (ret)
+               return ret;
+
+       ret = sysfs_create_group(&pdev->dev.kobj, &power_limit_attribute_group);
+       if (ret)
+               sysfs_remove_file(&pdev->dev.kobj, &dev_attr_tcc_offset_degree_celsius.attr);
+
+       return ret;
 }
 
 static void  proc_thermal_pci_remove(struct pci_dev *pdev)
@@ -615,6 +699,8 @@ static int proc_thermal_resume(struct device *dev)
        proc_dev = dev_get_drvdata(dev);
        proc_thermal_read_ppcc(proc_dev);
 
+       tcc_offset_update(tcc_offset_save);
+
        return 0;
 }
 #else
@@ -636,6 +722,8 @@ static const struct pci_device_id proc_thermal_pci_ids[] = {
        { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_PROC_CNL_THERMAL)},
        { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_PROC_CFL_THERMAL)},
        { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_PROC_GLK_THERMAL)},
+       { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_PROC_ICL_THERMAL),
+               .driver_data = (kernel_ulong_t)&rapl_mmio_hsw, },
        { 0, },
 };
 
index 99f8b25..4f0bb8f 100644 (file)
@@ -371,16 +371,14 @@ static void intel_pch_thermal_remove(struct pci_dev *pdev)
 
 static int intel_pch_thermal_suspend(struct device *device)
 {
-       struct pci_dev *pdev = to_pci_dev(device);
-       struct pch_thermal_device *ptd = pci_get_drvdata(pdev);
+       struct pch_thermal_device *ptd = dev_get_drvdata(device);
 
        return ptd->ops->suspend(ptd);
 }
 
 static int intel_pch_thermal_resume(struct device *device)
 {
-       struct pci_dev *pdev = to_pci_dev(device);
-       struct pch_thermal_device *ptd = pci_get_drvdata(pdev);
+       struct pch_thermal_device *ptd = dev_get_drvdata(device);
 
        return ptd->ops->resume(ptd);
 }
index 8d9b721..e46a4e3 100644 (file)
@@ -229,6 +229,8 @@ static int calibrate_8960(struct tsens_priv *priv)
        for (i = 0; i < num_read; i++, s++)
                s->offset = data[i];
 
+       kfree(data);
+
        return 0;
 }
 
index 6f26fad..055647b 100644 (file)
@@ -145,8 +145,10 @@ static int calibrate_8916(struct tsens_priv *priv)
                return PTR_ERR(qfprom_cdata);
 
        qfprom_csel = (u32 *)qfprom_read(priv->dev, "calib_sel");
-       if (IS_ERR(qfprom_csel))
+       if (IS_ERR(qfprom_csel)) {
+               kfree(qfprom_cdata);
                return PTR_ERR(qfprom_csel);
+       }
 
        mode = (qfprom_csel[0] & MSM8916_CAL_SEL_MASK) >> MSM8916_CAL_SEL_SHIFT;
        dev_dbg(priv->dev, "calibration mode is %d\n", mode);
@@ -181,6 +183,8 @@ static int calibrate_8916(struct tsens_priv *priv)
        }
 
        compute_intercept_slope(priv, p1, p2, mode);
+       kfree(qfprom_cdata);
+       kfree(qfprom_csel);
 
        return 0;
 }
@@ -198,8 +202,10 @@ static int calibrate_8974(struct tsens_priv *priv)
                return PTR_ERR(calib);
 
        bkp = (u32 *)qfprom_read(priv->dev, "calib_backup");
-       if (IS_ERR(bkp))
+       if (IS_ERR(bkp)) {
+               kfree(calib);
                return PTR_ERR(bkp);
+       }
 
        calib_redun_sel =  bkp[1] & BKP_REDUN_SEL;
        calib_redun_sel >>= BKP_REDUN_SHIFT;
@@ -313,6 +319,8 @@ static int calibrate_8974(struct tsens_priv *priv)
        }
 
        compute_intercept_slope(priv, p1, p2, mode);
+       kfree(calib);
+       kfree(bkp);
 
        return 0;
 }
index 10b595d..870f502 100644 (file)
@@ -138,6 +138,7 @@ static int calibrate_v1(struct tsens_priv *priv)
        }
 
        compute_intercept_slope(priv, p1, p2, mode);
+       kfree(qfprom_cdata);
 
        return 0;
 }
index 2fd9499..b89083b 100644 (file)
@@ -17,6 +17,7 @@
 
 #include <linux/thermal.h>
 #include <linux/regmap.h>
+#include <linux/slab.h>
 
 struct tsens_priv;
 
index 7b36493..39542c6 100644 (file)
@@ -2,6 +2,7 @@
 //
 // Copyright 2016 Freescale Semiconductor, Inc.
 
+#include <linux/clk.h>
 #include <linux/module.h>
 #include <linux/platform_device.h>
 #include <linux/err.h>
@@ -72,6 +73,7 @@ struct qoriq_sensor {
 
 struct qoriq_tmu_data {
        struct qoriq_tmu_regs __iomem *regs;
+       struct clk *clk;
        bool little_endian;
        struct qoriq_sensor     *sensor[SITES_MAX];
 };
@@ -202,32 +204,39 @@ static int qoriq_tmu_probe(struct platform_device *pdev)
 
        data->little_endian = of_property_read_bool(np, "little-endian");
 
-       data->regs = of_iomap(np, 0);
-       if (!data->regs) {
+       data->regs = devm_platform_ioremap_resource(pdev, 0);
+       if (IS_ERR(data->regs)) {
                dev_err(&pdev->dev, "Failed to get memory region\n");
-               ret = -ENODEV;
-               goto err_iomap;
+               return PTR_ERR(data->regs);
+       }
+
+       data->clk = devm_clk_get_optional(&pdev->dev, NULL);
+       if (IS_ERR(data->clk))
+               return PTR_ERR(data->clk);
+
+       ret = clk_prepare_enable(data->clk);
+       if (ret) {
+               dev_err(&pdev->dev, "Failed to enable clock\n");
+               return ret;
        }
 
        qoriq_tmu_init_device(data);    /* TMU initialization */
 
        ret = qoriq_tmu_calibration(pdev);      /* TMU calibration */
        if (ret < 0)
-               goto err_tmu;
+               goto err;
 
        ret = qoriq_tmu_register_tmu_zone(pdev);
        if (ret < 0) {
                dev_err(&pdev->dev, "Failed to register sensors\n");
                ret = -ENODEV;
-               goto err_iomap;
+               goto err;
        }
 
        return 0;
 
-err_tmu:
-       iounmap(data->regs);
-
-err_iomap:
+err:
+       clk_disable_unprepare(data->clk);
        platform_set_drvdata(pdev, NULL);
 
        return ret;
@@ -240,14 +249,14 @@ static int qoriq_tmu_remove(struct platform_device *pdev)
        /* Disable monitoring */
        tmu_write(data, TMR_DISABLE, &data->regs->tmr);
 
-       iounmap(data->regs);
+       clk_disable_unprepare(data->clk);
+
        platform_set_drvdata(pdev, NULL);
 
        return 0;
 }
 
-#ifdef CONFIG_PM_SLEEP
-static int qoriq_tmu_suspend(struct device *dev)
+static int __maybe_unused qoriq_tmu_suspend(struct device *dev)
 {
        u32 tmr;
        struct qoriq_tmu_data *data = dev_get_drvdata(dev);
@@ -257,14 +266,21 @@ static int qoriq_tmu_suspend(struct device *dev)
        tmr &= ~TMR_ME;
        tmu_write(data, tmr, &data->regs->tmr);
 
+       clk_disable_unprepare(data->clk);
+
        return 0;
 }
 
-static int qoriq_tmu_resume(struct device *dev)
+static int __maybe_unused qoriq_tmu_resume(struct device *dev)
 {
        u32 tmr;
+       int ret;
        struct qoriq_tmu_data *data = dev_get_drvdata(dev);
 
+       ret = clk_prepare_enable(data->clk);
+       if (ret)
+               return ret;
+
        /* Enable monitoring */
        tmr = tmu_read(data, &data->regs->tmr);
        tmr |= TMR_ME;
@@ -272,7 +288,6 @@ static int qoriq_tmu_resume(struct device *dev)
 
        return 0;
 }
-#endif
 
 static SIMPLE_DEV_PM_OPS(qoriq_tmu_pm_ops,
                         qoriq_tmu_suspend, qoriq_tmu_resume);
index a564633..755d2b5 100644 (file)
@@ -443,9 +443,8 @@ static int rcar_gen3_thermal_probe(struct platform_device *pdev)
                if (ret)
                        goto error_unregister;
 
-               ret = devm_add_action(dev, rcar_gen3_hwmon_action, zone);
+               ret = devm_add_action_or_reset(dev, rcar_gen3_hwmon_action, zone);
                if (ret) {
-                       rcar_gen3_hwmon_action(zone);
                        goto error_unregister;
                }
 
index 43941eb..5acaad3 100644 (file)
 /* get dividend from the depth */
 #define THROT_DEPTH_DIVIDEND(depth)    ((256 * (100 - (depth)) / 100) - 1)
 
-/* gk20a nv_therm interface N:3 Mapping. Levels defined in tegra124-sochterm.h
+/* gk20a nv_therm interface N:3 Mapping. Levels defined in tegra124-soctherm.h
  * level       vector
  * NONE                3'b000
  * LOW         3'b001
index 6bab66e..d4481cc 100644 (file)
@@ -304,7 +304,7 @@ static void thermal_zone_device_set_polling(struct thermal_zone_device *tz,
                                 &tz->poll_queue,
                                 msecs_to_jiffies(delay));
        else
-               cancel_delayed_work(&tz->poll_queue);
+               cancel_delayed_work_sync(&tz->poll_queue);
 }
 
 static void monitor_thermal_zone(struct thermal_zone_device *tz)
@@ -985,7 +985,7 @@ __thermal_cooling_device_register(struct device_node *np,
        result = device_register(&cdev->device);
        if (result) {
                ida_simple_remove(&thermal_cdev_ida, cdev->id);
-               kfree(cdev);
+               put_device(&cdev->device);
                return ERR_PTR(result);
        }
 
@@ -1240,21 +1240,31 @@ thermal_zone_device_register(const char *type, int trips, int mask,
        struct thermal_zone_device *tz;
        enum thermal_trip_type trip_type;
        int trip_temp;
+       int id;
        int result;
        int count;
        struct thermal_governor *governor;
 
-       if (!type || strlen(type) == 0)
+       if (!type || strlen(type) == 0) {
+               pr_err("Error: No thermal zone type defined\n");
                return ERR_PTR(-EINVAL);
+       }
 
-       if (type && strlen(type) >= THERMAL_NAME_LENGTH)
+       if (type && strlen(type) >= THERMAL_NAME_LENGTH) {
+               pr_err("Error: Thermal zone name (%s) too long, should be under %d chars\n",
+                      type, THERMAL_NAME_LENGTH);
                return ERR_PTR(-EINVAL);
+       }
 
-       if (trips > THERMAL_MAX_TRIPS || trips < 0 || mask >> trips)
+       if (trips > THERMAL_MAX_TRIPS || trips < 0 || mask >> trips) {
+               pr_err("Error: Incorrect number of thermal trips\n");
                return ERR_PTR(-EINVAL);
+       }
 
-       if (!ops)
+       if (!ops) {
+               pr_err("Error: Thermal zone device ops not defined\n");
                return ERR_PTR(-EINVAL);
+       }
 
        if (trips > 0 && (!ops->get_trip_type || !ops->get_trip_temp))
                return ERR_PTR(-EINVAL);
@@ -1266,11 +1276,13 @@ thermal_zone_device_register(const char *type, int trips, int mask,
        INIT_LIST_HEAD(&tz->thermal_instances);
        ida_init(&tz->ida);
        mutex_init(&tz->lock);
-       result = ida_simple_get(&thermal_tz_ida, 0, 0, GFP_KERNEL);
-       if (result < 0)
+       id = ida_simple_get(&thermal_tz_ida, 0, 0, GFP_KERNEL);
+       if (id < 0) {
+               result = id;
                goto free_tz;
+       }
 
-       tz->id = result;
+       tz->id = id;
        strlcpy(tz->type, type, sizeof(tz->type));
        tz->ops = ops;
        tz->tzp = tzp;
@@ -1292,7 +1304,7 @@ thermal_zone_device_register(const char *type, int trips, int mask,
        dev_set_name(&tz->device, "thermal_zone%d", tz->id);
        result = device_register(&tz->device);
        if (result)
-               goto remove_device_groups;
+               goto release_device;
 
        for (count = 0; count < trips; count++) {
                if (tz->ops->get_trip_type(tz, count, &trip_type))
@@ -1343,14 +1355,12 @@ thermal_zone_device_register(const char *type, int trips, int mask,
        return tz;
 
 unregister:
-       ida_simple_remove(&thermal_tz_ida, tz->id);
-       device_unregister(&tz->device);
-       return ERR_PTR(result);
-
-remove_device_groups:
-       thermal_zone_destroy_device_groups(tz);
+       device_del(&tz->device);
+release_device:
+       put_device(&tz->device);
+       tz = NULL;
 remove_id:
-       ida_simple_remove(&thermal_tz_ida, tz->id);
+       ida_simple_remove(&thermal_tz_ida, id);
 free_tz:
        kfree(tz);
        return ERR_PTR(result);
index 40c69a5..dd5d8ee 100644 (file)
@@ -87,13 +87,17 @@ static struct thermal_hwmon_device *
 thermal_hwmon_lookup_by_type(const struct thermal_zone_device *tz)
 {
        struct thermal_hwmon_device *hwmon;
+       char type[THERMAL_NAME_LENGTH];
 
        mutex_lock(&thermal_hwmon_list_lock);
-       list_for_each_entry(hwmon, &thermal_hwmon_list, node)
-               if (!strcmp(hwmon->type, tz->type)) {
+       list_for_each_entry(hwmon, &thermal_hwmon_list, node) {
+               strcpy(type, tz->type);
+               strreplace(type, '-', '_');
+               if (!strcmp(hwmon->type, type)) {
                        mutex_unlock(&thermal_hwmon_list_lock);
                        return hwmon;
                }
+       }
        mutex_unlock(&thermal_hwmon_list_lock);
 
        return NULL;
index 4223cb4..6e713be 100644 (file)
@@ -22,6 +22,7 @@
 #include <linux/serial_core.h>
 #include <linux/delay.h>
 #include <linux/mutex.h>
+#include <linux/security.h>
 
 #include <linux/irq.h>
 #include <linux/uaccess.h>
@@ -862,6 +863,10 @@ static int uart_set_info(struct tty_struct *tty, struct tty_port *port,
                goto check_and_exit;
        }
 
+       retval = security_locked_down(LOCKDOWN_TIOCSSERIAL);
+       if (retval && (change_irq || change_port))
+               goto exit;
+
        /*
         * Ask the low level driver to verify the settings.
         */
index 213ff03..59d9d51 100644 (file)
@@ -17,6 +17,7 @@
 #include <linux/blkdev.h>
 #include <linux/pagemap.h>
 #include <linux/export.h>
+#include <linux/fs_parser.h>
 #include <linux/hid.h>
 #include <linux/mm.h>
 #include <linux/module.h>
@@ -1451,9 +1452,9 @@ struct ffs_sb_fill_data {
        struct ffs_data *ffs_data;
 };
 
-static int ffs_sb_fill(struct super_block *sb, void *_data, int silent)
+static int ffs_sb_fill(struct super_block *sb, struct fs_context *fc)
 {
-       struct ffs_sb_fill_data *data = _data;
+       struct ffs_sb_fill_data *data = fc->fs_private;
        struct inode    *inode;
        struct ffs_data *ffs = data->ffs_data;
 
@@ -1486,147 +1487,152 @@ static int ffs_sb_fill(struct super_block *sb, void *_data, int silent)
        return 0;
 }
 
-static int ffs_fs_parse_opts(struct ffs_sb_fill_data *data, char *opts)
-{
-       ENTER();
+enum {
+       Opt_no_disconnect,
+       Opt_rmode,
+       Opt_fmode,
+       Opt_mode,
+       Opt_uid,
+       Opt_gid,
+};
 
-       if (!opts || !*opts)
-               return 0;
+static const struct fs_parameter_spec ffs_fs_param_specs[] = {
+       fsparam_bool    ("no_disconnect",       Opt_no_disconnect),
+       fsparam_u32     ("rmode",               Opt_rmode),
+       fsparam_u32     ("fmode",               Opt_fmode),
+       fsparam_u32     ("mode",                Opt_mode),
+       fsparam_u32     ("uid",                 Opt_uid),
+       fsparam_u32     ("gid",                 Opt_gid),
+       {}
+};
 
-       for (;;) {
-               unsigned long value;
-               char *eq, *comma;
-
-               /* Option limit */
-               comma = strchr(opts, ',');
-               if (comma)
-                       *comma = 0;
-
-               /* Value limit */
-               eq = strchr(opts, '=');
-               if (unlikely(!eq)) {
-                       pr_err("'=' missing in %s\n", opts);
-                       return -EINVAL;
-               }
-               *eq = 0;
+static const struct fs_parameter_description ffs_fs_fs_parameters = {
+       .name           = "kAFS",
+       .specs          = ffs_fs_param_specs,
+};
 
-               /* Parse value */
-               if (kstrtoul(eq + 1, 0, &value)) {
-                       pr_err("%s: invalid value: %s\n", opts, eq + 1);
-                       return -EINVAL;
-               }
+static int ffs_fs_parse_param(struct fs_context *fc, struct fs_parameter *param)
+{
+       struct ffs_sb_fill_data *data = fc->fs_private;
+       struct fs_parse_result result;
+       int opt;
 
-               /* Interpret option */
-               switch (eq - opts) {
-               case 13:
-                       if (!memcmp(opts, "no_disconnect", 13))
-                               data->no_disconnect = !!value;
-                       else
-                               goto invalid;
-                       break;
-               case 5:
-                       if (!memcmp(opts, "rmode", 5))
-                               data->root_mode  = (value & 0555) | S_IFDIR;
-                       else if (!memcmp(opts, "fmode", 5))
-                               data->perms.mode = (value & 0666) | S_IFREG;
-                       else
-                               goto invalid;
-                       break;
+       ENTER();
 
-               case 4:
-                       if (!memcmp(opts, "mode", 4)) {
-                               data->root_mode  = (value & 0555) | S_IFDIR;
-                               data->perms.mode = (value & 0666) | S_IFREG;
-                       } else {
-                               goto invalid;
-                       }
-                       break;
+       opt = fs_parse(fc, &ffs_fs_fs_parameters, param, &result);
+       if (opt < 0)
+               return opt;
 
-               case 3:
-                       if (!memcmp(opts, "uid", 3)) {
-                               data->perms.uid = make_kuid(current_user_ns(), value);
-                               if (!uid_valid(data->perms.uid)) {
-                                       pr_err("%s: unmapped value: %lu\n", opts, value);
-                                       return -EINVAL;
-                               }
-                       } else if (!memcmp(opts, "gid", 3)) {
-                               data->perms.gid = make_kgid(current_user_ns(), value);
-                               if (!gid_valid(data->perms.gid)) {
-                                       pr_err("%s: unmapped value: %lu\n", opts, value);
-                                       return -EINVAL;
-                               }
-                       } else {
-                               goto invalid;
-                       }
-                       break;
+       switch (opt) {
+       case Opt_no_disconnect:
+               data->no_disconnect = result.boolean;
+               break;
+       case Opt_rmode:
+               data->root_mode  = (result.uint_32 & 0555) | S_IFDIR;
+               break;
+       case Opt_fmode:
+               data->perms.mode = (result.uint_32 & 0666) | S_IFREG;
+               break;
+       case Opt_mode:
+               data->root_mode  = (result.uint_32 & 0555) | S_IFDIR;
+               data->perms.mode = (result.uint_32 & 0666) | S_IFREG;
+               break;
 
-               default:
-invalid:
-                       pr_err("%s: invalid option\n", opts);
-                       return -EINVAL;
-               }
+       case Opt_uid:
+               data->perms.uid = make_kuid(current_user_ns(), result.uint_32);
+               if (!uid_valid(data->perms.uid))
+                       goto unmapped_value;
+               break;
+       case Opt_gid:
+               data->perms.gid = make_kgid(current_user_ns(), result.uint_32);
+               if (!gid_valid(data->perms.gid))
+                       goto unmapped_value;
+               break;
 
-               /* Next iteration */
-               if (!comma)
-                       break;
-               opts = comma + 1;
+       default:
+               return -ENOPARAM;
        }
 
        return 0;
-}
 
-/* "mount -t functionfs dev_name /dev/function" ends up here */
+unmapped_value:
+       return invalf(fc, "%s: unmapped value: %u", param->key, result.uint_32);
+}
 
-static struct dentry *
-ffs_fs_mount(struct file_system_type *t, int flags,
-             const char *dev_name, void *opts)
-{
-       struct ffs_sb_fill_data data = {
-               .perms = {
-                       .mode = S_IFREG | 0600,
-                       .uid = GLOBAL_ROOT_UID,
-                       .gid = GLOBAL_ROOT_GID,
-               },
-               .root_mode = S_IFDIR | 0500,
-               .no_disconnect = false,
-       };
-       struct dentry *rv;
-       int ret;
+/*
+ * Set up the superblock for a mount.
+ */
+static int ffs_fs_get_tree(struct fs_context *fc)
+{
+       struct ffs_sb_fill_data *ctx = fc->fs_private;
        void *ffs_dev;
        struct ffs_data *ffs;
 
        ENTER();
 
-       ret = ffs_fs_parse_opts(&data, opts);
-       if (unlikely(ret < 0))
-               return ERR_PTR(ret);
+       if (!fc->source)
+               return invalf(fc, "No source specified");
 
-       ffs = ffs_data_new(dev_name);
+       ffs = ffs_data_new(fc->source);
        if (unlikely(!ffs))
-               return ERR_PTR(-ENOMEM);
-       ffs->file_perms = data.perms;
-       ffs->no_disconnect = data.no_disconnect;
+               return -ENOMEM;
+       ffs->file_perms = ctx->perms;
+       ffs->no_disconnect = ctx->no_disconnect;
 
-       ffs->dev_name = kstrdup(dev_name, GFP_KERNEL);
+       ffs->dev_name = kstrdup(fc->source, GFP_KERNEL);
        if (unlikely(!ffs->dev_name)) {
                ffs_data_put(ffs);
-               return ERR_PTR(-ENOMEM);
+               return -ENOMEM;
        }
 
-       ffs_dev = ffs_acquire_dev(dev_name);
+       ffs_dev = ffs_acquire_dev(ffs->dev_name);
        if (IS_ERR(ffs_dev)) {
                ffs_data_put(ffs);
-               return ERR_CAST(ffs_dev);
+               return PTR_ERR(ffs_dev);
        }
+
        ffs->private_data = ffs_dev;
-       data.ffs_data = ffs;
+       ctx->ffs_data = ffs;
+       return get_tree_nodev(fc, ffs_sb_fill);
+}
+
+static void ffs_fs_free_fc(struct fs_context *fc)
+{
+       struct ffs_sb_fill_data *ctx = fc->fs_private;
+
+       if (ctx) {
+               if (ctx->ffs_data) {
+                       ffs_release_dev(ctx->ffs_data);
+                       ffs_data_put(ctx->ffs_data);
+               }
 
-       rv = mount_nodev(t, flags, &data, ffs_sb_fill);
-       if (IS_ERR(rv) && data.ffs_data) {
-               ffs_release_dev(data.ffs_data);
-               ffs_data_put(data.ffs_data);
+               kfree(ctx);
        }
-       return rv;
+}
+
+static const struct fs_context_operations ffs_fs_context_ops = {
+       .free           = ffs_fs_free_fc,
+       .parse_param    = ffs_fs_parse_param,
+       .get_tree       = ffs_fs_get_tree,
+};
+
+static int ffs_fs_init_fs_context(struct fs_context *fc)
+{
+       struct ffs_sb_fill_data *ctx;
+
+       ctx = kzalloc(sizeof(struct ffs_sb_fill_data), GFP_KERNEL);
+       if (!ctx)
+               return -ENOMEM;
+
+       ctx->perms.mode = S_IFREG | 0600;
+       ctx->perms.uid = GLOBAL_ROOT_UID;
+       ctx->perms.gid = GLOBAL_ROOT_GID;
+       ctx->root_mode = S_IFDIR | 0500;
+       ctx->no_disconnect = false;
+
+       fc->fs_private = ctx;
+       fc->ops = &ffs_fs_context_ops;
+       return 0;
 }
 
 static void
@@ -1644,7 +1650,8 @@ ffs_fs_kill_sb(struct super_block *sb)
 static struct file_system_type ffs_fs_type = {
        .owner          = THIS_MODULE,
        .name           = "functionfs",
-       .mount          = ffs_fs_mount,
+       .init_fs_context = ffs_fs_init_fs_context,
+       .parameters     = &ffs_fs_fs_parameters,
        .kill_sb        = ffs_fs_kill_sb,
 };
 MODULE_ALIAS_FS("functionfs");
index a67ddcb..46635fa 100644 (file)
@@ -8,6 +8,8 @@
 
 ccflags-y := -I $(srctree)/drivers/scsi
 
+ccflags-y += -DDEFAULT_SYMBOL_NAMESPACE=USB_STORAGE
+
 obj-$(CONFIG_USB_UAS)          += uas.o
 obj-$(CONFIG_USB_STORAGE)      += usb-storage.o
 
index 6b8edf6..ddab2cd 100644 (file)
@@ -36,6 +36,7 @@
 MODULE_DESCRIPTION("Driver for Alauda-based card readers");
 MODULE_AUTHOR("Daniel Drake <dsd@gentoo.org>");
 MODULE_LICENSE("GPL");
+MODULE_IMPORT_NS(USB_STORAGE);
 
 /*
  * Status bytes
index 4825902..a6f3267 100644 (file)
@@ -22,6 +22,7 @@
 MODULE_DESCRIPTION("SAT support for Cypress USB/ATA bridges with ATACB");
 MODULE_AUTHOR("Matthieu Castet <castet.matthieu@free.fr>");
 MODULE_LICENSE("GPL");
+MODULE_IMPORT_NS(USB_STORAGE);
 
 /*
  * The table of devices
index 09353be..5888184 100644 (file)
@@ -54,6 +54,7 @@
 MODULE_DESCRIPTION("Driver for Datafab USB Compact Flash reader");
 MODULE_AUTHOR("Jimmie Mayfield <mayfield+datafab@sackheads.org>");
 MODULE_LICENSE("GPL");
+MODULE_IMPORT_NS(USB_STORAGE);
 
 struct datafab_info {
        unsigned long   sectors;        /* total sector count */
index c26129d..8b1b730 100644 (file)
@@ -26,6 +26,7 @@
 
 MODULE_DESCRIPTION("Driver for ENE UB6250 reader");
 MODULE_LICENSE("GPL");
+MODULE_IMPORT_NS(USB_STORAGE);
 MODULE_FIRMWARE(SD_INIT1_FIRMWARE);
 MODULE_FIRMWARE(SD_INIT2_FIRMWARE);
 MODULE_FIRMWARE(SD_RW_FIRMWARE);
index 4f542df..34e7eaf 100644 (file)
@@ -29,6 +29,7 @@
 MODULE_DESCRIPTION("Driver for Freecom USB/IDE adaptor");
 MODULE_AUTHOR("David Brown <usb-storage@davidb.org>");
 MODULE_LICENSE("GPL");
+MODULE_IMPORT_NS(USB_STORAGE);
 
 #ifdef CONFIG_USB_STORAGE_DEBUG
 static void pdump(struct us_data *us, void *ibuffer, int length);
index 28e1128..89f5e33 100644 (file)
@@ -53,6 +53,7 @@
 MODULE_DESCRIPTION("Driver for In-System Design, Inc. ISD200 ASIC");
 MODULE_AUTHOR("Björn Stenberg <bjorn@haxx.se>");
 MODULE_LICENSE("GPL");
+MODULE_IMPORT_NS(USB_STORAGE);
 
 static int isd200_Initialization(struct us_data *us);
 
index 917f170..229bf0c 100644 (file)
@@ -51,6 +51,7 @@
 MODULE_DESCRIPTION("Driver for Lexar \"Jumpshot\" Compact Flash reader");
 MODULE_AUTHOR("Jimmie Mayfield <mayfield+usb@sackheads.org>");
 MODULE_LICENSE("GPL");
+MODULE_IMPORT_NS(USB_STORAGE);
 
 /*
  * The table of devices
index 395cf8f..05cec81 100644 (file)
@@ -23,6 +23,7 @@
 MODULE_DESCRIPTION("Driver for Rio Karma");
 MODULE_AUTHOR("Bob Copeland <me@bobcopeland.com>, Keith Bennett <keith@mcs.st-and.ac.uk>");
 MODULE_LICENSE("GPL");
+MODULE_IMPORT_NS(USB_STORAGE);
 
 #define RIO_PREFIX "RIOP\x00"
 #define RIO_PREFIX_LEN 5
index 39a5009..a989fe9 100644 (file)
@@ -25,6 +25,7 @@
 MODULE_DESCRIPTION("Maxtor USB OneTouch hard drive button driver");
 MODULE_AUTHOR("Nick Sillik <n.sillik@temple.edu>");
 MODULE_LICENSE("GPL");
+MODULE_IMPORT_NS(USB_STORAGE);
 
 #define ONETOUCH_PKT_LEN        0x02
 #define ONETOUCH_BUTTON         KEY_PROG1
index 1d9ce9c..3789698 100644 (file)
@@ -35,6 +35,7 @@
 MODULE_DESCRIPTION("Driver for Realtek USB Card Reader");
 MODULE_AUTHOR("wwang <wei_wang@realsil.com.cn>");
 MODULE_LICENSE("GPL");
+MODULE_IMPORT_NS(USB_STORAGE);
 
 static int auto_delink_en = 1;
 module_param(auto_delink_en, int, S_IRUGO | S_IWUSR);
index bc9da73..51bcd4a 100644 (file)
@@ -47,6 +47,7 @@
 MODULE_DESCRIPTION("Driver for SanDisk SDDR-09 SmartMedia reader");
 MODULE_AUTHOR("Andries Brouwer <aeb@cwi.nl>, Robert Baruch <autophile@starband.net>");
 MODULE_LICENSE("GPL");
+MODULE_IMPORT_NS(USB_STORAGE);
 
 static int usb_stor_sddr09_dpcm_init(struct us_data *us);
 static int sddr09_transport(struct scsi_cmnd *srb, struct us_data *us);
index b8527c5..ba955d6 100644 (file)
@@ -29,6 +29,7 @@
 MODULE_DESCRIPTION("Driver for SanDisk SDDR-55 SmartMedia reader");
 MODULE_AUTHOR("Simon Munton");
 MODULE_LICENSE("GPL");
+MODULE_IMPORT_NS(USB_STORAGE);
 
 /*
  * The table of devices
index 854498e..54aa139 100644 (file)
@@ -48,6 +48,7 @@
 MODULE_DESCRIPTION("Driver for SCM Microsystems (a.k.a. Shuttle) USB-ATAPI cable");
 MODULE_AUTHOR("Daniel Drake <dsd@gentoo.org>, Robert Baruch <autophile@starband.net>");
 MODULE_LICENSE("GPL");
+MODULE_IMPORT_NS(USB_STORAGE);
 
 /* Supported device types */
 #define USBAT_DEV_HP8200       0x01
index 047c592..bf80d6f 100644 (file)
@@ -1219,5 +1219,6 @@ static struct usb_driver uas_driver = {
 module_usb_driver(uas_driver);
 
 MODULE_LICENSE("GPL");
+MODULE_IMPORT_NS(USB_STORAGE);
 MODULE_AUTHOR(
        "Hans de Goede <hdegoede@redhat.com>, Matthew Wilcox and Sarah Sharp");
index 703948c..0220616 100644 (file)
@@ -438,11 +438,20 @@ static void vfio_pci_disable(struct vfio_pci_device *vdev)
        pci_write_config_word(pdev, PCI_COMMAND, PCI_COMMAND_INTX_DISABLE);
 
        /*
-        * Try to reset the device.  The success of this is dependent on
-        * being able to lock the device, which is not always possible.
+        * Try to get the locks ourselves to prevent a deadlock. The
+        * success of this is dependent on being able to lock the device,
+        * which is not always possible.
+        * We can not use the "try" reset interface here, which will
+        * overwrite the previously restored configuration information.
         */
-       if (vdev->reset_works && !pci_try_reset_function(pdev))
-               vdev->needs_reset = false;
+       if (vdev->reset_works && pci_cfg_access_trylock(pdev)) {
+               if (device_trylock(&pdev->dev)) {
+                       if (!__pci_reset_function_locked(pdev))
+                               vdev->needs_reset = false;
+                       device_unlock(&pdev->dev);
+               }
+               pci_cfg_access_unlock(pdev);
+       }
 
        pci_restore_state(pdev);
 out:
index 9809369..26cef65 100644 (file)
@@ -176,13 +176,13 @@ put_exit:
 }
 
 static bool tce_page_is_contained(struct mm_struct *mm, unsigned long hpa,
-               unsigned int page_shift)
+               unsigned int it_page_shift)
 {
        struct page *page;
        unsigned long size = 0;
 
-       if (mm_iommu_is_devmem(mm, hpa, page_shift, &size))
-               return size == (1UL << page_shift);
+       if (mm_iommu_is_devmem(mm, hpa, it_page_shift, &size))
+               return size == (1UL << it_page_shift);
 
        page = pfn_to_page(hpa >> PAGE_SHIFT);
        /*
@@ -190,7 +190,7 @@ static bool tce_page_is_contained(struct mm_struct *mm, unsigned long hpa,
         * a page we just found. Otherwise the hardware can get access to
         * a bigger memory chunk that it should.
         */
-       return (PAGE_SHIFT + compound_order(compound_head(page))) >= page_shift;
+       return page_shift(compound_head(page)) >= it_page_shift;
 }
 
 static inline bool tce_groups_attached(struct tce_container *container)
@@ -1240,7 +1240,7 @@ release_exit:
 static int tce_iommu_attach_group(void *iommu_data,
                struct iommu_group *iommu_group)
 {
-       int ret;
+       int ret = 0;
        struct tce_container *container = iommu_data;
        struct iommu_table_group *table_group;
        struct tce_iommu_group *tcegrp = NULL;
@@ -1293,13 +1293,13 @@ static int tce_iommu_attach_group(void *iommu_data,
                        !table_group->ops->release_ownership) {
                if (container->v2) {
                        ret = -EPERM;
-                       goto unlock_exit;
+                       goto free_exit;
                }
                ret = tce_iommu_take_ownership(container, table_group);
        } else {
                if (!container->v2) {
                        ret = -EPERM;
-                       goto unlock_exit;
+                       goto free_exit;
                }
                ret = tce_iommu_take_ownership_ddw(container, table_group);
                if (!tce_groups_attached(container) && !container->tables[0])
@@ -1311,10 +1311,11 @@ static int tce_iommu_attach_group(void *iommu_data,
                list_add(&tcegrp->next, &container->group_list);
        }
 
-unlock_exit:
+free_exit:
        if (ret && tcegrp)
                kfree(tcegrp);
 
+unlock_exit:
        mutex_unlock(&container->lock);
 
        return ret;
index ad830ab..96fddc1 100644 (file)
@@ -62,6 +62,7 @@ MODULE_PARM_DESC(dma_entry_limit,
 
 struct vfio_iommu {
        struct list_head        domain_list;
+       struct list_head        iova_list;
        struct vfio_domain      *external_domain; /* domain for external user */
        struct mutex            lock;
        struct rb_root          dma_list;
@@ -97,6 +98,12 @@ struct vfio_group {
        bool                    mdev_group;     /* An mdev group */
 };
 
+struct vfio_iova {
+       struct list_head        list;
+       dma_addr_t              start;
+       dma_addr_t              end;
+};
+
 /*
  * Guest RAM pinning working set or DMA target
  */
@@ -368,6 +375,8 @@ static int vaddr_get_pfn(struct mm_struct *mm, unsigned long vaddr,
 
        down_read(&mm->mmap_sem);
 
+       vaddr = untagged_addr(vaddr);
+
        vma = find_vma_intersection(mm, vaddr, vaddr + 1);
 
        if (vma && vma->vm_flags & VM_PFNMAP) {
@@ -1038,6 +1047,27 @@ static int vfio_pin_map_dma(struct vfio_iommu *iommu, struct vfio_dma *dma,
        return ret;
 }
 
+/*
+ * Check dma map request is within a valid iova range
+ */
+static bool vfio_iommu_iova_dma_valid(struct vfio_iommu *iommu,
+                                     dma_addr_t start, dma_addr_t end)
+{
+       struct list_head *iova = &iommu->iova_list;
+       struct vfio_iova *node;
+
+       list_for_each_entry(node, iova, list) {
+               if (start >= node->start && end <= node->end)
+                       return true;
+       }
+
+       /*
+        * Check for list_empty() as well since a container with
+        * a single mdev device will have an empty list.
+        */
+       return list_empty(iova);
+}
+
 static int vfio_dma_do_map(struct vfio_iommu *iommu,
                           struct vfio_iommu_type1_dma_map *map)
 {
@@ -1081,6 +1111,11 @@ static int vfio_dma_do_map(struct vfio_iommu *iommu,
                goto out_unlock;
        }
 
+       if (!vfio_iommu_iova_dma_valid(iommu, iova, iova + size - 1)) {
+               ret = -EINVAL;
+               goto out_unlock;
+       }
+
        dma = kzalloc(sizeof(*dma), GFP_KERNEL);
        if (!dma) {
                ret = -ENOMEM;
@@ -1270,15 +1305,13 @@ static struct vfio_group *find_iommu_group(struct vfio_domain *domain,
        return NULL;
 }
 
-static bool vfio_iommu_has_sw_msi(struct iommu_group *group, phys_addr_t *base)
+static bool vfio_iommu_has_sw_msi(struct list_head *group_resv_regions,
+                                 phys_addr_t *base)
 {
-       struct list_head group_resv_regions;
-       struct iommu_resv_region *region, *next;
+       struct iommu_resv_region *region;
        bool ret = false;
 
-       INIT_LIST_HEAD(&group_resv_regions);
-       iommu_get_group_resv_regions(group, &group_resv_regions);
-       list_for_each_entry(region, &group_resv_regions, list) {
+       list_for_each_entry(region, group_resv_regions, list) {
                /*
                 * The presence of any 'real' MSI regions should take
                 * precedence over the software-managed one if the
@@ -1294,8 +1327,7 @@ static bool vfio_iommu_has_sw_msi(struct iommu_group *group, phys_addr_t *base)
                        ret = true;
                }
        }
-       list_for_each_entry_safe(region, next, &group_resv_regions, list)
-               kfree(region);
+
        return ret;
 }
 
@@ -1395,6 +1427,228 @@ static int vfio_mdev_iommu_device(struct device *dev, void *data)
        return 0;
 }
 
+/*
+ * This is a helper function to insert an address range to iova list.
+ * The list is initially created with a single entry corresponding to
+ * the IOMMU domain geometry to which the device group is attached.
+ * The list aperture gets modified when a new domain is added to the
+ * container if the new aperture doesn't conflict with the current one
+ * or with any existing dma mappings. The list is also modified to
+ * exclude any reserved regions associated with the device group.
+ */
+static int vfio_iommu_iova_insert(struct list_head *head,
+                                 dma_addr_t start, dma_addr_t end)
+{
+       struct vfio_iova *region;
+
+       region = kmalloc(sizeof(*region), GFP_KERNEL);
+       if (!region)
+               return -ENOMEM;
+
+       INIT_LIST_HEAD(&region->list);
+       region->start = start;
+       region->end = end;
+
+       list_add_tail(&region->list, head);
+       return 0;
+}
+
+/*
+ * Check the new iommu aperture conflicts with existing aper or with any
+ * existing dma mappings.
+ */
+static bool vfio_iommu_aper_conflict(struct vfio_iommu *iommu,
+                                    dma_addr_t start, dma_addr_t end)
+{
+       struct vfio_iova *first, *last;
+       struct list_head *iova = &iommu->iova_list;
+
+       if (list_empty(iova))
+               return false;
+
+       /* Disjoint sets, return conflict */
+       first = list_first_entry(iova, struct vfio_iova, list);
+       last = list_last_entry(iova, struct vfio_iova, list);
+       if (start > last->end || end < first->start)
+               return true;
+
+       /* Check for any existing dma mappings below the new start */
+       if (start > first->start) {
+               if (vfio_find_dma(iommu, first->start, start - first->start))
+                       return true;
+       }
+
+       /* Check for any existing dma mappings beyond the new end */
+       if (end < last->end) {
+               if (vfio_find_dma(iommu, end + 1, last->end - end))
+                       return true;
+       }
+
+       return false;
+}
+
+/*
+ * Resize iommu iova aperture window. This is called only if the new
+ * aperture has no conflict with existing aperture and dma mappings.
+ */
+static int vfio_iommu_aper_resize(struct list_head *iova,
+                                 dma_addr_t start, dma_addr_t end)
+{
+       struct vfio_iova *node, *next;
+
+       if (list_empty(iova))
+               return vfio_iommu_iova_insert(iova, start, end);
+
+       /* Adjust iova list start */
+       list_for_each_entry_safe(node, next, iova, list) {
+               if (start < node->start)
+                       break;
+               if (start >= node->start && start < node->end) {
+                       node->start = start;
+                       break;
+               }
+               /* Delete nodes before new start */
+               list_del(&node->list);
+               kfree(node);
+       }
+
+       /* Adjust iova list end */
+       list_for_each_entry_safe(node, next, iova, list) {
+               if (end > node->end)
+                       continue;
+               if (end > node->start && end <= node->end) {
+                       node->end = end;
+                       continue;
+               }
+               /* Delete nodes after new end */
+               list_del(&node->list);
+               kfree(node);
+       }
+
+       return 0;
+}
+
+/*
+ * Check reserved region conflicts with existing dma mappings
+ */
+static bool vfio_iommu_resv_conflict(struct vfio_iommu *iommu,
+                                    struct list_head *resv_regions)
+{
+       struct iommu_resv_region *region;
+
+       /* Check for conflict with existing dma mappings */
+       list_for_each_entry(region, resv_regions, list) {
+               if (region->type == IOMMU_RESV_DIRECT_RELAXABLE)
+                       continue;
+
+               if (vfio_find_dma(iommu, region->start, region->length))
+                       return true;
+       }
+
+       return false;
+}
+
+/*
+ * Check iova region overlap with  reserved regions and
+ * exclude them from the iommu iova range
+ */
+static int vfio_iommu_resv_exclude(struct list_head *iova,
+                                  struct list_head *resv_regions)
+{
+       struct iommu_resv_region *resv;
+       struct vfio_iova *n, *next;
+
+       list_for_each_entry(resv, resv_regions, list) {
+               phys_addr_t start, end;
+
+               if (resv->type == IOMMU_RESV_DIRECT_RELAXABLE)
+                       continue;
+
+               start = resv->start;
+               end = resv->start + resv->length - 1;
+
+               list_for_each_entry_safe(n, next, iova, list) {
+                       int ret = 0;
+
+                       /* No overlap */
+                       if (start > n->end || end < n->start)
+                               continue;
+                       /*
+                        * Insert a new node if current node overlaps with the
+                        * reserve region to exlude that from valid iova range.
+                        * Note that, new node is inserted before the current
+                        * node and finally the current node is deleted keeping
+                        * the list updated and sorted.
+                        */
+                       if (start > n->start)
+                               ret = vfio_iommu_iova_insert(&n->list, n->start,
+                                                            start - 1);
+                       if (!ret && end < n->end)
+                               ret = vfio_iommu_iova_insert(&n->list, end + 1,
+                                                            n->end);
+                       if (ret)
+                               return ret;
+
+                       list_del(&n->list);
+                       kfree(n);
+               }
+       }
+
+       if (list_empty(iova))
+               return -EINVAL;
+
+       return 0;
+}
+
+static void vfio_iommu_resv_free(struct list_head *resv_regions)
+{
+       struct iommu_resv_region *n, *next;
+
+       list_for_each_entry_safe(n, next, resv_regions, list) {
+               list_del(&n->list);
+               kfree(n);
+       }
+}
+
+static void vfio_iommu_iova_free(struct list_head *iova)
+{
+       struct vfio_iova *n, *next;
+
+       list_for_each_entry_safe(n, next, iova, list) {
+               list_del(&n->list);
+               kfree(n);
+       }
+}
+
+static int vfio_iommu_iova_get_copy(struct vfio_iommu *iommu,
+                                   struct list_head *iova_copy)
+{
+       struct list_head *iova = &iommu->iova_list;
+       struct vfio_iova *n;
+       int ret;
+
+       list_for_each_entry(n, iova, list) {
+               ret = vfio_iommu_iova_insert(iova_copy, n->start, n->end);
+               if (ret)
+                       goto out_free;
+       }
+
+       return 0;
+
+out_free:
+       vfio_iommu_iova_free(iova_copy);
+       return ret;
+}
+
+static void vfio_iommu_iova_insert_copy(struct vfio_iommu *iommu,
+                                       struct list_head *iova_copy)
+{
+       struct list_head *iova = &iommu->iova_list;
+
+       vfio_iommu_iova_free(iova);
+
+       list_splice_tail(iova_copy, iova);
+}
 static int vfio_iommu_type1_attach_group(void *iommu_data,
                                         struct iommu_group *iommu_group)
 {
@@ -1405,6 +1659,9 @@ static int vfio_iommu_type1_attach_group(void *iommu_data,
        int ret;
        bool resv_msi, msi_remap;
        phys_addr_t resv_msi_base;
+       struct iommu_domain_geometry geo;
+       LIST_HEAD(iova_copy);
+       LIST_HEAD(group_resv_regions);
 
        mutex_lock(&iommu->lock);
 
@@ -1481,7 +1738,43 @@ static int vfio_iommu_type1_attach_group(void *iommu_data,
        if (ret)
                goto out_domain;
 
-       resv_msi = vfio_iommu_has_sw_msi(iommu_group, &resv_msi_base);
+       /* Get aperture info */
+       iommu_domain_get_attr(domain->domain, DOMAIN_ATTR_GEOMETRY, &geo);
+
+       if (vfio_iommu_aper_conflict(iommu, geo.aperture_start,
+                                    geo.aperture_end)) {
+               ret = -EINVAL;
+               goto out_detach;
+       }
+
+       ret = iommu_get_group_resv_regions(iommu_group, &group_resv_regions);
+       if (ret)
+               goto out_detach;
+
+       if (vfio_iommu_resv_conflict(iommu, &group_resv_regions)) {
+               ret = -EINVAL;
+               goto out_detach;
+       }
+
+       /*
+        * We don't want to work on the original iova list as the list
+        * gets modified and in case of failure we have to retain the
+        * original list. Get a copy here.
+        */
+       ret = vfio_iommu_iova_get_copy(iommu, &iova_copy);
+       if (ret)
+               goto out_detach;
+
+       ret = vfio_iommu_aper_resize(&iova_copy, geo.aperture_start,
+                                    geo.aperture_end);
+       if (ret)
+               goto out_detach;
+
+       ret = vfio_iommu_resv_exclude(&iova_copy, &group_resv_regions);
+       if (ret)
+               goto out_detach;
+
+       resv_msi = vfio_iommu_has_sw_msi(&group_resv_regions, &resv_msi_base);
 
        INIT_LIST_HEAD(&domain->group_list);
        list_add(&group->next, &domain->group_list);
@@ -1514,8 +1807,7 @@ static int vfio_iommu_type1_attach_group(void *iommu_data,
                                list_add(&group->next, &d->group_list);
                                iommu_domain_free(domain->domain);
                                kfree(domain);
-                               mutex_unlock(&iommu->lock);
-                               return 0;
+                               goto done;
                        }
 
                        ret = vfio_iommu_attach_group(domain, group);
@@ -1538,8 +1830,11 @@ static int vfio_iommu_type1_attach_group(void *iommu_data,
        }
 
        list_add(&domain->next, &iommu->domain_list);
-
+done:
+       /* Delete the old one and insert new iova list */
+       vfio_iommu_iova_insert_copy(iommu, &iova_copy);
        mutex_unlock(&iommu->lock);
+       vfio_iommu_resv_free(&group_resv_regions);
 
        return 0;
 
@@ -1547,6 +1842,8 @@ out_detach:
        vfio_iommu_detach_group(domain, group);
 out_domain:
        iommu_domain_free(domain->domain);
+       vfio_iommu_iova_free(&iova_copy);
+       vfio_iommu_resv_free(&group_resv_regions);
 out_free:
        kfree(domain);
        kfree(group);
@@ -1602,12 +1899,93 @@ static void vfio_sanity_check_pfn_list(struct vfio_iommu *iommu)
        WARN_ON(iommu->notifier.head);
 }
 
+/*
+ * Called when a domain is removed in detach. It is possible that
+ * the removed domain decided the iova aperture window. Modify the
+ * iova aperture with the smallest window among existing domains.
+ */
+static void vfio_iommu_aper_expand(struct vfio_iommu *iommu,
+                                  struct list_head *iova_copy)
+{
+       struct vfio_domain *domain;
+       struct iommu_domain_geometry geo;
+       struct vfio_iova *node;
+       dma_addr_t start = 0;
+       dma_addr_t end = (dma_addr_t)~0;
+
+       if (list_empty(iova_copy))
+               return;
+
+       list_for_each_entry(domain, &iommu->domain_list, next) {
+               iommu_domain_get_attr(domain->domain, DOMAIN_ATTR_GEOMETRY,
+                                     &geo);
+               if (geo.aperture_start > start)
+                       start = geo.aperture_start;
+               if (geo.aperture_end < end)
+                       end = geo.aperture_end;
+       }
+
+       /* Modify aperture limits. The new aper is either same or bigger */
+       node = list_first_entry(iova_copy, struct vfio_iova, list);
+       node->start = start;
+       node = list_last_entry(iova_copy, struct vfio_iova, list);
+       node->end = end;
+}
+
+/*
+ * Called when a group is detached. The reserved regions for that
+ * group can be part of valid iova now. But since reserved regions
+ * may be duplicated among groups, populate the iova valid regions
+ * list again.
+ */
+static int vfio_iommu_resv_refresh(struct vfio_iommu *iommu,
+                                  struct list_head *iova_copy)
+{
+       struct vfio_domain *d;
+       struct vfio_group *g;
+       struct vfio_iova *node;
+       dma_addr_t start, end;
+       LIST_HEAD(resv_regions);
+       int ret;
+
+       if (list_empty(iova_copy))
+               return -EINVAL;
+
+       list_for_each_entry(d, &iommu->domain_list, next) {
+               list_for_each_entry(g, &d->group_list, next) {
+                       ret = iommu_get_group_resv_regions(g->iommu_group,
+                                                          &resv_regions);
+                       if (ret)
+                               goto done;
+               }
+       }
+
+       node = list_first_entry(iova_copy, struct vfio_iova, list);
+       start = node->start;
+       node = list_last_entry(iova_copy, struct vfio_iova, list);
+       end = node->end;
+
+       /* purge the iova list and create new one */
+       vfio_iommu_iova_free(iova_copy);
+
+       ret = vfio_iommu_aper_resize(iova_copy, start, end);
+       if (ret)
+               goto done;
+
+       /* Exclude current reserved regions from iova ranges */
+       ret = vfio_iommu_resv_exclude(iova_copy, &resv_regions);
+done:
+       vfio_iommu_resv_free(&resv_regions);
+       return ret;
+}
+
 static void vfio_iommu_type1_detach_group(void *iommu_data,
                                          struct iommu_group *iommu_group)
 {
        struct vfio_iommu *iommu = iommu_data;
        struct vfio_domain *domain;
        struct vfio_group *group;
+       LIST_HEAD(iova_copy);
 
        mutex_lock(&iommu->lock);
 
@@ -1630,6 +2008,13 @@ static void vfio_iommu_type1_detach_group(void *iommu_data,
                }
        }
 
+       /*
+        * Get a copy of iova list. This will be used to update
+        * and to replace the current one later. Please note that
+        * we will leave the original list as it is if update fails.
+        */
+       vfio_iommu_iova_get_copy(iommu, &iova_copy);
+
        list_for_each_entry(domain, &iommu->domain_list, next) {
                group = find_iommu_group(domain, iommu_group);
                if (!group)
@@ -1655,10 +2040,16 @@ static void vfio_iommu_type1_detach_group(void *iommu_data,
                        iommu_domain_free(domain->domain);
                        list_del(&domain->next);
                        kfree(domain);
+                       vfio_iommu_aper_expand(iommu, &iova_copy);
                }
                break;
        }
 
+       if (!vfio_iommu_resv_refresh(iommu, &iova_copy))
+               vfio_iommu_iova_insert_copy(iommu, &iova_copy);
+       else
+               vfio_iommu_iova_free(&iova_copy);
+
 detach_group_done:
        mutex_unlock(&iommu->lock);
 }
@@ -1686,6 +2077,7 @@ static void *vfio_iommu_type1_open(unsigned long arg)
        }
 
        INIT_LIST_HEAD(&iommu->domain_list);
+       INIT_LIST_HEAD(&iommu->iova_list);
        iommu->dma_list = RB_ROOT;
        iommu->dma_avail = dma_entry_limit;
        mutex_init(&iommu->lock);
@@ -1729,6 +2121,9 @@ static void vfio_iommu_type1_release(void *iommu_data)
                list_del(&domain->next);
                kfree(domain);
        }
+
+       vfio_iommu_iova_free(&iommu->iova_list);
+
        kfree(iommu);
 }
 
@@ -1749,6 +2144,73 @@ static int vfio_domains_have_iommu_cache(struct vfio_iommu *iommu)
        return ret;
 }
 
+static int vfio_iommu_iova_add_cap(struct vfio_info_cap *caps,
+                struct vfio_iommu_type1_info_cap_iova_range *cap_iovas,
+                size_t size)
+{
+       struct vfio_info_cap_header *header;
+       struct vfio_iommu_type1_info_cap_iova_range *iova_cap;
+
+       header = vfio_info_cap_add(caps, size,
+                                  VFIO_IOMMU_TYPE1_INFO_CAP_IOVA_RANGE, 1);
+       if (IS_ERR(header))
+               return PTR_ERR(header);
+
+       iova_cap = container_of(header,
+                               struct vfio_iommu_type1_info_cap_iova_range,
+                               header);
+       iova_cap->nr_iovas = cap_iovas->nr_iovas;
+       memcpy(iova_cap->iova_ranges, cap_iovas->iova_ranges,
+              cap_iovas->nr_iovas * sizeof(*cap_iovas->iova_ranges));
+       return 0;
+}
+
+static int vfio_iommu_iova_build_caps(struct vfio_iommu *iommu,
+                                     struct vfio_info_cap *caps)
+{
+       struct vfio_iommu_type1_info_cap_iova_range *cap_iovas;
+       struct vfio_iova *iova;
+       size_t size;
+       int iovas = 0, i = 0, ret;
+
+       mutex_lock(&iommu->lock);
+
+       list_for_each_entry(iova, &iommu->iova_list, list)
+               iovas++;
+
+       if (!iovas) {
+               /*
+                * Return 0 as a container with a single mdev device
+                * will have an empty list
+                */
+               ret = 0;
+               goto out_unlock;
+       }
+
+       size = sizeof(*cap_iovas) + (iovas * sizeof(*cap_iovas->iova_ranges));
+
+       cap_iovas = kzalloc(size, GFP_KERNEL);
+       if (!cap_iovas) {
+               ret = -ENOMEM;
+               goto out_unlock;
+       }
+
+       cap_iovas->nr_iovas = iovas;
+
+       list_for_each_entry(iova, &iommu->iova_list, list) {
+               cap_iovas->iova_ranges[i].start = iova->start;
+               cap_iovas->iova_ranges[i].end = iova->end;
+               i++;
+       }
+
+       ret = vfio_iommu_iova_add_cap(caps, cap_iovas, size);
+
+       kfree(cap_iovas);
+out_unlock:
+       mutex_unlock(&iommu->lock);
+       return ret;
+}
+
 static long vfio_iommu_type1_ioctl(void *iommu_data,
                                   unsigned int cmd, unsigned long arg)
 {
@@ -1770,19 +2232,53 @@ static long vfio_iommu_type1_ioctl(void *iommu_data,
                }
        } else if (cmd == VFIO_IOMMU_GET_INFO) {
                struct vfio_iommu_type1_info info;
+               struct vfio_info_cap caps = { .buf = NULL, .size = 0 };
+               unsigned long capsz;
+               int ret;
 
                minsz = offsetofend(struct vfio_iommu_type1_info, iova_pgsizes);
 
+               /* For backward compatibility, cannot require this */
+               capsz = offsetofend(struct vfio_iommu_type1_info, cap_offset);
+
                if (copy_from_user(&info, (void __user *)arg, minsz))
                        return -EFAULT;
 
                if (info.argsz < minsz)
                        return -EINVAL;
 
+               if (info.argsz >= capsz) {
+                       minsz = capsz;
+                       info.cap_offset = 0; /* output, no-recopy necessary */
+               }
+
                info.flags = VFIO_IOMMU_INFO_PGSIZES;
 
                info.iova_pgsizes = vfio_pgsize_bitmap(iommu);
 
+               ret = vfio_iommu_iova_build_caps(iommu, &caps);
+               if (ret)
+                       return ret;
+
+               if (caps.size) {
+                       info.flags |= VFIO_IOMMU_INFO_CAPS;
+
+                       if (info.argsz < sizeof(info) + caps.size) {
+                               info.argsz = sizeof(info) + caps.size;
+                       } else {
+                               vfio_info_cap_shift(&caps, sizeof(info));
+                               if (copy_to_user((void __user *)arg +
+                                               sizeof(info), caps.buf,
+                                               caps.size)) {
+                                       kfree(caps.buf);
+                                       return -EFAULT;
+                               }
+                               info.cap_offset = sizeof(info);
+                       }
+
+                       kfree(caps.buf);
+               }
+
                return copy_to_user((void __user *)arg, &info, minsz) ?
                        -EFAULT : 0;
 
index 8b081d6..40676be 100644 (file)
@@ -10,7 +10,6 @@ menu "Backlight & LCD device support"
 #
 config LCD_CLASS_DEVICE
         tristate "Lowlevel LCD controls"
-       default m
        help
          This framework adds support for low-level control of LCD.
          Some framebuffer devices connect to platform-specific LCD modules
@@ -143,7 +142,6 @@ endif # LCD_CLASS_DEVICE
 #
 config BACKLIGHT_CLASS_DEVICE
         tristate "Lowlevel Backlight controls"
-       default m
        help
          This framework adds support for low-level control of the LCD
           backlight. This includes support for brightness and power.
index 5dc0710..cac3e35 100644 (file)
@@ -32,6 +32,12 @@ static const char *const backlight_types[] = {
        [BACKLIGHT_FIRMWARE] = "firmware",
 };
 
+static const char *const backlight_scale_types[] = {
+       [BACKLIGHT_SCALE_UNKNOWN]       = "unknown",
+       [BACKLIGHT_SCALE_LINEAR]        = "linear",
+       [BACKLIGHT_SCALE_NON_LINEAR]    = "non-linear",
+};
+
 #if defined(CONFIG_FB) || (defined(CONFIG_FB_MODULE) && \
                           defined(CONFIG_BACKLIGHT_CLASS_DEVICE_MODULE))
 /* This callback gets called when something important happens inside a
@@ -246,6 +252,18 @@ static ssize_t actual_brightness_show(struct device *dev,
 }
 static DEVICE_ATTR_RO(actual_brightness);
 
+static ssize_t scale_show(struct device *dev,
+               struct device_attribute *attr, char *buf)
+{
+       struct backlight_device *bd = to_backlight_device(dev);
+
+       if (WARN_ON(bd->props.scale > BACKLIGHT_SCALE_NON_LINEAR))
+               return sprintf(buf, "unknown\n");
+
+       return sprintf(buf, "%s\n", backlight_scale_types[bd->props.scale]);
+}
+static DEVICE_ATTR_RO(scale);
+
 static struct class *backlight_class;
 
 #ifdef CONFIG_PM_SLEEP
@@ -292,6 +310,7 @@ static struct attribute *bl_device_attrs[] = {
        &dev_attr_brightness.attr,
        &dev_attr_actual_brightness.attr,
        &dev_attr_max_brightness.attr,
+       &dev_attr_scale.attr,
        &dev_attr_type.attr,
        NULL,
 };
index e84f308..18e053e 100644 (file)
@@ -59,13 +59,11 @@ static int gpio_backlight_probe_dt(struct platform_device *pdev,
                                   struct gpio_backlight *gbl)
 {
        struct device *dev = &pdev->dev;
-       enum gpiod_flags flags;
        int ret;
 
        gbl->def_value = device_property_read_bool(dev, "default-on");
-       flags = gbl->def_value ? GPIOD_OUT_HIGH : GPIOD_OUT_LOW;
 
-       gbl->gpiod = devm_gpiod_get(dev, NULL, flags);
+       gbl->gpiod = devm_gpiod_get(dev, NULL, GPIOD_ASIS);
        if (IS_ERR(gbl->gpiod)) {
                ret = PTR_ERR(gbl->gpiod);
 
@@ -79,6 +77,22 @@ static int gpio_backlight_probe_dt(struct platform_device *pdev,
        return 0;
 }
 
+static int gpio_backlight_initial_power_state(struct gpio_backlight *gbl)
+{
+       struct device_node *node = gbl->dev->of_node;
+
+       /* Not booted with device tree or no phandle link to the node */
+       if (!node || !node->phandle)
+               return gbl->def_value ? FB_BLANK_UNBLANK : FB_BLANK_POWERDOWN;
+
+       /* if the enable GPIO is disabled, do not enable the backlight */
+       if (gpiod_get_value_cansleep(gbl->gpiod) == 0)
+               return FB_BLANK_POWERDOWN;
+
+       return FB_BLANK_UNBLANK;
+}
+
+
 static int gpio_backlight_probe(struct platform_device *pdev)
 {
        struct gpio_backlight_platform_data *pdata =
@@ -136,7 +150,9 @@ static int gpio_backlight_probe(struct platform_device *pdev)
                return PTR_ERR(bl);
        }
 
-       bl->props.brightness = gbl->def_value;
+       bl->props.power = gpio_backlight_initial_power_state(gbl);
+       bl->props.brightness = 1;
+
        backlight_update_status(bl);
 
        platform_set_drvdata(pdev, bl);
index b04b35d..2d8e819 100644 (file)
@@ -377,8 +377,7 @@ static int lm3630a_parse_led_sources(struct fwnode_handle *node,
        u32 sources[LM3630A_NUM_SINKS];
        int ret, num_sources, i;
 
-       num_sources = fwnode_property_read_u32_array(node, "led-sources", NULL,
-                                                    0);
+       num_sources = fwnode_property_count_u32(node, "led-sources");
        if (num_sources < 0)
                return default_led_sources;
        else if (num_sources > ARRAY_SIZE(sources))
index 35bc012..0e45685 100644 (file)
@@ -158,7 +158,7 @@ static int lms283gf05_probe(struct spi_device *spi)
                ret = devm_gpio_request_one(&spi->dev, pdata->reset_gpio,
                                GPIOF_DIR_OUT | (!pdata->reset_inverted ?
                                GPIOF_INIT_HIGH : GPIOF_INIT_LOW),
-                               "LMS285GF05 RESET");
+                               "LMS283GF05 RESET");
                if (ret)
                        return ret;
        }
index 2201b8c..746eebc 100644 (file)
@@ -387,6 +387,31 @@ int pwm_backlight_brightness_default(struct device *dev,
 }
 #endif
 
+static bool pwm_backlight_is_linear(struct platform_pwm_backlight_data *data)
+{
+       unsigned int nlevels = data->max_brightness + 1;
+       unsigned int min_val = data->levels[0];
+       unsigned int max_val = data->levels[nlevels - 1];
+       /*
+        * Multiplying by 128 means that even in pathological cases such
+        * as (max_val - min_val) == nlevels the error at max_val is less
+        * than 1%.
+        */
+       unsigned int slope = (128 * (max_val - min_val)) / nlevels;
+       unsigned int margin = (max_val - min_val) / 20; /* 5% */
+       int i;
+
+       for (i = 1; i < nlevels; i++) {
+               unsigned int linear_value = min_val + ((i * slope) / 128);
+               unsigned int delta = abs(linear_value - data->levels[i]);
+
+               if (delta > margin)
+                       return false;
+       }
+
+       return true;
+}
+
 static int pwm_backlight_initial_power_state(const struct pwm_bl_data *pb)
 {
        struct device_node *node = pb->dev->of_node;
@@ -536,6 +561,8 @@ static int pwm_backlight_probe(struct platform_device *pdev)
                goto err_alloc;
        }
 
+       memset(&props, 0, sizeof(struct backlight_properties));
+
        if (data->levels) {
                /*
                 * For the DT case, only when brightness levels is defined
@@ -548,6 +575,11 @@ static int pwm_backlight_probe(struct platform_device *pdev)
 
                        pb->levels = data->levels;
                }
+
+               if (pwm_backlight_is_linear(data))
+                       props.scale = BACKLIGHT_SCALE_LINEAR;
+               else
+                       props.scale = BACKLIGHT_SCALE_NON_LINEAR;
        } else if (!data->max_brightness) {
                /*
                 * If no brightness levels are provided and max_brightness is
@@ -574,6 +606,8 @@ static int pwm_backlight_probe(struct platform_device *pdev)
 
                        pb->levels = data->levels;
                }
+
+               props.scale = BACKLIGHT_SCALE_NON_LINEAR;
        } else {
                /*
                 * That only happens for the non-DT case, where platform data
@@ -584,7 +618,6 @@ static int pwm_backlight_probe(struct platform_device *pdev)
 
        pb->lth_brightness = data->lth_brightness * (state.period / pb->scale);
 
-       memset(&props, 0, sizeof(struct backlight_properties));
        props.type = BACKLIGHT_RAW;
        props.max_brightness = data->max_brightness;
        bl = backlight_device_register(dev_name(&pdev->dev), &pdev->dev, pb,
index 462f14a..05b5f00 100644 (file)
@@ -48,14 +48,20 @@ static int rave_sp_backlight_probe(struct platform_device *pdev)
        struct device *dev = &pdev->dev;
        struct backlight_device *bd;
 
-       bd = devm_backlight_device_register(dev, pdev->name, dev->parent,
+       bd = devm_backlight_device_register(dev, pdev->name, dev,
                                            dev_get_drvdata(dev->parent),
                                            &rave_sp_backlight_ops,
                                            &rave_sp_backlight_props);
        if (IS_ERR(bd))
                return PTR_ERR(bd);
 
-       backlight_update_status(bd);
+       /*
+        * If there is a phandle pointing to the device node we can
+        * assume that another device will manage the status changes.
+        * If not we make sure the backlight is in a consistent state.
+        */
+       if (!dev->of_node->phandle)
+               backlight_update_status(bd);
 
        return 0;
 }
index 65cb757..29af8e2 100644 (file)
@@ -222,8 +222,7 @@ static int tosa_lcd_remove(struct spi_device *spi)
 {
        struct tosa_lcd_data *data = spi_get_drvdata(spi);
 
-       if (data->i2c)
-               i2c_unregister_device(data->i2c);
+       i2c_unregister_device(data->i2c);
 
        tosa_lcd_tg_off(data);
 
index 5f83cd7..1e70e83 100644 (file)
@@ -2197,15 +2197,6 @@ config FB_BROADSHEET
          and could also have been called by other names when coupled with
          a bridge adapter.
 
-config FB_JZ4740
-       tristate "JZ4740 LCD framebuffer support"
-       depends on FB && MACH_JZ4740
-       select FB_SYS_FILLRECT
-       select FB_SYS_COPYAREA
-       select FB_SYS_IMAGEBLIT
-       help
-         Framebuffer support for the JZ4740 SoC.
-
 config FB_PUV3_UNIGFX
        tristate "PKUnity v3 Unigfx framebuffer support"
        depends on FB && UNICORE32 && ARCH_PUV3
index aab7155..aa63527 100644 (file)
@@ -116,7 +116,6 @@ obj-y                             += omap2/
 obj-$(CONFIG_XEN_FBDEV_FRONTEND)  += xen-fbfront.o
 obj-$(CONFIG_FB_CARMINE)          += carminefb.o
 obj-$(CONFIG_FB_MB862XX)         += mb862xx/
-obj-$(CONFIG_FB_JZ4740)                  += jz4740_fb.o
 obj-$(CONFIG_FB_PUV3_UNIGFX)      += fb-puv3.o
 obj-$(CONFIG_FB_HYPERV)                  += hyperv_fb.o
 obj-$(CONFIG_FB_OPENCORES)       += ocfb.o
diff --git a/drivers/video/fbdev/jz4740_fb.c b/drivers/video/fbdev/jz4740_fb.c
deleted file mode 100644 (file)
index 0b6fa25..0000000
+++ /dev/null
@@ -1,690 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- *  Copyright (C) 2009-2010, Lars-Peter Clausen <lars@metafoo.de>
- *     JZ4740 SoC LCD framebuffer driver
- */
-
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/mutex.h>
-#include <linux/platform_device.h>
-#include <linux/pinctrl/consumer.h>
-
-#include <linux/clk.h>
-#include <linux/delay.h>
-
-#include <linux/console.h>
-#include <linux/fb.h>
-
-#include <linux/dma-mapping.h>
-
-#include <asm/mach-jz4740/jz4740_fb.h>
-
-#define JZ_REG_LCD_CFG         0x00
-#define JZ_REG_LCD_VSYNC       0x04
-#define JZ_REG_LCD_HSYNC       0x08
-#define JZ_REG_LCD_VAT         0x0C
-#define JZ_REG_LCD_DAH         0x10
-#define JZ_REG_LCD_DAV         0x14
-#define JZ_REG_LCD_PS          0x18
-#define JZ_REG_LCD_CLS         0x1C
-#define JZ_REG_LCD_SPL         0x20
-#define JZ_REG_LCD_REV         0x24
-#define JZ_REG_LCD_CTRL                0x30
-#define JZ_REG_LCD_STATE       0x34
-#define JZ_REG_LCD_IID         0x38
-#define JZ_REG_LCD_DA0         0x40
-#define JZ_REG_LCD_SA0         0x44
-#define JZ_REG_LCD_FID0                0x48
-#define JZ_REG_LCD_CMD0                0x4C
-#define JZ_REG_LCD_DA1         0x50
-#define JZ_REG_LCD_SA1         0x54
-#define JZ_REG_LCD_FID1                0x58
-#define JZ_REG_LCD_CMD1                0x5C
-
-#define JZ_LCD_CFG_SLCD                        BIT(31)
-#define JZ_LCD_CFG_PS_DISABLE          BIT(23)
-#define JZ_LCD_CFG_CLS_DISABLE         BIT(22)
-#define JZ_LCD_CFG_SPL_DISABLE         BIT(21)
-#define JZ_LCD_CFG_REV_DISABLE         BIT(20)
-#define JZ_LCD_CFG_HSYNCM              BIT(19)
-#define JZ_LCD_CFG_PCLKM               BIT(18)
-#define JZ_LCD_CFG_INV                 BIT(17)
-#define JZ_LCD_CFG_SYNC_DIR            BIT(16)
-#define JZ_LCD_CFG_PS_POLARITY         BIT(15)
-#define JZ_LCD_CFG_CLS_POLARITY                BIT(14)
-#define JZ_LCD_CFG_SPL_POLARITY                BIT(13)
-#define JZ_LCD_CFG_REV_POLARITY                BIT(12)
-#define JZ_LCD_CFG_HSYNC_ACTIVE_LOW    BIT(11)
-#define JZ_LCD_CFG_PCLK_FALLING_EDGE   BIT(10)
-#define JZ_LCD_CFG_DE_ACTIVE_LOW       BIT(9)
-#define JZ_LCD_CFG_VSYNC_ACTIVE_LOW    BIT(8)
-#define JZ_LCD_CFG_18_BIT              BIT(7)
-#define JZ_LCD_CFG_PDW                 (BIT(5) | BIT(4))
-#define JZ_LCD_CFG_MODE_MASK 0xf
-
-#define JZ_LCD_CTRL_BURST_4            (0x0 << 28)
-#define JZ_LCD_CTRL_BURST_8            (0x1 << 28)
-#define JZ_LCD_CTRL_BURST_16           (0x2 << 28)
-#define JZ_LCD_CTRL_RGB555             BIT(27)
-#define JZ_LCD_CTRL_OFUP               BIT(26)
-#define JZ_LCD_CTRL_FRC_GRAYSCALE_16   (0x0 << 24)
-#define JZ_LCD_CTRL_FRC_GRAYSCALE_4    (0x1 << 24)
-#define JZ_LCD_CTRL_FRC_GRAYSCALE_2    (0x2 << 24)
-#define JZ_LCD_CTRL_PDD_MASK           (0xff << 16)
-#define JZ_LCD_CTRL_EOF_IRQ            BIT(13)
-#define JZ_LCD_CTRL_SOF_IRQ            BIT(12)
-#define JZ_LCD_CTRL_OFU_IRQ            BIT(11)
-#define JZ_LCD_CTRL_IFU0_IRQ           BIT(10)
-#define JZ_LCD_CTRL_IFU1_IRQ           BIT(9)
-#define JZ_LCD_CTRL_DD_IRQ             BIT(8)
-#define JZ_LCD_CTRL_QDD_IRQ            BIT(7)
-#define JZ_LCD_CTRL_REVERSE_ENDIAN     BIT(6)
-#define JZ_LCD_CTRL_LSB_FISRT          BIT(5)
-#define JZ_LCD_CTRL_DISABLE            BIT(4)
-#define JZ_LCD_CTRL_ENABLE             BIT(3)
-#define JZ_LCD_CTRL_BPP_1              0x0
-#define JZ_LCD_CTRL_BPP_2              0x1
-#define JZ_LCD_CTRL_BPP_4              0x2
-#define JZ_LCD_CTRL_BPP_8              0x3
-#define JZ_LCD_CTRL_BPP_15_16          0x4
-#define JZ_LCD_CTRL_BPP_18_24          0x5
-
-#define JZ_LCD_CMD_SOF_IRQ BIT(31)
-#define JZ_LCD_CMD_EOF_IRQ BIT(30)
-#define JZ_LCD_CMD_ENABLE_PAL BIT(28)
-
-#define JZ_LCD_SYNC_MASK 0x3ff
-
-#define JZ_LCD_STATE_DISABLED BIT(0)
-
-struct jzfb_framedesc {
-       uint32_t next;
-       uint32_t addr;
-       uint32_t id;
-       uint32_t cmd;
-} __packed;
-
-struct jzfb {
-       struct fb_info *fb;
-       struct platform_device *pdev;
-       void __iomem *base;
-       struct resource *mem;
-       struct jz4740_fb_platform_data *pdata;
-
-       size_t vidmem_size;
-       void *vidmem;
-       dma_addr_t vidmem_phys;
-       struct jzfb_framedesc *framedesc;
-       dma_addr_t framedesc_phys;
-
-       struct clk *ldclk;
-       struct clk *lpclk;
-
-       unsigned is_enabled:1;
-       struct mutex lock;
-
-       uint32_t pseudo_palette[16];
-};
-
-static const struct fb_fix_screeninfo jzfb_fix = {
-       .id             = "JZ4740 FB",
-       .type           = FB_TYPE_PACKED_PIXELS,
-       .visual         = FB_VISUAL_TRUECOLOR,
-       .xpanstep       = 0,
-       .ypanstep       = 0,
-       .ywrapstep      = 0,
-       .accel          = FB_ACCEL_NONE,
-};
-
-/* Based on CNVT_TOHW macro from skeletonfb.c */
-static inline uint32_t jzfb_convert_color_to_hw(unsigned val,
-       struct fb_bitfield *bf)
-{
-       return (((val << bf->length) + 0x7FFF - val) >> 16) << bf->offset;
-}
-
-static int jzfb_setcolreg(unsigned regno, unsigned red, unsigned green,
-                       unsigned blue, unsigned transp, struct fb_info *fb)
-{
-       uint32_t color;
-
-       if (regno >= 16)
-               return -EINVAL;
-
-       color = jzfb_convert_color_to_hw(red, &fb->var.red);
-       color |= jzfb_convert_color_to_hw(green, &fb->var.green);
-       color |= jzfb_convert_color_to_hw(blue, &fb->var.blue);
-       color |= jzfb_convert_color_to_hw(transp, &fb->var.transp);
-
-       ((uint32_t *)(fb->pseudo_palette))[regno] = color;
-
-       return 0;
-}
-
-static int jzfb_get_controller_bpp(struct jzfb *jzfb)
-{
-       switch (jzfb->pdata->bpp) {
-       case 18:
-       case 24:
-               return 32;
-       case 15:
-               return 16;
-       default:
-               return jzfb->pdata->bpp;
-       }
-}
-
-static struct fb_videomode *jzfb_get_mode(struct jzfb *jzfb,
-       struct fb_var_screeninfo *var)
-{
-       size_t i;
-       struct fb_videomode *mode = jzfb->pdata->modes;
-
-       for (i = 0; i < jzfb->pdata->num_modes; ++i, ++mode) {
-               if (mode->xres == var->xres && mode->yres == var->yres)
-                       return mode;
-       }
-
-       return NULL;
-}
-
-static int jzfb_check_var(struct fb_var_screeninfo *var, struct fb_info *fb)
-{
-       struct jzfb *jzfb = fb->par;
-       struct fb_videomode *mode;
-
-       if (var->bits_per_pixel != jzfb_get_controller_bpp(jzfb) &&
-               var->bits_per_pixel != jzfb->pdata->bpp)
-               return -EINVAL;
-
-       mode = jzfb_get_mode(jzfb, var);
-       if (mode == NULL)
-               return -EINVAL;
-
-       fb_videomode_to_var(var, mode);
-
-       switch (jzfb->pdata->bpp) {
-       case 8:
-               break;
-       case 15:
-               var->red.offset = 10;
-               var->red.length = 5;
-               var->green.offset = 6;
-               var->green.length = 5;
-               var->blue.offset = 0;
-               var->blue.length = 5;
-               break;
-       case 16:
-               var->red.offset = 11;
-               var->red.length = 5;
-               var->green.offset = 5;
-               var->green.length = 6;
-               var->blue.offset = 0;
-               var->blue.length = 5;
-               break;
-       case 18:
-               var->red.offset = 16;
-               var->red.length = 6;
-               var->green.offset = 8;
-               var->green.length = 6;
-               var->blue.offset = 0;
-               var->blue.length = 6;
-               var->bits_per_pixel = 32;
-               break;
-       case 32:
-       case 24:
-               var->transp.offset = 24;
-               var->transp.length = 8;
-               var->red.offset = 16;
-               var->red.length = 8;
-               var->green.offset = 8;
-               var->green.length = 8;
-               var->blue.offset = 0;
-               var->blue.length = 8;
-               var->bits_per_pixel = 32;
-               break;
-       default:
-               break;
-       }
-
-       return 0;
-}
-
-static int jzfb_set_par(struct fb_info *info)
-{
-       struct jzfb *jzfb = info->par;
-       struct jz4740_fb_platform_data *pdata = jzfb->pdata;
-       struct fb_var_screeninfo *var = &info->var;
-       struct fb_videomode *mode;
-       uint16_t hds, vds;
-       uint16_t hde, vde;
-       uint16_t ht, vt;
-       uint32_t ctrl;
-       uint32_t cfg;
-       unsigned long rate;
-
-       mode = jzfb_get_mode(jzfb, var);
-       if (mode == NULL)
-               return -EINVAL;
-
-       if (mode == info->mode)
-               return 0;
-
-       info->mode = mode;
-
-       hds = mode->hsync_len + mode->left_margin;
-       hde = hds + mode->xres;
-       ht = hde + mode->right_margin;
-
-       vds = mode->vsync_len + mode->upper_margin;
-       vde = vds + mode->yres;
-       vt = vde + mode->lower_margin;
-
-       ctrl = JZ_LCD_CTRL_OFUP | JZ_LCD_CTRL_BURST_16;
-
-       switch (pdata->bpp) {
-       case 1:
-               ctrl |= JZ_LCD_CTRL_BPP_1;
-               break;
-       case 2:
-               ctrl |= JZ_LCD_CTRL_BPP_2;
-               break;
-       case 4:
-               ctrl |= JZ_LCD_CTRL_BPP_4;
-               break;
-       case 8:
-               ctrl |= JZ_LCD_CTRL_BPP_8;
-       break;
-       case 15:
-               ctrl |= JZ_LCD_CTRL_RGB555; /* Falltrough */
-       case 16:
-               ctrl |= JZ_LCD_CTRL_BPP_15_16;
-               break;
-       case 18:
-       case 24:
-       case 32:
-               ctrl |= JZ_LCD_CTRL_BPP_18_24;
-               break;
-       default:
-               break;
-       }
-
-       cfg = pdata->lcd_type & 0xf;
-
-       if (!(mode->sync & FB_SYNC_HOR_HIGH_ACT))
-               cfg |= JZ_LCD_CFG_HSYNC_ACTIVE_LOW;
-
-       if (!(mode->sync & FB_SYNC_VERT_HIGH_ACT))
-               cfg |= JZ_LCD_CFG_VSYNC_ACTIVE_LOW;
-
-       if (pdata->pixclk_falling_edge)
-               cfg |= JZ_LCD_CFG_PCLK_FALLING_EDGE;
-
-       if (pdata->date_enable_active_low)
-               cfg |= JZ_LCD_CFG_DE_ACTIVE_LOW;
-
-       if (pdata->lcd_type == JZ_LCD_TYPE_GENERIC_18_BIT)
-               cfg |= JZ_LCD_CFG_18_BIT;
-
-       if (mode->pixclock) {
-               rate = PICOS2KHZ(mode->pixclock) * 1000;
-               mode->refresh = rate / vt / ht;
-       } else {
-               if (pdata->lcd_type == JZ_LCD_TYPE_8BIT_SERIAL)
-                       rate = mode->refresh * (vt + 2 * mode->xres) * ht;
-               else
-                       rate = mode->refresh * vt * ht;
-
-               mode->pixclock = KHZ2PICOS(rate / 1000);
-       }
-
-       mutex_lock(&jzfb->lock);
-       if (!jzfb->is_enabled)
-               clk_enable(jzfb->ldclk);
-       else
-               ctrl |= JZ_LCD_CTRL_ENABLE;
-
-       switch (pdata->lcd_type) {
-       case JZ_LCD_TYPE_SPECIAL_TFT_1:
-       case JZ_LCD_TYPE_SPECIAL_TFT_2:
-       case JZ_LCD_TYPE_SPECIAL_TFT_3:
-               writel(pdata->special_tft_config.spl, jzfb->base + JZ_REG_LCD_SPL);
-               writel(pdata->special_tft_config.cls, jzfb->base + JZ_REG_LCD_CLS);
-               writel(pdata->special_tft_config.ps, jzfb->base + JZ_REG_LCD_PS);
-               writel(pdata->special_tft_config.ps, jzfb->base + JZ_REG_LCD_REV);
-               break;
-       default:
-               cfg |= JZ_LCD_CFG_PS_DISABLE;
-               cfg |= JZ_LCD_CFG_CLS_DISABLE;
-               cfg |= JZ_LCD_CFG_SPL_DISABLE;
-               cfg |= JZ_LCD_CFG_REV_DISABLE;
-               break;
-       }
-
-       writel(mode->hsync_len, jzfb->base + JZ_REG_LCD_HSYNC);
-       writel(mode->vsync_len, jzfb->base + JZ_REG_LCD_VSYNC);
-
-       writel((ht << 16) | vt, jzfb->base + JZ_REG_LCD_VAT);
-
-       writel((hds << 16) | hde, jzfb->base + JZ_REG_LCD_DAH);
-       writel((vds << 16) | vde, jzfb->base + JZ_REG_LCD_DAV);
-
-       writel(cfg, jzfb->base + JZ_REG_LCD_CFG);
-
-       writel(ctrl, jzfb->base + JZ_REG_LCD_CTRL);
-
-       if (!jzfb->is_enabled)
-               clk_disable_unprepare(jzfb->ldclk);
-
-       mutex_unlock(&jzfb->lock);
-
-       clk_set_rate(jzfb->lpclk, rate);
-       clk_set_rate(jzfb->ldclk, rate * 3);
-
-       return 0;
-}
-
-static void jzfb_enable(struct jzfb *jzfb)
-{
-       uint32_t ctrl;
-
-       clk_prepare_enable(jzfb->ldclk);
-
-       pinctrl_pm_select_default_state(&jzfb->pdev->dev);
-
-       writel(0, jzfb->base + JZ_REG_LCD_STATE);
-
-       writel(jzfb->framedesc->next, jzfb->base + JZ_REG_LCD_DA0);
-
-       ctrl = readl(jzfb->base + JZ_REG_LCD_CTRL);
-       ctrl |= JZ_LCD_CTRL_ENABLE;
-       ctrl &= ~JZ_LCD_CTRL_DISABLE;
-       writel(ctrl, jzfb->base + JZ_REG_LCD_CTRL);
-}
-
-static void jzfb_disable(struct jzfb *jzfb)
-{
-       uint32_t ctrl;
-
-       ctrl = readl(jzfb->base + JZ_REG_LCD_CTRL);
-       ctrl |= JZ_LCD_CTRL_DISABLE;
-       writel(ctrl, jzfb->base + JZ_REG_LCD_CTRL);
-       do {
-               ctrl = readl(jzfb->base + JZ_REG_LCD_STATE);
-       } while (!(ctrl & JZ_LCD_STATE_DISABLED));
-
-       pinctrl_pm_select_sleep_state(&jzfb->pdev->dev);
-
-       clk_disable_unprepare(jzfb->ldclk);
-}
-
-static int jzfb_blank(int blank_mode, struct fb_info *info)
-{
-       struct jzfb *jzfb = info->par;
-
-       switch (blank_mode) {
-       case FB_BLANK_UNBLANK:
-               mutex_lock(&jzfb->lock);
-               if (jzfb->is_enabled) {
-                       mutex_unlock(&jzfb->lock);
-                       return 0;
-               }
-
-               jzfb_enable(jzfb);
-               jzfb->is_enabled = 1;
-
-               mutex_unlock(&jzfb->lock);
-               break;
-       default:
-               mutex_lock(&jzfb->lock);
-               if (!jzfb->is_enabled) {
-                       mutex_unlock(&jzfb->lock);
-                       return 0;
-               }
-
-               jzfb_disable(jzfb);
-               jzfb->is_enabled = 0;
-
-               mutex_unlock(&jzfb->lock);
-               break;
-       }
-
-       return 0;
-}
-
-static int jzfb_alloc_devmem(struct jzfb *jzfb)
-{
-       int max_videosize = 0;
-       struct fb_videomode *mode = jzfb->pdata->modes;
-       int i;
-
-       for (i = 0; i < jzfb->pdata->num_modes; ++mode, ++i) {
-               if (max_videosize < mode->xres * mode->yres)
-                       max_videosize = mode->xres * mode->yres;
-       }
-
-       max_videosize *= jzfb_get_controller_bpp(jzfb) >> 3;
-
-       jzfb->framedesc = dma_alloc_coherent(&jzfb->pdev->dev,
-                                       sizeof(*jzfb->framedesc),
-                                       &jzfb->framedesc_phys, GFP_KERNEL);
-
-       if (!jzfb->framedesc)
-               return -ENOMEM;
-
-       jzfb->vidmem_size = PAGE_ALIGN(max_videosize);
-       jzfb->vidmem = dma_alloc_coherent(&jzfb->pdev->dev,
-                                       jzfb->vidmem_size,
-                                       &jzfb->vidmem_phys, GFP_KERNEL);
-
-       if (!jzfb->vidmem)
-               goto err_free_framedesc;
-
-       jzfb->framedesc->next = jzfb->framedesc_phys;
-       jzfb->framedesc->addr = jzfb->vidmem_phys;
-       jzfb->framedesc->id = 0xdeafbead;
-       jzfb->framedesc->cmd = 0;
-       jzfb->framedesc->cmd |= max_videosize / 4;
-
-       return 0;
-
-err_free_framedesc:
-       dma_free_coherent(&jzfb->pdev->dev, sizeof(*jzfb->framedesc),
-                               jzfb->framedesc, jzfb->framedesc_phys);
-       return -ENOMEM;
-}
-
-static void jzfb_free_devmem(struct jzfb *jzfb)
-{
-       dma_free_coherent(&jzfb->pdev->dev, jzfb->vidmem_size,
-                               jzfb->vidmem, jzfb->vidmem_phys);
-       dma_free_coherent(&jzfb->pdev->dev, sizeof(*jzfb->framedesc),
-                               jzfb->framedesc, jzfb->framedesc_phys);
-}
-
-static struct  fb_ops jzfb_ops = {
-       .owner = THIS_MODULE,
-       .fb_check_var = jzfb_check_var,
-       .fb_set_par = jzfb_set_par,
-       .fb_blank = jzfb_blank,
-       .fb_fillrect    = sys_fillrect,
-       .fb_copyarea    = sys_copyarea,
-       .fb_imageblit   = sys_imageblit,
-       .fb_setcolreg = jzfb_setcolreg,
-};
-
-static int jzfb_probe(struct platform_device *pdev)
-{
-       int ret;
-       struct jzfb *jzfb;
-       struct fb_info *fb;
-       struct jz4740_fb_platform_data *pdata = pdev->dev.platform_data;
-       struct resource *mem;
-
-       if (!pdata) {
-               dev_err(&pdev->dev, "Missing platform data\n");
-               return -ENXIO;
-       }
-
-       fb = framebuffer_alloc(sizeof(struct jzfb), &pdev->dev);
-       if (!fb)
-               return -ENOMEM;
-
-       fb->fbops = &jzfb_ops;
-       fb->flags = FBINFO_DEFAULT;
-
-       jzfb = fb->par;
-       jzfb->pdev = pdev;
-       jzfb->pdata = pdata;
-
-       jzfb->ldclk = devm_clk_get(&pdev->dev, "lcd");
-       if (IS_ERR(jzfb->ldclk)) {
-               ret = PTR_ERR(jzfb->ldclk);
-               dev_err(&pdev->dev, "Failed to get lcd clock: %d\n", ret);
-               goto err_framebuffer_release;
-       }
-
-       jzfb->lpclk = devm_clk_get(&pdev->dev, "lcd_pclk");
-       if (IS_ERR(jzfb->lpclk)) {
-               ret = PTR_ERR(jzfb->lpclk);
-               dev_err(&pdev->dev, "Failed to get lcd pixel clock: %d\n", ret);
-               goto err_framebuffer_release;
-       }
-
-       mem = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-       jzfb->base = devm_ioremap_resource(&pdev->dev, mem);
-       if (IS_ERR(jzfb->base)) {
-               ret = PTR_ERR(jzfb->base);
-               goto err_framebuffer_release;
-       }
-
-       platform_set_drvdata(pdev, jzfb);
-
-       mutex_init(&jzfb->lock);
-
-       fb_videomode_to_modelist(pdata->modes, pdata->num_modes,
-                                &fb->modelist);
-       fb_videomode_to_var(&fb->var, pdata->modes);
-       fb->var.bits_per_pixel = pdata->bpp;
-       jzfb_check_var(&fb->var, fb);
-
-       ret = jzfb_alloc_devmem(jzfb);
-       if (ret) {
-               dev_err(&pdev->dev, "Failed to allocate video memory\n");
-               goto err_framebuffer_release;
-       }
-
-       fb->fix = jzfb_fix;
-       fb->fix.line_length = fb->var.bits_per_pixel * fb->var.xres / 8;
-       fb->fix.mmio_start = mem->start;
-       fb->fix.mmio_len = resource_size(mem);
-       fb->fix.smem_start = jzfb->vidmem_phys;
-       fb->fix.smem_len =  fb->fix.line_length * fb->var.yres;
-       fb->screen_base = jzfb->vidmem;
-       fb->pseudo_palette = jzfb->pseudo_palette;
-
-       fb_alloc_cmap(&fb->cmap, 256, 0);
-
-       clk_prepare_enable(jzfb->ldclk);
-       jzfb->is_enabled = 1;
-
-       writel(jzfb->framedesc->next, jzfb->base + JZ_REG_LCD_DA0);
-
-       fb->mode = NULL;
-       jzfb_set_par(fb);
-
-       ret = register_framebuffer(fb);
-       if (ret) {
-               dev_err(&pdev->dev, "Failed to register framebuffer: %d\n", ret);
-               goto err_free_devmem;
-       }
-
-       jzfb->fb = fb;
-
-       return 0;
-
-err_free_devmem:
-       fb_dealloc_cmap(&fb->cmap);
-       jzfb_free_devmem(jzfb);
-err_framebuffer_release:
-       framebuffer_release(fb);
-       return ret;
-}
-
-static int jzfb_remove(struct platform_device *pdev)
-{
-       struct jzfb *jzfb = platform_get_drvdata(pdev);
-
-       jzfb_blank(FB_BLANK_POWERDOWN, jzfb->fb);
-
-       fb_dealloc_cmap(&jzfb->fb->cmap);
-       jzfb_free_devmem(jzfb);
-
-       framebuffer_release(jzfb->fb);
-
-       return 0;
-}
-
-#ifdef CONFIG_PM
-
-static int jzfb_suspend(struct device *dev)
-{
-       struct jzfb *jzfb = dev_get_drvdata(dev);
-
-       console_lock();
-       fb_set_suspend(jzfb->fb, 1);
-       console_unlock();
-
-       mutex_lock(&jzfb->lock);
-       if (jzfb->is_enabled)
-               jzfb_disable(jzfb);
-       mutex_unlock(&jzfb->lock);
-
-       return 0;
-}
-
-static int jzfb_resume(struct device *dev)
-{
-       struct jzfb *jzfb = dev_get_drvdata(dev);
-       clk_prepare_enable(jzfb->ldclk);
-
-       mutex_lock(&jzfb->lock);
-       if (jzfb->is_enabled)
-               jzfb_enable(jzfb);
-       mutex_unlock(&jzfb->lock);
-
-       console_lock();
-       fb_set_suspend(jzfb->fb, 0);
-       console_unlock();
-
-       return 0;
-}
-
-static const struct dev_pm_ops jzfb_pm_ops = {
-       .suspend        = jzfb_suspend,
-       .resume         = jzfb_resume,
-       .poweroff       = jzfb_suspend,
-       .restore        = jzfb_resume,
-};
-
-#define JZFB_PM_OPS (&jzfb_pm_ops)
-
-#else
-#define JZFB_PM_OPS NULL
-#endif
-
-static struct platform_driver jzfb_driver = {
-       .probe = jzfb_probe,
-       .remove = jzfb_remove,
-       .driver = {
-               .name = "jz4740-fb",
-               .pm = JZFB_PM_OPS,
-       },
-};
-module_platform_driver(jzfb_driver);
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Lars-Peter Clausen <lars@metafoo.de>");
-MODULE_DESCRIPTION("JZ4740 SoC LCD framebuffer driver");
-MODULE_ALIAS("platform:jz4740-fb");
index a45f9e3..58e7c10 100644 (file)
@@ -477,13 +477,6 @@ config IXP4XX_WATCHDOG
 
          Say N if you are unsure.
 
-config KS8695_WATCHDOG
-       tristate "KS8695 watchdog"
-       depends on ARCH_KS8695
-       help
-         Watchdog timer embedded into KS8695 processor. This will reboot your
-         system when the timeout is reached.
-
 config HAVE_S3C2410_WATCHDOG
        bool
        help
@@ -662,15 +655,6 @@ config STMP3XXX_RTC_WATCHDOG
          To compile this driver as a module, choose M here: the
          module will be called stmp3xxx_rtc_wdt.
 
-config NUC900_WATCHDOG
-       tristate "Nuvoton NUC900 watchdog"
-       depends on ARCH_W90X900 || COMPILE_TEST
-       help
-         Say Y here if to include support for the watchdog timer
-         for the Nuvoton NUC900 series SoCs.
-         To compile this driver as a module, choose M here: the
-         module will be called nuc900_wdt.
-
 config TS4800_WATCHDOG
        tristate "TS-4800 Watchdog"
        depends on HAS_IOMEM && OF
@@ -740,6 +724,19 @@ config IMX_SC_WDT
          To compile this driver as a module, choose M here: the
          module will be called imx_sc_wdt.
 
+config IMX7ULP_WDT
+       tristate "IMX7ULP Watchdog"
+       depends on ARCH_MXC || COMPILE_TEST
+       select WATCHDOG_CORE
+       help
+         This is the driver for the hardware watchdog on the Freescale
+         IMX7ULP and later processors. If you have one of these
+         processors and wish to have watchdog support enabled,
+         say Y, otherwise say N.
+
+         To compile this driver as a module, choose M here: the
+         module will be called imx7ulp_wdt.
+
 config UX500_WATCHDOG
        tristate "ST-Ericsson Ux500 watchdog"
        depends on MFD_DB8500_PRCMU
@@ -1046,8 +1043,8 @@ config F71808E_WDT
        depends on X86
        help
          This is the driver for the hardware watchdog on the Fintek F71808E,
-         F71862FG, F71868, F71869, F71882FG, F71889FG, F81865 and F81866
-         Super I/O controllers.
+         F71862FG, F71868, F71869, F71882FG, F71889FG, F81803, F81865, and
+         F81866 Super I/O controllers.
 
          You can compile this driver directly into the kernel, or use
          it as a module.  The module will be called f71808e_wdt.
index 7caa920..2ee352b 100644 (file)
@@ -49,7 +49,6 @@ obj-$(CONFIG_21285_WATCHDOG) += wdt285.o
 obj-$(CONFIG_977_WATCHDOG) += wdt977.o
 obj-$(CONFIG_FTWDT010_WATCHDOG) += ftwdt010_wdt.o
 obj-$(CONFIG_IXP4XX_WATCHDOG) += ixp4xx_wdt.o
-obj-$(CONFIG_KS8695_WATCHDOG) += ks8695_wdt.o
 obj-$(CONFIG_S3C2410_WATCHDOG) += s3c2410_wdt.o
 obj-$(CONFIG_SA1100_WATCHDOG) += sa1100_wdt.o
 obj-$(CONFIG_SAMA5D4_WATCHDOG) += sama5d4_wdt.o
@@ -64,11 +63,11 @@ obj-$(CONFIG_RN5T618_WATCHDOG) += rn5t618_wdt.o
 obj-$(CONFIG_COH901327_WATCHDOG) += coh901327_wdt.o
 obj-$(CONFIG_NPCM7XX_WATCHDOG) += npcm_wdt.o
 obj-$(CONFIG_STMP3XXX_RTC_WATCHDOG) += stmp3xxx_rtc_wdt.o
-obj-$(CONFIG_NUC900_WATCHDOG) += nuc900_wdt.o
 obj-$(CONFIG_TS4800_WATCHDOG) += ts4800_wdt.o
 obj-$(CONFIG_TS72XX_WATCHDOG) += ts72xx_wdt.o
 obj-$(CONFIG_IMX2_WDT) += imx2_wdt.o
 obj-$(CONFIG_IMX_SC_WDT) += imx_sc_wdt.o
+obj-$(CONFIG_IMX7ULP_WDT) += imx7ulp_wdt.o
 obj-$(CONFIG_UX500_WATCHDOG) += ux500_wdt.o
 obj-$(CONFIG_RETU_WATCHDOG) += retu_wdt.o
 obj-$(CONFIG_BCM2835_WDT) += bcm2835_wdt.o
index cc71861..4ec0906 100644 (file)
@@ -34,6 +34,7 @@ static const struct aspeed_wdt_config ast2500_config = {
 static const struct of_device_id aspeed_wdt_of_table[] = {
        { .compatible = "aspeed,ast2400-wdt", .data = &ast2400_config },
        { .compatible = "aspeed,ast2500-wdt", .data = &ast2500_config },
+       { .compatible = "aspeed,ast2600-wdt", .data = &ast2500_config },
        { },
 };
 MODULE_DEVICE_TABLE(of, aspeed_wdt_of_table);
@@ -53,6 +54,8 @@ MODULE_DEVICE_TABLE(of, aspeed_wdt_of_table);
 #define   WDT_CTRL_ENABLE              BIT(0)
 #define WDT_TIMEOUT_STATUS     0x10
 #define   WDT_TIMEOUT_STATUS_BOOT_SECONDARY    BIT(1)
+#define WDT_CLEAR_TIMEOUT_STATUS       0x14
+#define   WDT_CLEAR_TIMEOUT_AND_BOOT_CODE_SELECTION    BIT(0)
 
 /*
  * WDT_RESET_WIDTH controls the characteristics of the external pulse (if
@@ -165,6 +168,60 @@ static int aspeed_wdt_restart(struct watchdog_device *wdd,
        return 0;
 }
 
+/* access_cs0 shows if cs0 is accessible, hence the reverted bit */
+static ssize_t access_cs0_show(struct device *dev,
+                              struct device_attribute *attr, char *buf)
+{
+       struct aspeed_wdt *wdt = dev_get_drvdata(dev);
+       u32 status = readl(wdt->base + WDT_TIMEOUT_STATUS);
+
+       return sprintf(buf, "%u\n",
+                     !(status & WDT_TIMEOUT_STATUS_BOOT_SECONDARY));
+}
+
+static ssize_t access_cs0_store(struct device *dev,
+                               struct device_attribute *attr, const char *buf,
+                               size_t size)
+{
+       struct aspeed_wdt *wdt = dev_get_drvdata(dev);
+       unsigned long val;
+
+       if (kstrtoul(buf, 10, &val))
+               return -EINVAL;
+
+       if (val)
+               writel(WDT_CLEAR_TIMEOUT_AND_BOOT_CODE_SELECTION,
+                      wdt->base + WDT_CLEAR_TIMEOUT_STATUS);
+
+       return size;
+}
+
+/*
+ * This attribute exists only if the system has booted from the alternate
+ * flash with 'alt-boot' option.
+ *
+ * At alternate flash the 'access_cs0' sysfs node provides:
+ *   ast2400: a way to get access to the primary SPI flash chip at CS0
+ *            after booting from the alternate chip at CS1.
+ *   ast2500: a way to restore the normal address mapping from
+ *            (CS0->CS1, CS1->CS0) to (CS0->CS0, CS1->CS1).
+ *
+ * Clearing the boot code selection and timeout counter also resets to the
+ * initial state the chip select line mapping. When the SoC is in normal
+ * mapping state (i.e. booted from CS0), clearing those bits does nothing for
+ * both versions of the SoC. For alternate boot mode (booted from CS1 due to
+ * wdt2 expiration) the behavior differs as described above.
+ *
+ * This option can be used with wdt2 (watchdog1) only.
+ */
+static DEVICE_ATTR_RW(access_cs0);
+
+static struct attribute *bswitch_attrs[] = {
+       &dev_attr_access_cs0.attr,
+       NULL
+};
+ATTRIBUTE_GROUPS(bswitch);
+
 static const struct watchdog_ops aspeed_wdt_ops = {
        .start          = aspeed_wdt_start,
        .stop           = aspeed_wdt_stop,
@@ -259,7 +316,8 @@ static int aspeed_wdt_probe(struct platform_device *pdev)
                set_bit(WDOG_HW_RUNNING, &wdt->wdd.status);
        }
 
-       if (of_device_is_compatible(np, "aspeed,ast2500-wdt")) {
+       if ((of_device_is_compatible(np, "aspeed,ast2500-wdt")) ||
+               (of_device_is_compatible(np, "aspeed,ast2600-wdt"))) {
                u32 reg = readl(wdt->base + WDT_RESET_WIDTH);
 
                reg &= config->ext_pulse_width_mask;
@@ -306,9 +364,16 @@ static int aspeed_wdt_probe(struct platform_device *pdev)
        }
 
        status = readl(wdt->base + WDT_TIMEOUT_STATUS);
-       if (status & WDT_TIMEOUT_STATUS_BOOT_SECONDARY)
+       if (status & WDT_TIMEOUT_STATUS_BOOT_SECONDARY) {
                wdt->wdd.bootstatus = WDIOF_CARDRESET;
 
+               if (of_device_is_compatible(np, "aspeed,ast2400-wdt") ||
+                   of_device_is_compatible(np, "aspeed,ast2500-wdt"))
+                       wdt->wdd.groups = bswitch_groups;
+       }
+
+       dev_set_drvdata(dev, wdt);
+
        return devm_watchdog_register_device(dev, &wdt->wdd);
 }
 
index 2e09981..75de664 100644 (file)
@@ -302,7 +302,7 @@ static int ath79_wdt_remove(struct platform_device *pdev)
        return 0;
 }
 
-static void ath97_wdt_shutdown(struct platform_device *pdev)
+static void ath79_wdt_shutdown(struct platform_device *pdev)
 {
        ath79_wdt_disable();
 }
@@ -318,7 +318,7 @@ MODULE_DEVICE_TABLE(of, ath79_wdt_match);
 static struct platform_driver ath79_wdt_driver = {
        .probe          = ath79_wdt_probe,
        .remove         = ath79_wdt_remove,
-       .shutdown       = ath97_wdt_shutdown,
+       .shutdown       = ath79_wdt_shutdown,
        .driver         = {
                .name   = DRIVER_NAME,
                .of_match_table = of_match_ptr(ath79_wdt_match),
index b973b31..9393be5 100644 (file)
@@ -473,29 +473,6 @@ static long cpwd_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
        return 0;
 }
 
-static long cpwd_compat_ioctl(struct file *file, unsigned int cmd,
-                             unsigned long arg)
-{
-       int rval = -ENOIOCTLCMD;
-
-       switch (cmd) {
-       /* solaris ioctls are specific to this driver */
-       case WIOCSTART:
-       case WIOCSTOP:
-       case WIOCGSTAT:
-               mutex_lock(&cpwd_mutex);
-               rval = cpwd_ioctl(file, cmd, arg);
-               mutex_unlock(&cpwd_mutex);
-               break;
-
-       /* everything else is handled by the generic compat layer */
-       default:
-               break;
-       }
-
-       return rval;
-}
-
 static ssize_t cpwd_write(struct file *file, const char __user *buf,
                          size_t count, loff_t *ppos)
 {
@@ -520,7 +497,7 @@ static ssize_t cpwd_read(struct file *file, char __user *buffer,
 static const struct file_operations cpwd_fops = {
        .owner =                THIS_MODULE,
        .unlocked_ioctl =       cpwd_ioctl,
-       .compat_ioctl =         cpwd_compat_ioctl,
+       .compat_ioctl =         compat_ptr_ioctl,
        .open =                 cpwd_open,
        .write =                cpwd_write,
        .read =                 cpwd_read,
index 181440b..aafc8d9 100644 (file)
 #include <linux/module.h>
 #include <linux/moduleparam.h>
 #include <linux/slab.h>
-#include <linux/miscdevice.h>
 #include <linux/watchdog.h>
 #include <linux/suspend.h>
 #include <asm/ebcdic.h>
 #include <asm/diag.h>
 #include <linux/io.h>
-#include <linux/uaccess.h>
 
 #define MAX_CMDLEN 240
 #define DEFAULT_CMD "SYSTEM RESTART"
@@ -70,7 +68,6 @@ MODULE_PARM_DESC(conceal, "Enable the CONCEAL CP option while the watchdog is ac
 module_param_named(nowayout, nowayout_info, bool, 0444);
 MODULE_PARM_DESC(nowayout, "Watchdog cannot be stopped once started (default = CONFIG_WATCHDOG_NOWAYOUT)");
 
-MODULE_ALIAS_MISCDEV(WATCHDOG_MINOR);
 MODULE_ALIAS("vmwatchdog");
 
 static int __diag288(unsigned int func, unsigned int timeout,
index ff5cf1b..e46104c 100644 (file)
 #define SIO_REG_DEVID          0x20    /* Device ID (2 bytes) */
 #define SIO_REG_DEVREV         0x22    /* Device revision */
 #define SIO_REG_MANID          0x23    /* Fintek ID (2 bytes) */
+#define SIO_REG_CLOCK_SEL      0x26    /* Clock select */
 #define SIO_REG_ROM_ADDR_SEL   0x27    /* ROM address select */
 #define SIO_F81866_REG_PORT_SEL        0x27    /* F81866 Multi-Function Register */
+#define SIO_REG_TSI_LEVEL_SEL  0x28    /* TSI Level select */
 #define SIO_REG_MFUNCT1                0x29    /* Multi function select 1 */
 #define SIO_REG_MFUNCT2                0x2a    /* Multi function select 2 */
 #define SIO_REG_MFUNCT3                0x2b    /* Multi function select 3 */
@@ -49,6 +51,7 @@
 #define SIO_F71869A_ID         0x1007  /* Chipset ID */
 #define SIO_F71882_ID          0x0541  /* Chipset ID */
 #define SIO_F71889_ID          0x0723  /* Chipset ID */
+#define SIO_F81803_ID          0x1210  /* Chipset ID */
 #define SIO_F81865_ID          0x0704  /* Chipset ID */
 #define SIO_F81866_ID          0x1010  /* Chipset ID */
 
@@ -108,7 +111,7 @@ MODULE_PARM_DESC(start_withtimeout, "Start watchdog timer on module load with"
        " given initial timeout. Zero (default) disables this feature.");
 
 enum chips { f71808fg, f71858fg, f71862fg, f71868, f71869, f71882fg, f71889fg,
-            f81865, f81866};
+            f81803, f81865, f81866};
 
 static const char *f71808e_names[] = {
        "f71808fg",
@@ -118,6 +121,7 @@ static const char *f71808e_names[] = {
        "f71869",
        "f71882fg",
        "f71889fg",
+       "f81803",
        "f81865",
        "f81866",
 };
@@ -370,6 +374,14 @@ static int watchdog_start(void)
                        superio_inb(watchdog.sioaddr, SIO_REG_MFUNCT3) & 0xcf);
                break;
 
+       case f81803:
+               /* Enable TSI Level register bank */
+               superio_clear_bit(watchdog.sioaddr, SIO_REG_CLOCK_SEL, 3);
+               /* Set pin 27 to WDTRST# */
+               superio_outb(watchdog.sioaddr, SIO_REG_TSI_LEVEL_SEL, 0x5f &
+                       superio_inb(watchdog.sioaddr, SIO_REG_TSI_LEVEL_SEL));
+               break;
+
        case f81865:
                /* Set pin 70 to WDTRST# */
                superio_clear_bit(watchdog.sioaddr, SIO_REG_MFUNCT3, 5);
@@ -809,6 +821,9 @@ static int __init f71808e_find(int sioaddr)
                /* Confirmed (by datasheet) not to have a watchdog. */
                err = -ENODEV;
                goto exit;
+       case SIO_F81803_ID:
+               watchdog.type = f81803;
+               break;
        case SIO_F81865_ID:
                watchdog.type = f81865;
                break;
index c559f70..156360e 100644 (file)
@@ -48,6 +48,7 @@
 
 /* Includes */
 #include <linux/acpi.h>                        /* For ACPI support */
+#include <linux/bits.h>                        /* For BIT() */
 #include <linux/module.h>              /* For module specific items */
 #include <linux/moduleparam.h>         /* For new moduleparam's */
 #include <linux/types.h>               /* For standard types (like size_t) */
@@ -215,6 +216,23 @@ static int update_no_reboot_bit_mem(void *priv, bool set)
        return 0;
 }
 
+static int update_no_reboot_bit_cnt(void *priv, bool set)
+{
+       struct iTCO_wdt_private *p = priv;
+       u16 val, newval;
+
+       val = inw(TCO1_CNT(p));
+       if (set)
+               val |= BIT(0);
+       else
+               val &= ~BIT(0);
+       outw(val, TCO1_CNT(p));
+       newval = inw(TCO1_CNT(p));
+
+       /* make sure the update is successful */
+       return val != newval ? -EIO : 0;
+}
+
 static void iTCO_wdt_no_reboot_bit_setup(struct iTCO_wdt_private *p,
                struct itco_wdt_platform_data *pdata)
 {
@@ -224,7 +242,9 @@ static void iTCO_wdt_no_reboot_bit_setup(struct iTCO_wdt_private *p,
                return;
        }
 
-       if (p->iTCO_version >= 2)
+       if (p->iTCO_version >= 6)
+               p->update_no_reboot_bit = update_no_reboot_bit_cnt;
+       else if (p->iTCO_version >= 2)
                p->update_no_reboot_bit = update_no_reboot_bit_mem;
        else if (p->iTCO_version == 1)
                p->update_no_reboot_bit = update_no_reboot_bit_pci;
@@ -452,7 +472,8 @@ static int iTCO_wdt_probe(struct platform_device *pdev)
         * Get the Memory-Mapped GCS or PMC register, we need it for the
         * NO_REBOOT flag (TCO v2 and v3).
         */
-       if (p->iTCO_version >= 2 && !pdata->update_no_reboot_bit) {
+       if (p->iTCO_version >= 2 && p->iTCO_version < 6 &&
+           !pdata->update_no_reboot_bit) {
                p->gcs_pmc_res = platform_get_resource(pdev,
                                                       IORESOURCE_MEM,
                                                       ICH_RES_MEM_GCS_PMC);
@@ -502,6 +523,7 @@ static int iTCO_wdt_probe(struct platform_device *pdev)
 
        /* Clear out the (probably old) status */
        switch (p->iTCO_version) {
+       case 6:
        case 5:
        case 4:
                outw(0x0008, TCO1_STS(p)); /* Clear the Time Out Status bit */
index 32af397..8d019a9 100644 (file)
@@ -55,7 +55,7 @@
 
 #define IMX2_WDT_WMCR          0x08            /* Misc Register */
 
-#define IMX2_WDT_MAX_TIME      128
+#define IMX2_WDT_MAX_TIME      128U
 #define IMX2_WDT_DEFAULT_TIME  60              /* in seconds */
 
 #define WDOG_SEC_TO_COUNT(s)   ((s * 2 - 1) << 8)
@@ -180,7 +180,7 @@ static int imx2_wdt_set_timeout(struct watchdog_device *wdog,
 {
        unsigned int actual;
 
-       actual = min(new_timeout, wdog->max_hw_heartbeat_ms * 1000);
+       actual = min(new_timeout, IMX2_WDT_MAX_TIME);
        __imx2_wdt_set_timeout(wdog, actual);
        wdog->timeout = new_timeout;
        return 0;
diff --git a/drivers/watchdog/imx7ulp_wdt.c b/drivers/watchdog/imx7ulp_wdt.c
new file mode 100644 (file)
index 0000000..5ce5102
--- /dev/null
@@ -0,0 +1,243 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright 2019 NXP.
+ */
+
+#include <linux/clk.h>
+#include <linux/init.h>
+#include <linux/io.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
+#include <linux/reboot.h>
+#include <linux/watchdog.h>
+
+#define WDOG_CS                        0x0
+#define WDOG_CS_CMD32EN                BIT(13)
+#define WDOG_CS_ULK            BIT(11)
+#define WDOG_CS_RCS            BIT(10)
+#define WDOG_CS_EN             BIT(7)
+#define WDOG_CS_UPDATE         BIT(5)
+
+#define WDOG_CNT       0x4
+#define WDOG_TOVAL     0x8
+
+#define REFRESH_SEQ0   0xA602
+#define REFRESH_SEQ1   0xB480
+#define REFRESH                ((REFRESH_SEQ1 << 16) | REFRESH_SEQ0)
+
+#define UNLOCK_SEQ0    0xC520
+#define UNLOCK_SEQ1    0xD928
+#define UNLOCK         ((UNLOCK_SEQ1 << 16) | UNLOCK_SEQ0)
+
+#define DEFAULT_TIMEOUT        60
+#define MAX_TIMEOUT    128
+#define WDOG_CLOCK_RATE        1000
+
+static bool nowayout = WATCHDOG_NOWAYOUT;
+module_param(nowayout, bool, 0000);
+MODULE_PARM_DESC(nowayout, "Watchdog cannot be stopped once started (default="
+                __MODULE_STRING(WATCHDOG_NOWAYOUT) ")");
+
+struct imx7ulp_wdt_device {
+       struct notifier_block restart_handler;
+       struct watchdog_device wdd;
+       void __iomem *base;
+       struct clk *clk;
+};
+
+static inline void imx7ulp_wdt_enable(void __iomem *base, bool enable)
+{
+       u32 val = readl(base + WDOG_CS);
+
+       writel(UNLOCK, base + WDOG_CNT);
+       if (enable)
+               writel(val | WDOG_CS_EN, base + WDOG_CS);
+       else
+               writel(val & ~WDOG_CS_EN, base + WDOG_CS);
+}
+
+static inline bool imx7ulp_wdt_is_enabled(void __iomem *base)
+{
+       u32 val = readl(base + WDOG_CS);
+
+       return val & WDOG_CS_EN;
+}
+
+static int imx7ulp_wdt_ping(struct watchdog_device *wdog)
+{
+       struct imx7ulp_wdt_device *wdt = watchdog_get_drvdata(wdog);
+
+       writel(REFRESH, wdt->base + WDOG_CNT);
+
+       return 0;
+}
+
+static int imx7ulp_wdt_start(struct watchdog_device *wdog)
+{
+       struct imx7ulp_wdt_device *wdt = watchdog_get_drvdata(wdog);
+
+       imx7ulp_wdt_enable(wdt->base, true);
+
+       return 0;
+}
+
+static int imx7ulp_wdt_stop(struct watchdog_device *wdog)
+{
+       struct imx7ulp_wdt_device *wdt = watchdog_get_drvdata(wdog);
+
+       imx7ulp_wdt_enable(wdt->base, false);
+
+       return 0;
+}
+
+static int imx7ulp_wdt_set_timeout(struct watchdog_device *wdog,
+                                  unsigned int timeout)
+{
+       struct imx7ulp_wdt_device *wdt = watchdog_get_drvdata(wdog);
+       u32 val = WDOG_CLOCK_RATE * timeout;
+
+       writel(UNLOCK, wdt->base + WDOG_CNT);
+       writel(val, wdt->base + WDOG_TOVAL);
+
+       wdog->timeout = timeout;
+
+       return 0;
+}
+
+static const struct watchdog_ops imx7ulp_wdt_ops = {
+       .owner = THIS_MODULE,
+       .start = imx7ulp_wdt_start,
+       .stop  = imx7ulp_wdt_stop,
+       .ping  = imx7ulp_wdt_ping,
+       .set_timeout = imx7ulp_wdt_set_timeout,
+};
+
+static const struct watchdog_info imx7ulp_wdt_info = {
+       .identity = "i.MX7ULP watchdog timer",
+       .options  = WDIOF_SETTIMEOUT | WDIOF_KEEPALIVEPING |
+                   WDIOF_MAGICCLOSE,
+};
+
+static inline void imx7ulp_wdt_init(void __iomem *base, unsigned int timeout)
+{
+       u32 val;
+
+       /* unlock the wdog for reconfiguration */
+       writel_relaxed(UNLOCK_SEQ0, base + WDOG_CNT);
+       writel_relaxed(UNLOCK_SEQ1, base + WDOG_CNT);
+
+       /* set an initial timeout value in TOVAL */
+       writel(timeout, base + WDOG_TOVAL);
+       /* enable 32bit command sequence and reconfigure */
+       val = BIT(13) | BIT(8) | BIT(5);
+       writel(val, base + WDOG_CS);
+}
+
+static void imx7ulp_wdt_action(void *data)
+{
+       clk_disable_unprepare(data);
+}
+
+static int imx7ulp_wdt_probe(struct platform_device *pdev)
+{
+       struct imx7ulp_wdt_device *imx7ulp_wdt;
+       struct device *dev = &pdev->dev;
+       struct watchdog_device *wdog;
+       int ret;
+
+       imx7ulp_wdt = devm_kzalloc(dev, sizeof(*imx7ulp_wdt), GFP_KERNEL);
+       if (!imx7ulp_wdt)
+               return -ENOMEM;
+
+       platform_set_drvdata(pdev, imx7ulp_wdt);
+
+       imx7ulp_wdt->base = devm_platform_ioremap_resource(pdev, 0);
+       if (IS_ERR(imx7ulp_wdt->base))
+               return PTR_ERR(imx7ulp_wdt->base);
+
+       imx7ulp_wdt->clk = devm_clk_get(dev, NULL);
+       if (IS_ERR(imx7ulp_wdt->clk)) {
+               dev_err(dev, "Failed to get watchdog clock\n");
+               return PTR_ERR(imx7ulp_wdt->clk);
+       }
+
+       ret = clk_prepare_enable(imx7ulp_wdt->clk);
+       if (ret)
+               return ret;
+
+       ret = devm_add_action_or_reset(dev, imx7ulp_wdt_action, imx7ulp_wdt->clk);
+       if (ret)
+               return ret;
+
+       wdog = &imx7ulp_wdt->wdd;
+       wdog->info = &imx7ulp_wdt_info;
+       wdog->ops = &imx7ulp_wdt_ops;
+       wdog->min_timeout = 1;
+       wdog->max_timeout = MAX_TIMEOUT;
+       wdog->parent = dev;
+       wdog->timeout = DEFAULT_TIMEOUT;
+
+       watchdog_init_timeout(wdog, 0, dev);
+       watchdog_stop_on_reboot(wdog);
+       watchdog_stop_on_unregister(wdog);
+       watchdog_set_drvdata(wdog, imx7ulp_wdt);
+       imx7ulp_wdt_init(imx7ulp_wdt->base, wdog->timeout * WDOG_CLOCK_RATE);
+
+       return devm_watchdog_register_device(dev, wdog);
+}
+
+static int __maybe_unused imx7ulp_wdt_suspend(struct device *dev)
+{
+       struct imx7ulp_wdt_device *imx7ulp_wdt = dev_get_drvdata(dev);
+
+       if (watchdog_active(&imx7ulp_wdt->wdd))
+               imx7ulp_wdt_stop(&imx7ulp_wdt->wdd);
+
+       clk_disable_unprepare(imx7ulp_wdt->clk);
+
+       return 0;
+}
+
+static int __maybe_unused imx7ulp_wdt_resume(struct device *dev)
+{
+       struct imx7ulp_wdt_device *imx7ulp_wdt = dev_get_drvdata(dev);
+       u32 timeout = imx7ulp_wdt->wdd.timeout * WDOG_CLOCK_RATE;
+       int ret;
+
+       ret = clk_prepare_enable(imx7ulp_wdt->clk);
+       if (ret)
+               return ret;
+
+       if (imx7ulp_wdt_is_enabled(imx7ulp_wdt->base))
+               imx7ulp_wdt_init(imx7ulp_wdt->base, timeout);
+
+       if (watchdog_active(&imx7ulp_wdt->wdd))
+               imx7ulp_wdt_start(&imx7ulp_wdt->wdd);
+
+       return 0;
+}
+
+static SIMPLE_DEV_PM_OPS(imx7ulp_wdt_pm_ops, imx7ulp_wdt_suspend,
+                        imx7ulp_wdt_resume);
+
+static const struct of_device_id imx7ulp_wdt_dt_ids[] = {
+       { .compatible = "fsl,imx7ulp-wdt", },
+       { /* sentinel */ }
+};
+MODULE_DEVICE_TABLE(of, imx7ulp_wdt_dt_ids);
+
+static struct platform_driver imx7ulp_wdt_driver = {
+       .probe          = imx7ulp_wdt_probe,
+       .driver         = {
+               .name   = "imx7ulp-wdt",
+               .pm     = &imx7ulp_wdt_pm_ops,
+               .of_match_table = imx7ulp_wdt_dt_ids,
+       },
+};
+module_platform_driver(imx7ulp_wdt_driver);
+
+MODULE_AUTHOR("Anson Huang <Anson.Huang@nxp.com>");
+MODULE_DESCRIPTION("Freescale i.MX7ULP watchdog driver");
+MODULE_LICENSE("GPL v2");
index 78eaaf7..7ea5cf5 100644 (file)
@@ -175,12 +175,9 @@ static int imx_sc_wdt_probe(struct platform_device *pdev)
        watchdog_stop_on_unregister(wdog);
 
        ret = devm_watchdog_register_device(dev, wdog);
-       if (ret) {
-               dev_err(dev, "Failed to register watchdog device\n");
-               return ret;
-       }
+       if (ret)
+               return ret;
+
        ret = imx_scu_irq_group_enable(SC_IRQ_GROUP_WDOG,
                                       SC_IRQ_WDOG,
                                       true);
index d4a9091..c6052ae 100644 (file)
@@ -162,7 +162,6 @@ static int jz4740_wdt_probe(struct platform_device *pdev)
        struct device *dev = &pdev->dev;
        struct jz4740_wdt_drvdata *drvdata;
        struct watchdog_device *jz4740_wdt;
-       int ret;
 
        drvdata = devm_kzalloc(dev, sizeof(struct jz4740_wdt_drvdata),
                               GFP_KERNEL);
diff --git a/drivers/watchdog/ks8695_wdt.c b/drivers/watchdog/ks8695_wdt.c
deleted file mode 100644 (file)
index 1550ce3..0000000
+++ /dev/null
@@ -1,319 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Watchdog driver for Kendin/Micrel KS8695.
- *
- * (C) 2007 Andrew Victor
- */
-
-#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
-
-#include <linux/bitops.h>
-#include <linux/errno.h>
-#include <linux/fs.h>
-#include <linux/init.h>
-#include <linux/kernel.h>
-#include <linux/miscdevice.h>
-#include <linux/module.h>
-#include <linux/moduleparam.h>
-#include <linux/platform_device.h>
-#include <linux/types.h>
-#include <linux/watchdog.h>
-#include <linux/io.h>
-#include <linux/uaccess.h>
-#include <mach/hardware.h>
-
-#define KS8695_TMR_OFFSET      (0xF0000 + 0xE400)
-#define KS8695_TMR_VA          (KS8695_IO_VA + KS8695_TMR_OFFSET)
-
-/*
- * Timer registers
- */
-#define KS8695_TMCON           (0x00)          /* Timer Control Register */
-#define KS8695_T0TC            (0x08)          /* Timer 0 Timeout Count Register */
-#define TMCON_T0EN             (1 << 0)        /* Timer 0 Enable */
-
-/* Timer0 Timeout Counter Register */
-#define T0TC_WATCHDOG          (0xff)          /* Enable watchdog mode */
-
-#define WDT_DEFAULT_TIME       5       /* seconds */
-#define WDT_MAX_TIME           171     /* seconds */
-
-static int wdt_time = WDT_DEFAULT_TIME;
-static bool nowayout = WATCHDOG_NOWAYOUT;
-
-module_param(wdt_time, int, 0);
-MODULE_PARM_DESC(wdt_time, "Watchdog time in seconds. (default="
-                                       __MODULE_STRING(WDT_DEFAULT_TIME) ")");
-
-#ifdef CONFIG_WATCHDOG_NOWAYOUT
-module_param(nowayout, bool, 0);
-MODULE_PARM_DESC(nowayout, "Watchdog cannot be stopped once started (default="
-                               __MODULE_STRING(WATCHDOG_NOWAYOUT) ")");
-#endif
-
-
-static unsigned long ks8695wdt_busy;
-static DEFINE_SPINLOCK(ks8695_lock);
-
-/* ......................................................................... */
-
-/*
- * Disable the watchdog.
- */
-static inline void ks8695_wdt_stop(void)
-{
-       unsigned long tmcon;
-
-       spin_lock(&ks8695_lock);
-       /* disable timer0 */
-       tmcon = __raw_readl(KS8695_TMR_VA + KS8695_TMCON);
-       __raw_writel(tmcon & ~TMCON_T0EN, KS8695_TMR_VA + KS8695_TMCON);
-       spin_unlock(&ks8695_lock);
-}
-
-/*
- * Enable and reset the watchdog.
- */
-static inline void ks8695_wdt_start(void)
-{
-       unsigned long tmcon;
-       unsigned long tval = wdt_time * KS8695_CLOCK_RATE;
-
-       spin_lock(&ks8695_lock);
-       /* disable timer0 */
-       tmcon = __raw_readl(KS8695_TMR_VA + KS8695_TMCON);
-       __raw_writel(tmcon & ~TMCON_T0EN, KS8695_TMR_VA + KS8695_TMCON);
-
-       /* program timer0 */
-       __raw_writel(tval | T0TC_WATCHDOG, KS8695_TMR_VA + KS8695_T0TC);
-
-       /* re-enable timer0 */
-       tmcon = __raw_readl(KS8695_TMR_VA + KS8695_TMCON);
-       __raw_writel(tmcon | TMCON_T0EN, KS8695_TMR_VA + KS8695_TMCON);
-       spin_unlock(&ks8695_lock);
-}
-
-/*
- * Reload the watchdog timer.  (ie, pat the watchdog)
- */
-static inline void ks8695_wdt_reload(void)
-{
-       unsigned long tmcon;
-
-       spin_lock(&ks8695_lock);
-       /* disable, then re-enable timer0 */
-       tmcon = __raw_readl(KS8695_TMR_VA + KS8695_TMCON);
-       __raw_writel(tmcon & ~TMCON_T0EN, KS8695_TMR_VA + KS8695_TMCON);
-       __raw_writel(tmcon | TMCON_T0EN, KS8695_TMR_VA + KS8695_TMCON);
-       spin_unlock(&ks8695_lock);
-}
-
-/*
- * Change the watchdog time interval.
- */
-static int ks8695_wdt_settimeout(int new_time)
-{
-       /*
-        * All counting occurs at KS8695_CLOCK_RATE / 128 = 0.256 Hz
-        *
-        * Since WDV is a 16-bit counter, the maximum period is
-        * 65536 / 0.256 = 256 seconds.
-        */
-       if ((new_time <= 0) || (new_time > WDT_MAX_TIME))
-               return -EINVAL;
-
-       /* Set new watchdog time. It will be used when
-          ks8695_wdt_start() is called. */
-       wdt_time = new_time;
-       return 0;
-}
-
-/* ......................................................................... */
-
-/*
- * Watchdog device is opened, and watchdog starts running.
- */
-static int ks8695_wdt_open(struct inode *inode, struct file *file)
-{
-       if (test_and_set_bit(0, &ks8695wdt_busy))
-               return -EBUSY;
-
-       ks8695_wdt_start();
-       return stream_open(inode, file);
-}
-
-/*
- * Close the watchdog device.
- * If CONFIG_WATCHDOG_NOWAYOUT is NOT defined then the watchdog is also
- *  disabled.
- */
-static int ks8695_wdt_close(struct inode *inode, struct file *file)
-{
-       /* Disable the watchdog when file is closed */
-       if (!nowayout)
-               ks8695_wdt_stop();
-       clear_bit(0, &ks8695wdt_busy);
-       return 0;
-}
-
-static const struct watchdog_info ks8695_wdt_info = {
-       .identity       = "ks8695 watchdog",
-       .options        = WDIOF_SETTIMEOUT | WDIOF_KEEPALIVEPING,
-};
-
-/*
- * Handle commands from user-space.
- */
-static long ks8695_wdt_ioctl(struct file *file, unsigned int cmd,
-                                                       unsigned long arg)
-{
-       void __user *argp = (void __user *)arg;
-       int __user *p = argp;
-       int new_value;
-
-       switch (cmd) {
-       case WDIOC_GETSUPPORT:
-               return copy_to_user(argp, &ks8695_wdt_info,
-                                       sizeof(ks8695_wdt_info)) ? -EFAULT : 0;
-       case WDIOC_GETSTATUS:
-       case WDIOC_GETBOOTSTATUS:
-               return put_user(0, p);
-       case WDIOC_SETOPTIONS:
-               if (get_user(new_value, p))
-                       return -EFAULT;
-               if (new_value & WDIOS_DISABLECARD)
-                       ks8695_wdt_stop();
-               if (new_value & WDIOS_ENABLECARD)
-                       ks8695_wdt_start();
-               return 0;
-       case WDIOC_KEEPALIVE:
-               ks8695_wdt_reload();    /* pat the watchdog */
-               return 0;
-       case WDIOC_SETTIMEOUT:
-               if (get_user(new_value, p))
-                       return -EFAULT;
-               if (ks8695_wdt_settimeout(new_value))
-                       return -EINVAL;
-               /* Enable new time value */
-               ks8695_wdt_start();
-               /* Return current value */
-               return put_user(wdt_time, p);
-       case WDIOC_GETTIMEOUT:
-               return put_user(wdt_time, p);
-       default:
-               return -ENOTTY;
-       }
-}
-
-/*
- * Pat the watchdog whenever device is written to.
- */
-static ssize_t ks8695_wdt_write(struct file *file, const char *data,
-                                               size_t len, loff_t *ppos)
-{
-       ks8695_wdt_reload();            /* pat the watchdog */
-       return len;
-}
-
-/* ......................................................................... */
-
-static const struct file_operations ks8695wdt_fops = {
-       .owner          = THIS_MODULE,
-       .llseek         = no_llseek,
-       .unlocked_ioctl = ks8695_wdt_ioctl,
-       .open           = ks8695_wdt_open,
-       .release        = ks8695_wdt_close,
-       .write          = ks8695_wdt_write,
-};
-
-static struct miscdevice ks8695wdt_miscdev = {
-       .minor          = WATCHDOG_MINOR,
-       .name           = "watchdog",
-       .fops           = &ks8695wdt_fops,
-};
-
-static int ks8695wdt_probe(struct platform_device *pdev)
-{
-       int res;
-
-       if (ks8695wdt_miscdev.parent)
-               return -EBUSY;
-       ks8695wdt_miscdev.parent = &pdev->dev;
-
-       res = misc_register(&ks8695wdt_miscdev);
-       if (res)
-               return res;
-
-       pr_info("KS8695 Watchdog Timer enabled (%d seconds%s)\n",
-               wdt_time, nowayout ? ", nowayout" : "");
-       return 0;
-}
-
-static int ks8695wdt_remove(struct platform_device *pdev)
-{
-       misc_deregister(&ks8695wdt_miscdev);
-       ks8695wdt_miscdev.parent = NULL;
-
-       return 0;
-}
-
-static void ks8695wdt_shutdown(struct platform_device *pdev)
-{
-       ks8695_wdt_stop();
-}
-
-#ifdef CONFIG_PM
-
-static int ks8695wdt_suspend(struct platform_device *pdev, pm_message_t message)
-{
-       ks8695_wdt_stop();
-       return 0;
-}
-
-static int ks8695wdt_resume(struct platform_device *pdev)
-{
-       if (ks8695wdt_busy)
-               ks8695_wdt_start();
-       return 0;
-}
-
-#else
-#define ks8695wdt_suspend NULL
-#define ks8695wdt_resume       NULL
-#endif
-
-static struct platform_driver ks8695wdt_driver = {
-       .probe          = ks8695wdt_probe,
-       .remove         = ks8695wdt_remove,
-       .shutdown       = ks8695wdt_shutdown,
-       .suspend        = ks8695wdt_suspend,
-       .resume         = ks8695wdt_resume,
-       .driver         = {
-               .name   = "ks8695_wdt",
-       },
-};
-
-static int __init ks8695_wdt_init(void)
-{
-       /* Check that the heartbeat value is within range;
-          if not reset to the default */
-       if (ks8695_wdt_settimeout(wdt_time)) {
-               ks8695_wdt_settimeout(WDT_DEFAULT_TIME);
-               pr_info("ks8695_wdt: wdt_time value must be 1 <= wdt_time <= %i"
-                                       ", using %d\n", wdt_time, WDT_MAX_TIME);
-       }
-       return platform_driver_register(&ks8695wdt_driver);
-}
-
-static void __exit ks8695_wdt_exit(void)
-{
-       platform_driver_unregister(&ks8695wdt_driver);
-}
-
-module_init(ks8695_wdt_init);
-module_exit(ks8695_wdt_exit);
-
-MODULE_AUTHOR("Andrew Victor");
-MODULE_DESCRIPTION("Watchdog driver for KS8695");
-MODULE_LICENSE("GPL");
-MODULE_ALIAS("platform:ks8695_wdt");
diff --git a/drivers/watchdog/nuc900_wdt.c b/drivers/watchdog/nuc900_wdt.c
deleted file mode 100644 (file)
index db124ce..0000000
+++ /dev/null
@@ -1,302 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Copyright (c) 2009 Nuvoton technology corporation.
- *
- * Wan ZongShun <mcuos.com@gmail.com>
- */
-
-#include <linux/bitops.h>
-#include <linux/errno.h>
-#include <linux/fs.h>
-#include <linux/io.h>
-#include <linux/clk.h>
-#include <linux/kernel.h>
-#include <linux/miscdevice.h>
-#include <linux/module.h>
-#include <linux/moduleparam.h>
-#include <linux/platform_device.h>
-#include <linux/slab.h>
-#include <linux/interrupt.h>
-#include <linux/types.h>
-#include <linux/watchdog.h>
-#include <linux/uaccess.h>
-
-#define REG_WTCR               0x1c
-#define WTCLK                  (0x01 << 10)
-#define WTE                    (0x01 << 7)     /*wdt enable*/
-#define WTIS                   (0x03 << 4)
-#define WTIF                   (0x01 << 3)
-#define WTRF                   (0x01 << 2)
-#define WTRE                   (0x01 << 1)
-#define WTR                    (0x01 << 0)
-/*
- * The watchdog time interval can be calculated via following formula:
- * WTIS                real time interval (formula)
- * 0x00                ((2^ 14 ) * ((external crystal freq) / 256))seconds
- * 0x01                ((2^ 16 ) * ((external crystal freq) / 256))seconds
- * 0x02                ((2^ 18 ) * ((external crystal freq) / 256))seconds
- * 0x03                ((2^ 20 ) * ((external crystal freq) / 256))seconds
- *
- * The external crystal freq is 15Mhz in the nuc900 evaluation board.
- * So 0x00 = +-0.28 seconds, 0x01 = +-1.12 seconds, 0x02 = +-4.48 seconds,
- * 0x03 = +- 16.92 seconds..
- */
-#define WDT_HW_TIMEOUT         0x02
-#define WDT_TIMEOUT            (HZ/2)
-#define WDT_HEARTBEAT          15
-
-static int heartbeat = WDT_HEARTBEAT;
-module_param(heartbeat, int, 0);
-MODULE_PARM_DESC(heartbeat, "Watchdog heartbeats in seconds. "
-       "(default = " __MODULE_STRING(WDT_HEARTBEAT) ")");
-
-static bool nowayout = WATCHDOG_NOWAYOUT;
-module_param(nowayout, bool, 0);
-MODULE_PARM_DESC(nowayout, "Watchdog cannot be stopped once started "
-       "(default=" __MODULE_STRING(WATCHDOG_NOWAYOUT) ")");
-
-struct nuc900_wdt {
-       struct clk       *wdt_clock;
-       struct platform_device *pdev;
-       void __iomem     *wdt_base;
-       char             expect_close;
-       struct timer_list timer;
-       spinlock_t       wdt_lock;
-       unsigned long next_heartbeat;
-};
-
-static unsigned long nuc900wdt_busy;
-static struct nuc900_wdt *nuc900_wdt;
-
-static inline void nuc900_wdt_keepalive(void)
-{
-       unsigned int val;
-
-       spin_lock(&nuc900_wdt->wdt_lock);
-
-       val = __raw_readl(nuc900_wdt->wdt_base + REG_WTCR);
-       val |= (WTR | WTIF);
-       __raw_writel(val, nuc900_wdt->wdt_base + REG_WTCR);
-
-       spin_unlock(&nuc900_wdt->wdt_lock);
-}
-
-static inline void nuc900_wdt_start(void)
-{
-       unsigned int val;
-
-       spin_lock(&nuc900_wdt->wdt_lock);
-
-       val = __raw_readl(nuc900_wdt->wdt_base + REG_WTCR);
-       val |= (WTRE | WTE | WTR | WTCLK | WTIF);
-       val &= ~WTIS;
-       val |= (WDT_HW_TIMEOUT << 0x04);
-       __raw_writel(val, nuc900_wdt->wdt_base + REG_WTCR);
-
-       spin_unlock(&nuc900_wdt->wdt_lock);
-
-       nuc900_wdt->next_heartbeat = jiffies + heartbeat * HZ;
-       mod_timer(&nuc900_wdt->timer, jiffies + WDT_TIMEOUT);
-}
-
-static inline void nuc900_wdt_stop(void)
-{
-       unsigned int val;
-
-       del_timer(&nuc900_wdt->timer);
-
-       spin_lock(&nuc900_wdt->wdt_lock);
-
-       val = __raw_readl(nuc900_wdt->wdt_base + REG_WTCR);
-       val &= ~WTE;
-       __raw_writel(val, nuc900_wdt->wdt_base + REG_WTCR);
-
-       spin_unlock(&nuc900_wdt->wdt_lock);
-}
-
-static inline void nuc900_wdt_ping(void)
-{
-       nuc900_wdt->next_heartbeat = jiffies + heartbeat * HZ;
-}
-
-static int nuc900_wdt_open(struct inode *inode, struct file *file)
-{
-
-       if (test_and_set_bit(0, &nuc900wdt_busy))
-               return -EBUSY;
-
-       nuc900_wdt_start();
-
-       return stream_open(inode, file);
-}
-
-static int nuc900_wdt_close(struct inode *inode, struct file *file)
-{
-       if (nuc900_wdt->expect_close == 42)
-               nuc900_wdt_stop();
-       else {
-               dev_crit(&nuc900_wdt->pdev->dev,
-                       "Unexpected close, not stopping watchdog!\n");
-               nuc900_wdt_ping();
-       }
-
-       nuc900_wdt->expect_close = 0;
-       clear_bit(0, &nuc900wdt_busy);
-       return 0;
-}
-
-static const struct watchdog_info nuc900_wdt_info = {
-       .identity       = "nuc900 watchdog",
-       .options        = WDIOF_SETTIMEOUT | WDIOF_KEEPALIVEPING |
-                                               WDIOF_MAGICCLOSE,
-};
-
-static long nuc900_wdt_ioctl(struct file *file,
-                                       unsigned int cmd, unsigned long arg)
-{
-       void __user *argp = (void __user *)arg;
-       int __user *p = argp;
-       int new_value;
-
-       switch (cmd) {
-       case WDIOC_GETSUPPORT:
-               return copy_to_user(argp, &nuc900_wdt_info,
-                               sizeof(nuc900_wdt_info)) ? -EFAULT : 0;
-       case WDIOC_GETSTATUS:
-       case WDIOC_GETBOOTSTATUS:
-               return put_user(0, p);
-
-       case WDIOC_KEEPALIVE:
-               nuc900_wdt_ping();
-               return 0;
-
-       case WDIOC_SETTIMEOUT:
-               if (get_user(new_value, p))
-                       return -EFAULT;
-
-               heartbeat = new_value;
-               nuc900_wdt_ping();
-
-               return put_user(new_value, p);
-       case WDIOC_GETTIMEOUT:
-               return put_user(heartbeat, p);
-       default:
-               return -ENOTTY;
-       }
-}
-
-static ssize_t nuc900_wdt_write(struct file *file, const char __user *data,
-                                               size_t len, loff_t *ppos)
-{
-       if (!len)
-               return 0;
-
-       /* Scan for magic character */
-       if (!nowayout) {
-               size_t i;
-
-               nuc900_wdt->expect_close = 0;
-
-               for (i = 0; i < len; i++) {
-                       char c;
-                       if (get_user(c, data + i))
-                               return -EFAULT;
-                       if (c == 'V') {
-                               nuc900_wdt->expect_close = 42;
-                               break;
-                       }
-               }
-       }
-
-       nuc900_wdt_ping();
-       return len;
-}
-
-static void nuc900_wdt_timer_ping(struct timer_list *unused)
-{
-       if (time_before(jiffies, nuc900_wdt->next_heartbeat)) {
-               nuc900_wdt_keepalive();
-               mod_timer(&nuc900_wdt->timer, jiffies + WDT_TIMEOUT);
-       } else
-               dev_warn(&nuc900_wdt->pdev->dev, "Will reset the machine !\n");
-}
-
-static const struct file_operations nuc900wdt_fops = {
-       .owner          = THIS_MODULE,
-       .llseek         = no_llseek,
-       .unlocked_ioctl = nuc900_wdt_ioctl,
-       .open           = nuc900_wdt_open,
-       .release        = nuc900_wdt_close,
-       .write          = nuc900_wdt_write,
-};
-
-static struct miscdevice nuc900wdt_miscdev = {
-       .minor          = WATCHDOG_MINOR,
-       .name           = "watchdog",
-       .fops           = &nuc900wdt_fops,
-};
-
-static int nuc900wdt_probe(struct platform_device *pdev)
-{
-       int ret = 0;
-
-       nuc900_wdt = devm_kzalloc(&pdev->dev, sizeof(*nuc900_wdt),
-                               GFP_KERNEL);
-       if (!nuc900_wdt)
-               return -ENOMEM;
-
-       nuc900_wdt->pdev = pdev;
-
-       spin_lock_init(&nuc900_wdt->wdt_lock);
-
-       nuc900_wdt->wdt_base = devm_platform_ioremap_resource(pdev, 0);
-       if (IS_ERR(nuc900_wdt->wdt_base))
-               return PTR_ERR(nuc900_wdt->wdt_base);
-
-       nuc900_wdt->wdt_clock = devm_clk_get(&pdev->dev, NULL);
-       if (IS_ERR(nuc900_wdt->wdt_clock)) {
-               dev_err(&pdev->dev, "failed to find watchdog clock source\n");
-               return PTR_ERR(nuc900_wdt->wdt_clock);
-       }
-
-       clk_enable(nuc900_wdt->wdt_clock);
-
-       timer_setup(&nuc900_wdt->timer, nuc900_wdt_timer_ping, 0);
-
-       ret = misc_register(&nuc900wdt_miscdev);
-       if (ret) {
-               dev_err(&pdev->dev, "err register miscdev on minor=%d (%d)\n",
-                       WATCHDOG_MINOR, ret);
-               goto err_clk;
-       }
-
-       return 0;
-
-err_clk:
-       clk_disable(nuc900_wdt->wdt_clock);
-       return ret;
-}
-
-static int nuc900wdt_remove(struct platform_device *pdev)
-{
-       misc_deregister(&nuc900wdt_miscdev);
-
-       clk_disable(nuc900_wdt->wdt_clock);
-
-       return 0;
-}
-
-static struct platform_driver nuc900wdt_driver = {
-       .probe          = nuc900wdt_probe,
-       .remove         = nuc900wdt_remove,
-       .driver         = {
-               .name   = "nuc900-wdt",
-       },
-};
-
-module_platform_driver(nuc900wdt_driver);
-
-MODULE_AUTHOR("Wan ZongShun <mcuos.com@gmail.com>");
-MODULE_DESCRIPTION("Watchdog driver for NUC900");
-MODULE_LICENSE("GPL");
-MODULE_ALIAS("platform:nuc900-wdt");
index cdb0d17..1cccf8e 100644 (file)
  * Watchdog timer block registers.
  */
 #define TIMER_CTRL             0x0000
-#define TIMER_A370_STATUS      0x04
+#define TIMER1_FIXED_ENABLE_BIT        BIT(12)
+#define WDT_AXP_FIXED_ENABLE_BIT BIT(10)
+#define TIMER1_ENABLE_BIT      BIT(2)
+
+#define TIMER_A370_STATUS      0x0004
+#define WDT_A370_EXPIRED       BIT(31)
+#define TIMER1_STATUS_BIT      BIT(8)
+
+#define TIMER1_VAL_OFF         0x001c
 
 #define WDT_MAX_CYCLE_COUNT    0xffffffff
 
@@ -43,9 +51,6 @@
 #define WDT_A370_RATIO_SHIFT   5
 #define WDT_A370_RATIO         (1 << WDT_A370_RATIO_SHIFT)
 
-#define WDT_AXP_FIXED_ENABLE_BIT BIT(10)
-#define WDT_A370_EXPIRED       BIT(31)
-
 static bool nowayout = WATCHDOG_NOWAYOUT;
 static int heartbeat = -1;             /* module parameter (seconds) */
 
@@ -158,6 +163,7 @@ static int armadaxp_wdt_clock_init(struct platform_device *pdev,
                                   struct orion_watchdog *dev)
 {
        int ret;
+       u32 val;
 
        dev->clk = of_clk_get_by_name(pdev->dev.of_node, "fixed");
        if (IS_ERR(dev->clk))
@@ -168,10 +174,9 @@ static int armadaxp_wdt_clock_init(struct platform_device *pdev,
                return ret;
        }
 
-       /* Enable the fixed watchdog clock input */
-       atomic_io_modify(dev->reg + TIMER_CTRL,
-                        WDT_AXP_FIXED_ENABLE_BIT,
-                        WDT_AXP_FIXED_ENABLE_BIT);
+       /* Fix the wdt and timer1 clock freqency to 25MHz */
+       val = WDT_AXP_FIXED_ENABLE_BIT | TIMER1_FIXED_ENABLE_BIT;
+       atomic_io_modify(dev->reg + TIMER_CTRL, val, val);
 
        dev->clk_rate = clk_get_rate(dev->clk);
        return 0;
@@ -183,6 +188,10 @@ static int orion_wdt_ping(struct watchdog_device *wdt_dev)
        /* Reload watchdog duration */
        writel(dev->clk_rate * wdt_dev->timeout,
               dev->reg + dev->data->wdt_counter_offset);
+       if (dev->wdt.info->options & WDIOF_PRETIMEOUT)
+               writel(dev->clk_rate * (wdt_dev->timeout - wdt_dev->pretimeout),
+                      dev->reg + TIMER1_VAL_OFF);
+
        return 0;
 }
 
@@ -194,13 +203,18 @@ static int armada375_start(struct watchdog_device *wdt_dev)
        /* Set watchdog duration */
        writel(dev->clk_rate * wdt_dev->timeout,
               dev->reg + dev->data->wdt_counter_offset);
+       if (dev->wdt.info->options & WDIOF_PRETIMEOUT)
+               writel(dev->clk_rate * (wdt_dev->timeout - wdt_dev->pretimeout),
+                      dev->reg + TIMER1_VAL_OFF);
 
        /* Clear the watchdog expiration bit */
        atomic_io_modify(dev->reg + TIMER_A370_STATUS, WDT_A370_EXPIRED, 0);
 
        /* Enable watchdog timer */
-       atomic_io_modify(dev->reg + TIMER_CTRL, dev->data->wdt_enable_bit,
-                                               dev->data->wdt_enable_bit);
+       reg = dev->data->wdt_enable_bit;
+       if (dev->wdt.info->options & WDIOF_PRETIMEOUT)
+               reg |= TIMER1_ENABLE_BIT;
+       atomic_io_modify(dev->reg + TIMER_CTRL, reg, reg);
 
        /* Enable reset on watchdog */
        reg = readl(dev->rstout);
@@ -277,7 +291,7 @@ static int orion_stop(struct watchdog_device *wdt_dev)
 static int armada375_stop(struct watchdog_device *wdt_dev)
 {
        struct orion_watchdog *dev = watchdog_get_drvdata(wdt_dev);
-       u32 reg;
+       u32 reg, mask;
 
        /* Disable reset on watchdog */
        atomic_io_modify(dev->rstout_mask, dev->data->rstout_mask_bit,
@@ -287,7 +301,10 @@ static int armada375_stop(struct watchdog_device *wdt_dev)
        writel(reg, dev->rstout);
 
        /* Disable watchdog timer */
-       atomic_io_modify(dev->reg + TIMER_CTRL, dev->data->wdt_enable_bit, 0);
+       mask = dev->data->wdt_enable_bit;
+       if (wdt_dev->info->options & WDIOF_PRETIMEOUT)
+               mask |= TIMER1_ENABLE_BIT;
+       atomic_io_modify(dev->reg + TIMER_CTRL, mask, 0);
 
        return 0;
 }
@@ -349,7 +366,7 @@ static unsigned int orion_wdt_get_timeleft(struct watchdog_device *wdt_dev)
        return readl(dev->reg + dev->data->wdt_counter_offset) / dev->clk_rate;
 }
 
-static const struct watchdog_info orion_wdt_info = {
+static struct watchdog_info orion_wdt_info = {
        .options = WDIOF_SETTIMEOUT | WDIOF_KEEPALIVEPING | WDIOF_MAGICCLOSE,
        .identity = "Orion Watchdog",
 };
@@ -368,6 +385,16 @@ static irqreturn_t orion_wdt_irq(int irq, void *devid)
        return IRQ_HANDLED;
 }
 
+static irqreturn_t orion_wdt_pre_irq(int irq, void *devid)
+{
+       struct orion_watchdog *dev = devid;
+
+       atomic_io_modify(dev->reg + TIMER_A370_STATUS,
+                        TIMER1_STATUS_BIT, 0);
+       watchdog_notify_pretimeout(&dev->wdt);
+       return IRQ_HANDLED;
+}
+
 /*
  * The original devicetree binding for this driver specified only
  * one memory resource, so in order to keep DT backwards compatibility
@@ -589,6 +616,19 @@ static int orion_wdt_probe(struct platform_device *pdev)
                }
        }
 
+       /* Optional 2nd interrupt for pretimeout */
+       irq = platform_get_irq(pdev, 1);
+       if (irq > 0) {
+               orion_wdt_info.options |= WDIOF_PRETIMEOUT;
+               ret = devm_request_irq(&pdev->dev, irq, orion_wdt_pre_irq,
+                                      0, pdev->name, dev);
+               if (ret < 0) {
+                       dev_err(&pdev->dev, "failed to request IRQ\n");
+                       goto disable_clk;
+               }
+       }
+
+
        watchdog_set_nowayout(&dev->wdt, nowayout);
        ret = watchdog_register_device(&dev->wdt);
        if (ret)
index 7be7f87..a494543 100644 (file)
@@ -1,8 +1,10 @@
 // SPDX-License-Identifier: GPL-2.0-only
 /* Copyright (c) 2014, The Linux Foundation. All rights reserved.
  */
+#include <linux/bits.h>
 #include <linux/clk.h>
 #include <linux/delay.h>
+#include <linux/interrupt.h>
 #include <linux/io.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
@@ -19,6 +21,9 @@ enum wdt_reg {
        WDT_BITE_TIME,
 };
 
+#define QCOM_WDT_ENABLE                BIT(0)
+#define QCOM_WDT_ENABLE_IRQ    BIT(1)
+
 static const u32 reg_offset_data_apcs_tmr[] = {
        [WDT_RST] = 0x38,
        [WDT_EN] = 0x40,
@@ -37,7 +42,6 @@ static const u32 reg_offset_data_kpss[] = {
 
 struct qcom_wdt {
        struct watchdog_device  wdd;
-       struct clk              *clk;
        unsigned long           rate;
        void __iomem            *base;
        const u32               *layout;
@@ -54,15 +58,35 @@ struct qcom_wdt *to_qcom_wdt(struct watchdog_device *wdd)
        return container_of(wdd, struct qcom_wdt, wdd);
 }
 
+static inline int qcom_get_enable(struct watchdog_device *wdd)
+{
+       int enable = QCOM_WDT_ENABLE;
+
+       if (wdd->pretimeout)
+               enable |= QCOM_WDT_ENABLE_IRQ;
+
+       return enable;
+}
+
+static irqreturn_t qcom_wdt_isr(int irq, void *arg)
+{
+       struct watchdog_device *wdd = arg;
+
+       watchdog_notify_pretimeout(wdd);
+
+       return IRQ_HANDLED;
+}
+
 static int qcom_wdt_start(struct watchdog_device *wdd)
 {
        struct qcom_wdt *wdt = to_qcom_wdt(wdd);
+       unsigned int bark = wdd->timeout - wdd->pretimeout;
 
        writel(0, wdt_addr(wdt, WDT_EN));
        writel(1, wdt_addr(wdt, WDT_RST));
-       writel(wdd->timeout * wdt->rate, wdt_addr(wdt, WDT_BARK_TIME));
+       writel(bark * wdt->rate, wdt_addr(wdt, WDT_BARK_TIME));
        writel(wdd->timeout * wdt->rate, wdt_addr(wdt, WDT_BITE_TIME));
-       writel(1, wdt_addr(wdt, WDT_EN));
+       writel(qcom_get_enable(wdd), wdt_addr(wdt, WDT_EN));
        return 0;
 }
 
@@ -89,6 +113,13 @@ static int qcom_wdt_set_timeout(struct watchdog_device *wdd,
        return qcom_wdt_start(wdd);
 }
 
+static int qcom_wdt_set_pretimeout(struct watchdog_device *wdd,
+                                  unsigned int timeout)
+{
+       wdd->pretimeout = timeout;
+       return qcom_wdt_start(wdd);
+}
+
 static int qcom_wdt_restart(struct watchdog_device *wdd, unsigned long action,
                            void *data)
 {
@@ -105,7 +136,7 @@ static int qcom_wdt_restart(struct watchdog_device *wdd, unsigned long action,
        writel(1, wdt_addr(wdt, WDT_RST));
        writel(timeout, wdt_addr(wdt, WDT_BARK_TIME));
        writel(timeout, wdt_addr(wdt, WDT_BITE_TIME));
-       writel(1, wdt_addr(wdt, WDT_EN));
+       writel(QCOM_WDT_ENABLE, wdt_addr(wdt, WDT_EN));
 
        /*
         * Actually make sure the above sequence hits hardware before sleeping.
@@ -121,6 +152,7 @@ static const struct watchdog_ops qcom_wdt_ops = {
        .stop           = qcom_wdt_stop,
        .ping           = qcom_wdt_ping,
        .set_timeout    = qcom_wdt_set_timeout,
+       .set_pretimeout = qcom_wdt_set_pretimeout,
        .restart        = qcom_wdt_restart,
        .owner          = THIS_MODULE,
 };
@@ -133,6 +165,15 @@ static const struct watchdog_info qcom_wdt_info = {
        .identity       = KBUILD_MODNAME,
 };
 
+static const struct watchdog_info qcom_wdt_pt_info = {
+       .options        = WDIOF_KEEPALIVEPING
+                       | WDIOF_MAGICCLOSE
+                       | WDIOF_SETTIMEOUT
+                       | WDIOF_PRETIMEOUT
+                       | WDIOF_CARDRESET,
+       .identity       = KBUILD_MODNAME,
+};
+
 static void qcom_clk_disable_unprepare(void *data)
 {
        clk_disable_unprepare(data);
@@ -146,7 +187,8 @@ static int qcom_wdt_probe(struct platform_device *pdev)
        struct device_node *np = dev->of_node;
        const u32 *regs;
        u32 percpu_offset;
-       int ret;
+       int irq, ret;
+       struct clk *clk;
 
        regs = of_device_get_match_data(dev);
        if (!regs) {
@@ -173,19 +215,18 @@ static int qcom_wdt_probe(struct platform_device *pdev)
        if (IS_ERR(wdt->base))
                return PTR_ERR(wdt->base);
 
-       wdt->clk = devm_clk_get(dev, NULL);
-       if (IS_ERR(wdt->clk)) {
+       clk = devm_clk_get(dev, NULL);
+       if (IS_ERR(clk)) {
                dev_err(dev, "failed to get input clock\n");
-               return PTR_ERR(wdt->clk);
+               return PTR_ERR(clk);
        }
 
-       ret = clk_prepare_enable(wdt->clk);
+       ret = clk_prepare_enable(clk);
        if (ret) {
                dev_err(dev, "failed to setup clock\n");
                return ret;
        }
-       ret = devm_add_action_or_reset(dev, qcom_clk_disable_unprepare,
-                                      wdt->clk);
+       ret = devm_add_action_or_reset(dev, qcom_clk_disable_unprepare, clk);
        if (ret)
                return ret;
 
@@ -197,14 +238,31 @@ static int qcom_wdt_probe(struct platform_device *pdev)
         * that it would bite before a second elapses it's usefulness is
         * limited.  Bail if this is the case.
         */
-       wdt->rate = clk_get_rate(wdt->clk);
+       wdt->rate = clk_get_rate(clk);
        if (wdt->rate == 0 ||
            wdt->rate > 0x10000000U) {
                dev_err(dev, "invalid clock rate\n");
                return -EINVAL;
        }
 
-       wdt->wdd.info = &qcom_wdt_info;
+       /* check if there is pretimeout support */
+       irq = platform_get_irq(pdev, 0);
+       if (irq > 0) {
+               ret = devm_request_irq(dev, irq, qcom_wdt_isr,
+                                      IRQF_TRIGGER_RISING,
+                                      "wdt_bark", &wdt->wdd);
+               if (ret)
+                       return ret;
+
+               wdt->wdd.info = &qcom_wdt_pt_info;
+               wdt->wdd.pretimeout = 1;
+       } else {
+               if (irq == -EPROBE_DEFER)
+                       return -EPROBE_DEFER;
+
+               wdt->wdd.info = &qcom_wdt_info;
+       }
+
        wdt->wdd.ops = &qcom_wdt_ops;
        wdt->wdd.min_timeout = 1;
        wdt->wdd.max_timeout = 0x10000000U / wdt->rate;
index edba4e2..0bb17b0 100644 (file)
@@ -284,10 +284,8 @@ static int sprd_wdt_probe(struct platform_device *pdev)
        }
 
        wdt->irq = platform_get_irq(pdev, 0);
-       if (wdt->irq < 0) {
-               dev_err(dev, "failed to get IRQ resource\n");
+       if (wdt->irq < 0)
                return wdt->irq;
-       }
 
        ret = devm_request_irq(dev, wdt->irq, sprd_wdt_isr, IRQF_NO_SUSPEND,
                               "sprd-wdt", (void *)wdt);
index dec660c..4a363a8 100644 (file)
 #include <linux/version.h>
 #include <linux/watchdog.h>
 
+#include <asm/unaligned.h>
+
 #define ZIIRAVE_TIMEOUT_MIN    3
 #define ZIIRAVE_TIMEOUT_MAX    255
+#define ZIIRAVE_TIMEOUT_DEFAULT        30
 
 #define ZIIRAVE_PING_VALUE     0x0
 
@@ -48,16 +51,12 @@ static char *ziirave_reasons[] = {"power cycle", "hw watchdog", NULL, NULL,
 
 #define ZIIRAVE_FIRM_PKT_TOTAL_SIZE    20
 #define ZIIRAVE_FIRM_PKT_DATA_SIZE     16
-#define ZIIRAVE_FIRM_FLASH_MEMORY_START        0x1600
-#define ZIIRAVE_FIRM_FLASH_MEMORY_END  0x2bbf
+#define ZIIRAVE_FIRM_FLASH_MEMORY_START        (2 * 0x1600)
+#define ZIIRAVE_FIRM_FLASH_MEMORY_END  (2 * 0x2bbf)
+#define ZIIRAVE_FIRM_PAGE_SIZE         128
 
 /* Received and ready for next Download packet. */
 #define ZIIRAVE_FIRM_DOWNLOAD_ACK      1
-/* Currently writing to flash. Retry Download status in a moment! */
-#define ZIIRAVE_FIRM_DOWNLOAD_BUSY     2
-
-/* Wait for ACK timeout in ms */
-#define ZIIRAVE_FIRM_WAIT_FOR_ACK_TIMEOUT      50
 
 /* Firmware commands */
 #define ZIIRAVE_CMD_DOWNLOAD_START             0x10
@@ -68,6 +67,12 @@ static char *ziirave_reasons[] = {"power cycle", "hw watchdog", NULL, NULL,
 #define ZIIRAVE_CMD_JUMP_TO_BOOTLOADER         0x0c
 #define ZIIRAVE_CMD_DOWNLOAD_PACKET            0x0e
 
+#define ZIIRAVE_CMD_JUMP_TO_BOOTLOADER_MAGIC   1
+#define ZIIRAVE_CMD_RESET_PROCESSOR_MAGIC      1
+
+#define ZIIRAVE_FW_VERSION_FMT "02.%02u.%02u"
+#define ZIIRAVE_BL_VERSION_FMT "01.%02u.%02u"
+
 struct ziirave_wdt_rev {
        unsigned char major;
        unsigned char minor;
@@ -165,67 +170,37 @@ static unsigned int ziirave_wdt_get_timeleft(struct watchdog_device *wdd)
        return ret;
 }
 
-static int ziirave_firm_wait_for_ack(struct watchdog_device *wdd)
+static int ziirave_firm_read_ack(struct watchdog_device *wdd)
 {
        struct i2c_client *client = to_i2c_client(wdd->parent);
        int ret;
-       unsigned long timeout;
 
-       timeout = jiffies + msecs_to_jiffies(ZIIRAVE_FIRM_WAIT_FOR_ACK_TIMEOUT);
-       do {
-               if (time_after(jiffies, timeout))
-                       return -ETIMEDOUT;
-
-               usleep_range(5000, 10000);
-
-               ret = i2c_smbus_read_byte(client);
-               if (ret < 0) {
-                       dev_err(&client->dev, "Failed to read byte\n");
-                       return ret;
-               }
-       } while (ret == ZIIRAVE_FIRM_DOWNLOAD_BUSY);
+       ret = i2c_smbus_read_byte(client);
+       if (ret < 0) {
+               dev_err(&client->dev, "Failed to read status byte\n");
+               return ret;
+       }
 
        return ret == ZIIRAVE_FIRM_DOWNLOAD_ACK ? 0 : -EIO;
 }
 
-static int ziirave_firm_set_read_addr(struct watchdog_device *wdd, u16 addr)
+static int ziirave_firm_set_read_addr(struct watchdog_device *wdd, u32 addr)
 {
        struct i2c_client *client = to_i2c_client(wdd->parent);
+       const u16 addr16 = (u16)addr / 2;
        u8 address[2];
 
-       address[0] = addr & 0xff;
-       address[1] = (addr >> 8) & 0xff;
+       put_unaligned_le16(addr16, address);
 
        return i2c_smbus_write_block_data(client,
                                          ZIIRAVE_CMD_DOWNLOAD_SET_READ_ADDR,
-                                         ARRAY_SIZE(address), address);
-}
-
-static int ziirave_firm_write_block_data(struct watchdog_device *wdd,
-                                        u8 command, u8 length, const u8 *data,
-                                        bool wait_for_ack)
-{
-       struct i2c_client *client = to_i2c_client(wdd->parent);
-       int ret;
-
-       ret = i2c_smbus_write_block_data(client, command, length, data);
-       if (ret) {
-               dev_err(&client->dev,
-                       "Failed to send command 0x%02x: %d\n", command, ret);
-               return ret;
-       }
-
-       if (wait_for_ack)
-               ret = ziirave_firm_wait_for_ack(wdd);
-
-       return ret;
+                                         sizeof(address), address);
 }
 
-static int ziirave_firm_write_byte(struct watchdog_device *wdd, u8 command,
-                                  u8 byte, bool wait_for_ack)
+static bool ziirave_firm_addr_readonly(u32 addr)
 {
-       return ziirave_firm_write_block_data(wdd, command, 1, &byte,
-                                            wait_for_ack);
+       return addr < ZIIRAVE_FIRM_FLASH_MEMORY_START ||
+              addr > ZIIRAVE_FIRM_FLASH_MEMORY_END;
 }
 
 /*
@@ -240,35 +215,53 @@ static int ziirave_firm_write_byte(struct watchdog_device *wdd, u8 command,
  *     Data0 .. Data15: Array of 16 bytes of data.
  *     Checksum: Checksum byte to verify data integrity.
  */
-static int ziirave_firm_write_pkt(struct watchdog_device *wdd,
-                                 const struct ihex_binrec *rec)
+static int __ziirave_firm_write_pkt(struct watchdog_device *wdd,
+                                   u32 addr, const u8 *data, u8 len)
 {
+       const u16 addr16 = (u16)addr / 2;
        struct i2c_client *client = to_i2c_client(wdd->parent);
        u8 i, checksum = 0, packet[ZIIRAVE_FIRM_PKT_TOTAL_SIZE];
        int ret;
-       u16 addr;
 
-       memset(packet, 0, ARRAY_SIZE(packet));
+       /* Check max data size */
+       if (len > ZIIRAVE_FIRM_PKT_DATA_SIZE) {
+               dev_err(&client->dev, "Firmware packet too long (%d)\n",
+                       len);
+               return -EMSGSIZE;
+       }
+
+       /*
+        * Ignore packets that are targeting program memory outisde of
+        * app partition, since they will be ignored by the
+        * bootloader. At the same time, we need to make sure we'll
+        * allow zero length packet that will be sent as the last step
+        * of firmware update
+        */
+       if (len && ziirave_firm_addr_readonly(addr))
+               return 0;
 
        /* Packet length */
-       packet[0] = (u8)be16_to_cpu(rec->len);
+       packet[0] = len;
        /* Packet address */
-       addr = (be32_to_cpu(rec->addr) & 0xffff) >> 1;
-       packet[1] = addr & 0xff;
-       packet[2] = (addr & 0xff00) >> 8;
+       put_unaligned_le16(addr16, packet + 1);
 
-       /* Packet data */
-       if (be16_to_cpu(rec->len) > ZIIRAVE_FIRM_PKT_DATA_SIZE)
-               return -EMSGSIZE;
-       memcpy(packet + 3, rec->data, be16_to_cpu(rec->len));
+       memcpy(packet + 3, data, len);
+       memset(packet + 3 + len, 0, ZIIRAVE_FIRM_PKT_DATA_SIZE - len);
 
        /* Packet checksum */
-       for (i = 0; i < ZIIRAVE_FIRM_PKT_TOTAL_SIZE - 1; i++)
+       for (i = 0; i < len + 3; i++)
                checksum += packet[i];
        packet[ZIIRAVE_FIRM_PKT_TOTAL_SIZE - 1] = checksum;
 
-       ret = ziirave_firm_write_block_data(wdd, ZIIRAVE_CMD_DOWNLOAD_PACKET,
-                                           ARRAY_SIZE(packet), packet, true);
+       ret = i2c_smbus_write_block_data(client, ZIIRAVE_CMD_DOWNLOAD_PACKET,
+                                        sizeof(packet), packet);
+       if (ret) {
+               dev_err(&client->dev,
+                       "Failed to send DOWNLOAD_PACKET: %d\n", ret);
+               return ret;
+       }
+
+       ret = ziirave_firm_read_ack(wdd);
        if (ret)
                dev_err(&client->dev,
                      "Failed to write firmware packet at address 0x%04x: %d\n",
@@ -277,6 +270,30 @@ static int ziirave_firm_write_pkt(struct watchdog_device *wdd,
        return ret;
 }
 
+static int ziirave_firm_write_pkt(struct watchdog_device *wdd,
+                                 u32 addr, const u8 *data, u8 len)
+{
+       const u8 max_write_len = ZIIRAVE_FIRM_PAGE_SIZE -
+               (addr - ALIGN_DOWN(addr, ZIIRAVE_FIRM_PAGE_SIZE));
+       int ret;
+
+       if (len > max_write_len) {
+               /*
+                * If data crossed page boundary we need to split this
+                * write in two
+                */
+               ret = __ziirave_firm_write_pkt(wdd, addr, data, max_write_len);
+               if (ret)
+                       return ret;
+
+               addr += max_write_len;
+               data += max_write_len;
+               len  -= max_write_len;
+       }
+
+       return __ziirave_firm_write_pkt(wdd, addr, data, len);
+}
+
 static int ziirave_firm_verify(struct watchdog_device *wdd,
                               const struct firmware *fw)
 {
@@ -284,16 +301,12 @@ static int ziirave_firm_verify(struct watchdog_device *wdd,
        const struct ihex_binrec *rec;
        int i, ret;
        u8 data[ZIIRAVE_FIRM_PKT_DATA_SIZE];
-       u16 addr;
 
        for (rec = (void *)fw->data; rec; rec = ihex_next_binrec(rec)) {
-               /* Zero length marks end of records */
-               if (!be16_to_cpu(rec->len))
-                       break;
+               const u16 len = be16_to_cpu(rec->len);
+               const u32 addr = be32_to_cpu(rec->addr);
 
-               addr = (be32_to_cpu(rec->addr) & 0xffff) >> 1;
-               if (addr < ZIIRAVE_FIRM_FLASH_MEMORY_START ||
-                   addr > ZIIRAVE_FIRM_FLASH_MEMORY_END)
+               if (ziirave_firm_addr_readonly(addr))
                        continue;
 
                ret = ziirave_firm_set_read_addr(wdd, addr);
@@ -304,7 +317,7 @@ static int ziirave_firm_verify(struct watchdog_device *wdd,
                        return ret;
                }
 
-               for (i = 0; i < ARRAY_SIZE(data); i++) {
+               for (i = 0; i < len; i++) {
                        ret = i2c_smbus_read_byte_data(client,
                                                ZIIRAVE_CMD_DOWNLOAD_READ_BYTE);
                        if (ret < 0) {
@@ -315,7 +328,7 @@ static int ziirave_firm_verify(struct watchdog_device *wdd,
                        data[i] = ret;
                }
 
-               if (memcmp(data, rec->data, be16_to_cpu(rec->len))) {
+               if (memcmp(data, rec->data, len)) {
                        dev_err(&client->dev,
                                "Firmware mismatch at address 0x%04x\n", addr);
                        return -EINVAL;
@@ -329,97 +342,45 @@ static int ziirave_firm_upload(struct watchdog_device *wdd,
                               const struct firmware *fw)
 {
        struct i2c_client *client = to_i2c_client(wdd->parent);
-       int ret, words_till_page_break;
        const struct ihex_binrec *rec;
-       struct ihex_binrec *rec_new;
+       int ret;
 
-       ret = ziirave_firm_write_byte(wdd, ZIIRAVE_CMD_JUMP_TO_BOOTLOADER, 1,
-                                     false);
-       if (ret)
+       ret = i2c_smbus_write_byte_data(client,
+                                       ZIIRAVE_CMD_JUMP_TO_BOOTLOADER,
+                                       ZIIRAVE_CMD_JUMP_TO_BOOTLOADER_MAGIC);
+       if (ret) {
+               dev_err(&client->dev, "Failed to jump to bootloader\n");
                return ret;
+       }
 
        msleep(500);
 
-       ret = ziirave_firm_write_byte(wdd, ZIIRAVE_CMD_DOWNLOAD_START, 1, true);
-       if (ret)
+       ret = i2c_smbus_write_byte(client, ZIIRAVE_CMD_DOWNLOAD_START);
+       if (ret) {
+               dev_err(&client->dev, "Failed to start download\n");
                return ret;
+       }
+
+       ret = ziirave_firm_read_ack(wdd);
+       if (ret) {
+               dev_err(&client->dev, "No ACK for start download\n");
+               return ret;
+       }
 
        msleep(500);
 
        for (rec = (void *)fw->data; rec; rec = ihex_next_binrec(rec)) {
-               /* Zero length marks end of records */
-               if (!be16_to_cpu(rec->len))
-                       break;
-
-               /* Check max data size */
-               if (be16_to_cpu(rec->len) > ZIIRAVE_FIRM_PKT_DATA_SIZE) {
-                       dev_err(&client->dev, "Firmware packet too long (%d)\n",
-                               be16_to_cpu(rec->len));
-                       return -EMSGSIZE;
-               }
-
-               /* Calculate words till page break */
-               words_till_page_break = (64 - ((be32_to_cpu(rec->addr) >> 1) &
-                                        0x3f));
-               if ((be16_to_cpu(rec->len) >> 1) > words_till_page_break) {
-                       /*
-                        * Data in passes page boundary, so we need to split in
-                        * two blocks of data. Create a packet with the first
-                        * block of data.
-                        */
-                       rec_new = kzalloc(sizeof(struct ihex_binrec) +
-                                         (words_till_page_break << 1),
-                                         GFP_KERNEL);
-                       if (!rec_new)
-                               return -ENOMEM;
-
-                       rec_new->len = cpu_to_be16(words_till_page_break << 1);
-                       rec_new->addr = rec->addr;
-                       memcpy(rec_new->data, rec->data,
-                              be16_to_cpu(rec_new->len));
-
-                       ret = ziirave_firm_write_pkt(wdd, rec_new);
-                       kfree(rec_new);
-                       if (ret)
-                               return ret;
-
-                       /* Create a packet with the second block of data */
-                       rec_new = kzalloc(sizeof(struct ihex_binrec) +
-                                         be16_to_cpu(rec->len) -
-                                         (words_till_page_break << 1),
-                                         GFP_KERNEL);
-                       if (!rec_new)
-                               return -ENOMEM;
-
-                       /* Remaining bytes */
-                       rec_new->len = rec->len -
-                                      cpu_to_be16(words_till_page_break << 1);
-
-                       rec_new->addr = cpu_to_be32(be32_to_cpu(rec->addr) +
-                                       (words_till_page_break << 1));
-
-                       memcpy(rec_new->data,
-                              rec->data + (words_till_page_break << 1),
-                              be16_to_cpu(rec_new->len));
-
-                       ret = ziirave_firm_write_pkt(wdd, rec_new);
-                       kfree(rec_new);
-                       if (ret)
-                               return ret;
-               } else {
-                       ret = ziirave_firm_write_pkt(wdd, rec);
-                       if (ret)
-                               return ret;
-               }
+               ret = ziirave_firm_write_pkt(wdd, be32_to_cpu(rec->addr),
+                                            rec->data, be16_to_cpu(rec->len));
+               if (ret)
+                       return ret;
        }
 
-       /* For end of download, the length field will be set to 0 */
-       rec_new = kzalloc(sizeof(struct ihex_binrec) + 1, GFP_KERNEL);
-       if (!rec_new)
-               return -ENOMEM;
-
-       ret = ziirave_firm_write_pkt(wdd, rec_new);
-       kfree(rec_new);
+       /*
+        * Finish firmware download process by sending a zero length
+        * payload
+        */
+       ret = ziirave_firm_write_pkt(wdd, 0, NULL, 0);
        if (ret) {
                dev_err(&client->dev, "Failed to send EMPTY packet: %d\n", ret);
                return ret;
@@ -437,15 +398,22 @@ static int ziirave_firm_upload(struct watchdog_device *wdd,
        }
 
        /* End download operation */
-       ret = ziirave_firm_write_byte(wdd, ZIIRAVE_CMD_DOWNLOAD_END, 1, false);
-       if (ret)
+       ret = i2c_smbus_write_byte(client, ZIIRAVE_CMD_DOWNLOAD_END);
+       if (ret) {
+               dev_err(&client->dev,
+                       "Failed to end firmware download: %d\n", ret);
                return ret;
+       }
 
        /* Reset the processor */
-       ret = ziirave_firm_write_byte(wdd, ZIIRAVE_CMD_RESET_PROCESSOR, 1,
-                                     false);
-       if (ret)
+       ret = i2c_smbus_write_byte_data(client,
+                                       ZIIRAVE_CMD_RESET_PROCESSOR,
+                                       ZIIRAVE_CMD_RESET_PROCESSOR_MAGIC);
+       if (ret) {
+               dev_err(&client->dev,
+                       "Failed to reset the watchdog: %d\n", ret);
                return ret;
+       }
 
        msleep(500);
 
@@ -478,7 +446,7 @@ static ssize_t ziirave_wdt_sysfs_show_firm(struct device *dev,
        if (ret)
                return ret;
 
-       ret = sprintf(buf, "02.%02u.%02u", w_priv->firmware_rev.major,
+       ret = sprintf(buf, ZIIRAVE_FW_VERSION_FMT, w_priv->firmware_rev.major,
                      w_priv->firmware_rev.minor);
 
        mutex_unlock(&w_priv->sysfs_mutex);
@@ -501,7 +469,7 @@ static ssize_t ziirave_wdt_sysfs_show_boot(struct device *dev,
        if (ret)
                return ret;
 
-       ret = sprintf(buf, "01.%02u.%02u", w_priv->bootloader_rev.major,
+       ret = sprintf(buf, ZIIRAVE_BL_VERSION_FMT, w_priv->bootloader_rev.major,
                      w_priv->bootloader_rev.minor);
 
        mutex_unlock(&w_priv->sysfs_mutex);
@@ -568,7 +536,8 @@ static ssize_t ziirave_wdt_sysfs_store_firm(struct device *dev,
                goto unlock_mutex;
        }
 
-       dev_info(&client->dev, "Firmware updated to version 02.%02u.%02u\n",
+       dev_info(&client->dev,
+                "Firmware updated to version " ZIIRAVE_FW_VERSION_FMT "\n",
                 w_priv->firmware_rev.major, w_priv->firmware_rev.minor);
 
        /* Restore the watchdog timeout */
@@ -611,7 +580,7 @@ static int ziirave_wdt_init_duration(struct i2c_client *client)
                                                   &reset_duration);
                if (ret) {
                        dev_info(&client->dev,
-                                "Unable to set reset pulse duration, using default\n");
+                        "No reset pulse duration specified, using default\n");
                        return 0;
                }
        }
@@ -633,7 +602,10 @@ static int ziirave_wdt_probe(struct i2c_client *client,
        struct ziirave_wdt_data *w_priv;
        int val;
 
-       if (!i2c_check_functionality(client->adapter, I2C_FUNC_SMBUS_BYTE_DATA))
+       if (!i2c_check_functionality(client->adapter,
+                                    I2C_FUNC_SMBUS_BYTE |
+                                    I2C_FUNC_SMBUS_BYTE_DATA |
+                                    I2C_FUNC_SMBUS_WRITE_BLOCK_DATA))
                return -ENODEV;
 
        w_priv = devm_kzalloc(&client->dev, sizeof(*w_priv), GFP_KERNEL);
@@ -658,57 +630,80 @@ static int ziirave_wdt_probe(struct i2c_client *client,
         */
        if (w_priv->wdd.timeout == 0) {
                val = i2c_smbus_read_byte_data(client, ZIIRAVE_WDT_TIMEOUT);
-               if (val < 0)
+               if (val < 0) {
+                       dev_err(&client->dev, "Failed to read timeout\n");
                        return val;
+               }
 
-               if (val < ZIIRAVE_TIMEOUT_MIN)
-                       return -ENODEV;
+               if (val > ZIIRAVE_TIMEOUT_MAX ||
+                   val < ZIIRAVE_TIMEOUT_MIN)
+                       val = ZIIRAVE_TIMEOUT_DEFAULT;
 
                w_priv->wdd.timeout = val;
-       } else {
-               ret = ziirave_wdt_set_timeout(&w_priv->wdd,
-                                             w_priv->wdd.timeout);
-               if (ret)
-                       return ret;
+       }
 
-               dev_info(&client->dev, "Timeout set to %ds.",
-                        w_priv->wdd.timeout);
+       ret = ziirave_wdt_set_timeout(&w_priv->wdd, w_priv->wdd.timeout);
+       if (ret) {
+               dev_err(&client->dev, "Failed to set timeout\n");
+               return ret;
        }
 
+       dev_info(&client->dev, "Timeout set to %ds\n", w_priv->wdd.timeout);
+
        watchdog_set_nowayout(&w_priv->wdd, nowayout);
 
        i2c_set_clientdata(client, w_priv);
 
        /* If in unconfigured state, set to stopped */
        val = i2c_smbus_read_byte_data(client, ZIIRAVE_WDT_STATE);
-       if (val < 0)
+       if (val < 0) {
+               dev_err(&client->dev, "Failed to read state\n");
                return val;
+       }
 
        if (val == ZIIRAVE_STATE_INITIAL)
                ziirave_wdt_stop(&w_priv->wdd);
 
        ret = ziirave_wdt_init_duration(client);
-       if (ret)
+       if (ret) {
+               dev_err(&client->dev, "Failed to init duration\n");
                return ret;
+       }
 
        ret = ziirave_wdt_revision(client, &w_priv->firmware_rev,
                                   ZIIRAVE_WDT_FIRM_VER_MAJOR);
-       if (ret)
+       if (ret) {
+               dev_err(&client->dev, "Failed to read firmware version\n");
                return ret;
+       }
+
+       dev_info(&client->dev,
+                "Firmware version: " ZIIRAVE_FW_VERSION_FMT "\n",
+                w_priv->firmware_rev.major, w_priv->firmware_rev.minor);
 
        ret = ziirave_wdt_revision(client, &w_priv->bootloader_rev,
                                   ZIIRAVE_WDT_BOOT_VER_MAJOR);
-       if (ret)
+       if (ret) {
+               dev_err(&client->dev, "Failed to read bootloader version\n");
                return ret;
+       }
+
+       dev_info(&client->dev,
+                "Bootloader version: " ZIIRAVE_BL_VERSION_FMT "\n",
+                w_priv->bootloader_rev.major, w_priv->bootloader_rev.minor);
 
        w_priv->reset_reason = i2c_smbus_read_byte_data(client,
                                                ZIIRAVE_WDT_RESET_REASON);
-       if (w_priv->reset_reason < 0)
+       if (w_priv->reset_reason < 0) {
+               dev_err(&client->dev, "Failed to read reset reason\n");
                return w_priv->reset_reason;
+       }
 
        if (w_priv->reset_reason >= ARRAY_SIZE(ziirave_reasons) ||
-           !ziirave_reasons[w_priv->reset_reason])
+           !ziirave_reasons[w_priv->reset_reason]) {
+               dev_err(&client->dev, "Invalid reset reason\n");
                return -ENODEV;
+       }
 
        ret = watchdog_register_device(&w_priv->wdd);
 
index 2e8570c..6c88439 100644 (file)
@@ -247,7 +247,7 @@ static void xen_irq_info_cleanup(struct irq_info *info)
  */
 unsigned int evtchn_from_irq(unsigned irq)
 {
-       if (unlikely(WARN(irq >= nr_irqs, "Invalid irq %d!\n", irq)))
+       if (WARN(irq >= nr_irqs, "Invalid irq %d!\n", irq))
                return 0;
 
        return info_for_irq(irq)->evtchn;
index 3eeb9be..224df03 100644 (file)
@@ -17,6 +17,8 @@
 #include "../pci/pci.h"
 #ifdef CONFIG_PCI_MMCONFIG
 #include <asm/pci_x86.h>
+
+static int xen_mcfg_late(void);
 #endif
 
 static bool __read_mostly pci_seg_supported = true;
@@ -28,7 +30,18 @@ static int xen_add_device(struct device *dev)
 #ifdef CONFIG_PCI_IOV
        struct pci_dev *physfn = pci_dev->physfn;
 #endif
-
+#ifdef CONFIG_PCI_MMCONFIG
+       static bool pci_mcfg_reserved = false;
+       /*
+        * Reserve MCFG areas in Xen on first invocation due to this being
+        * potentially called from inside of acpi_init immediately after
+        * MCFG table has been finally parsed.
+        */
+       if (!pci_mcfg_reserved) {
+               xen_mcfg_late();
+               pci_mcfg_reserved = true;
+       }
+#endif
        if (pci_seg_supported) {
                struct {
                        struct physdev_pci_device_add add;
@@ -201,7 +214,7 @@ static int __init register_xen_pci_notifier(void)
 arch_initcall(register_xen_pci_notifier);
 
 #ifdef CONFIG_PCI_MMCONFIG
-static int __init xen_mcfg_late(void)
+static int xen_mcfg_late(void)
 {
        struct pci_mmcfg_region *cfg;
        int rc;
@@ -240,8 +253,4 @@ static int __init xen_mcfg_late(void)
        }
        return 0;
 }
-/*
- * Needs to be done after acpi_init which are subsys_initcall.
- */
-subsys_initcall_sync(xen_mcfg_late);
 #endif
index 58c9365..bd3a10d 100644 (file)
@@ -39,6 +39,7 @@
 #include <asm/xen/page-coherent.h>
 
 #include <trace/events/swiotlb.h>
+#define MAX_DMA_BITS 32
 /*
  * Used to do a quick range check in swiotlb_tbl_unmap_single and
  * swiotlb_tbl_sync_single_*, to see if the memory was in fact allocated by this
@@ -115,8 +116,6 @@ static int is_xen_swiotlb_buffer(dma_addr_t dma_addr)
        return 0;
 }
 
-static int max_dma_bits = 32;
-
 static int
 xen_swiotlb_fixup(void *buf, size_t size, unsigned long nslabs)
 {
@@ -136,7 +135,7 @@ xen_swiotlb_fixup(void *buf, size_t size, unsigned long nslabs)
                                p + (i << IO_TLB_SHIFT),
                                get_order(slabs << IO_TLB_SHIFT),
                                dma_bits, &dma_handle);
-               } while (rc && dma_bits++ < max_dma_bits);
+               } while (rc && dma_bits++ < MAX_DMA_BITS);
                if (rc)
                        return rc;
 
index 995e332..eb2151f 100644 (file)
@@ -51,6 +51,8 @@ void v9fs_cache_session_get_cookie(struct v9fs_session_info *v9ses)
        if (!v9ses->cachetag) {
                if (v9fs_random_cachetag(v9ses) < 0) {
                        v9ses->fscache = NULL;
+                       kfree(v9ses->cachetag);
+                       v9ses->cachetag = NULL;
                        return;
                }
        }
index 4cc966a..fe7f0bd 100644 (file)
@@ -513,6 +513,7 @@ v9fs_mmap_file_mmap(struct file *filp, struct vm_area_struct *vma)
        v9inode = V9FS_I(inode);
        mutex_lock(&v9inode->v_mutex);
        if (!v9inode->writeback_fid &&
+           (vma->vm_flags & VM_SHARED) &&
            (vma->vm_flags & VM_WRITE)) {
                /*
                 * clone a fid and add it to writeback_fid
@@ -614,6 +615,8 @@ static void v9fs_mmap_vm_close(struct vm_area_struct *vma)
                        (vma->vm_end - vma->vm_start - 1),
        };
 
+       if (!(vma->vm_flags & VM_SHARED))
+               return;
 
        p9_debug(P9_DEBUG_VFS, "9p VMA close, %p, flushing", vma);
 
index ca243e6..74df32b 100644 (file)
@@ -58,7 +58,7 @@ static int v9fs_set_super(struct super_block *s, void *data)
 
 static int
 v9fs_fill_super(struct super_block *sb, struct v9fs_session_info *v9ses,
-               int flags, void *data)
+               int flags)
 {
        int ret;
 
@@ -132,7 +132,7 @@ static struct dentry *v9fs_mount(struct file_system_type *fs_type, int flags,
                retval = PTR_ERR(sb);
                goto clunk_fid;
        }
-       retval = v9fs_fill_super(sb, v9ses, flags, data);
+       retval = v9fs_fill_super(sb, v9ses, flags);
        if (retval)
                goto release_sb;
 
index d4e11b2..ad4c6b1 100644 (file)
@@ -670,26 +670,6 @@ out:
  * libraries.  There is no binary dependent code anywhere else.
  */
 
-#ifndef STACK_RND_MASK
-#define STACK_RND_MASK (0x7ff >> (PAGE_SHIFT - 12))    /* 8MB of VA */
-#endif
-
-static unsigned long randomize_stack_top(unsigned long stack_top)
-{
-       unsigned long random_variable = 0;
-
-       if (current->flags & PF_RANDOMIZE) {
-               random_variable = get_random_long();
-               random_variable &= STACK_RND_MASK;
-               random_variable <<= PAGE_SHIFT;
-       }
-#ifdef CONFIG_STACK_GROWSUP
-       return PAGE_ALIGN(stack_top) + random_variable;
-#else
-       return PAGE_ALIGN(stack_top) - random_variable;
-#endif
-}
-
 static int load_elf_binary(struct linux_binprm *bprm)
 {
        struct file *interpreter = NULL; /* to shut gcc up */
@@ -1141,7 +1121,8 @@ out_free_interp:
                 * (since it grows up, and may collide early with the stack
                 * growing down), and into the unused ELF_ET_DYN_BASE region.
                 */
-               if (IS_ENABLED(CONFIG_ARCH_HAS_ELF_RANDOMIZE) && !interpreter)
+               if (IS_ENABLED(CONFIG_ARCH_HAS_ELF_RANDOMIZE) &&
+                   loc->elf_ex.e_type == ET_DYN && !interpreter)
                        current->mm->brk = current->mm->start_brk =
                                ELF_ET_DYN_BASE;
 
index a699e32..c1da294 100644 (file)
@@ -6,7 +6,7 @@
 obj-$(CONFIG_CEPH_FS) += ceph.o
 
 ceph-y := super.o inode.o dir.o file.o locks.o addr.o ioctl.o \
-       export.o caps.o snap.o xattr.o quota.o \
+       export.o caps.o snap.o xattr.o quota.o io.o \
        mds_client.o mdsmap.o strings.o ceph_frag.o \
        debugfs.o
 
index b3c8b88..7ab6166 100644 (file)
@@ -189,8 +189,7 @@ static int ceph_do_readpage(struct file *filp, struct page *page)
 {
        struct inode *inode = file_inode(filp);
        struct ceph_inode_info *ci = ceph_inode(inode);
-       struct ceph_osd_client *osdc =
-               &ceph_inode_to_client(inode)->client->osdc;
+       struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
        int err = 0;
        u64 off = page_offset(page);
        u64 len = PAGE_SIZE;
@@ -219,8 +218,8 @@ static int ceph_do_readpage(struct file *filp, struct page *page)
 
        dout("readpage inode %p file %p page %p index %lu\n",
             inode, filp, page, page->index);
-       err = ceph_osdc_readpages(osdc, ceph_vino(inode), &ci->i_layout,
-                                 off, &len,
+       err = ceph_osdc_readpages(&fsc->client->osdc, ceph_vino(inode),
+                                 &ci->i_layout, off, &len,
                                  ci->i_truncate_seq, ci->i_truncate_size,
                                  &page, 1, 0);
        if (err == -ENOENT)
@@ -228,6 +227,8 @@ static int ceph_do_readpage(struct file *filp, struct page *page)
        if (err < 0) {
                SetPageError(page);
                ceph_fscache_readpage_cancel(inode, page);
+               if (err == -EBLACKLISTED)
+                       fsc->blacklisted = true;
                goto out;
        }
        if (err < PAGE_SIZE)
@@ -266,6 +267,8 @@ static void finish_read(struct ceph_osd_request *req)
        int i;
 
        dout("finish_read %p req %p rc %d bytes %d\n", inode, req, rc, bytes);
+       if (rc == -EBLACKLISTED)
+               ceph_inode_to_client(inode)->blacklisted = true;
 
        /* unlock all pages, zeroing any data we didn't read */
        osd_data = osd_req_op_extent_osd_data(req, 0);
@@ -323,7 +326,8 @@ static int start_read(struct inode *inode, struct ceph_rw_context *rw_ctx,
                /* caller of readpages does not hold buffer and read caps
                 * (fadvise, madvise and readahead cases) */
                int want = CEPH_CAP_FILE_CACHE;
-               ret = ceph_try_get_caps(ci, CEPH_CAP_FILE_RD, want, true, &got);
+               ret = ceph_try_get_caps(inode, CEPH_CAP_FILE_RD, want,
+                                       true, &got);
                if (ret < 0) {
                        dout("start_read %p, error getting cap\n", inode);
                } else if (!(got & want)) {
@@ -569,7 +573,7 @@ static u64 get_writepages_data_length(struct inode *inode,
 /*
  * Write a single page, but leave the page locked.
  *
- * If we get a write error, set the page error bit, but still adjust the
+ * If we get a write error, mark the mapping for error, but still adjust the
  * dirty page accounting (i.e., page is no longer dirty).
  */
 static int writepage_nounlock(struct page *page, struct writeback_control *wbc)
@@ -640,9 +644,10 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc)
                        end_page_writeback(page);
                        return err;
                }
+               if (err == -EBLACKLISTED)
+                       fsc->blacklisted = true;
                dout("writepage setting page/mapping error %d %p\n",
                     err, page);
-               SetPageError(page);
                mapping_set_error(&inode->i_data, err);
                wbc->pages_skipped++;
        } else {
@@ -680,23 +685,6 @@ static int ceph_writepage(struct page *page, struct writeback_control *wbc)
 }
 
 /*
- * lame release_pages helper.  release_pages() isn't exported to
- * modules.
- */
-static void ceph_release_pages(struct page **pages, int num)
-{
-       struct pagevec pvec;
-       int i;
-
-       pagevec_init(&pvec);
-       for (i = 0; i < num; i++) {
-               if (pagevec_add(&pvec, pages[i]) == 0)
-                       pagevec_release(&pvec);
-       }
-       pagevec_release(&pvec);
-}
-
-/*
  * async writeback completion handler.
  *
  * If we get an error, set the mapping error bit, but not the individual
@@ -720,6 +708,8 @@ static void writepages_finish(struct ceph_osd_request *req)
        if (rc < 0) {
                mapping_set_error(mapping, rc);
                ceph_set_error_write(ci);
+               if (rc == -EBLACKLISTED)
+                       fsc->blacklisted = true;
        } else {
                ceph_clear_error_write(ci);
        }
@@ -769,7 +759,7 @@ static void writepages_finish(struct ceph_osd_request *req)
                dout("writepages_finish %p wrote %llu bytes cleaned %d pages\n",
                     inode, osd_data->length, rc >= 0 ? num_pages : 0);
 
-               ceph_release_pages(osd_data->pages, num_pages);
+               release_pages(osd_data->pages, num_pages);
        }
 
        ceph_put_wrbuffer_cap_refs(ci, total_pages, snapc);
@@ -1452,7 +1442,8 @@ static vm_fault_t ceph_filemap_fault(struct vm_fault *vmf)
                want = CEPH_CAP_FILE_CACHE;
 
        got = 0;
-       err = ceph_get_caps(ci, CEPH_CAP_FILE_RD, want, -1, &got, &pinned_page);
+       err = ceph_get_caps(vma->vm_file, CEPH_CAP_FILE_RD, want, -1,
+                           &got, &pinned_page);
        if (err < 0)
                goto out_restore;
 
@@ -1540,6 +1531,7 @@ static vm_fault_t ceph_page_mkwrite(struct vm_fault *vmf)
        if (!prealloc_cf)
                return VM_FAULT_OOM;
 
+       sb_start_pagefault(inode->i_sb);
        ceph_block_sigs(&oldset);
 
        if (ci->i_inline_version != CEPH_INLINE_NONE) {
@@ -1568,7 +1560,7 @@ static vm_fault_t ceph_page_mkwrite(struct vm_fault *vmf)
                want = CEPH_CAP_FILE_BUFFER;
 
        got = 0;
-       err = ceph_get_caps(ci, CEPH_CAP_FILE_WR, want, off + len,
+       err = ceph_get_caps(vma->vm_file, CEPH_CAP_FILE_WR, want, off + len,
                            &got, NULL);
        if (err < 0)
                goto out_free;
@@ -1614,6 +1606,7 @@ static vm_fault_t ceph_page_mkwrite(struct vm_fault *vmf)
        ceph_put_cap_refs(ci, got);
 out_free:
        ceph_restore_sigs(&oldset);
+       sb_end_pagefault(inode->i_sb);
        ceph_free_cap_flush(prealloc_cf);
        if (err < 0)
                ret = vmf_error(err);
@@ -1946,12 +1939,17 @@ static int __ceph_pool_perm_get(struct ceph_inode_info *ci,
 
        if (err >= 0 || err == -ENOENT)
                have |= POOL_READ;
-       else if (err != -EPERM)
+       else if (err != -EPERM) {
+               if (err == -EBLACKLISTED)
+                       fsc->blacklisted = true;
                goto out_unlock;
+       }
 
        if (err2 == 0 || err2 == -EEXIST)
                have |= POOL_WRITE;
        else if (err2 != -EPERM) {
+               if (err2 == -EBLACKLISTED)
+                       fsc->blacklisted = true;
                err = err2;
                goto out_unlock;
        }
@@ -1989,10 +1987,11 @@ out:
        return err;
 }
 
-int ceph_pool_perm_check(struct ceph_inode_info *ci, int need)
+int ceph_pool_perm_check(struct inode *inode, int need)
 {
-       s64 pool;
+       struct ceph_inode_info *ci = ceph_inode(inode);
        struct ceph_string *pool_ns;
+       s64 pool;
        int ret, flags;
 
        if (ci->i_vino.snap != CEPH_NOSNAP) {
@@ -2004,7 +2003,7 @@ int ceph_pool_perm_check(struct ceph_inode_info *ci, int need)
                return 0;
        }
 
-       if (ceph_test_mount_opt(ceph_inode_to_client(&ci->vfs_inode),
+       if (ceph_test_mount_opt(ceph_inode_to_client(inode),
                                NOPOOLPERM))
                return 0;
 
index bc90cf6..b2ec29e 100644 (file)
@@ -6,6 +6,8 @@
  *  Written by Milosz Tanski (milosz@adfin.com)
  */
 
+#include <linux/ceph/ceph_debug.h>
+
 #include "super.h"
 #include "cache.h"
 
index ce0f565..d3b9c9d 100644 (file)
@@ -458,37 +458,6 @@ struct ceph_cap *ceph_get_cap_for_mds(struct ceph_inode_info *ci, int mds)
 }
 
 /*
- * Return id of any MDS with a cap, preferably FILE_WR|BUFFER|EXCL, else -1.
- */
-static int __ceph_get_cap_mds(struct ceph_inode_info *ci)
-{
-       struct ceph_cap *cap;
-       int mds = -1;
-       struct rb_node *p;
-
-       /* prefer mds with WR|BUFFER|EXCL caps */
-       for (p = rb_first(&ci->i_caps); p; p = rb_next(p)) {
-               cap = rb_entry(p, struct ceph_cap, ci_node);
-               mds = cap->mds;
-               if (cap->issued & (CEPH_CAP_FILE_WR |
-                                  CEPH_CAP_FILE_BUFFER |
-                                  CEPH_CAP_FILE_EXCL))
-                       break;
-       }
-       return mds;
-}
-
-int ceph_get_cap_mds(struct inode *inode)
-{
-       struct ceph_inode_info *ci = ceph_inode(inode);
-       int mds;
-       spin_lock(&ci->i_ceph_lock);
-       mds = __ceph_get_cap_mds(ceph_inode(inode));
-       spin_unlock(&ci->i_ceph_lock);
-       return mds;
-}
-
-/*
  * Called under i_ceph_lock.
  */
 static void __insert_cap_node(struct ceph_inode_info *ci,
@@ -628,7 +597,7 @@ static void __check_cap_issue(struct ceph_inode_info *ci, struct ceph_cap *cap,
 /*
  * Add a capability under the given MDS session.
  *
- * Caller should hold session snap_rwsem (read) and s_mutex.
+ * Caller should hold session snap_rwsem (read) and ci->i_ceph_lock
  *
  * @fmode is the open file mode, if we are opening a file, otherwise
  * it is < 0.  (This is so we can atomically add the cap and add an
@@ -645,6 +614,9 @@ void ceph_add_cap(struct inode *inode,
        struct ceph_cap *cap;
        int mds = session->s_mds;
        int actual_wanted;
+       u32 gen;
+
+       lockdep_assert_held(&ci->i_ceph_lock);
 
        dout("add_cap %p mds%d cap %llx %s seq %d\n", inode,
             session->s_mds, cap_id, ceph_cap_string(issued), seq);
@@ -656,6 +628,10 @@ void ceph_add_cap(struct inode *inode,
        if (fmode >= 0)
                wanted |= ceph_caps_for_mode(fmode);
 
+       spin_lock(&session->s_gen_ttl_lock);
+       gen = session->s_cap_gen;
+       spin_unlock(&session->s_gen_ttl_lock);
+
        cap = __get_cap_for_mds(ci, mds);
        if (!cap) {
                cap = *new_cap;
@@ -681,7 +657,7 @@ void ceph_add_cap(struct inode *inode,
                list_move_tail(&cap->session_caps, &session->s_caps);
                spin_unlock(&session->s_cap_lock);
 
-               if (cap->cap_gen < session->s_cap_gen)
+               if (cap->cap_gen < gen)
                        cap->issued = cap->implemented = CEPH_CAP_PIN;
 
                /*
@@ -775,7 +751,7 @@ void ceph_add_cap(struct inode *inode,
        cap->seq = seq;
        cap->issue_seq = seq;
        cap->mseq = mseq;
-       cap->cap_gen = session->s_cap_gen;
+       cap->cap_gen = gen;
 
        if (fmode >= 0)
                __ceph_get_fmode(ci, fmode);
@@ -1284,10 +1260,6 @@ void __ceph_remove_caps(struct ceph_inode_info *ci)
  * Make note of max_size reported/requested from mds, revoked caps
  * that have now been implemented.
  *
- * Make half-hearted attempt ot to invalidate page cache if we are
- * dropping RDCACHE.  Note that this will leave behind locked pages
- * that we'll then need to deal with elsewhere.
- *
  * Return non-zero if delayed release, or we experienced an error
  * such that the caller should requeue + retry later.
  *
@@ -1746,11 +1718,11 @@ static bool __finish_cap_flush(struct ceph_mds_client *mdsc,
  * Add dirty inode to the flushing list.  Assigned a seq number so we
  * can wait for caps to flush without starving.
  *
- * Called under i_ceph_lock.
+ * Called under i_ceph_lock. Returns the flush tid.
  */
-static int __mark_caps_flushing(struct inode *inode,
+static u64 __mark_caps_flushing(struct inode *inode,
                                struct ceph_mds_session *session, bool wake,
-                               u64 *flush_tid, u64 *oldest_flush_tid)
+                               u64 *oldest_flush_tid)
 {
        struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc;
        struct ceph_inode_info *ci = ceph_inode(inode);
@@ -1789,8 +1761,7 @@ static int __mark_caps_flushing(struct inode *inode,
 
        list_add_tail(&cf->i_list, &ci->i_cap_flush_list);
 
-       *flush_tid = cf->tid;
-       return flushing;
+       return cf->tid;
 }
 
 /*
@@ -2028,11 +1999,6 @@ retry_locked:
                }
 
 ack:
-               if (ci->i_ceph_flags & CEPH_I_NOFLUSH) {
-                       dout(" skipping %p I_NOFLUSH set\n", inode);
-                       continue;
-               }
-
                if (session && session != cap->session) {
                        dout("oops, wrong session %p mutex\n", session);
                        mutex_unlock(&session->s_mutex);
@@ -2080,9 +2046,9 @@ ack:
                }
 
                if (cap == ci->i_auth_cap && ci->i_dirty_caps) {
-                       flushing = __mark_caps_flushing(inode, session, false,
-                                                       &flush_tid,
-                                                       &oldest_flush_tid);
+                       flushing = ci->i_dirty_caps;
+                       flush_tid = __mark_caps_flushing(inode, session, false,
+                                                        &oldest_flush_tid);
                } else {
                        flushing = 0;
                        flush_tid = 0;
@@ -2130,16 +2096,11 @@ static int try_flush_caps(struct inode *inode, u64 *ptid)
 retry:
        spin_lock(&ci->i_ceph_lock);
 retry_locked:
-       if (ci->i_ceph_flags & CEPH_I_NOFLUSH) {
-               spin_unlock(&ci->i_ceph_lock);
-               dout("try_flush_caps skipping %p I_NOFLUSH set\n", inode);
-               goto out;
-       }
        if (ci->i_dirty_caps && ci->i_auth_cap) {
                struct ceph_cap *cap = ci->i_auth_cap;
                int delayed;
 
-               if (!session || session != cap->session) {
+               if (session != cap->session) {
                        spin_unlock(&ci->i_ceph_lock);
                        if (session)
                                mutex_unlock(&session->s_mutex);
@@ -2161,8 +2122,9 @@ retry_locked:
                        goto retry_locked;
                }
 
-               flushing = __mark_caps_flushing(inode, session, true,
-                                               &flush_tid, &oldest_flush_tid);
+               flushing = ci->i_dirty_caps;
+               flush_tid = __mark_caps_flushing(inode, session, true,
+                                                &oldest_flush_tid);
 
                /* __send_cap drops i_ceph_lock */
                delayed = __send_cap(mdsc, cap, CEPH_CAP_OP_FLUSH,
@@ -2261,35 +2223,45 @@ static int unsafe_request_wait(struct inode *inode)
 
 int ceph_fsync(struct file *file, loff_t start, loff_t end, int datasync)
 {
+       struct ceph_file_info *fi = file->private_data;
        struct inode *inode = file->f_mapping->host;
        struct ceph_inode_info *ci = ceph_inode(inode);
        u64 flush_tid;
-       int ret;
+       int ret, err;
        int dirty;
 
        dout("fsync %p%s\n", inode, datasync ? " datasync" : "");
 
        ret = file_write_and_wait_range(file, start, end);
-       if (ret < 0)
-               goto out;
-
        if (datasync)
                goto out;
 
        dirty = try_flush_caps(inode, &flush_tid);
        dout("fsync dirty caps are %s\n", ceph_cap_string(dirty));
 
-       ret = unsafe_request_wait(inode);
+       err = unsafe_request_wait(inode);
 
        /*
         * only wait on non-file metadata writeback (the mds
         * can recover size and mtime, so we don't need to
         * wait for that)
         */
-       if (!ret && (dirty & ~CEPH_CAP_ANY_FILE_WR)) {
-               ret = wait_event_interruptible(ci->i_cap_wq,
+       if (!err && (dirty & ~CEPH_CAP_ANY_FILE_WR)) {
+               err = wait_event_interruptible(ci->i_cap_wq,
                                        caps_are_flushed(inode, flush_tid));
        }
+
+       if (err < 0)
+               ret = err;
+
+       if (errseq_check(&ci->i_meta_err, READ_ONCE(fi->meta_err))) {
+               spin_lock(&file->f_lock);
+               err = errseq_check_and_advance(&ci->i_meta_err,
+                                              &fi->meta_err);
+               spin_unlock(&file->f_lock);
+               if (err < 0)
+                       ret = err;
+       }
 out:
        dout("fsync %p%s result=%d\n", inode, datasync ? " datasync" : "", ret);
        return ret;
@@ -2560,10 +2532,15 @@ static void __take_cap_refs(struct ceph_inode_info *ci, int got,
  *
  * FIXME: how does a 0 return differ from -EAGAIN?
  */
-static int try_get_cap_refs(struct ceph_inode_info *ci, int need, int want,
-                           loff_t endoff, bool nonblock, int *got)
+enum {
+       NON_BLOCKING    = 1,
+       CHECK_FILELOCK  = 2,
+};
+
+static int try_get_cap_refs(struct inode *inode, int need, int want,
+                           loff_t endoff, int flags, int *got)
 {
-       struct inode *inode = &ci->vfs_inode;
+       struct ceph_inode_info *ci = ceph_inode(inode);
        struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc;
        int ret = 0;
        int have, implemented;
@@ -2576,6 +2553,13 @@ static int try_get_cap_refs(struct ceph_inode_info *ci, int need, int want,
 again:
        spin_lock(&ci->i_ceph_lock);
 
+       if ((flags & CHECK_FILELOCK) &&
+           (ci->i_ceph_flags & CEPH_I_ERROR_FILELOCK)) {
+               dout("try_get_cap_refs %p error filelock\n", inode);
+               ret = -EIO;
+               goto out_unlock;
+       }
+
        /* make sure file is actually open */
        file_wanted = __ceph_caps_file_wanted(ci);
        if ((file_wanted & need) != need) {
@@ -2637,7 +2621,7 @@ again:
                                         * we can not call down_read() when
                                         * task isn't in TASK_RUNNING state
                                         */
-                                       if (nonblock) {
+                                       if (flags & NON_BLOCKING) {
                                                ret = -EAGAIN;
                                                goto out_unlock;
                                        }
@@ -2731,18 +2715,19 @@ static void check_max_size(struct inode *inode, loff_t endoff)
                ceph_check_caps(ci, CHECK_CAPS_AUTHONLY, NULL);
 }
 
-int ceph_try_get_caps(struct ceph_inode_info *ci, int need, int want,
+int ceph_try_get_caps(struct inode *inode, int need, int want,
                      bool nonblock, int *got)
 {
        int ret;
 
        BUG_ON(need & ~CEPH_CAP_FILE_RD);
        BUG_ON(want & ~(CEPH_CAP_FILE_CACHE|CEPH_CAP_FILE_LAZYIO|CEPH_CAP_FILE_SHARED));
-       ret = ceph_pool_perm_check(ci, need);
+       ret = ceph_pool_perm_check(inode, need);
        if (ret < 0)
                return ret;
 
-       ret = try_get_cap_refs(ci, need, want, 0, nonblock, got);
+       ret = try_get_cap_refs(inode, need, want, 0,
+                              (nonblock ? NON_BLOCKING : 0), got);
        return ret == -EAGAIN ? 0 : ret;
 }
 
@@ -2751,30 +2736,40 @@ int ceph_try_get_caps(struct ceph_inode_info *ci, int need, int want,
  * due to a small max_size, make sure we check_max_size (and possibly
  * ask the mds) so we don't get hung up indefinitely.
  */
-int ceph_get_caps(struct ceph_inode_info *ci, int need, int want,
+int ceph_get_caps(struct file *filp, int need, int want,
                  loff_t endoff, int *got, struct page **pinned_page)
 {
-       int _got, ret;
+       struct ceph_file_info *fi = filp->private_data;
+       struct inode *inode = file_inode(filp);
+       struct ceph_inode_info *ci = ceph_inode(inode);
+       struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
+       int ret, _got, flags;
 
-       ret = ceph_pool_perm_check(ci, need);
+       ret = ceph_pool_perm_check(inode, need);
        if (ret < 0)
                return ret;
 
+       if ((fi->fmode & CEPH_FILE_MODE_WR) &&
+           fi->filp_gen != READ_ONCE(fsc->filp_gen))
+               return -EBADF;
+
        while (true) {
                if (endoff > 0)
-                       check_max_size(&ci->vfs_inode, endoff);
+                       check_max_size(inode, endoff);
 
+               flags = atomic_read(&fi->num_locks) ? CHECK_FILELOCK : 0;
                _got = 0;
-               ret = try_get_cap_refs(ci, need, want, endoff,
-                                      false, &_got);
+               ret = try_get_cap_refs(inode, need, want, endoff,
+                                      flags, &_got);
                if (ret == -EAGAIN)
                        continue;
                if (!ret) {
                        DEFINE_WAIT_FUNC(wait, woken_wake_function);
                        add_wait_queue(&ci->i_cap_wq, &wait);
 
-                       while (!(ret = try_get_cap_refs(ci, need, want, endoff,
-                                                       true, &_got))) {
+                       flags |= NON_BLOCKING;
+                       while (!(ret = try_get_cap_refs(inode, need, want,
+                                                       endoff, flags, &_got))) {
                                if (signal_pending(current)) {
                                        ret = -ERESTARTSYS;
                                        break;
@@ -2786,10 +2781,18 @@ int ceph_get_caps(struct ceph_inode_info *ci, int need, int want,
                        if (ret == -EAGAIN)
                                continue;
                }
+
+               if ((fi->fmode & CEPH_FILE_MODE_WR) &&
+                   fi->filp_gen != READ_ONCE(fsc->filp_gen)) {
+                       if (ret >= 0 && _got)
+                               ceph_put_cap_refs(ci, _got);
+                       return -EBADF;
+               }
+
                if (ret < 0) {
                        if (ret == -ESTALE) {
                                /* session was killed, try renew caps */
-                               ret = ceph_renew_caps(&ci->vfs_inode);
+                               ret = ceph_renew_caps(inode);
                                if (ret == 0)
                                        continue;
                        }
@@ -2798,9 +2801,9 @@ int ceph_get_caps(struct ceph_inode_info *ci, int need, int want,
 
                if (ci->i_inline_version != CEPH_INLINE_NONE &&
                    (_got & (CEPH_CAP_FILE_CACHE|CEPH_CAP_FILE_LAZYIO)) &&
-                   i_size_read(&ci->vfs_inode) > 0) {
+                   i_size_read(inode) > 0) {
                        struct page *page =
-                               find_get_page(ci->vfs_inode.i_mapping, 0);
+                               find_get_page(inode->i_mapping, 0);
                        if (page) {
                                if (PageUptodate(page)) {
                                        *pinned_page = page;
@@ -2819,7 +2822,7 @@ int ceph_get_caps(struct ceph_inode_info *ci, int need, int want,
                         * getattr request will bring inline data into
                         * page cache
                         */
-                       ret = __ceph_do_getattr(&ci->vfs_inode, NULL,
+                       ret = __ceph_do_getattr(inode, NULL,
                                                CEPH_STAT_CAP_INLINE_DATA,
                                                true);
                        if (ret < 0)
index 2eb88ed..facb387 100644 (file)
@@ -294,7 +294,6 @@ void ceph_fs_debugfs_init(struct ceph_fs_client *fsc)
 
 void ceph_fs_debugfs_init(struct ceph_fs_client *fsc)
 {
-       return 0;
 }
 
 void ceph_fs_debugfs_cleanup(struct ceph_fs_client *fsc)
index 15ff1b0..b6bfa94 100644 (file)
@@ -35,7 +35,7 @@ struct ceph_nfs_snapfh {
 static int ceph_encode_snapfh(struct inode *inode, u32 *rawfh, int *max_len,
                              struct inode *parent_inode)
 {
-       const static int snap_handle_length =
+       static const int snap_handle_length =
                sizeof(struct ceph_nfs_snapfh) >> 2;
        struct ceph_nfs_snapfh *sfh = (void *)rawfh;
        u64 snapid = ceph_snap(inode);
@@ -85,9 +85,9 @@ out:
 static int ceph_encode_fh(struct inode *inode, u32 *rawfh, int *max_len,
                          struct inode *parent_inode)
 {
-       const static int handle_length =
+       static const int handle_length =
                sizeof(struct ceph_nfs_fh) >> 2;
-       const static int connected_handle_length =
+       static const int connected_handle_length =
                sizeof(struct ceph_nfs_confh) >> 2;
        int type;
 
@@ -458,33 +458,33 @@ static int __get_snap_name(struct dentry *parent, char *name,
                if (err < 0)
                        goto out;
 
-                rinfo = &req->r_reply_info;
-                for (i = 0; i < rinfo->dir_nr; i++) {
-                        rde = rinfo->dir_entries + i;
-                        BUG_ON(!rde->inode.in);
-                        if (ceph_snap(inode) ==
-                            le64_to_cpu(rde->inode.in->snapid)) {
-                                memcpy(name, rde->name, rde->name_len);
-                                name[rde->name_len] = '\0';
-                                err = 0;
-                                goto out;
-                        }
-                }
-
-                if (rinfo->dir_end)
-                        break;
-
-                BUG_ON(rinfo->dir_nr <= 0);
-                rde = rinfo->dir_entries + (rinfo->dir_nr - 1);
-                next_offset += rinfo->dir_nr;
-                last_name = kstrndup(rde->name, rde->name_len, GFP_KERNEL);
-                if (!last_name) {
-                        err = -ENOMEM;
-                        goto out;
-                }
-
-                ceph_mdsc_put_request(req);
-                req = NULL;
+               rinfo = &req->r_reply_info;
+               for (i = 0; i < rinfo->dir_nr; i++) {
+                       rde = rinfo->dir_entries + i;
+                       BUG_ON(!rde->inode.in);
+                       if (ceph_snap(inode) ==
+                           le64_to_cpu(rde->inode.in->snapid)) {
+                               memcpy(name, rde->name, rde->name_len);
+                               name[rde->name_len] = '\0';
+                               err = 0;
+                               goto out;
+                       }
+               }
+
+               if (rinfo->dir_end)
+                       break;
+
+               BUG_ON(rinfo->dir_nr <= 0);
+               rde = rinfo->dir_entries + (rinfo->dir_nr - 1);
+               next_offset += rinfo->dir_nr;
+               last_name = kstrndup(rde->name, rde->name_len, GFP_KERNEL);
+               if (!last_name) {
+                       err = -ENOMEM;
+                       goto out;
+               }
+
+               ceph_mdsc_put_request(req);
+               req = NULL;
        }
        err = -ENOENT;
 out:
index 685a03c..d277f71 100644 (file)
@@ -15,6 +15,7 @@
 #include "super.h"
 #include "mds_client.h"
 #include "cache.h"
+#include "io.h"
 
 static __le32 ceph_flags_sys2wire(u32 flags)
 {
@@ -201,6 +202,7 @@ out:
 static int ceph_init_file_info(struct inode *inode, struct file *file,
                                        int fmode, bool isdir)
 {
+       struct ceph_inode_info *ci = ceph_inode(inode);
        struct ceph_file_info *fi;
 
        dout("%s %p %p 0%o (%s)\n", __func__, inode, file,
@@ -211,7 +213,7 @@ static int ceph_init_file_info(struct inode *inode, struct file *file,
                struct ceph_dir_file_info *dfi =
                        kmem_cache_zalloc(ceph_dir_file_cachep, GFP_KERNEL);
                if (!dfi) {
-                       ceph_put_fmode(ceph_inode(inode), fmode); /* clean up */
+                       ceph_put_fmode(ci, fmode); /* clean up */
                        return -ENOMEM;
                }
 
@@ -222,7 +224,7 @@ static int ceph_init_file_info(struct inode *inode, struct file *file,
        } else {
                fi = kmem_cache_zalloc(ceph_file_cachep, GFP_KERNEL);
                if (!fi) {
-                       ceph_put_fmode(ceph_inode(inode), fmode); /* clean up */
+                       ceph_put_fmode(ci, fmode); /* clean up */
                        return -ENOMEM;
                }
 
@@ -232,6 +234,8 @@ static int ceph_init_file_info(struct inode *inode, struct file *file,
        fi->fmode = fmode;
        spin_lock_init(&fi->rw_contexts_lock);
        INIT_LIST_HEAD(&fi->rw_contexts);
+       fi->meta_err = errseq_sample(&ci->i_meta_err);
+       fi->filp_gen = READ_ONCE(ceph_inode_to_client(inode)->filp_gen);
 
        return 0;
 }
@@ -695,7 +699,13 @@ static ssize_t ceph_sync_read(struct kiocb *iocb, struct iov_iter *to,
                        ceph_release_page_vector(pages, num_pages);
                }
 
-               if (ret <= 0 || off >= i_size || !more)
+               if (ret < 0) {
+                       if (ret == -EBLACKLISTED)
+                               fsc->blacklisted = true;
+                       break;
+               }
+
+               if (off >= i_size || !more)
                        break;
        }
 
@@ -921,7 +931,7 @@ ceph_direct_read_write(struct kiocb *iocb, struct iov_iter *iter,
        struct ceph_aio_request *aio_req = NULL;
        int num_pages = 0;
        int flags;
-       int ret;
+       int ret = 0;
        struct timespec64 mtime = current_time(inode);
        size_t count = iov_iter_count(iter);
        loff_t pos = iocb->ki_pos;
@@ -935,11 +945,6 @@ ceph_direct_read_write(struct kiocb *iocb, struct iov_iter *iter,
             (write ? "write" : "read"), file, pos, (unsigned)count,
             snapc, snapc ? snapc->seq : 0);
 
-       ret = filemap_write_and_wait_range(inode->i_mapping,
-                                          pos, pos + count - 1);
-       if (ret < 0)
-               return ret;
-
        if (write) {
                int ret2 = invalidate_inode_pages2_range(inode->i_mapping,
                                        pos >> PAGE_SHIFT,
@@ -1260,7 +1265,8 @@ again:
                want = CEPH_CAP_FILE_CACHE | CEPH_CAP_FILE_LAZYIO;
        else
                want = CEPH_CAP_FILE_CACHE;
-       ret = ceph_get_caps(ci, CEPH_CAP_FILE_RD, want, -1, &got, &pinned_page);
+       ret = ceph_get_caps(filp, CEPH_CAP_FILE_RD, want, -1,
+                           &got, &pinned_page);
        if (ret < 0)
                return ret;
 
@@ -1274,12 +1280,16 @@ again:
 
                if (ci->i_inline_version == CEPH_INLINE_NONE) {
                        if (!retry_op && (iocb->ki_flags & IOCB_DIRECT)) {
+                               ceph_start_io_direct(inode);
                                ret = ceph_direct_read_write(iocb, to,
                                                             NULL, NULL);
+                               ceph_end_io_direct(inode);
                                if (ret >= 0 && ret < len)
                                        retry_op = CHECK_EOF;
                        } else {
+                               ceph_start_io_read(inode);
                                ret = ceph_sync_read(iocb, to, &retry_op);
+                               ceph_end_io_read(inode);
                        }
                } else {
                        retry_op = READ_INLINE;
@@ -1290,7 +1300,9 @@ again:
                     inode, ceph_vinop(inode), iocb->ki_pos, (unsigned)len,
                     ceph_cap_string(got));
                ceph_add_rw_context(fi, &rw_ctx);
+               ceph_start_io_read(inode);
                ret = generic_file_read_iter(iocb, to);
+               ceph_end_io_read(inode);
                ceph_del_rw_context(fi, &rw_ctx);
        }
        dout("aio_read %p %llx.%llx dropping cap refs on %s = %d\n",
@@ -1399,7 +1411,10 @@ static ssize_t ceph_write_iter(struct kiocb *iocb, struct iov_iter *from)
                return -ENOMEM;
 
 retry_snap:
-       inode_lock(inode);
+       if (iocb->ki_flags & IOCB_DIRECT)
+               ceph_start_io_direct(inode);
+       else
+               ceph_start_io_write(inode);
 
        /* We can write back this queue in page reclaim */
        current->backing_dev_info = inode_to_bdi(inode);
@@ -1457,7 +1472,7 @@ retry_snap:
        else
                want = CEPH_CAP_FILE_BUFFER;
        got = 0;
-       err = ceph_get_caps(ci, CEPH_CAP_FILE_WR, want, pos + count,
+       err = ceph_get_caps(file, CEPH_CAP_FILE_WR, want, pos + count,
                            &got, NULL);
        if (err < 0)
                goto out;
@@ -1470,7 +1485,6 @@ retry_snap:
            (ci->i_ceph_flags & CEPH_I_ERROR_WRITE)) {
                struct ceph_snap_context *snapc;
                struct iov_iter data;
-               inode_unlock(inode);
 
                spin_lock(&ci->i_ceph_lock);
                if (__ceph_have_pending_cap_snap(ci)) {
@@ -1487,11 +1501,14 @@ retry_snap:
 
                /* we might need to revert back to that point */
                data = *from;
-               if (iocb->ki_flags & IOCB_DIRECT)
+               if (iocb->ki_flags & IOCB_DIRECT) {
                        written = ceph_direct_read_write(iocb, &data, snapc,
                                                         &prealloc_cf);
-               else
+                       ceph_end_io_direct(inode);
+               } else {
                        written = ceph_sync_write(iocb, &data, pos, snapc);
+                       ceph_end_io_write(inode);
+               }
                if (written > 0)
                        iov_iter_advance(from, written);
                ceph_put_snap_context(snapc);
@@ -1506,7 +1523,7 @@ retry_snap:
                written = generic_perform_write(file, from, pos);
                if (likely(written >= 0))
                        iocb->ki_pos = pos + written;
-               inode_unlock(inode);
+               ceph_end_io_write(inode);
        }
 
        if (written >= 0) {
@@ -1541,9 +1558,11 @@ retry_snap:
        }
 
        goto out_unlocked;
-
 out:
-       inode_unlock(inode);
+       if (iocb->ki_flags & IOCB_DIRECT)
+               ceph_end_io_direct(inode);
+       else
+               ceph_end_io_write(inode);
 out_unlocked:
        ceph_free_cap_flush(prealloc_cf);
        current->backing_dev_info = NULL;
@@ -1781,7 +1800,7 @@ static long ceph_fallocate(struct file *file, int mode,
        else
                want = CEPH_CAP_FILE_BUFFER;
 
-       ret = ceph_get_caps(ci, CEPH_CAP_FILE_WR, want, endoff, &got, NULL);
+       ret = ceph_get_caps(file, CEPH_CAP_FILE_WR, want, endoff, &got, NULL);
        if (ret < 0)
                goto unlock;
 
@@ -1810,16 +1829,15 @@ unlock:
  * src_ci.  Two attempts are made to obtain both caps, and an error is return if
  * this fails; zero is returned on success.
  */
-static int get_rd_wr_caps(struct ceph_inode_info *src_ci,
-                         loff_t src_endoff, int *src_got,
-                         struct ceph_inode_info *dst_ci,
+static int get_rd_wr_caps(struct file *src_filp, int *src_got,
+                         struct file *dst_filp,
                          loff_t dst_endoff, int *dst_got)
 {
        int ret = 0;
        bool retrying = false;
 
 retry_caps:
-       ret = ceph_get_caps(dst_ci, CEPH_CAP_FILE_WR, CEPH_CAP_FILE_BUFFER,
+       ret = ceph_get_caps(dst_filp, CEPH_CAP_FILE_WR, CEPH_CAP_FILE_BUFFER,
                            dst_endoff, dst_got, NULL);
        if (ret < 0)
                return ret;
@@ -1829,24 +1847,24 @@ retry_caps:
         * we would risk a deadlock by using ceph_get_caps.  Thus, we'll do some
         * retry dance instead to try to get both capabilities.
         */
-       ret = ceph_try_get_caps(src_ci, CEPH_CAP_FILE_RD, CEPH_CAP_FILE_SHARED,
+       ret = ceph_try_get_caps(file_inode(src_filp),
+                               CEPH_CAP_FILE_RD, CEPH_CAP_FILE_SHARED,
                                false, src_got);
        if (ret <= 0) {
                /* Start by dropping dst_ci caps and getting src_ci caps */
-               ceph_put_cap_refs(dst_ci, *dst_got);
+               ceph_put_cap_refs(ceph_inode(file_inode(dst_filp)), *dst_got);
                if (retrying) {
                        if (!ret)
                                /* ceph_try_get_caps masks EAGAIN */
                                ret = -EAGAIN;
                        return ret;
                }
-               ret = ceph_get_caps(src_ci, CEPH_CAP_FILE_RD,
-                                   CEPH_CAP_FILE_SHARED, src_endoff,
-                                   src_got, NULL);
+               ret = ceph_get_caps(src_filp, CEPH_CAP_FILE_RD,
+                                   CEPH_CAP_FILE_SHARED, -1, src_got, NULL);
                if (ret < 0)
                        return ret;
                /*... drop src_ci caps too, and retry */
-               ceph_put_cap_refs(src_ci, *src_got);
+               ceph_put_cap_refs(ceph_inode(file_inode(src_filp)), *src_got);
                retrying = true;
                goto retry_caps;
        }
@@ -1904,6 +1922,7 @@ static ssize_t __ceph_copy_file_range(struct file *src_file, loff_t src_off,
        struct ceph_inode_info *src_ci = ceph_inode(src_inode);
        struct ceph_inode_info *dst_ci = ceph_inode(dst_inode);
        struct ceph_cap_flush *prealloc_cf;
+       struct ceph_fs_client *src_fsc = ceph_inode_to_client(src_inode);
        struct ceph_object_locator src_oloc, dst_oloc;
        struct ceph_object_id src_oid, dst_oid;
        loff_t endoff = 0, size;
@@ -1913,10 +1932,16 @@ static ssize_t __ceph_copy_file_range(struct file *src_file, loff_t src_off,
        int src_got = 0, dst_got = 0, err, dirty;
        bool do_final_copy = false;
 
-       if (src_inode == dst_inode)
-               return -EINVAL;
-       if (src_inode->i_sb != dst_inode->i_sb)
-               return -EXDEV;
+       if (src_inode->i_sb != dst_inode->i_sb) {
+               struct ceph_fs_client *dst_fsc = ceph_inode_to_client(dst_inode);
+
+               if (ceph_fsid_compare(&src_fsc->client->fsid,
+                                     &dst_fsc->client->fsid)) {
+                       dout("Copying files across clusters: src: %pU dst: %pU\n",
+                            &src_fsc->client->fsid, &dst_fsc->client->fsid);
+                       return -EXDEV;
+               }
+       }
        if (ceph_snap(dst_inode) != CEPH_NOSNAP)
                return -EROFS;
 
@@ -1928,7 +1953,7 @@ static ssize_t __ceph_copy_file_range(struct file *src_file, loff_t src_off,
         * efficient).
         */
 
-       if (ceph_test_mount_opt(ceph_inode_to_client(src_inode), NOCOPYFROM))
+       if (ceph_test_mount_opt(src_fsc, NOCOPYFROM))
                return -EOPNOTSUPP;
 
        if ((src_ci->i_layout.stripe_unit != dst_ci->i_layout.stripe_unit) ||
@@ -1960,8 +1985,8 @@ static ssize_t __ceph_copy_file_range(struct file *src_file, loff_t src_off,
         * clients may have dirty data in their caches.  And OSDs know nothing
         * about caps, so they can't safely do the remote object copies.
         */
-       err = get_rd_wr_caps(src_ci, (src_off + len), &src_got,
-                            dst_ci, (dst_off + len), &dst_got);
+       err = get_rd_wr_caps(src_file, &src_got,
+                            dst_file, (dst_off + len), &dst_got);
        if (err < 0) {
                dout("get_rd_wr_caps returned %d\n", err);
                ret = -EOPNOTSUPP;
@@ -2018,9 +2043,8 @@ static ssize_t __ceph_copy_file_range(struct file *src_file, loff_t src_off,
                        goto out;
                }
                len -= ret;
-               err = get_rd_wr_caps(src_ci, (src_off + len),
-                                    &src_got, dst_ci,
-                                    (dst_off + len), &dst_got);
+               err = get_rd_wr_caps(src_file, &src_got,
+                                    dst_file, (dst_off + len), &dst_got);
                if (err < 0)
                        goto out;
                err = is_file_size_ok(src_inode, dst_inode,
@@ -2044,7 +2068,7 @@ static ssize_t __ceph_copy_file_range(struct file *src_file, loff_t src_off,
                                dst_ci->i_vino.ino, dst_objnum);
                /* Do an object remote copy */
                err = ceph_osdc_copy_from(
-                       &ceph_inode_to_client(src_inode)->client->osdc,
+                       &src_fsc->client->osdc,
                        src_ci->i_vino.snap, 0,
                        &src_oid, &src_oloc,
                        CEPH_OSD_OP_FLAG_FADVISE_SEQUENTIAL |
index 18500ed..9f13562 100644 (file)
@@ -515,6 +515,8 @@ struct inode *ceph_alloc_inode(struct super_block *sb)
 
        ceph_fscache_inode_init(ci);
 
+       ci->i_meta_err = 0;
+
        return &ci->vfs_inode;
 }
 
@@ -801,7 +803,12 @@ static int fill_inode(struct inode *inode, struct page *locked_page,
 
        /* update inode */
        inode->i_rdev = le32_to_cpu(info->rdev);
-       inode->i_blkbits = fls(le32_to_cpu(info->layout.fl_stripe_unit)) - 1;
+       /* directories have fl_stripe_unit set to zero */
+       if (le32_to_cpu(info->layout.fl_stripe_unit))
+               inode->i_blkbits =
+                       fls(le32_to_cpu(info->layout.fl_stripe_unit)) - 1;
+       else
+               inode->i_blkbits = CEPH_BLOCK_SHIFT;
 
        __ceph_update_quota(ci, iinfo->max_bytes, iinfo->max_files);
 
@@ -1982,7 +1989,7 @@ static const struct inode_operations ceph_symlink_iops = {
 int __ceph_setattr(struct inode *inode, struct iattr *attr)
 {
        struct ceph_inode_info *ci = ceph_inode(inode);
-       const unsigned int ia_valid = attr->ia_valid;
+       unsigned int ia_valid = attr->ia_valid;
        struct ceph_mds_request *req;
        struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc;
        struct ceph_cap_flush *prealloc_cf;
@@ -2087,6 +2094,26 @@ int __ceph_setattr(struct inode *inode, struct iattr *attr)
                                   CEPH_CAP_FILE_RD | CEPH_CAP_FILE_WR;
                }
        }
+       if (ia_valid & ATTR_SIZE) {
+               dout("setattr %p size %lld -> %lld\n", inode,
+                    inode->i_size, attr->ia_size);
+               if ((issued & CEPH_CAP_FILE_EXCL) &&
+                   attr->ia_size > inode->i_size) {
+                       i_size_write(inode, attr->ia_size);
+                       inode->i_blocks = calc_inode_blocks(attr->ia_size);
+                       ci->i_reported_size = attr->ia_size;
+                       dirtied |= CEPH_CAP_FILE_EXCL;
+                       ia_valid |= ATTR_MTIME;
+               } else if ((issued & CEPH_CAP_FILE_SHARED) == 0 ||
+                          attr->ia_size != inode->i_size) {
+                       req->r_args.setattr.size = cpu_to_le64(attr->ia_size);
+                       req->r_args.setattr.old_size =
+                               cpu_to_le64(inode->i_size);
+                       mask |= CEPH_SETATTR_SIZE;
+                       release |= CEPH_CAP_FILE_SHARED | CEPH_CAP_FILE_EXCL |
+                                  CEPH_CAP_FILE_RD | CEPH_CAP_FILE_WR;
+               }
+       }
        if (ia_valid & ATTR_MTIME) {
                dout("setattr %p mtime %lld.%ld -> %lld.%ld\n", inode,
                     inode->i_mtime.tv_sec, inode->i_mtime.tv_nsec,
@@ -2109,25 +2136,6 @@ int __ceph_setattr(struct inode *inode, struct iattr *attr)
                                   CEPH_CAP_FILE_RD | CEPH_CAP_FILE_WR;
                }
        }
-       if (ia_valid & ATTR_SIZE) {
-               dout("setattr %p size %lld -> %lld\n", inode,
-                    inode->i_size, attr->ia_size);
-               if ((issued & CEPH_CAP_FILE_EXCL) &&
-                   attr->ia_size > inode->i_size) {
-                       i_size_write(inode, attr->ia_size);
-                       inode->i_blocks = calc_inode_blocks(attr->ia_size);
-                       ci->i_reported_size = attr->ia_size;
-                       dirtied |= CEPH_CAP_FILE_EXCL;
-               } else if ((issued & CEPH_CAP_FILE_SHARED) == 0 ||
-                          attr->ia_size != inode->i_size) {
-                       req->r_args.setattr.size = cpu_to_le64(attr->ia_size);
-                       req->r_args.setattr.old_size =
-                               cpu_to_le64(inode->i_size);
-                       mask |= CEPH_SETATTR_SIZE;
-                       release |= CEPH_CAP_FILE_SHARED | CEPH_CAP_FILE_EXCL |
-                                  CEPH_CAP_FILE_RD | CEPH_CAP_FILE_WR;
-               }
-       }
 
        /* these do nothing */
        if (ia_valid & ATTR_CTIME) {
diff --git a/fs/ceph/io.c b/fs/ceph/io.c
new file mode 100644 (file)
index 0000000..97602ea
--- /dev/null
@@ -0,0 +1,163 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2016 Trond Myklebust
+ * Copyright (c) 2019 Jeff Layton
+ *
+ * I/O and data path helper functionality.
+ *
+ * Heavily borrowed from equivalent code in fs/nfs/io.c
+ */
+
+#include <linux/ceph/ceph_debug.h>
+
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/rwsem.h>
+#include <linux/fs.h>
+
+#include "super.h"
+#include "io.h"
+
+/* Call with exclusively locked inode->i_rwsem */
+static void ceph_block_o_direct(struct ceph_inode_info *ci, struct inode *inode)
+{
+       lockdep_assert_held_write(&inode->i_rwsem);
+
+       if (READ_ONCE(ci->i_ceph_flags) & CEPH_I_ODIRECT) {
+               spin_lock(&ci->i_ceph_lock);
+               ci->i_ceph_flags &= ~CEPH_I_ODIRECT;
+               spin_unlock(&ci->i_ceph_lock);
+               inode_dio_wait(inode);
+       }
+}
+
+/**
+ * ceph_start_io_read - declare the file is being used for buffered reads
+ * @inode: file inode
+ *
+ * Declare that a buffered read operation is about to start, and ensure
+ * that we block all direct I/O.
+ * On exit, the function ensures that the CEPH_I_ODIRECT flag is unset,
+ * and holds a shared lock on inode->i_rwsem to ensure that the flag
+ * cannot be changed.
+ * In practice, this means that buffered read operations are allowed to
+ * execute in parallel, thanks to the shared lock, whereas direct I/O
+ * operations need to wait to grab an exclusive lock in order to set
+ * CEPH_I_ODIRECT.
+ * Note that buffered writes and truncates both take a write lock on
+ * inode->i_rwsem, meaning that those are serialised w.r.t. the reads.
+ */
+void
+ceph_start_io_read(struct inode *inode)
+{
+       struct ceph_inode_info *ci = ceph_inode(inode);
+
+       /* Be an optimist! */
+       down_read(&inode->i_rwsem);
+       if (!(READ_ONCE(ci->i_ceph_flags) & CEPH_I_ODIRECT))
+               return;
+       up_read(&inode->i_rwsem);
+       /* Slow path.... */
+       down_write(&inode->i_rwsem);
+       ceph_block_o_direct(ci, inode);
+       downgrade_write(&inode->i_rwsem);
+}
+
+/**
+ * ceph_end_io_read - declare that the buffered read operation is done
+ * @inode: file inode
+ *
+ * Declare that a buffered read operation is done, and release the shared
+ * lock on inode->i_rwsem.
+ */
+void
+ceph_end_io_read(struct inode *inode)
+{
+       up_read(&inode->i_rwsem);
+}
+
+/**
+ * ceph_start_io_write - declare the file is being used for buffered writes
+ * @inode: file inode
+ *
+ * Declare that a buffered write operation is about to start, and ensure
+ * that we block all direct I/O.
+ */
+void
+ceph_start_io_write(struct inode *inode)
+{
+       down_write(&inode->i_rwsem);
+       ceph_block_o_direct(ceph_inode(inode), inode);
+}
+
+/**
+ * ceph_end_io_write - declare that the buffered write operation is done
+ * @inode: file inode
+ *
+ * Declare that a buffered write operation is done, and release the
+ * lock on inode->i_rwsem.
+ */
+void
+ceph_end_io_write(struct inode *inode)
+{
+       up_write(&inode->i_rwsem);
+}
+
+/* Call with exclusively locked inode->i_rwsem */
+static void ceph_block_buffered(struct ceph_inode_info *ci, struct inode *inode)
+{
+       lockdep_assert_held_write(&inode->i_rwsem);
+
+       if (!(READ_ONCE(ci->i_ceph_flags) & CEPH_I_ODIRECT)) {
+               spin_lock(&ci->i_ceph_lock);
+               ci->i_ceph_flags |= CEPH_I_ODIRECT;
+               spin_unlock(&ci->i_ceph_lock);
+               /* FIXME: unmap_mapping_range? */
+               filemap_write_and_wait(inode->i_mapping);
+       }
+}
+
+/**
+ * ceph_end_io_direct - declare the file is being used for direct i/o
+ * @inode: file inode
+ *
+ * Declare that a direct I/O operation is about to start, and ensure
+ * that we block all buffered I/O.
+ * On exit, the function ensures that the CEPH_I_ODIRECT flag is set,
+ * and holds a shared lock on inode->i_rwsem to ensure that the flag
+ * cannot be changed.
+ * In practice, this means that direct I/O operations are allowed to
+ * execute in parallel, thanks to the shared lock, whereas buffered I/O
+ * operations need to wait to grab an exclusive lock in order to clear
+ * CEPH_I_ODIRECT.
+ * Note that buffered writes and truncates both take a write lock on
+ * inode->i_rwsem, meaning that those are serialised w.r.t. O_DIRECT.
+ */
+void
+ceph_start_io_direct(struct inode *inode)
+{
+       struct ceph_inode_info *ci = ceph_inode(inode);
+
+       /* Be an optimist! */
+       down_read(&inode->i_rwsem);
+       if (READ_ONCE(ci->i_ceph_flags) & CEPH_I_ODIRECT)
+               return;
+       up_read(&inode->i_rwsem);
+       /* Slow path.... */
+       down_write(&inode->i_rwsem);
+       ceph_block_buffered(ci, inode);
+       downgrade_write(&inode->i_rwsem);
+}
+
+/**
+ * ceph_end_io_direct - declare that the direct i/o operation is done
+ * @inode: file inode
+ *
+ * Declare that a direct I/O operation is done, and release the shared
+ * lock on inode->i_rwsem.
+ */
+void
+ceph_end_io_direct(struct inode *inode)
+{
+       up_read(&inode->i_rwsem);
+}
diff --git a/fs/ceph/io.h b/fs/ceph/io.h
new file mode 100644 (file)
index 0000000..fa594cd
--- /dev/null
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _FS_CEPH_IO_H
+#define _FS_CEPH_IO_H
+
+void ceph_start_io_read(struct inode *inode);
+void ceph_end_io_read(struct inode *inode);
+void ceph_start_io_write(struct inode *inode);
+void ceph_end_io_write(struct inode *inode);
+void ceph_start_io_direct(struct inode *inode);
+void ceph_end_io_direct(struct inode *inode);
+
+#endif /* FS_CEPH_IO_H */
index 5083e23..544e9e8 100644 (file)
@@ -32,14 +32,18 @@ void __init ceph_flock_init(void)
 
 static void ceph_fl_copy_lock(struct file_lock *dst, struct file_lock *src)
 {
-       struct inode *inode = file_inode(src->fl_file);
+       struct ceph_file_info *fi = dst->fl_file->private_data;
+       struct inode *inode = file_inode(dst->fl_file);
        atomic_inc(&ceph_inode(inode)->i_filelock_ref);
+       atomic_inc(&fi->num_locks);
 }
 
 static void ceph_fl_release_lock(struct file_lock *fl)
 {
+       struct ceph_file_info *fi = fl->fl_file->private_data;
        struct inode *inode = file_inode(fl->fl_file);
        struct ceph_inode_info *ci = ceph_inode(inode);
+       atomic_dec(&fi->num_locks);
        if (atomic_dec_and_test(&ci->i_filelock_ref)) {
                /* clear error when all locks are released */
                spin_lock(&ci->i_ceph_lock);
@@ -73,7 +77,7 @@ static int ceph_lock_message(u8 lock_type, u16 operation, struct inode *inode,
                 * window. Caller function will decrease the counter.
                 */
                fl->fl_ops = &ceph_fl_lock_ops;
-               atomic_inc(&ceph_inode(inode)->i_filelock_ref);
+               fl->fl_ops->fl_copy_lock(fl, NULL);
        }
 
        if (operation != CEPH_MDS_OP_SETFILELOCK || cmd == CEPH_LOCK_UNLOCK)
index 920e9f0..a8a8f84 100644 (file)
@@ -639,7 +639,6 @@ static struct ceph_mds_session *register_session(struct ceph_mds_client *mdsc,
        s->s_renew_seq = 0;
        INIT_LIST_HEAD(&s->s_caps);
        s->s_nr_caps = 0;
-       s->s_trim_caps = 0;
        refcount_set(&s->s_ref, 1);
        INIT_LIST_HEAD(&s->s_waiting);
        INIT_LIST_HEAD(&s->s_unsafe);
@@ -1270,6 +1269,7 @@ static void cleanup_session_requests(struct ceph_mds_client *mdsc,
 {
        struct ceph_mds_request *req;
        struct rb_node *p;
+       struct ceph_inode_info *ci;
 
        dout("cleanup_session_requests mds%d\n", session->s_mds);
        mutex_lock(&mdsc->mutex);
@@ -1278,6 +1278,16 @@ static void cleanup_session_requests(struct ceph_mds_client *mdsc,
                                       struct ceph_mds_request, r_unsafe_item);
                pr_warn_ratelimited(" dropping unsafe request %llu\n",
                                    req->r_tid);
+               if (req->r_target_inode) {
+                       /* dropping unsafe change of inode's attributes */
+                       ci = ceph_inode(req->r_target_inode);
+                       errseq_set(&ci->i_meta_err, -EIO);
+               }
+               if (req->r_unsafe_dir) {
+                       /* dropping unsafe directory operation */
+                       ci = ceph_inode(req->r_unsafe_dir);
+                       errseq_set(&ci->i_meta_err, -EIO);
+               }
                __unregister_request(mdsc, req);
        }
        /* zero r_attempts, so kick_requests() will re-send requests */
@@ -1370,7 +1380,7 @@ static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap,
        struct ceph_fs_client *fsc = (struct ceph_fs_client *)arg;
        struct ceph_inode_info *ci = ceph_inode(inode);
        LIST_HEAD(to_remove);
-       bool drop = false;
+       bool dirty_dropped = false;
        bool invalidate = false;
 
        dout("removing cap %p, ci is %p, inode is %p\n",
@@ -1383,9 +1393,12 @@ static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap,
                struct ceph_cap_flush *cf;
                struct ceph_mds_client *mdsc = fsc->mdsc;
 
-               if (ci->i_wrbuffer_ref > 0 &&
-                   READ_ONCE(fsc->mount_state) == CEPH_MOUNT_SHUTDOWN)
-                       invalidate = true;
+               if (READ_ONCE(fsc->mount_state) == CEPH_MOUNT_SHUTDOWN) {
+                       if (inode->i_data.nrpages > 0)
+                               invalidate = true;
+                       if (ci->i_wrbuffer_ref > 0)
+                               mapping_set_error(&inode->i_data, -EIO);
+               }
 
                while (!list_empty(&ci->i_cap_flush_list)) {
                        cf = list_first_entry(&ci->i_cap_flush_list,
@@ -1405,7 +1418,7 @@ static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap,
                                inode, ceph_ino(inode));
                        ci->i_dirty_caps = 0;
                        list_del_init(&ci->i_dirty_item);
-                       drop = true;
+                       dirty_dropped = true;
                }
                if (!list_empty(&ci->i_flushing_item)) {
                        pr_warn_ratelimited(
@@ -1415,10 +1428,22 @@ static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap,
                        ci->i_flushing_caps = 0;
                        list_del_init(&ci->i_flushing_item);
                        mdsc->num_cap_flushing--;
-                       drop = true;
+                       dirty_dropped = true;
                }
                spin_unlock(&mdsc->cap_dirty_lock);
 
+               if (dirty_dropped) {
+                       errseq_set(&ci->i_meta_err, -EIO);
+
+                       if (ci->i_wrbuffer_ref_head == 0 &&
+                           ci->i_wr_ref == 0 &&
+                           ci->i_dirty_caps == 0 &&
+                           ci->i_flushing_caps == 0) {
+                               ceph_put_snap_context(ci->i_head_snapc);
+                               ci->i_head_snapc = NULL;
+                       }
+               }
+
                if (atomic_read(&ci->i_filelock_ref) > 0) {
                        /* make further file lock syscall return -EIO */
                        ci->i_ceph_flags |= CEPH_I_ERROR_FILELOCK;
@@ -1430,15 +1455,6 @@ static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap,
                        list_add(&ci->i_prealloc_cap_flush->i_list, &to_remove);
                        ci->i_prealloc_cap_flush = NULL;
                }
-
-               if (drop &&
-                  ci->i_wrbuffer_ref_head == 0 &&
-                  ci->i_wr_ref == 0 &&
-                  ci->i_dirty_caps == 0 &&
-                  ci->i_flushing_caps == 0) {
-                      ceph_put_snap_context(ci->i_head_snapc);
-                      ci->i_head_snapc = NULL;
-               }
        }
        spin_unlock(&ci->i_ceph_lock);
        while (!list_empty(&to_remove)) {
@@ -1452,7 +1468,7 @@ static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap,
        wake_up_all(&ci->i_cap_wq);
        if (invalidate)
                ceph_queue_invalidate(inode);
-       if (drop)
+       if (dirty_dropped)
                iput(inode);
        return 0;
 }
@@ -1705,11 +1721,11 @@ out:
  */
 static int trim_caps_cb(struct inode *inode, struct ceph_cap *cap, void *arg)
 {
-       struct ceph_mds_session *session = arg;
+       int *remaining = arg;
        struct ceph_inode_info *ci = ceph_inode(inode);
        int used, wanted, oissued, mine;
 
-       if (session->s_trim_caps <= 0)
+       if (*remaining <= 0)
                return -1;
 
        spin_lock(&ci->i_ceph_lock);
@@ -1746,7 +1762,7 @@ static int trim_caps_cb(struct inode *inode, struct ceph_cap *cap, void *arg)
        if (oissued) {
                /* we aren't the only cap.. just remove us */
                __ceph_remove_cap(cap, true);
-               session->s_trim_caps--;
+               (*remaining)--;
        } else {
                struct dentry *dentry;
                /* try dropping referring dentries */
@@ -1758,7 +1774,7 @@ static int trim_caps_cb(struct inode *inode, struct ceph_cap *cap, void *arg)
                        d_prune_aliases(inode);
                        count = atomic_read(&inode->i_count);
                        if (count == 1)
-                               session->s_trim_caps--;
+                               (*remaining)--;
                        dout("trim_caps_cb %p cap %p pruned, count now %d\n",
                             inode, cap, count);
                } else {
@@ -1784,12 +1800,12 @@ int ceph_trim_caps(struct ceph_mds_client *mdsc,
        dout("trim_caps mds%d start: %d / %d, trim %d\n",
             session->s_mds, session->s_nr_caps, max_caps, trim_caps);
        if (trim_caps > 0) {
-               session->s_trim_caps = trim_caps;
-               ceph_iterate_session_caps(session, trim_caps_cb, session);
+               int remaining = trim_caps;
+
+               ceph_iterate_session_caps(session, trim_caps_cb, &remaining);
                dout("trim_caps mds%d done: %d / %d, trimmed %d\n",
                     session->s_mds, session->s_nr_caps, max_caps,
-                       trim_caps - session->s_trim_caps);
-               session->s_trim_caps = 0;
+                       trim_caps - remaining);
        }
 
        ceph_flush_cap_releases(mdsc, session);
@@ -3015,18 +3031,23 @@ bad:
        pr_err("mdsc_handle_forward decode error err=%d\n", err);
 }
 
-static int __decode_and_drop_session_metadata(void **p, void *end)
+static int __decode_session_metadata(void **p, void *end,
+                                    bool *blacklisted)
 {
        /* map<string,string> */
        u32 n;
+       bool err_str;
        ceph_decode_32_safe(p, end, n, bad);
        while (n-- > 0) {
                u32 len;
                ceph_decode_32_safe(p, end, len, bad);
                ceph_decode_need(p, end, len, bad);
+               err_str = !strncmp(*p, "error_string", len);
                *p += len;
                ceph_decode_32_safe(p, end, len, bad);
                ceph_decode_need(p, end, len, bad);
+               if (err_str && strnstr(*p, "blacklisted", len))
+                       *blacklisted = true;
                *p += len;
        }
        return 0;
@@ -3050,6 +3071,7 @@ static void handle_session(struct ceph_mds_session *session,
        u64 seq;
        unsigned long features = 0;
        int wake = 0;
+       bool blacklisted = false;
 
        /* decode */
        ceph_decode_need(&p, end, sizeof(*h), bad);
@@ -3062,7 +3084,7 @@ static void handle_session(struct ceph_mds_session *session,
        if (msg_version >= 3) {
                u32 len;
                /* version >= 2, metadata */
-               if (__decode_and_drop_session_metadata(&p, end) < 0)
+               if (__decode_session_metadata(&p, end, &blacklisted) < 0)
                        goto bad;
                /* version >= 3, feature bits */
                ceph_decode_32_safe(&p, end, len, bad);
@@ -3149,6 +3171,8 @@ static void handle_session(struct ceph_mds_session *session,
                session->s_state = CEPH_MDS_SESSION_REJECTED;
                cleanup_session_requests(mdsc, session);
                remove_session_caps(session);
+               if (blacklisted)
+                       mdsc->fsc->blacklisted = true;
                wake = 2; /* for good measure */
                break;
 
@@ -3998,7 +4022,27 @@ static void lock_unlock_sessions(struct ceph_mds_client *mdsc)
        mutex_unlock(&mdsc->mutex);
 }
 
+static void maybe_recover_session(struct ceph_mds_client *mdsc)
+{
+       struct ceph_fs_client *fsc = mdsc->fsc;
+
+       if (!ceph_test_mount_opt(fsc, CLEANRECOVER))
+               return;
+
+       if (READ_ONCE(fsc->mount_state) != CEPH_MOUNT_MOUNTED)
+               return;
+
+       if (!READ_ONCE(fsc->blacklisted))
+               return;
+
+       if (fsc->last_auto_reconnect &&
+           time_before(jiffies, fsc->last_auto_reconnect + HZ * 60 * 30))
+               return;
 
+       pr_info("auto reconnect after blacklisted\n");
+       fsc->last_auto_reconnect = jiffies;
+       ceph_force_reconnect(fsc->sb);
+}
 
 /*
  * delayed work -- periodically trim expired leases, renew caps with mds
@@ -4044,7 +4088,9 @@ static void delayed_work(struct work_struct *work)
                                pr_info("mds%d hung\n", s->s_mds);
                        }
                }
-               if (s->s_state < CEPH_MDS_SESSION_OPEN) {
+               if (s->s_state == CEPH_MDS_SESSION_NEW ||
+                   s->s_state == CEPH_MDS_SESSION_RESTARTING ||
+                   s->s_state == CEPH_MDS_SESSION_REJECTED) {
                        /* this mds is failed or recovering, just wait */
                        ceph_put_mds_session(s);
                        continue;
@@ -4072,6 +4118,8 @@ static void delayed_work(struct work_struct *work)
 
        ceph_trim_snapid_map(mdsc);
 
+       maybe_recover_session(mdsc);
+
        schedule_delayed(mdsc);
 }
 
@@ -4355,7 +4403,12 @@ void ceph_mdsc_force_umount(struct ceph_mds_client *mdsc)
                session = __ceph_lookup_mds_session(mdsc, mds);
                if (!session)
                        continue;
+
+               if (session->s_state == CEPH_MDS_SESSION_REJECTED)
+                       __unregister_session(mdsc, session);
+               __wake_requests(mdsc, &session->s_waiting);
                mutex_unlock(&mdsc->mutex);
+
                mutex_lock(&session->s_mutex);
                __close_session(mdsc, session);
                if (session->s_state == CEPH_MDS_SESSION_CLOSING) {
@@ -4364,6 +4417,7 @@ void ceph_mdsc_force_umount(struct ceph_mds_client *mdsc)
                }
                mutex_unlock(&session->s_mutex);
                ceph_put_mds_session(session);
+
                mutex_lock(&mdsc->mutex);
                kick_requests(mdsc, mds);
        }
index f7c8603..5cd131b 100644 (file)
@@ -148,9 +148,9 @@ enum {
        CEPH_MDS_SESSION_OPENING = 2,
        CEPH_MDS_SESSION_OPEN = 3,
        CEPH_MDS_SESSION_HUNG = 4,
-       CEPH_MDS_SESSION_CLOSING = 5,
-       CEPH_MDS_SESSION_RESTARTING = 6,
-       CEPH_MDS_SESSION_RECONNECTING = 7,
+       CEPH_MDS_SESSION_RESTARTING = 5,
+       CEPH_MDS_SESSION_RECONNECTING = 6,
+       CEPH_MDS_SESSION_CLOSING = 7,
        CEPH_MDS_SESSION_REJECTED = 8,
 };
 
@@ -176,7 +176,7 @@ struct ceph_mds_session {
        spinlock_t        s_cap_lock;
        struct list_head  s_caps;     /* all caps issued by this session */
        struct ceph_cap  *s_cap_iterator;
-       int               s_nr_caps, s_trim_caps;
+       int               s_nr_caps;
        int               s_num_cap_releases;
        int               s_cap_reconnect;
        int               s_readonly;
index 377fafc..edfd643 100644 (file)
@@ -143,6 +143,7 @@ enum {
        Opt_snapdirname,
        Opt_mds_namespace,
        Opt_fscache_uniq,
+       Opt_recover_session,
        Opt_last_string,
        /* string args above */
        Opt_dirstat,
@@ -184,6 +185,7 @@ static match_table_t fsopt_tokens = {
        /* int args above */
        {Opt_snapdirname, "snapdirname=%s"},
        {Opt_mds_namespace, "mds_namespace=%s"},
+       {Opt_recover_session, "recover_session=%s"},
        {Opt_fscache_uniq, "fsc=%s"},
        /* string args above */
        {Opt_dirstat, "dirstat"},
@@ -254,6 +256,17 @@ static int parse_fsopt_token(char *c, void *private)
                if (!fsopt->mds_namespace)
                        return -ENOMEM;
                break;
+       case Opt_recover_session:
+               if (!strncmp(argstr[0].from, "no",
+                            argstr[0].to - argstr[0].from)) {
+                       fsopt->flags &= ~CEPH_MOUNT_OPT_CLEANRECOVER;
+               } else if (!strncmp(argstr[0].from, "clean",
+                                   argstr[0].to - argstr[0].from)) {
+                       fsopt->flags |= CEPH_MOUNT_OPT_CLEANRECOVER;
+               } else {
+                       return -EINVAL;
+               }
+               break;
        case Opt_fscache_uniq:
                kfree(fsopt->fscache_uniq);
                fsopt->fscache_uniq = kstrndup(argstr[0].from,
@@ -576,6 +589,10 @@ static int ceph_show_options(struct seq_file *m, struct dentry *root)
 
        if (fsopt->mds_namespace)
                seq_show_option(m, "mds_namespace", fsopt->mds_namespace);
+
+       if (fsopt->flags & CEPH_MOUNT_OPT_CLEANRECOVER)
+               seq_show_option(m, "recover_session", "clean");
+
        if (fsopt->wsize != CEPH_MAX_WRITE_SIZE)
                seq_printf(m, ",wsize=%d", fsopt->wsize);
        if (fsopt->rsize != CEPH_MAX_READ_SIZE)
@@ -664,6 +681,7 @@ static struct ceph_fs_client *create_fs_client(struct ceph_mount_options *fsopt,
 
        fsc->sb = NULL;
        fsc->mount_state = CEPH_MOUNT_MOUNTING;
+       fsc->filp_gen = 1;
 
        atomic_long_set(&fsc->writeback_count, 0);
 
@@ -713,6 +731,7 @@ static void destroy_fs_client(struct ceph_fs_client *fsc)
 {
        dout("destroy_fs_client %p\n", fsc);
 
+       ceph_mdsc_destroy(fsc);
        destroy_workqueue(fsc->inode_wq);
        destroy_workqueue(fsc->cap_wq);
 
@@ -829,7 +848,7 @@ static void ceph_umount_begin(struct super_block *sb)
        fsc->mount_state = CEPH_MOUNT_SHUTDOWN;
        ceph_osdc_abort_requests(&fsc->client->osdc, -EIO);
        ceph_mdsc_force_umount(fsc->mdsc);
-       return;
+       fsc->filp_gen++; // invalidate open files
 }
 
 static int ceph_remount(struct super_block *sb, int *flags, char *data)
@@ -1089,7 +1108,6 @@ static struct dentry *ceph_mount(struct file_system_type *fs_type,
        }
 
        if (ceph_sb_to_client(sb) != fsc) {
-               ceph_mdsc_destroy(fsc);
                destroy_fs_client(fsc);
                fsc = ceph_sb_to_client(sb);
                dout("get_sb got existing client %p\n", fsc);
@@ -1115,7 +1133,6 @@ out_splat:
        goto out_final;
 
 out:
-       ceph_mdsc_destroy(fsc);
        destroy_fs_client(fsc);
 out_final:
        dout("ceph_mount fail %ld\n", PTR_ERR(res));
@@ -1139,8 +1156,6 @@ static void ceph_kill_sb(struct super_block *s)
 
        ceph_fscache_unregister_fs(fsc);
 
-       ceph_mdsc_destroy(fsc);
-
        destroy_fs_client(fsc);
        free_anon_bdev(dev);
 }
@@ -1154,6 +1169,33 @@ static struct file_system_type ceph_fs_type = {
 };
 MODULE_ALIAS_FS("ceph");
 
+int ceph_force_reconnect(struct super_block *sb)
+{
+       struct ceph_fs_client *fsc = ceph_sb_to_client(sb);
+       int err = 0;
+
+       ceph_umount_begin(sb);
+
+       /* Make sure all page caches get invalidated.
+        * see remove_session_caps_cb() */
+       flush_workqueue(fsc->inode_wq);
+
+       /* In case that we were blacklisted. This also reset
+        * all mon/osd connections */
+       ceph_reset_client_addr(fsc->client);
+
+       ceph_osdc_clear_abort_err(&fsc->client->osdc);
+
+       fsc->blacklisted = false;
+       fsc->mount_state = CEPH_MOUNT_MOUNTED;
+
+       if (sb->s_root) {
+               err = __ceph_do_getattr(d_inode(sb->s_root), NULL,
+                                       CEPH_STAT_CAP_INODE, true);
+       }
+       return err;
+}
+
 static int __init init_ceph(void)
 {
        int ret = init_caches();
index 6b9f1ee..f98d924 100644 (file)
@@ -16,6 +16,7 @@
 #include <linux/slab.h>
 #include <linux/posix_acl.h>
 #include <linux/refcount.h>
+#include <linux/security.h>
 
 #include <linux/ceph/libceph.h>
 
@@ -31,6 +32,7 @@
 #define CEPH_BLOCK_SHIFT   22  /* 4 MB */
 #define CEPH_BLOCK         (1 << CEPH_BLOCK_SHIFT)
 
+#define CEPH_MOUNT_OPT_CLEANRECOVER    (1<<1) /* auto reonnect (clean mode) after blacklisted */
 #define CEPH_MOUNT_OPT_DIRSTAT         (1<<4) /* `cat dirname` for stats */
 #define CEPH_MOUNT_OPT_RBYTES          (1<<5) /* dir st_bytes = rbytes */
 #define CEPH_MOUNT_OPT_NOASYNCREADDIR  (1<<7) /* no dcache readdir */
@@ -101,6 +103,11 @@ struct ceph_fs_client {
        struct ceph_client *client;
 
        unsigned long mount_state;
+
+       unsigned long last_auto_reconnect;
+       bool blacklisted;
+
+       u32 filp_gen;
        loff_t max_file_size;
 
        struct ceph_mds_client *mdsc;
@@ -395,6 +402,8 @@ struct ceph_inode_info {
        struct fscache_cookie *fscache;
        u32 i_fscache_gen;
 #endif
+       errseq_t i_meta_err;
+
        struct inode vfs_inode; /* at end */
 };
 
@@ -499,17 +508,16 @@ static inline struct inode *ceph_find_inode(struct super_block *sb,
 #define CEPH_I_DIR_ORDERED     (1 << 0)  /* dentries in dir are ordered */
 #define CEPH_I_NODELAY         (1 << 1)  /* do not delay cap release */
 #define CEPH_I_FLUSH           (1 << 2)  /* do not delay flush of dirty metadata */
-#define CEPH_I_NOFLUSH         (1 << 3)  /* do not flush dirty caps */
-#define CEPH_I_POOL_PERM       (1 << 4)  /* pool rd/wr bits are valid */
-#define CEPH_I_POOL_RD         (1 << 5)  /* can read from pool */
-#define CEPH_I_POOL_WR         (1 << 6)  /* can write to pool */
-#define CEPH_I_SEC_INITED      (1 << 7)  /* security initialized */
-#define CEPH_I_CAP_DROPPED     (1 << 8)  /* caps were forcibly dropped */
-#define CEPH_I_KICK_FLUSH      (1 << 9)  /* kick flushing caps */
-#define CEPH_I_FLUSH_SNAPS     (1 << 10) /* need flush snapss */
-#define CEPH_I_ERROR_WRITE     (1 << 11) /* have seen write errors */
-#define CEPH_I_ERROR_FILELOCK  (1 << 12) /* have seen file lock errors */
-
+#define CEPH_I_POOL_PERM       (1 << 3)  /* pool rd/wr bits are valid */
+#define CEPH_I_POOL_RD         (1 << 4)  /* can read from pool */
+#define CEPH_I_POOL_WR         (1 << 5)  /* can write to pool */
+#define CEPH_I_SEC_INITED      (1 << 6)  /* security initialized */
+#define CEPH_I_CAP_DROPPED     (1 << 7)  /* caps were forcibly dropped */
+#define CEPH_I_KICK_FLUSH      (1 << 8)  /* kick flushing caps */
+#define CEPH_I_FLUSH_SNAPS     (1 << 9)  /* need flush snapss */
+#define CEPH_I_ERROR_WRITE     (1 << 10) /* have seen write errors */
+#define CEPH_I_ERROR_FILELOCK  (1 << 11) /* have seen file lock errors */
+#define CEPH_I_ODIRECT         (1 << 12) /* inode in direct I/O mode */
 
 /*
  * Masks of ceph inode work.
@@ -703,6 +711,10 @@ struct ceph_file_info {
 
        spinlock_t rw_contexts_lock;
        struct list_head rw_contexts;
+
+       errseq_t meta_err;
+       u32 filp_gen;
+       atomic_t num_locks;
 };
 
 struct ceph_dir_file_info {
@@ -842,7 +854,8 @@ static inline int default_congestion_kb(void)
 }
 
 
-
+/* super.c */
+extern int ceph_force_reconnect(struct super_block *sb);
 /* snap.c */
 struct ceph_snap_realm *ceph_lookup_snap_realm(struct ceph_mds_client *mdsc,
                                               u64 ino);
@@ -959,7 +972,10 @@ static inline bool ceph_security_xattr_wanted(struct inode *in)
 #ifdef CONFIG_CEPH_FS_SECURITY_LABEL
 extern int ceph_security_init_secctx(struct dentry *dentry, umode_t mode,
                                     struct ceph_acl_sec_ctx *ctx);
-extern void ceph_security_invalidate_secctx(struct inode *inode);
+static inline void ceph_security_invalidate_secctx(struct inode *inode)
+{
+       security_inode_invalidate_secctx(inode);
+}
 #else
 static inline int ceph_security_init_secctx(struct dentry *dentry, umode_t mode,
                                            struct ceph_acl_sec_ctx *ctx)
@@ -1039,7 +1055,6 @@ extern void ceph_kick_flushing_caps(struct ceph_mds_client *mdsc,
                                    struct ceph_mds_session *session);
 extern struct ceph_cap *ceph_get_cap_for_mds(struct ceph_inode_info *ci,
                                             int mds);
-extern int ceph_get_cap_mds(struct inode *inode);
 extern void ceph_get_cap_refs(struct ceph_inode_info *ci, int caps);
 extern void ceph_put_cap_refs(struct ceph_inode_info *ci, int had);
 extern void ceph_put_wrbuffer_cap_refs(struct ceph_inode_info *ci, int nr,
@@ -1058,9 +1073,9 @@ extern int ceph_encode_dentry_release(void **p, struct dentry *dn,
                                      struct inode *dir,
                                      int mds, int drop, int unless);
 
-extern int ceph_get_caps(struct ceph_inode_info *ci, int need, int want,
+extern int ceph_get_caps(struct file *filp, int need, int want,
                         loff_t endoff, int *got, struct page **pinned_page);
-extern int ceph_try_get_caps(struct ceph_inode_info *ci,
+extern int ceph_try_get_caps(struct inode *inode,
                             int need, int want, bool nonblock, int *got);
 
 /* for counting open files by mode */
@@ -1071,7 +1086,7 @@ extern void ceph_put_fmode(struct ceph_inode_info *ci, int mode);
 extern const struct address_space_operations ceph_aops;
 extern int ceph_mmap(struct file *file, struct vm_area_struct *vma);
 extern int ceph_uninline_data(struct file *filp, struct page *locked_page);
-extern int ceph_pool_perm_check(struct ceph_inode_info *ci, int need);
+extern int ceph_pool_perm_check(struct inode *inode, int need);
 extern void ceph_pool_perm_destroy(struct ceph_mds_client* mdsc);
 
 /* file.c */
index 939eab7..cb18ee6 100644 (file)
@@ -20,7 +20,8 @@ static int __remove_xattr(struct ceph_inode_info *ci,
 
 static bool ceph_is_valid_xattr(const char *name)
 {
-       return !strncmp(name, XATTR_CEPH_PREFIX, XATTR_CEPH_PREFIX_LEN) ||
+       return !strncmp(name, XATTR_SECURITY_PREFIX, XATTR_SECURITY_PREFIX_LEN) ||
+              !strncmp(name, XATTR_CEPH_PREFIX, XATTR_CEPH_PREFIX_LEN) ||
               !strncmp(name, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN) ||
               !strncmp(name, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN);
 }
@@ -892,7 +893,8 @@ ssize_t __ceph_getxattr(struct inode *inode, const char *name, void *value,
        memcpy(value, xattr->val, xattr->val_len);
 
        if (current->journal_info &&
-           !strncmp(name, XATTR_SECURITY_PREFIX, XATTR_SECURITY_PREFIX_LEN))
+           !strncmp(name, XATTR_SECURITY_PREFIX, XATTR_SECURITY_PREFIX_LEN) &&
+           security_ismaclabel(name + XATTR_SECURITY_PREFIX_LEN))
                ci->i_ceph_flags |= CEPH_I_SEC_INITED;
 out:
        spin_unlock(&ci->i_ceph_lock);
@@ -903,11 +905,9 @@ ssize_t ceph_listxattr(struct dentry *dentry, char *names, size_t size)
 {
        struct inode *inode = d_inode(dentry);
        struct ceph_inode_info *ci = ceph_inode(inode);
-       struct ceph_vxattr *vxattrs = ceph_inode_vxattrs(inode);
        bool len_only = (size == 0);
        u32 namelen;
        int err;
-       int i;
 
        spin_lock(&ci->i_ceph_lock);
        dout("listxattr %p ver=%lld index_ver=%lld\n", inode,
@@ -936,33 +936,6 @@ ssize_t ceph_listxattr(struct dentry *dentry, char *names, size_t size)
                names = __copy_xattr_names(ci, names);
                size -= namelen;
        }
-
-
-       /* virtual xattr names, too */
-       if (vxattrs) {
-               for (i = 0; vxattrs[i].name; i++) {
-                       size_t this_len;
-
-                       if (vxattrs[i].flags & VXATTR_FLAG_HIDDEN)
-                               continue;
-                       if (vxattrs[i].exists_cb && !vxattrs[i].exists_cb(ci))
-                               continue;
-
-                       this_len = strlen(vxattrs[i].name) + 1;
-                       namelen += this_len;
-                       if (len_only)
-                               continue;
-
-                       if (this_len > size) {
-                               err = -ERANGE;
-                               goto out;
-                       }
-
-                       memcpy(names, vxattrs[i].name, this_len);
-                       names += this_len;
-                       size -= this_len;
-               }
-       }
        err = namelen;
 out:
        spin_unlock(&ci->i_ceph_lock);
@@ -1293,42 +1266,8 @@ out:
                ceph_pagelist_release(pagelist);
        return err;
 }
-
-void ceph_security_invalidate_secctx(struct inode *inode)
-{
-       security_inode_invalidate_secctx(inode);
-}
-
-static int ceph_xattr_set_security_label(const struct xattr_handler *handler,
-                                   struct dentry *unused, struct inode *inode,
-                                   const char *key, const void *buf,
-                                   size_t buflen, int flags)
-{
-       if (security_ismaclabel(key)) {
-               const char *name = xattr_full_name(handler, key);
-               return __ceph_setxattr(inode, name, buf, buflen, flags);
-       }
-       return  -EOPNOTSUPP;
-}
-
-static int ceph_xattr_get_security_label(const struct xattr_handler *handler,
-                                   struct dentry *unused, struct inode *inode,
-                                   const char *key, void *buf, size_t buflen)
-{
-       if (security_ismaclabel(key)) {
-               const char *name = xattr_full_name(handler, key);
-               return __ceph_getxattr(inode, name, buf, buflen);
-       }
-       return  -EOPNOTSUPP;
-}
-
-static const struct xattr_handler ceph_security_label_handler = {
-       .prefix = XATTR_SECURITY_PREFIX,
-       .get    = ceph_xattr_get_security_label,
-       .set    = ceph_xattr_set_security_label,
-};
-#endif
-#endif
+#endif /* CONFIG_CEPH_FS_SECURITY_LABEL */
+#endif /* CONFIG_SECURITY */
 
 void ceph_release_acl_sec_ctx(struct ceph_acl_sec_ctx *as_ctx)
 {
@@ -1352,9 +1291,6 @@ const struct xattr_handler *ceph_xattr_handlers[] = {
        &posix_acl_access_xattr_handler,
        &posix_acl_default_xattr_handler,
 #endif
-#ifdef CONFIG_CEPH_FS_SECURITY_LABEL
-       &ceph_security_label_handler,
-#endif
        &ceph_other_xattr_handler,
        NULL,
 };
index cd07e53..3c91fa9 100644 (file)
@@ -1654,15 +1654,17 @@ static struct smbd_connection *_smbd_get_connection(
 
        info->send_cq = NULL;
        info->recv_cq = NULL;
-       info->send_cq = ib_alloc_cq(info->id->device, info,
-                       info->send_credit_target, 0, IB_POLL_SOFTIRQ);
+       info->send_cq =
+               ib_alloc_cq_any(info->id->device, info,
+                               info->send_credit_target, IB_POLL_SOFTIRQ);
        if (IS_ERR(info->send_cq)) {
                info->send_cq = NULL;
                goto alloc_cq_failed;
        }
 
-       info->recv_cq = ib_alloc_cq(info->id->device, info,
-                       info->receive_credit_max, 0, IB_POLL_SOFTIRQ);
+       info->recv_cq =
+               ib_alloc_cq_any(info->id->device, info,
+                               info->receive_credit_max, IB_POLL_SOFTIRQ);
        if (IS_ERR(info->recv_cq)) {
                info->recv_cq = NULL;
                goto alloc_cq_failed;
index 93e4ca6..87846aa 100644 (file)
@@ -19,6 +19,7 @@
 #include <linux/atomic.h>
 #include <linux/device.h>
 #include <linux/poll.h>
+#include <linux/security.h>
 
 #include "internal.h"
 
@@ -136,6 +137,25 @@ void debugfs_file_put(struct dentry *dentry)
 }
 EXPORT_SYMBOL_GPL(debugfs_file_put);
 
+/*
+ * Only permit access to world-readable files when the kernel is locked down.
+ * We also need to exclude any file that has ways to write or alter it as root
+ * can bypass the permissions check.
+ */
+static bool debugfs_is_locked_down(struct inode *inode,
+                                  struct file *filp,
+                                  const struct file_operations *real_fops)
+{
+       if ((inode->i_mode & 07777) == 0444 &&
+           !(filp->f_mode & FMODE_WRITE) &&
+           !real_fops->unlocked_ioctl &&
+           !real_fops->compat_ioctl &&
+           !real_fops->mmap)
+               return false;
+
+       return security_locked_down(LOCKDOWN_DEBUGFS);
+}
+
 static int open_proxy_open(struct inode *inode, struct file *filp)
 {
        struct dentry *dentry = F_DENTRY(filp);
@@ -147,6 +167,11 @@ static int open_proxy_open(struct inode *inode, struct file *filp)
                return r == -EIO ? -ENOENT : r;
 
        real_fops = debugfs_real_fops(filp);
+
+       r = debugfs_is_locked_down(inode, filp, real_fops);
+       if (r)
+               goto out;
+
        real_fops = fops_get(real_fops);
        if (!real_fops) {
                /* Huh? Module did not clean up after itself at exit? */
@@ -272,6 +297,11 @@ static int full_proxy_open(struct inode *inode, struct file *filp)
                return r == -EIO ? -ENOENT : r;
 
        real_fops = debugfs_real_fops(filp);
+
+       r = debugfs_is_locked_down(inode, filp, real_fops);
+       if (r)
+               goto out;
+
        real_fops = fops_get(real_fops);
        if (!real_fops) {
                /* Huh? Module did not cleanup after itself at exit? */
index 042b688..7b975db 100644 (file)
@@ -26,6 +26,7 @@
 #include <linux/parser.h>
 #include <linux/magic.h>
 #include <linux/slab.h>
+#include <linux/security.h>
 
 #include "internal.h"
 
@@ -35,6 +36,32 @@ static struct vfsmount *debugfs_mount;
 static int debugfs_mount_count;
 static bool debugfs_registered;
 
+/*
+ * Don't allow access attributes to be changed whilst the kernel is locked down
+ * so that we can use the file mode as part of a heuristic to determine whether
+ * to lock down individual files.
+ */
+static int debugfs_setattr(struct dentry *dentry, struct iattr *ia)
+{
+       int ret = security_locked_down(LOCKDOWN_DEBUGFS);
+
+       if (ret && (ia->ia_valid & (ATTR_MODE | ATTR_UID | ATTR_GID)))
+               return ret;
+       return simple_setattr(dentry, ia);
+}
+
+static const struct inode_operations debugfs_file_inode_operations = {
+       .setattr        = debugfs_setattr,
+};
+static const struct inode_operations debugfs_dir_inode_operations = {
+       .lookup         = simple_lookup,
+       .setattr        = debugfs_setattr,
+};
+static const struct inode_operations debugfs_symlink_inode_operations = {
+       .get_link       = simple_get_link,
+       .setattr        = debugfs_setattr,
+};
+
 static struct inode *debugfs_get_inode(struct super_block *sb)
 {
        struct inode *inode = new_inode(sb);
@@ -369,6 +396,7 @@ static struct dentry *__debugfs_create_file(const char *name, umode_t mode,
        inode->i_mode = mode;
        inode->i_private = data;
 
+       inode->i_op = &debugfs_file_inode_operations;
        inode->i_fop = proxy_fops;
        dentry->d_fsdata = (void *)((unsigned long)real_fops |
                                DEBUGFS_FSDATA_IS_REAL_FOPS_BIT);
@@ -532,7 +560,7 @@ struct dentry *debugfs_create_dir(const char *name, struct dentry *parent)
        }
 
        inode->i_mode = S_IFDIR | S_IRWXU | S_IRUGO | S_IXUGO;
-       inode->i_op = &simple_dir_inode_operations;
+       inode->i_op = &debugfs_dir_inode_operations;
        inode->i_fop = &simple_dir_operations;
 
        /* directory inodes start off with i_nlink == 2 (for "." entry) */
@@ -632,7 +660,7 @@ struct dentry *debugfs_create_symlink(const char *name, struct dentry *parent,
                return failed_creating(dentry);
        }
        inode->i_mode = S_IFLNK | S_IRWXUGO;
-       inode->i_op = &simple_symlink_inode_operations;
+       inode->i_op = &debugfs_symlink_inode_operations;
        inode->i_link = link;
        d_instantiate(dentry, inode);
        return end_creating(dentry);
index f7f6a14..555e93c 100644 (file)
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1033,6 +1033,7 @@ static int exec_mmap(struct mm_struct *mm)
        }
        task_lock(tsk);
        active_mm = tsk->active_mm;
+       membarrier_exec_mmap(mm);
        tsk->mm = mm;
        tsk->active_mm = mm;
        activate_mm(active_mm, mm);
@@ -1825,7 +1826,6 @@ static int __do_execve_file(int fd, struct filename *filename,
        /* execve succeeded */
        current->fs->in_exec = 0;
        current->in_execve = 0;
-       membarrier_execve(current);
        rseq_execve(current);
        acct_update_integrals(current);
        task_numa_free(current, false);
index 547c165..e0cc551 100644 (file)
@@ -490,9 +490,7 @@ void ext2_free_blocks (struct inode * inode, unsigned long block,
        struct ext2_super_block * es = sbi->s_es;
        unsigned freed = 0, group_freed;
 
-       if (block < le32_to_cpu(es->s_first_data_block) ||
-           block + count < block ||
-           block + count > le32_to_cpu(es->s_blocks_count)) {
+       if (!ext2_data_block_valid(sbi, block, count)) {
                ext2_error (sb, "ext2_free_blocks",
                            "Freeing blocks not in datazone - "
                            "block = %lu, count = %lu", block, count);
@@ -1203,13 +1201,13 @@ int ext2_data_block_valid(struct ext2_sb_info *sbi, ext2_fsblk_t start_blk,
                          unsigned int count)
 {
        if ((start_blk <= le32_to_cpu(sbi->s_es->s_first_data_block)) ||
-           (start_blk + count < start_blk) ||
-           (start_blk > le32_to_cpu(sbi->s_es->s_blocks_count)))
+           (start_blk + count - 1 < start_blk) ||
+           (start_blk + count - 1 >= le32_to_cpu(sbi->s_es->s_blocks_count)))
                return 0;
 
        /* Ensure we do not step over superblock */
        if ((start_blk <= sbi->s_sb_block) &&
-           (start_blk + count >= sbi->s_sb_block))
+           (start_blk + count - 1 >= sbi->s_sb_block))
                return 0;
 
        return 1;
index baa36c6..30c630d 100644 (file)
@@ -162,8 +162,7 @@ static void ext2_put_super (struct super_block * sb)
        }
        db_count = sbi->s_gdb_count;
        for (i = 0; i < db_count; i++)
-               if (sbi->s_group_desc[i])
-                       brelse (sbi->s_group_desc[i]);
+               brelse(sbi->s_group_desc[i]);
        kfree(sbi->s_group_desc);
        kfree(sbi->s_debts);
        percpu_counter_destroy(&sbi->s_freeblocks_counter);
index 79369c1..0456bc9 100644 (file)
@@ -794,7 +794,7 @@ ext2_xattr_delete_inode(struct inode *inode)
        if (!EXT2_I(inode)->i_file_acl)
                goto cleanup;
 
-       if (!ext2_data_block_valid(sbi, EXT2_I(inode)->i_file_acl, 0)) {
+       if (!ext2_data_block_valid(sbi, EXT2_I(inode)->i_file_acl, 1)) {
                ext2_error(inode->i_sb, "ext2_xattr_delete_inode",
                        "inode %ld: xattr block %d is out of data blocks range",
                        inode->i_ino, EXT2_I(inode)->i_file_acl);
index 8e83741..d4d4fdf 100644 (file)
@@ -38,6 +38,7 @@ int __init ext4_init_system_zone(void)
 
 void ext4_exit_system_zone(void)
 {
+       rcu_barrier();
        kmem_cache_destroy(ext4_system_zone_cachep);
 }
 
@@ -49,17 +50,26 @@ static inline int can_merge(struct ext4_system_zone *entry1,
        return 0;
 }
 
+static void release_system_zone(struct ext4_system_blocks *system_blks)
+{
+       struct ext4_system_zone *entry, *n;
+
+       rbtree_postorder_for_each_entry_safe(entry, n,
+                               &system_blks->root, node)
+               kmem_cache_free(ext4_system_zone_cachep, entry);
+}
+
 /*
  * Mark a range of blocks as belonging to the "system zone" --- that
  * is, filesystem metadata blocks which should never be used by
  * inodes.
  */
-static int add_system_zone(struct ext4_sb_info *sbi,
+static int add_system_zone(struct ext4_system_blocks *system_blks,
                           ext4_fsblk_t start_blk,
                           unsigned int count)
 {
        struct ext4_system_zone *new_entry = NULL, *entry;
-       struct rb_node **n = &sbi->system_blks.rb_node, *node;
+       struct rb_node **n = &system_blks->root.rb_node, *node;
        struct rb_node *parent = NULL, *new_node = NULL;
 
        while (*n) {
@@ -91,7 +101,7 @@ static int add_system_zone(struct ext4_sb_info *sbi,
                new_node = &new_entry->node;
 
                rb_link_node(new_node, parent, n);
-               rb_insert_color(new_node, &sbi->system_blks);
+               rb_insert_color(new_node, &system_blks->root);
        }
 
        /* Can we merge to the left? */
@@ -101,7 +111,7 @@ static int add_system_zone(struct ext4_sb_info *sbi,
                if (can_merge(entry, new_entry)) {
                        new_entry->start_blk = entry->start_blk;
                        new_entry->count += entry->count;
-                       rb_erase(node, &sbi->system_blks);
+                       rb_erase(node, &system_blks->root);
                        kmem_cache_free(ext4_system_zone_cachep, entry);
                }
        }
@@ -112,7 +122,7 @@ static int add_system_zone(struct ext4_sb_info *sbi,
                entry = rb_entry(node, struct ext4_system_zone, node);
                if (can_merge(new_entry, entry)) {
                        new_entry->count += entry->count;
-                       rb_erase(node, &sbi->system_blks);
+                       rb_erase(node, &system_blks->root);
                        kmem_cache_free(ext4_system_zone_cachep, entry);
                }
        }
@@ -126,7 +136,7 @@ static void debug_print_tree(struct ext4_sb_info *sbi)
        int first = 1;
 
        printk(KERN_INFO "System zones: ");
-       node = rb_first(&sbi->system_blks);
+       node = rb_first(&sbi->system_blks->root);
        while (node) {
                entry = rb_entry(node, struct ext4_system_zone, node);
                printk(KERN_CONT "%s%llu-%llu", first ? "" : ", ",
@@ -137,7 +147,47 @@ static void debug_print_tree(struct ext4_sb_info *sbi)
        printk(KERN_CONT "\n");
 }
 
-static int ext4_protect_reserved_inode(struct super_block *sb, u32 ino)
+/*
+ * Returns 1 if the passed-in block region (start_blk,
+ * start_blk+count) is valid; 0 if some part of the block region
+ * overlaps with filesystem metadata blocks.
+ */
+static int ext4_data_block_valid_rcu(struct ext4_sb_info *sbi,
+                                    struct ext4_system_blocks *system_blks,
+                                    ext4_fsblk_t start_blk,
+                                    unsigned int count)
+{
+       struct ext4_system_zone *entry;
+       struct rb_node *n;
+
+       if ((start_blk <= le32_to_cpu(sbi->s_es->s_first_data_block)) ||
+           (start_blk + count < start_blk) ||
+           (start_blk + count > ext4_blocks_count(sbi->s_es))) {
+               sbi->s_es->s_last_error_block = cpu_to_le64(start_blk);
+               return 0;
+       }
+
+       if (system_blks == NULL)
+               return 1;
+
+       n = system_blks->root.rb_node;
+       while (n) {
+               entry = rb_entry(n, struct ext4_system_zone, node);
+               if (start_blk + count - 1 < entry->start_blk)
+                       n = n->rb_left;
+               else if (start_blk >= (entry->start_blk + entry->count))
+                       n = n->rb_right;
+               else {
+                       sbi->s_es->s_last_error_block = cpu_to_le64(start_blk);
+                       return 0;
+               }
+       }
+       return 1;
+}
+
+static int ext4_protect_reserved_inode(struct super_block *sb,
+                                      struct ext4_system_blocks *system_blks,
+                                      u32 ino)
 {
        struct inode *inode;
        struct ext4_sb_info *sbi = EXT4_SB(sb);
@@ -163,14 +213,15 @@ static int ext4_protect_reserved_inode(struct super_block *sb, u32 ino)
                if (n == 0) {
                        i++;
                } else {
-                       if (!ext4_data_block_valid(sbi, map.m_pblk, n)) {
+                       if (!ext4_data_block_valid_rcu(sbi, system_blks,
+                                               map.m_pblk, n)) {
                                ext4_error(sb, "blocks %llu-%llu from inode %u "
                                           "overlap system zone", map.m_pblk,
                                           map.m_pblk + map.m_len - 1, ino);
                                err = -EFSCORRUPTED;
                                break;
                        }
-                       err = add_system_zone(sbi, map.m_pblk, n);
+                       err = add_system_zone(system_blks, map.m_pblk, n);
                        if (err < 0)
                                break;
                        i += n;
@@ -180,94 +231,130 @@ static int ext4_protect_reserved_inode(struct super_block *sb, u32 ino)
        return err;
 }
 
+static void ext4_destroy_system_zone(struct rcu_head *rcu)
+{
+       struct ext4_system_blocks *system_blks;
+
+       system_blks = container_of(rcu, struct ext4_system_blocks, rcu);
+       release_system_zone(system_blks);
+       kfree(system_blks);
+}
+
+/*
+ * Build system zone rbtree which is used for block validity checking.
+ *
+ * The update of system_blks pointer in this function is protected by
+ * sb->s_umount semaphore. However we have to be careful as we can be
+ * racing with ext4_data_block_valid() calls reading system_blks rbtree
+ * protected only by RCU. That's why we first build the rbtree and then
+ * swap it in place.
+ */
 int ext4_setup_system_zone(struct super_block *sb)
 {
        ext4_group_t ngroups = ext4_get_groups_count(sb);
        struct ext4_sb_info *sbi = EXT4_SB(sb);
+       struct ext4_system_blocks *system_blks;
        struct ext4_group_desc *gdp;
        ext4_group_t i;
        int flex_size = ext4_flex_bg_size(sbi);
        int ret;
 
        if (!test_opt(sb, BLOCK_VALIDITY)) {
-               if (sbi->system_blks.rb_node)
+               if (sbi->system_blks)
                        ext4_release_system_zone(sb);
                return 0;
        }
-       if (sbi->system_blks.rb_node)
+       if (sbi->system_blks)
                return 0;
 
+       system_blks = kzalloc(sizeof(*system_blks), GFP_KERNEL);
+       if (!system_blks)
+               return -ENOMEM;
+
        for (i=0; i < ngroups; i++) {
                cond_resched();
                if (ext4_bg_has_super(sb, i) &&
                    ((i < 5) || ((i % flex_size) == 0)))
-                       add_system_zone(sbi, ext4_group_first_block_no(sb, i),
+                       add_system_zone(system_blks,
+                                       ext4_group_first_block_no(sb, i),
                                        ext4_bg_num_gdb(sb, i) + 1);
                gdp = ext4_get_group_desc(sb, i, NULL);
-               ret = add_system_zone(sbi, ext4_block_bitmap(sb, gdp), 1);
+               ret = add_system_zone(system_blks,
+                               ext4_block_bitmap(sb, gdp), 1);
                if (ret)
-                       return ret;
-               ret = add_system_zone(sbi, ext4_inode_bitmap(sb, gdp), 1);
+                       goto err;
+               ret = add_system_zone(system_blks,
+                               ext4_inode_bitmap(sb, gdp), 1);
                if (ret)
-                       return ret;
-               ret = add_system_zone(sbi, ext4_inode_table(sb, gdp),
+                       goto err;
+               ret = add_system_zone(system_blks,
+                               ext4_inode_table(sb, gdp),
                                sbi->s_itb_per_group);
                if (ret)
-                       return ret;
+                       goto err;
        }
        if (ext4_has_feature_journal(sb) && sbi->s_es->s_journal_inum) {
-               ret = ext4_protect_reserved_inode(sb,
+               ret = ext4_protect_reserved_inode(sb, system_blks,
                                le32_to_cpu(sbi->s_es->s_journal_inum));
                if (ret)
-                       return ret;
+                       goto err;
        }
 
+       /*
+        * System blks rbtree complete, announce it once to prevent racing
+        * with ext4_data_block_valid() accessing the rbtree at the same
+        * time.
+        */
+       rcu_assign_pointer(sbi->system_blks, system_blks);
+
        if (test_opt(sb, DEBUG))
                debug_print_tree(sbi);
        return 0;
+err:
+       release_system_zone(system_blks);
+       kfree(system_blks);
+       return ret;
 }
 
-/* Called when the filesystem is unmounted */
+/*
+ * Called when the filesystem is unmounted or when remounting it with
+ * noblock_validity specified.
+ *
+ * The update of system_blks pointer in this function is protected by
+ * sb->s_umount semaphore. However we have to be careful as we can be
+ * racing with ext4_data_block_valid() calls reading system_blks rbtree
+ * protected only by RCU. So we first clear the system_blks pointer and
+ * then free the rbtree only after RCU grace period expires.
+ */
 void ext4_release_system_zone(struct super_block *sb)
 {
-       struct ext4_system_zone *entry, *n;
+       struct ext4_system_blocks *system_blks;
 
-       rbtree_postorder_for_each_entry_safe(entry, n,
-                       &EXT4_SB(sb)->system_blks, node)
-               kmem_cache_free(ext4_system_zone_cachep, entry);
+       system_blks = rcu_dereference_protected(EXT4_SB(sb)->system_blks,
+                                       lockdep_is_held(&sb->s_umount));
+       rcu_assign_pointer(EXT4_SB(sb)->system_blks, NULL);
 
-       EXT4_SB(sb)->system_blks = RB_ROOT;
+       if (system_blks)
+               call_rcu(&system_blks->rcu, ext4_destroy_system_zone);
 }
 
-/*
- * Returns 1 if the passed-in block region (start_blk,
- * start_blk+count) is valid; 0 if some part of the block region
- * overlaps with filesystem metadata blocks.
- */
 int ext4_data_block_valid(struct ext4_sb_info *sbi, ext4_fsblk_t start_blk,
                          unsigned int count)
 {
-       struct ext4_system_zone *entry;
-       struct rb_node *n = sbi->system_blks.rb_node;
+       struct ext4_system_blocks *system_blks;
+       int ret;
 
-       if ((start_blk <= le32_to_cpu(sbi->s_es->s_first_data_block)) ||
-           (start_blk + count < start_blk) ||
-           (start_blk + count > ext4_blocks_count(sbi->s_es))) {
-               sbi->s_es->s_last_error_block = cpu_to_le64(start_blk);
-               return 0;
-       }
-       while (n) {
-               entry = rb_entry(n, struct ext4_system_zone, node);
-               if (start_blk + count - 1 < entry->start_blk)
-                       n = n->rb_left;
-               else if (start_blk >= (entry->start_blk + entry->count))
-                       n = n->rb_right;
-               else {
-                       sbi->s_es->s_last_error_block = cpu_to_le64(start_blk);
-                       return 0;
-               }
-       }
-       return 1;
+       /*
+        * Lock the system zone to prevent it being released concurrently
+        * when doing a remount which inverse current "[no]block_validity"
+        * mount option.
+        */
+       rcu_read_lock();
+       system_blks = rcu_dereference(sbi->system_blks);
+       ret = ext4_data_block_valid_rcu(sbi, system_blks, start_blk,
+                                       count);
+       rcu_read_unlock();
+       return ret;
 }
 
 int ext4_check_blockref(const char *function, unsigned int line,
index 86054f3..9fdd2b2 100644 (file)
@@ -668,14 +668,15 @@ static int ext4_d_compare(const struct dentry *dentry, unsigned int len,
                          const char *str, const struct qstr *name)
 {
        struct qstr qstr = {.name = str, .len = len };
+       struct inode *inode = dentry->d_parent->d_inode;
 
-       if (!IS_CASEFOLDED(dentry->d_parent->d_inode)) {
+       if (!IS_CASEFOLDED(inode) || !EXT4_SB(inode->i_sb)->s_encoding) {
                if (len != name->len)
                        return -1;
                return memcmp(str, name->name, len);
        }
 
-       return ext4_ci_compare(dentry->d_parent->d_inode, name, &qstr, false);
+       return ext4_ci_compare(inode, name, &qstr, false);
 }
 
 static int ext4_d_hash(const struct dentry *dentry, struct qstr *str)
@@ -685,7 +686,7 @@ static int ext4_d_hash(const struct dentry *dentry, struct qstr *str)
        unsigned char *norm;
        int len, ret = 0;
 
-       if (!IS_CASEFOLDED(dentry->d_inode))
+       if (!IS_CASEFOLDED(dentry->d_inode) || !um)
                return 0;
 
        norm = kmalloc(PATH_MAX, GFP_ATOMIC);
index 42c6e4a..03db3e7 100644 (file)
@@ -186,6 +186,14 @@ struct ext4_map_blocks {
 };
 
 /*
+ * Block validity checking, system zone rbtree.
+ */
+struct ext4_system_blocks {
+       struct rb_root root;
+       struct rcu_head rcu;
+};
+
+/*
  * Flags for ext4_io_end->flags
  */
 #define        EXT4_IO_END_UNWRITTEN   0x0001
@@ -285,6 +293,9 @@ struct ext4_io_submit {
                                  ~((ext4_fsblk_t) (s)->s_cluster_ratio - 1))
 #define EXT4_LBLK_CMASK(s, lblk) ((lblk) &                             \
                                  ~((ext4_lblk_t) (s)->s_cluster_ratio - 1))
+/* Fill in the low bits to get the last block of the cluster */
+#define EXT4_LBLK_CFILL(sbi, lblk) ((lblk) |                           \
+                                   ((ext4_lblk_t) (sbi)->s_cluster_ratio - 1))
 /* Get the cluster offset */
 #define EXT4_PBLK_COFF(s, pblk) ((pblk) &                              \
                                 ((ext4_fsblk_t) (s)->s_cluster_ratio - 1))
@@ -653,6 +664,10 @@ enum {
 #define EXT4_IOC_SET_ENCRYPTION_POLICY FS_IOC_SET_ENCRYPTION_POLICY
 #define EXT4_IOC_GET_ENCRYPTION_PWSALT FS_IOC_GET_ENCRYPTION_PWSALT
 #define EXT4_IOC_GET_ENCRYPTION_POLICY FS_IOC_GET_ENCRYPTION_POLICY
+/* ioctl codes 19--39 are reserved for fscrypt */
+#define EXT4_IOC_CLEAR_ES_CACHE                _IO('f', 40)
+#define EXT4_IOC_GETSTATE              _IOW('f', 41, __u32)
+#define EXT4_IOC_GET_ES_CACHE          _IOWR('f', 42, struct fiemap)
 
 #define EXT4_IOC_FSGETXATTR            FS_IOC_FSGETXATTR
 #define EXT4_IOC_FSSETXATTR            FS_IOC_FSSETXATTR
@@ -666,6 +681,16 @@ enum {
 #define EXT4_GOING_FLAGS_LOGFLUSH              0x1     /* flush log but not data */
 #define EXT4_GOING_FLAGS_NOLOGFLUSH            0x2     /* don't flush log nor data */
 
+/*
+ * Flags returned by EXT4_IOC_GETSTATE
+ *
+ * We only expose to userspace a subset of the state flags in
+ * i_state_flags
+ */
+#define EXT4_STATE_FLAG_EXT_PRECACHED  0x00000001
+#define EXT4_STATE_FLAG_NEW            0x00000002
+#define EXT4_STATE_FLAG_NEWENTRY       0x00000004
+#define EXT4_STATE_FLAG_DA_ALLOC_CLOSE 0x00000008
 
 #if defined(__KERNEL__) && defined(CONFIG_COMPAT)
 /*
@@ -683,6 +708,12 @@ enum {
 #define EXT4_IOC32_SETVERSION_OLD      FS_IOC32_SETVERSION
 #endif
 
+/*
+ * Returned by EXT4_IOC_GET_ES_CACHE as an additional possible flag.
+ * It indicates that the entry in extent status cache is for a hole.
+ */
+#define EXT4_FIEMAP_EXTENT_HOLE                0x08000000
+
 /* Max physical block we can address w/o extents */
 #define EXT4_MAX_BLOCK_FILE_PHYS       0xFFFFFFFF
 
@@ -812,21 +843,8 @@ static inline __le32 ext4_encode_extra_time(struct timespec64 *time)
 static inline void ext4_decode_extra_time(struct timespec64 *time,
                                          __le32 extra)
 {
-       if (unlikely(extra & cpu_to_le32(EXT4_EPOCH_MASK))) {
-
-#if 1
-               /* Handle legacy encoding of pre-1970 dates with epoch
-                * bits 1,1. (This backwards compatibility may be removed
-                * at the discretion of the ext4 developers.)
-                */
-               u64 extra_bits = le32_to_cpu(extra) & EXT4_EPOCH_MASK;
-               if (extra_bits == 3 && ((time->tv_sec) & 0x80000000) != 0)
-                       extra_bits = 0;
-               time->tv_sec += extra_bits << 32;
-#else
+       if (unlikely(extra & cpu_to_le32(EXT4_EPOCH_MASK)))
                time->tv_sec += (u64)(le32_to_cpu(extra) & EXT4_EPOCH_MASK) << 32;
-#endif
-       }
        time->tv_nsec = (le32_to_cpu(extra) & EXT4_NSEC_MASK) >> EXT4_EPOCH_BITS;
 }
 
@@ -1427,7 +1445,7 @@ struct ext4_sb_info {
        int s_jquota_fmt;                       /* Format of quota to use */
 #endif
        unsigned int s_want_extra_isize; /* New inodes should reserve # bytes */
-       struct rb_root system_blks;
+       struct ext4_system_blocks __rcu *system_blks;
 
 #ifdef EXTENTS_STATS
        /* ext4 extents stats */
@@ -3267,6 +3285,9 @@ extern int ext4_ext_check_inode(struct inode *inode);
 extern ext4_lblk_t ext4_ext_next_allocated_block(struct ext4_ext_path *path);
 extern int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
                        __u64 start, __u64 len);
+extern int ext4_get_es_cache(struct inode *inode,
+                            struct fiemap_extent_info *fieinfo,
+                            __u64 start, __u64 len);
 extern int ext4_ext_precache(struct inode *inode);
 extern int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len);
 extern int ext4_insert_range(struct inode *inode, loff_t offset, loff_t len);
@@ -3359,6 +3380,19 @@ static inline void ext4_clear_io_unwritten_flag(ext4_io_end_t *io_end)
 
 extern const struct iomap_ops ext4_iomap_ops;
 
+static inline int ext4_buffer_uptodate(struct buffer_head *bh)
+{
+       /*
+        * If the buffer has the write error flag, we have failed
+        * to write out data in the block.  In this  case, we don't
+        * have to read the block because we may read the old data
+        * successfully.
+        */
+       if (!buffer_uptodate(bh) && buffer_write_io_error(bh))
+               set_buffer_uptodate(bh);
+       return buffer_uptodate(bh);
+}
+
 #endif /* __KERNEL__ */
 
 #define EFSBADCRC      EBADMSG         /* Bad CRC detected */
index 92266a2..fb0f99d 100644 (file)
@@ -2315,6 +2315,52 @@ static int ext4_fill_fiemap_extents(struct inode *inode,
        return err;
 }
 
+static int ext4_fill_es_cache_info(struct inode *inode,
+                                  ext4_lblk_t block, ext4_lblk_t num,
+                                  struct fiemap_extent_info *fieinfo)
+{
+       ext4_lblk_t next, end = block + num - 1;
+       struct extent_status es;
+       unsigned char blksize_bits = inode->i_sb->s_blocksize_bits;
+       unsigned int flags;
+       int err;
+
+       while (block <= end) {
+               next = 0;
+               flags = 0;
+               if (!ext4_es_lookup_extent(inode, block, &next, &es))
+                       break;
+               if (ext4_es_is_unwritten(&es))
+                       flags |= FIEMAP_EXTENT_UNWRITTEN;
+               if (ext4_es_is_delayed(&es))
+                       flags |= (FIEMAP_EXTENT_DELALLOC |
+                                 FIEMAP_EXTENT_UNKNOWN);
+               if (ext4_es_is_hole(&es))
+                       flags |= EXT4_FIEMAP_EXTENT_HOLE;
+               if (next == 0)
+                       flags |= FIEMAP_EXTENT_LAST;
+               if (flags & (FIEMAP_EXTENT_DELALLOC|
+                            EXT4_FIEMAP_EXTENT_HOLE))
+                       es.es_pblk = 0;
+               else
+                       es.es_pblk = ext4_es_pblock(&es);
+               err = fiemap_fill_next_extent(fieinfo,
+                               (__u64)es.es_lblk << blksize_bits,
+                               (__u64)es.es_pblk << blksize_bits,
+                               (__u64)es.es_len << blksize_bits,
+                               flags);
+               if (next == 0)
+                       break;
+               block = next;
+               if (err < 0)
+                       return err;
+               if (err == 1)
+                       return 0;
+       }
+       return 0;
+}
+
+
 /*
  * ext4_ext_determine_hole - determine hole around given block
  * @inode:     inode we lookup in
@@ -3813,8 +3859,8 @@ static int ext4_convert_unwritten_extents_endio(handle_t *handle,
         * illegal.
         */
        if (ee_block != map->m_lblk || ee_len > map->m_len) {
-#ifdef EXT4_DEBUG
-               ext4_warning("Inode (%ld) finished: extent logical block %llu,"
+#ifdef CONFIG_EXT4_DEBUG
+               ext4_warning(inode->i_sb, "Inode (%ld) finished: extent logical block %llu,"
                             " len %u; IO logical block %llu, len %u",
                             inode->i_ino, (unsigned long long)ee_block, ee_len,
                             (unsigned long long)map->m_lblk, map->m_len);
@@ -5017,8 +5063,6 @@ static int ext4_find_delayed_extent(struct inode *inode,
 
        return next_del;
 }
-/* fiemap flags we can handle specified here */
-#define EXT4_FIEMAP_FLAGS      (FIEMAP_FLAG_SYNC|FIEMAP_FLAG_XATTR)
 
 static int ext4_xattr_fiemap(struct inode *inode,
                                struct fiemap_extent_info *fieinfo)
@@ -5055,10 +5099,16 @@ static int ext4_xattr_fiemap(struct inode *inode,
        return (error < 0 ? error : 0);
 }
 
-int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
-               __u64 start, __u64 len)
+static int _ext4_fiemap(struct inode *inode,
+                       struct fiemap_extent_info *fieinfo,
+                       __u64 start, __u64 len,
+                       int (*fill)(struct inode *, ext4_lblk_t,
+                                   ext4_lblk_t,
+                                   struct fiemap_extent_info *))
 {
        ext4_lblk_t start_blk;
+       u32 ext4_fiemap_flags = FIEMAP_FLAG_SYNC|FIEMAP_FLAG_XATTR;
+
        int error = 0;
 
        if (ext4_has_inline_data(inode)) {
@@ -5075,14 +5125,18 @@ int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
                error = ext4_ext_precache(inode);
                if (error)
                        return error;
+               fieinfo->fi_flags &= ~FIEMAP_FLAG_CACHE;
        }
 
        /* fallback to generic here if not in extents fmt */
-       if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)))
+       if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) &&
+           fill == ext4_fill_fiemap_extents)
                return generic_block_fiemap(inode, fieinfo, start, len,
                        ext4_get_block);
 
-       if (fiemap_check_flags(fieinfo, EXT4_FIEMAP_FLAGS))
+       if (fill == ext4_fill_es_cache_info)
+               ext4_fiemap_flags &= FIEMAP_FLAG_XATTR;
+       if (fiemap_check_flags(fieinfo, ext4_fiemap_flags))
                return -EBADR;
 
        if (fieinfo->fi_flags & FIEMAP_FLAG_XATTR) {
@@ -5101,12 +5155,36 @@ int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
                 * Walk the extent tree gathering extent information
                 * and pushing extents back to the user.
                 */
-               error = ext4_fill_fiemap_extents(inode, start_blk,
-                                                len_blks, fieinfo);
+               error = fill(inode, start_blk, len_blks, fieinfo);
        }
        return error;
 }
 
+int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
+               __u64 start, __u64 len)
+{
+       return _ext4_fiemap(inode, fieinfo, start, len,
+                           ext4_fill_fiemap_extents);
+}
+
+int ext4_get_es_cache(struct inode *inode, struct fiemap_extent_info *fieinfo,
+                     __u64 start, __u64 len)
+{
+       if (ext4_has_inline_data(inode)) {
+               int has_inline;
+
+               down_read(&EXT4_I(inode)->xattr_sem);
+               has_inline = ext4_has_inline_data(inode);
+               up_read(&EXT4_I(inode)->xattr_sem);
+               if (has_inline)
+                       return 0;
+       }
+
+       return _ext4_fiemap(inode, fieinfo, start, len,
+                           ext4_fill_es_cache_info);
+}
+
+
 /*
  * ext4_access_path:
  * Function to access the path buffer for marking it dirty.
index 7521de2..d996b44 100644 (file)
@@ -146,7 +146,7 @@ static struct kmem_cache *ext4_pending_cachep;
 
 static int __es_insert_extent(struct inode *inode, struct extent_status *newes);
 static int __es_remove_extent(struct inode *inode, ext4_lblk_t lblk,
-                             ext4_lblk_t end);
+                             ext4_lblk_t end, int *reserved);
 static int es_reclaim_extents(struct ext4_inode_info *ei, int *nr_to_scan);
 static int __es_shrink(struct ext4_sb_info *sbi, int nr_to_scan,
                       struct ext4_inode_info *locked_ei);
@@ -836,7 +836,7 @@ int ext4_es_insert_extent(struct inode *inode, ext4_lblk_t lblk,
        ext4_es_insert_extent_check(inode, &newes);
 
        write_lock(&EXT4_I(inode)->i_es_lock);
-       err = __es_remove_extent(inode, lblk, end);
+       err = __es_remove_extent(inode, lblk, end, NULL);
        if (err != 0)
                goto error;
 retry:
@@ -899,6 +899,7 @@ void ext4_es_cache_extent(struct inode *inode, ext4_lblk_t lblk,
  * Return: 1 on found, 0 on not
  */
 int ext4_es_lookup_extent(struct inode *inode, ext4_lblk_t lblk,
+                         ext4_lblk_t *next_lblk,
                          struct extent_status *es)
 {
        struct ext4_es_tree *tree;
@@ -947,9 +948,18 @@ out:
                es->es_pblk = es1->es_pblk;
                if (!ext4_es_is_referenced(es1))
                        ext4_es_set_referenced(es1);
-               stats->es_stats_cache_hits++;
+               percpu_counter_inc(&stats->es_stats_cache_hits);
+               if (next_lblk) {
+                       node = rb_next(&es1->rb_node);
+                       if (node) {
+                               es1 = rb_entry(node, struct extent_status,
+                                              rb_node);
+                               *next_lblk = es1->es_lblk;
+                       } else
+                               *next_lblk = 0;
+               }
        } else {
-               stats->es_stats_cache_misses++;
+               percpu_counter_inc(&stats->es_stats_cache_misses);
        }
 
        read_unlock(&EXT4_I(inode)->i_es_lock);
@@ -958,8 +968,322 @@ out:
        return found;
 }
 
+struct rsvd_count {
+       int ndelonly;
+       bool first_do_lblk_found;
+       ext4_lblk_t first_do_lblk;
+       ext4_lblk_t last_do_lblk;
+       struct extent_status *left_es;
+       bool partial;
+       ext4_lblk_t lclu;
+};
+
+/*
+ * init_rsvd - initialize reserved count data before removing block range
+ *            in file from extent status tree
+ *
+ * @inode - file containing range
+ * @lblk - first block in range
+ * @es - pointer to first extent in range
+ * @rc - pointer to reserved count data
+ *
+ * Assumes es is not NULL
+ */
+static void init_rsvd(struct inode *inode, ext4_lblk_t lblk,
+                     struct extent_status *es, struct rsvd_count *rc)
+{
+       struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
+       struct rb_node *node;
+
+       rc->ndelonly = 0;
+
+       /*
+        * for bigalloc, note the first delonly block in the range has not
+        * been found, record the extent containing the block to the left of
+        * the region to be removed, if any, and note that there's no partial
+        * cluster to track
+        */
+       if (sbi->s_cluster_ratio > 1) {
+               rc->first_do_lblk_found = false;
+               if (lblk > es->es_lblk) {
+                       rc->left_es = es;
+               } else {
+                       node = rb_prev(&es->rb_node);
+                       rc->left_es = node ? rb_entry(node,
+                                                     struct extent_status,
+                                                     rb_node) : NULL;
+               }
+               rc->partial = false;
+       }
+}
+
+/*
+ * count_rsvd - count the clusters containing delayed and not unwritten
+ *             (delonly) blocks in a range within an extent and add to
+ *             the running tally in rsvd_count
+ *
+ * @inode - file containing extent
+ * @lblk - first block in range
+ * @len - length of range in blocks
+ * @es - pointer to extent containing clusters to be counted
+ * @rc - pointer to reserved count data
+ *
+ * Tracks partial clusters found at the beginning and end of extents so
+ * they aren't overcounted when they span adjacent extents
+ */
+static void count_rsvd(struct inode *inode, ext4_lblk_t lblk, long len,
+                      struct extent_status *es, struct rsvd_count *rc)
+{
+       struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
+       ext4_lblk_t i, end, nclu;
+
+       if (!ext4_es_is_delonly(es))
+               return;
+
+       WARN_ON(len <= 0);
+
+       if (sbi->s_cluster_ratio == 1) {
+               rc->ndelonly += (int) len;
+               return;
+       }
+
+       /* bigalloc */
+
+       i = (lblk < es->es_lblk) ? es->es_lblk : lblk;
+       end = lblk + (ext4_lblk_t) len - 1;
+       end = (end > ext4_es_end(es)) ? ext4_es_end(es) : end;
+
+       /* record the first block of the first delonly extent seen */
+       if (rc->first_do_lblk_found == false) {
+               rc->first_do_lblk = i;
+               rc->first_do_lblk_found = true;
+       }
+
+       /* update the last lblk in the region seen so far */
+       rc->last_do_lblk = end;
+
+       /*
+        * if we're tracking a partial cluster and the current extent
+        * doesn't start with it, count it and stop tracking
+        */
+       if (rc->partial && (rc->lclu != EXT4_B2C(sbi, i))) {
+               rc->ndelonly++;
+               rc->partial = false;
+       }
+
+       /*
+        * if the first cluster doesn't start on a cluster boundary but
+        * ends on one, count it
+        */
+       if (EXT4_LBLK_COFF(sbi, i) != 0) {
+               if (end >= EXT4_LBLK_CFILL(sbi, i)) {
+                       rc->ndelonly++;
+                       rc->partial = false;
+                       i = EXT4_LBLK_CFILL(sbi, i) + 1;
+               }
+       }
+
+       /*
+        * if the current cluster starts on a cluster boundary, count the
+        * number of whole delonly clusters in the extent
+        */
+       if ((i + sbi->s_cluster_ratio - 1) <= end) {
+               nclu = (end - i + 1) >> sbi->s_cluster_bits;
+               rc->ndelonly += nclu;
+               i += nclu << sbi->s_cluster_bits;
+       }
+
+       /*
+        * start tracking a partial cluster if there's a partial at the end
+        * of the current extent and we're not already tracking one
+        */
+       if (!rc->partial && i <= end) {
+               rc->partial = true;
+               rc->lclu = EXT4_B2C(sbi, i);
+       }
+}
+
+/*
+ * __pr_tree_search - search for a pending cluster reservation
+ *
+ * @root - root of pending reservation tree
+ * @lclu - logical cluster to search for
+ *
+ * Returns the pending reservation for the cluster identified by @lclu
+ * if found.  If not, returns a reservation for the next cluster if any,
+ * and if not, returns NULL.
+ */
+static struct pending_reservation *__pr_tree_search(struct rb_root *root,
+                                                   ext4_lblk_t lclu)
+{
+       struct rb_node *node = root->rb_node;
+       struct pending_reservation *pr = NULL;
+
+       while (node) {
+               pr = rb_entry(node, struct pending_reservation, rb_node);
+               if (lclu < pr->lclu)
+                       node = node->rb_left;
+               else if (lclu > pr->lclu)
+                       node = node->rb_right;
+               else
+                       return pr;
+       }
+       if (pr && lclu < pr->lclu)
+               return pr;
+       if (pr && lclu > pr->lclu) {
+               node = rb_next(&pr->rb_node);
+               return node ? rb_entry(node, struct pending_reservation,
+                                      rb_node) : NULL;
+       }
+       return NULL;
+}
+
+/*
+ * get_rsvd - calculates and returns the number of cluster reservations to be
+ *           released when removing a block range from the extent status tree
+ *           and releases any pending reservations within the range
+ *
+ * @inode - file containing block range
+ * @end - last block in range
+ * @right_es - pointer to extent containing next block beyond end or NULL
+ * @rc - pointer to reserved count data
+ *
+ * The number of reservations to be released is equal to the number of
+ * clusters containing delayed and not unwritten (delonly) blocks within
+ * the range, minus the number of clusters still containing delonly blocks
+ * at the ends of the range, and minus the number of pending reservations
+ * within the range.
+ */
+static unsigned int get_rsvd(struct inode *inode, ext4_lblk_t end,
+                            struct extent_status *right_es,
+                            struct rsvd_count *rc)
+{
+       struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
+       struct pending_reservation *pr;
+       struct ext4_pending_tree *tree = &EXT4_I(inode)->i_pending_tree;
+       struct rb_node *node;
+       ext4_lblk_t first_lclu, last_lclu;
+       bool left_delonly, right_delonly, count_pending;
+       struct extent_status *es;
+
+       if (sbi->s_cluster_ratio > 1) {
+               /* count any remaining partial cluster */
+               if (rc->partial)
+                       rc->ndelonly++;
+
+               if (rc->ndelonly == 0)
+                       return 0;
+
+               first_lclu = EXT4_B2C(sbi, rc->first_do_lblk);
+               last_lclu = EXT4_B2C(sbi, rc->last_do_lblk);
+
+               /*
+                * decrease the delonly count by the number of clusters at the
+                * ends of the range that still contain delonly blocks -
+                * these clusters still need to be reserved
+                */
+               left_delonly = right_delonly = false;
+
+               es = rc->left_es;
+               while (es && ext4_es_end(es) >=
+                      EXT4_LBLK_CMASK(sbi, rc->first_do_lblk)) {
+                       if (ext4_es_is_delonly(es)) {
+                               rc->ndelonly--;
+                               left_delonly = true;
+                               break;
+                       }
+                       node = rb_prev(&es->rb_node);
+                       if (!node)
+                               break;
+                       es = rb_entry(node, struct extent_status, rb_node);
+               }
+               if (right_es && (!left_delonly || first_lclu != last_lclu)) {
+                       if (end < ext4_es_end(right_es)) {
+                               es = right_es;
+                       } else {
+                               node = rb_next(&right_es->rb_node);
+                               es = node ? rb_entry(node, struct extent_status,
+                                                    rb_node) : NULL;
+                       }
+                       while (es && es->es_lblk <=
+                              EXT4_LBLK_CFILL(sbi, rc->last_do_lblk)) {
+                               if (ext4_es_is_delonly(es)) {
+                                       rc->ndelonly--;
+                                       right_delonly = true;
+                                       break;
+                               }
+                               node = rb_next(&es->rb_node);
+                               if (!node)
+                                       break;
+                               es = rb_entry(node, struct extent_status,
+                                             rb_node);
+                       }
+               }
+
+               /*
+                * Determine the block range that should be searched for
+                * pending reservations, if any.  Clusters on the ends of the
+                * original removed range containing delonly blocks are
+                * excluded.  They've already been accounted for and it's not
+                * possible to determine if an associated pending reservation
+                * should be released with the information available in the
+                * extents status tree.
+                */
+               if (first_lclu == last_lclu) {
+                       if (left_delonly | right_delonly)
+                               count_pending = false;
+                       else
+                               count_pending = true;
+               } else {
+                       if (left_delonly)
+                               first_lclu++;
+                       if (right_delonly)
+                               last_lclu--;
+                       if (first_lclu <= last_lclu)
+                               count_pending = true;
+                       else
+                               count_pending = false;
+               }
+
+               /*
+                * a pending reservation found between first_lclu and last_lclu
+                * represents an allocated cluster that contained at least one
+                * delonly block, so the delonly total must be reduced by one
+                * for each pending reservation found and released
+                */
+               if (count_pending) {
+                       pr = __pr_tree_search(&tree->root, first_lclu);
+                       while (pr && pr->lclu <= last_lclu) {
+                               rc->ndelonly--;
+                               node = rb_next(&pr->rb_node);
+                               rb_erase(&pr->rb_node, &tree->root);
+                               kmem_cache_free(ext4_pending_cachep, pr);
+                               if (!node)
+                                       break;
+                               pr = rb_entry(node, struct pending_reservation,
+                                             rb_node);
+                       }
+               }
+       }
+       return rc->ndelonly;
+}
+
+
+/*
+ * __es_remove_extent - removes block range from extent status tree
+ *
+ * @inode - file containing range
+ * @lblk - first block in range
+ * @end - last block in range
+ * @reserved - number of cluster reservations released
+ *
+ * If @reserved is not NULL and delayed allocation is enabled, counts
+ * block/cluster reservations freed by removing range and if bigalloc
+ * enabled cancels pending reservations as needed. Returns 0 on success,
+ * error code on failure.
+ */
 static int __es_remove_extent(struct inode *inode, ext4_lblk_t lblk,
-                             ext4_lblk_t end)
+                             ext4_lblk_t end, int *reserved)
 {
        struct ext4_es_tree *tree = &EXT4_I(inode)->i_es_tree;
        struct rb_node *node;
@@ -968,9 +1292,14 @@ static int __es_remove_extent(struct inode *inode, ext4_lblk_t lblk,
        ext4_lblk_t len1, len2;
        ext4_fsblk_t block;
        int err;
+       bool count_reserved = true;
+       struct rsvd_count rc;
 
+       if (reserved == NULL || !test_opt(inode->i_sb, DELALLOC))
+               count_reserved = false;
 retry:
        err = 0;
+
        es = __es_tree_search(&tree->root, lblk);
        if (!es)
                goto out;
@@ -979,6 +1308,8 @@ retry:
 
        /* Simply invalidate cache_es. */
        tree->cache_es = NULL;
+       if (count_reserved)
+               init_rsvd(inode, lblk, es, &rc);
 
        orig_es.es_lblk = es->es_lblk;
        orig_es.es_len = es->es_len;
@@ -1020,10 +1351,16 @@ retry:
                                ext4_es_store_pblock(es, block);
                        }
                }
+               if (count_reserved)
+                       count_rsvd(inode, lblk, orig_es.es_len - len1 - len2,
+                                  &orig_es, &rc);
                goto out;
        }
 
        if (len1 > 0) {
+               if (count_reserved)
+                       count_rsvd(inode, lblk, orig_es.es_len - len1,
+                                  &orig_es, &rc);
                node = rb_next(&es->rb_node);
                if (node)
                        es = rb_entry(node, struct extent_status, rb_node);
@@ -1032,6 +1369,8 @@ retry:
        }
 
        while (es && ext4_es_end(es) <= end) {
+               if (count_reserved)
+                       count_rsvd(inode, es->es_lblk, es->es_len, es, &rc);
                node = rb_next(&es->rb_node);
                rb_erase(&es->rb_node, &tree->root);
                ext4_es_free_extent(inode, es);
@@ -1046,6 +1385,9 @@ retry:
                ext4_lblk_t orig_len = es->es_len;
 
                len1 = ext4_es_end(es) - end;
+               if (count_reserved)
+                       count_rsvd(inode, es->es_lblk, orig_len - len1,
+                                  es, &rc);
                es->es_lblk = end + 1;
                es->es_len = len1;
                if (ext4_es_is_written(es) || ext4_es_is_unwritten(es)) {
@@ -1054,20 +1396,28 @@ retry:
                }
        }
 
+       if (count_reserved)
+               *reserved = get_rsvd(inode, end, es, &rc);
 out:
        return err;
 }
 
 /*
- * ext4_es_remove_extent() removes a space from a extent status tree.
+ * ext4_es_remove_extent - removes block range from extent status tree
  *
- * Return 0 on success, error code on failure.
+ * @inode - file containing range
+ * @lblk - first block in range
+ * @len - number of blocks to remove
+ *
+ * Reduces block/cluster reservation count and for bigalloc cancels pending
+ * reservations as needed. Returns 0 on success, error code on failure.
  */
 int ext4_es_remove_extent(struct inode *inode, ext4_lblk_t lblk,
                          ext4_lblk_t len)
 {
        ext4_lblk_t end;
        int err = 0;
+       int reserved = 0;
 
        trace_ext4_es_remove_extent(inode, lblk, len);
        es_debug("remove [%u/%u) from extent status tree of inode %lu\n",
@@ -1085,9 +1435,10 @@ int ext4_es_remove_extent(struct inode *inode, ext4_lblk_t lblk,
         * is reclaimed.
         */
        write_lock(&EXT4_I(inode)->i_es_lock);
-       err = __es_remove_extent(inode, lblk, end);
+       err = __es_remove_extent(inode, lblk, end, &reserved);
        write_unlock(&EXT4_I(inode)->i_es_lock);
        ext4_es_print_tree(inode);
+       ext4_da_release_space(inode, reserved);
        return err;
 }
 
@@ -1235,9 +1586,9 @@ int ext4_seq_es_shrinker_info_show(struct seq_file *seq, void *v)
        seq_printf(seq, "stats:\n  %lld objects\n  %lld reclaimable objects\n",
                   percpu_counter_sum_positive(&es_stats->es_stats_all_cnt),
                   percpu_counter_sum_positive(&es_stats->es_stats_shk_cnt));
-       seq_printf(seq, "  %lu/%lu cache hits/misses\n",
-                  es_stats->es_stats_cache_hits,
-                  es_stats->es_stats_cache_misses);
+       seq_printf(seq, "  %lld/%lld cache hits/misses\n",
+                  percpu_counter_sum_positive(&es_stats->es_stats_cache_hits),
+                  percpu_counter_sum_positive(&es_stats->es_stats_cache_misses));
        if (inode_cnt)
                seq_printf(seq, "  %d inodes on list\n", inode_cnt);
 
@@ -1264,35 +1615,46 @@ int ext4_es_register_shrinker(struct ext4_sb_info *sbi)
        sbi->s_es_nr_inode = 0;
        spin_lock_init(&sbi->s_es_lock);
        sbi->s_es_stats.es_stats_shrunk = 0;
-       sbi->s_es_stats.es_stats_cache_hits = 0;
-       sbi->s_es_stats.es_stats_cache_misses = 0;
+       err = percpu_counter_init(&sbi->s_es_stats.es_stats_cache_hits, 0,
+                                 GFP_KERNEL);
+       if (err)
+               return err;
+       err = percpu_counter_init(&sbi->s_es_stats.es_stats_cache_misses, 0,
+                                 GFP_KERNEL);
+       if (err)
+               goto err1;
        sbi->s_es_stats.es_stats_scan_time = 0;
        sbi->s_es_stats.es_stats_max_scan_time = 0;
        err = percpu_counter_init(&sbi->s_es_stats.es_stats_all_cnt, 0, GFP_KERNEL);
        if (err)
-               return err;
+               goto err2;
        err = percpu_counter_init(&sbi->s_es_stats.es_stats_shk_cnt, 0, GFP_KERNEL);
        if (err)
-               goto err1;
+               goto err3;
 
        sbi->s_es_shrinker.scan_objects = ext4_es_scan;
        sbi->s_es_shrinker.count_objects = ext4_es_count;
        sbi->s_es_shrinker.seeks = DEFAULT_SEEKS;
        err = register_shrinker(&sbi->s_es_shrinker);
        if (err)
-               goto err2;
+               goto err4;
 
        return 0;
-
-err2:
+err4:
        percpu_counter_destroy(&sbi->s_es_stats.es_stats_shk_cnt);
-err1:
+err3:
        percpu_counter_destroy(&sbi->s_es_stats.es_stats_all_cnt);
+err2:
+       percpu_counter_destroy(&sbi->s_es_stats.es_stats_cache_misses);
+err1:
+       percpu_counter_destroy(&sbi->s_es_stats.es_stats_cache_hits);
        return err;
 }
 
 void ext4_es_unregister_shrinker(struct ext4_sb_info *sbi)
 {
+       percpu_counter_destroy(&sbi->s_es_stats.es_stats_cache_hits);
+       percpu_counter_destroy(&sbi->s_es_stats.es_stats_cache_misses);
        percpu_counter_destroy(&sbi->s_es_stats.es_stats_all_cnt);
        percpu_counter_destroy(&sbi->s_es_stats.es_stats_shk_cnt);
        unregister_shrinker(&sbi->s_es_shrinker);
@@ -1317,6 +1679,7 @@ static int es_do_reclaim_extents(struct ext4_inode_info *ei, ext4_lblk_t end,
        es = __es_tree_search(&tree->root, ei->i_es_shrink_lblk);
        if (!es)
                goto out_wrap;
+
        while (*nr_to_scan > 0) {
                if (es->es_lblk > end) {
                        ei->i_es_shrink_lblk = end + 1;
@@ -1374,6 +1737,34 @@ static int es_reclaim_extents(struct ext4_inode_info *ei, int *nr_to_scan)
        return nr_shrunk;
 }
 
+/*
+ * Called to support EXT4_IOC_CLEAR_ES_CACHE.  We can only remove
+ * discretionary entries from the extent status cache.  (Some entries
+ * must be present for proper operations.)
+ */
+void ext4_clear_inode_es(struct inode *inode)
+{
+       struct ext4_inode_info *ei = EXT4_I(inode);
+       struct extent_status *es;
+       struct ext4_es_tree *tree;
+       struct rb_node *node;
+
+       write_lock(&ei->i_es_lock);
+       tree = &EXT4_I(inode)->i_es_tree;
+       tree->cache_es = NULL;
+       node = rb_first(&tree->root);
+       while (node) {
+               es = rb_entry(node, struct extent_status, rb_node);
+               node = rb_next(node);
+               if (!ext4_es_is_delayed(es)) {
+                       rb_erase(&es->rb_node, &tree->root);
+                       ext4_es_free_extent(inode, es);
+               }
+       }
+       ext4_clear_inode_state(inode, EXT4_STATE_EXT_PRECACHED);
+       write_unlock(&ei->i_es_lock);
+}
+
 #ifdef ES_DEBUG__
 static void ext4_print_pending_tree(struct inode *inode)
 {
@@ -1590,7 +1981,7 @@ int ext4_es_insert_delayed_block(struct inode *inode, ext4_lblk_t lblk,
 
        write_lock(&EXT4_I(inode)->i_es_lock);
 
-       err = __es_remove_extent(inode, lblk, lblk);
+       err = __es_remove_extent(inode, lblk, lblk, NULL);
        if (err != 0)
                goto error;
 retry:
@@ -1779,93 +2170,3 @@ static void __revise_pending(struct inode *inode, ext4_lblk_t lblk,
                        __remove_pending(inode, last);
        }
 }
-
-/*
- * ext4_es_remove_blks - remove block range from extents status tree and
- *                       reduce reservation count or cancel pending
- *                       reservation as needed
- *
- * @inode - file containing range
- * @lblk - first block in range
- * @len - number of blocks to remove
- *
- */
-void ext4_es_remove_blks(struct inode *inode, ext4_lblk_t lblk,
-                        ext4_lblk_t len)
-{
-       struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
-       unsigned int clu_size, reserved = 0;
-       ext4_lblk_t last_lclu, first, length, remainder, last;
-       bool delonly;
-       int err = 0;
-       struct pending_reservation *pr;
-       struct ext4_pending_tree *tree;
-
-       /*
-        * Process cluster by cluster for bigalloc - there may be up to
-        * two clusters in a 4k page with a 1k block size and two blocks
-        * per cluster.  Also necessary for systems with larger page sizes
-        * and potentially larger block sizes.
-        */
-       clu_size = sbi->s_cluster_ratio;
-       last_lclu = EXT4_B2C(sbi, lblk + len - 1);
-
-       write_lock(&EXT4_I(inode)->i_es_lock);
-
-       for (first = lblk, remainder = len;
-            remainder > 0;
-            first += length, remainder -= length) {
-
-               if (EXT4_B2C(sbi, first) == last_lclu)
-                       length = remainder;
-               else
-                       length = clu_size - EXT4_LBLK_COFF(sbi, first);
-
-               /*
-                * The BH_Delay flag, which triggers calls to this function,
-                * and the contents of the extents status tree can be
-                * inconsistent due to writepages activity. So, note whether
-                * the blocks to be removed actually belong to an extent with
-                * delayed only status.
-                */
-               delonly = __es_scan_clu(inode, &ext4_es_is_delonly, first);
-
-               /*
-                * because of the writepages effect, written and unwritten
-                * blocks could be removed here
-                */
-               last = first + length - 1;
-               err = __es_remove_extent(inode, first, last);
-               if (err)
-                       ext4_warning(inode->i_sb,
-                                    "%s: couldn't remove page (err = %d)",
-                                    __func__, err);
-
-               /* non-bigalloc case: simply count the cluster for release */
-               if (sbi->s_cluster_ratio == 1 && delonly) {
-                       reserved++;
-                       continue;
-               }
-
-               /*
-                * bigalloc case: if all delayed allocated only blocks have
-                * just been removed from a cluster, either cancel a pending
-                * reservation if it exists or count a cluster for release
-                */
-               if (delonly &&
-                   !__es_scan_clu(inode, &ext4_es_is_delonly, first)) {
-                       pr = __get_pending(inode, EXT4_B2C(sbi, first));
-                       if (pr != NULL) {
-                               tree = &EXT4_I(inode)->i_pending_tree;
-                               rb_erase(&pr->rb_node, &tree->root);
-                               kmem_cache_free(ext4_pending_cachep, pr);
-                       } else {
-                               reserved++;
-                       }
-               }
-       }
-
-       write_unlock(&EXT4_I(inode)->i_es_lock);
-
-       ext4_da_release_space(inode, reserved);
-}
index 131a8b7..825313c 100644 (file)
@@ -70,8 +70,8 @@ struct ext4_es_tree {
 
 struct ext4_es_stats {
        unsigned long es_stats_shrunk;
-       unsigned long es_stats_cache_hits;
-       unsigned long es_stats_cache_misses;
+       struct percpu_counter es_stats_cache_hits;
+       struct percpu_counter es_stats_cache_misses;
        u64 es_stats_scan_time;
        u64 es_stats_max_scan_time;
        struct percpu_counter es_stats_all_cnt;
@@ -140,6 +140,7 @@ extern void ext4_es_find_extent_range(struct inode *inode,
                                      ext4_lblk_t lblk, ext4_lblk_t end,
                                      struct extent_status *es);
 extern int ext4_es_lookup_extent(struct inode *inode, ext4_lblk_t lblk,
+                                ext4_lblk_t *next_lblk,
                                 struct extent_status *es);
 extern bool ext4_es_scan_range(struct inode *inode,
                               int (*matching_fn)(struct extent_status *es),
@@ -246,7 +247,6 @@ extern int ext4_es_insert_delayed_block(struct inode *inode, ext4_lblk_t lblk,
                                        bool allocated);
 extern unsigned int ext4_es_delayed_clu(struct inode *inode, ext4_lblk_t lblk,
                                        ext4_lblk_t len);
-extern void ext4_es_remove_blks(struct inode *inode, ext4_lblk_t lblk,
-                               ext4_lblk_t len);
+extern void ext4_clear_inode_es(struct inode *inode);
 
 #endif /* _EXT4_EXTENTS_STATUS_H */
index b8a20bb..8d2bbcc 100644 (file)
@@ -230,8 +230,6 @@ ext4_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
        if (IS_DAX(inode))
                return ext4_dax_write_iter(iocb, from);
 #endif
-       if (!o_direct && (iocb->ki_flags & IOCB_NOWAIT))
-               return -EOPNOTSUPP;
 
        if (!inode_trylock(inode)) {
                if (iocb->ki_flags & IOCB_NOWAIT)
index d358bfc..3e13379 100644 (file)
@@ -280,7 +280,7 @@ int ext4fs_dirhash(const struct inode *dir, const char *name, int len,
        unsigned char *buff;
        struct qstr qstr = {.name = name, .len = len };
 
-       if (len && IS_CASEFOLDED(dir)) {
+       if (len && IS_CASEFOLDED(dir) && um) {
                buff = kzalloc(sizeof(char) * PATH_MAX, GFP_KERNEL);
                if (!buff)
                        return -ENOMEM;
index 88cdf3c..2fec62d 100644 (file)
@@ -1416,7 +1416,7 @@ int ext4_inlinedir_to_tree(struct file *dir_file,
                err = ext4_htree_store_dirent(dir_file, hinfo->hash,
                                              hinfo->minor_hash, de, &tmp_str);
                if (err) {
-                       count = err;
+                       ret = err;
                        goto out;
                }
                count++;
index d0dc0e3..123e3de 100644 (file)
@@ -527,7 +527,7 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode,
                return -EFSCORRUPTED;
 
        /* Lookup extent status tree firstly */
-       if (ext4_es_lookup_extent(inode, map->m_lblk, &es)) {
+       if (ext4_es_lookup_extent(inode, map->m_lblk, NULL, &es)) {
                if (ext4_es_is_written(&es) || ext4_es_is_unwritten(&es)) {
                        map->m_pblk = ext4_es_pblock(&es) +
                                        map->m_lblk - es.es_lblk;
@@ -695,7 +695,7 @@ found:
                 * extent status tree.
                 */
                if ((flags & EXT4_GET_BLOCKS_PRE_IO) &&
-                   ext4_es_lookup_extent(inode, map->m_lblk, &es)) {
+                   ext4_es_lookup_extent(inode, map->m_lblk, NULL, &es)) {
                        if (ext4_es_is_written(&es))
                                goto out_sem;
                }
@@ -1024,7 +1024,7 @@ struct buffer_head *ext4_bread(handle_t *handle, struct inode *inode,
        bh = ext4_getblk(handle, inode, block, map_flags);
        if (IS_ERR(bh))
                return bh;
-       if (!bh || buffer_uptodate(bh))
+       if (!bh || ext4_buffer_uptodate(bh))
                return bh;
        ll_rw_block(REQ_OP_READ, REQ_META | REQ_PRIO, 1, &bh);
        wait_on_buffer(bh);
@@ -1051,7 +1051,7 @@ int ext4_bread_batch(struct inode *inode, ext4_lblk_t block, int bh_count,
 
        for (i = 0; i < bh_count; i++)
                /* Note that NULL bhs[i] is valid because of holes. */
-               if (bhs[i] && !buffer_uptodate(bhs[i]))
+               if (bhs[i] && !ext4_buffer_uptodate(bhs[i]))
                        ll_rw_block(REQ_OP_READ, REQ_META | REQ_PRIO, 1,
                                    &bhs[i]);
 
@@ -1656,49 +1656,6 @@ void ext4_da_release_space(struct inode *inode, int to_free)
        dquot_release_reservation_block(inode, EXT4_C2B(sbi, to_free));
 }
 
-static void ext4_da_page_release_reservation(struct page *page,
-                                            unsigned int offset,
-                                            unsigned int length)
-{
-       int contiguous_blks = 0;
-       struct buffer_head *head, *bh;
-       unsigned int curr_off = 0;
-       struct inode *inode = page->mapping->host;
-       unsigned int stop = offset + length;
-       ext4_fsblk_t lblk;
-
-       BUG_ON(stop > PAGE_SIZE || stop < length);
-
-       head = page_buffers(page);
-       bh = head;
-       do {
-               unsigned int next_off = curr_off + bh->b_size;
-
-               if (next_off > stop)
-                       break;
-
-               if ((offset <= curr_off) && (buffer_delay(bh))) {
-                       contiguous_blks++;
-                       clear_buffer_delay(bh);
-               } else if (contiguous_blks) {
-                       lblk = page->index <<
-                              (PAGE_SHIFT - inode->i_blkbits);
-                       lblk += (curr_off >> inode->i_blkbits) -
-                               contiguous_blks;
-                       ext4_es_remove_blks(inode, lblk, contiguous_blks);
-                       contiguous_blks = 0;
-               }
-               curr_off = next_off;
-       } while ((bh = bh->b_this_page) != head);
-
-       if (contiguous_blks) {
-               lblk = page->index << (PAGE_SHIFT - inode->i_blkbits);
-               lblk += (curr_off >> inode->i_blkbits) - contiguous_blks;
-               ext4_es_remove_blks(inode, lblk, contiguous_blks);
-       }
-
-}
-
 /*
  * Delayed allocation stuff
  */
@@ -1878,7 +1835,7 @@ static int ext4_da_map_blocks(struct inode *inode, sector_t iblock,
                  (unsigned long) map->m_lblk);
 
        /* Lookup extent status tree firstly */
-       if (ext4_es_lookup_extent(inode, iblock, &es)) {
+       if (ext4_es_lookup_extent(inode, iblock, NULL, &es)) {
                if (ext4_es_is_hole(&es)) {
                        retval = 0;
                        down_read(&EXT4_I(inode)->i_data_sem);
@@ -2800,15 +2757,6 @@ static int ext4_writepages(struct address_space *mapping,
                goto out_writepages;
        }
 
-       if (ext4_should_dioread_nolock(inode)) {
-               /*
-                * We may need to convert up to one extent per block in
-                * the page and we may dirty the inode.
-                */
-               rsv_blocks = 1 + ext4_chunk_trans_blocks(inode,
-                                               PAGE_SIZE >> inode->i_blkbits);
-       }
-
        /*
         * If we have inline data and arrive here, it means that
         * we will soon create the block for the 1st page, so
@@ -2827,6 +2775,15 @@ static int ext4_writepages(struct address_space *mapping,
                ext4_journal_stop(handle);
        }
 
+       if (ext4_should_dioread_nolock(inode)) {
+               /*
+                * We may need to convert up to one extent per block in
+                * the page and we may dirty the inode.
+                */
+               rsv_blocks = 1 + ext4_chunk_trans_blocks(inode,
+                                               PAGE_SIZE >> inode->i_blkbits);
+       }
+
        if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
                range_whole = 1;
 
@@ -3242,24 +3199,6 @@ static int ext4_da_write_end(struct file *file,
        return ret ? ret : copied;
 }
 
-static void ext4_da_invalidatepage(struct page *page, unsigned int offset,
-                                  unsigned int length)
-{
-       /*
-        * Drop reserved blocks
-        */
-       BUG_ON(!PageLocked(page));
-       if (!page_has_buffers(page))
-               goto out;
-
-       ext4_da_page_release_reservation(page, offset, length);
-
-out:
-       ext4_invalidatepage(page, offset, length);
-
-       return;
-}
-
 /*
  * Force all delayed allocation blocks to be allocated for a given inode.
  */
@@ -4002,7 +3941,7 @@ static const struct address_space_operations ext4_da_aops = {
        .write_end              = ext4_da_write_end,
        .set_page_dirty         = ext4_set_page_dirty,
        .bmap                   = ext4_bmap,
-       .invalidatepage         = ext4_da_invalidatepage,
+       .invalidatepage         = ext4_invalidatepage,
        .releasepage            = ext4_releasepage,
        .direct_IO              = ext4_direct_IO,
        .migratepage            = buffer_migrate_page,
@@ -4314,6 +4253,15 @@ int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length)
 
        trace_ext4_punch_hole(inode, offset, length, 0);
 
+       ext4_clear_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA);
+       if (ext4_has_inline_data(inode)) {
+               down_write(&EXT4_I(inode)->i_mmap_sem);
+               ret = ext4_convert_inline_data(inode);
+               up_write(&EXT4_I(inode)->i_mmap_sem);
+               if (ret)
+                       return ret;
+       }
+
        /*
         * Write out all dirty pages to avoid race conditions
         * Then release them.
@@ -5137,6 +5085,9 @@ struct inode *__ext4_iget(struct super_block *sb, unsigned long ino,
                                 "iget: bogus i_mode (%o)", inode->i_mode);
                goto bad_inode;
        }
+       if (IS_CASEFOLDED(inode) && !ext4_has_feature_casefold(inode->i_sb))
+               ext4_error_inode(inode, function, line, 0,
+                                "casefold flag without casefold feature");
        brelse(iloc.bh);
 
        unlock_new_inode(inode);
index 5444d49..0b7f316 100644 (file)
@@ -745,6 +745,74 @@ static void ext4_fill_fsxattr(struct inode *inode, struct fsxattr *fa)
                fa->fsx_projid = from_kprojid(&init_user_ns, ei->i_projid);
 }
 
+/* copied from fs/ioctl.c */
+static int fiemap_check_ranges(struct super_block *sb,
+                              u64 start, u64 len, u64 *new_len)
+{
+       u64 maxbytes = (u64) sb->s_maxbytes;
+
+       *new_len = len;
+
+       if (len == 0)
+               return -EINVAL;
+
+       if (start > maxbytes)
+               return -EFBIG;
+
+       /*
+        * Shrink request scope to what the fs can actually handle.
+        */
+       if (len > maxbytes || (maxbytes - len) < start)
+               *new_len = maxbytes - start;
+
+       return 0;
+}
+
+/* So that the fiemap access checks can't overflow on 32 bit machines. */
+#define FIEMAP_MAX_EXTENTS     (UINT_MAX / sizeof(struct fiemap_extent))
+
+static int ext4_ioctl_get_es_cache(struct file *filp, unsigned long arg)
+{
+       struct fiemap fiemap;
+       struct fiemap __user *ufiemap = (struct fiemap __user *) arg;
+       struct fiemap_extent_info fieinfo = { 0, };
+       struct inode *inode = file_inode(filp);
+       struct super_block *sb = inode->i_sb;
+       u64 len;
+       int error;
+
+       if (copy_from_user(&fiemap, ufiemap, sizeof(fiemap)))
+               return -EFAULT;
+
+       if (fiemap.fm_extent_count > FIEMAP_MAX_EXTENTS)
+               return -EINVAL;
+
+       error = fiemap_check_ranges(sb, fiemap.fm_start, fiemap.fm_length,
+                                   &len);
+       if (error)
+               return error;
+
+       fieinfo.fi_flags = fiemap.fm_flags;
+       fieinfo.fi_extents_max = fiemap.fm_extent_count;
+       fieinfo.fi_extents_start = ufiemap->fm_extents;
+
+       if (fiemap.fm_extent_count != 0 &&
+           !access_ok(fieinfo.fi_extents_start,
+                      fieinfo.fi_extents_max * sizeof(struct fiemap_extent)))
+               return -EFAULT;
+
+       if (fieinfo.fi_flags & FIEMAP_FLAG_SYNC)
+               filemap_write_and_wait(inode->i_mapping);
+
+       error = ext4_get_es_cache(inode, &fieinfo, fiemap.fm_start, len);
+       fiemap.fm_flags = fieinfo.fi_flags;
+       fiemap.fm_mapped_extents = fieinfo.fi_extents_mapped;
+       if (copy_to_user(ufiemap, &fiemap, sizeof(fiemap)))
+               error = -EFAULT;
+
+       return error;
+}
+
 long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 {
        struct inode *inode = file_inode(filp);
@@ -1142,6 +1210,33 @@ resizefs_out:
                        return -EOPNOTSUPP;
                return fscrypt_ioctl_get_key_status(filp, (void __user *)arg);
 
+       case EXT4_IOC_CLEAR_ES_CACHE:
+       {
+               if (!inode_owner_or_capable(inode))
+                       return -EACCES;
+               ext4_clear_inode_es(inode);
+               return 0;
+       }
+
+       case EXT4_IOC_GETSTATE:
+       {
+               __u32   state = 0;
+
+               if (ext4_test_inode_state(inode, EXT4_STATE_EXT_PRECACHED))
+                       state |= EXT4_STATE_FLAG_EXT_PRECACHED;
+               if (ext4_test_inode_state(inode, EXT4_STATE_NEW))
+                       state |= EXT4_STATE_FLAG_NEW;
+               if (ext4_test_inode_state(inode, EXT4_STATE_NEWENTRY))
+                       state |= EXT4_STATE_FLAG_NEWENTRY;
+               if (ext4_test_inode_state(inode, EXT4_STATE_DA_ALLOC_CLOSE))
+                       state |= EXT4_STATE_FLAG_DA_ALLOC_CLOSE;
+
+               return put_user(state, (__u32 __user *) arg);
+       }
+
+       case EXT4_IOC_GET_ES_CACHE:
+               return ext4_ioctl_get_es_cache(filp, arg);
+
        case EXT4_IOC_FSGETXATTR:
        {
                struct fsxattr fa;
@@ -1278,6 +1373,9 @@ long ext4_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
        case FS_IOC_GETFSMAP:
        case FS_IOC_ENABLE_VERITY:
        case FS_IOC_MEASURE_VERITY:
+       case EXT4_IOC_CLEAR_ES_CACHE:
+       case EXT4_IOC_GETSTATE:
+       case EXT4_IOC_GET_ES_CACHE:
                break;
        default:
                return -ENOIOCTLCMD;
index 1290295..a427d20 100644 (file)
@@ -1312,7 +1312,7 @@ void ext4_fname_setup_ci_filename(struct inode *dir, const struct qstr *iname,
 {
        int len;
 
-       if (!IS_CASEFOLDED(dir)) {
+       if (!IS_CASEFOLDED(dir) || !EXT4_SB(dir->i_sb)->s_encoding) {
                cf_name->name = NULL;
                return;
        }
@@ -2183,7 +2183,7 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry,
 
 #ifdef CONFIG_UNICODE
        if (ext4_has_strict_mode(sbi) && IS_CASEFOLDED(dir) &&
-           utf8_validate(sbi->s_encoding, &dentry->d_name))
+           sbi->s_encoding && utf8_validate(sbi->s_encoding, &dentry->d_name))
                return -EINVAL;
 #endif
 
index 3db5f17..dd654e5 100644 (file)
@@ -1878,6 +1878,13 @@ static int handle_mount_opt(struct super_block *sb, char *opt, int token,
        } else if (token == Opt_commit) {
                if (arg == 0)
                        arg = JBD2_DEFAULT_MAX_COMMIT_AGE;
+               else if (arg > INT_MAX / HZ) {
+                       ext4_msg(sb, KERN_ERR,
+                                "Invalid commit interval %d, "
+                                "must be smaller than %d",
+                                arg, INT_MAX / HZ);
+                       return -1;
+               }
                sbi->s_commit_interval = HZ * arg;
        } else if (token == Opt_debug_want_extra_isize) {
                sbi->s_want_extra_isize = arg;
index 110a38c..652fd2e 100644 (file)
@@ -2,6 +2,7 @@
 config F2FS_FS
        tristate "F2FS filesystem support"
        depends on BLOCK
+       select NLS
        select CRYPTO
        select CRYPTO_CRC32
        select F2FS_FS_XATTR if FS_ENCRYPTION
@@ -60,7 +61,9 @@ config F2FS_FS_SECURITY
          Security Models (LSMs) accepted by AppArmor, SELinux, Smack and TOMOYO
          Linux. This option enables an extended attribute handler for file
          security labels in the f2fs filesystem, so that it requires enabling
-         the extended attribute support in advance.
+         the extended attribute support in advance. In particular you need this
+         option if you use the setcap command to assign initial process capabi-
+         lities to executables (the security.* extended attributes).
 
          If you are not using a security module, say N.
 
index 54cad80..5755e89 100644 (file)
@@ -283,26 +283,25 @@ static bool __same_bdev(struct f2fs_sb_info *sbi,
 /*
  * Low-level block read/write IO operations.
  */
-static struct bio *__bio_alloc(struct f2fs_sb_info *sbi, block_t blk_addr,
-                               struct writeback_control *wbc,
-                               int npages, bool is_read,
-                               enum page_type type, enum temp_type temp)
+static struct bio *__bio_alloc(struct f2fs_io_info *fio, int npages)
 {
+       struct f2fs_sb_info *sbi = fio->sbi;
        struct bio *bio;
 
        bio = f2fs_bio_alloc(sbi, npages, true);
 
-       f2fs_target_device(sbi, blk_addr, bio);
-       if (is_read) {
+       f2fs_target_device(sbi, fio->new_blkaddr, bio);
+       if (is_read_io(fio->op)) {
                bio->bi_end_io = f2fs_read_end_io;
                bio->bi_private = NULL;
        } else {
                bio->bi_end_io = f2fs_write_end_io;
                bio->bi_private = sbi;
-               bio->bi_write_hint = f2fs_io_type_to_rw_hint(sbi, type, temp);
+               bio->bi_write_hint = f2fs_io_type_to_rw_hint(sbi,
+                                               fio->type, fio->temp);
        }
-       if (wbc)
-               wbc_init_bio(wbc, bio);
+       if (fio->io_wbc)
+               wbc_init_bio(fio->io_wbc, bio);
 
        return bio;
 }
@@ -319,6 +318,9 @@ static inline void __submit_bio(struct f2fs_sb_info *sbi,
                if (test_opt(sbi, LFS) && current->plug)
                        blk_finish_plug(current->plug);
 
+               if (F2FS_IO_ALIGNED(sbi))
+                       goto submit_io;
+
                start = bio->bi_iter.bi_size >> F2FS_BLKSIZE_BITS;
                start %= F2FS_IO_SIZE(sbi);
 
@@ -485,8 +487,7 @@ int f2fs_submit_page_bio(struct f2fs_io_info *fio)
        f2fs_trace_ios(fio, 0);
 
        /* Allocate a new bio */
-       bio = __bio_alloc(fio->sbi, fio->new_blkaddr, fio->io_wbc,
-                               1, is_read_io(fio->op), fio->type, fio->temp);
+       bio = __bio_alloc(fio, 1);
 
        if (bio_add_page(bio, page, PAGE_SIZE, 0) < PAGE_SIZE) {
                bio_put(bio);
@@ -505,6 +506,43 @@ int f2fs_submit_page_bio(struct f2fs_io_info *fio)
        return 0;
 }
 
+static bool page_is_mergeable(struct f2fs_sb_info *sbi, struct bio *bio,
+                               block_t last_blkaddr, block_t cur_blkaddr)
+{
+       if (last_blkaddr + 1 != cur_blkaddr)
+               return false;
+       return __same_bdev(sbi, cur_blkaddr, bio);
+}
+
+static bool io_type_is_mergeable(struct f2fs_bio_info *io,
+                                               struct f2fs_io_info *fio)
+{
+       if (io->fio.op != fio->op)
+               return false;
+       return io->fio.op_flags == fio->op_flags;
+}
+
+static bool io_is_mergeable(struct f2fs_sb_info *sbi, struct bio *bio,
+                                       struct f2fs_bio_info *io,
+                                       struct f2fs_io_info *fio,
+                                       block_t last_blkaddr,
+                                       block_t cur_blkaddr)
+{
+       if (F2FS_IO_ALIGNED(sbi) && (fio->type == DATA || fio->type == NODE)) {
+               unsigned int filled_blocks =
+                               F2FS_BYTES_TO_BLK(bio->bi_iter.bi_size);
+               unsigned int io_size = F2FS_IO_SIZE(sbi);
+               unsigned int left_vecs = bio->bi_max_vecs - bio->bi_vcnt;
+
+               /* IOs in bio is aligned and left space of vectors is not enough */
+               if (!(filled_blocks % io_size) && left_vecs < io_size)
+                       return false;
+       }
+       if (!page_is_mergeable(sbi, bio, last_blkaddr, cur_blkaddr))
+               return false;
+       return io_type_is_mergeable(io, fio);
+}
+
 int f2fs_merge_page_bio(struct f2fs_io_info *fio)
 {
        struct bio *bio = *fio->bio;
@@ -518,15 +556,14 @@ int f2fs_merge_page_bio(struct f2fs_io_info *fio)
        trace_f2fs_submit_page_bio(page, fio);
        f2fs_trace_ios(fio, 0);
 
-       if (bio && (*fio->last_block + 1 != fio->new_blkaddr ||
-                       !__same_bdev(fio->sbi, fio->new_blkaddr, bio))) {
+       if (bio && !page_is_mergeable(fio->sbi, bio, *fio->last_block,
+                                               fio->new_blkaddr)) {
                __submit_bio(fio->sbi, bio, fio->type);
                bio = NULL;
        }
 alloc_new:
        if (!bio) {
-               bio = __bio_alloc(fio->sbi, fio->new_blkaddr, fio->io_wbc,
-                               BIO_MAX_PAGES, false, fio->type, fio->temp);
+               bio = __bio_alloc(fio, BIO_MAX_PAGES);
                bio_set_op_attrs(bio, fio->op, fio->op_flags);
        }
 
@@ -592,21 +629,19 @@ next:
 
        inc_page_count(sbi, WB_DATA_TYPE(bio_page));
 
-       if (io->bio && (io->last_block_in_bio != fio->new_blkaddr - 1 ||
-           (io->fio.op != fio->op || io->fio.op_flags != fio->op_flags) ||
-                       !__same_bdev(sbi, fio->new_blkaddr, io->bio)))
+       if (io->bio && !io_is_mergeable(sbi, io->bio, io, fio,
+                       io->last_block_in_bio, fio->new_blkaddr))
                __submit_merged_bio(io);
 alloc_new:
        if (io->bio == NULL) {
-               if ((fio->type == DATA || fio->type == NODE) &&
+               if (F2FS_IO_ALIGNED(sbi) &&
+                               (fio->type == DATA || fio->type == NODE) &&
                                fio->new_blkaddr & F2FS_IO_SIZE_MASK(sbi)) {
                        dec_page_count(sbi, WB_DATA_TYPE(bio_page));
                        fio->retry = true;
                        goto skip;
                }
-               io->bio = __bio_alloc(sbi, fio->new_blkaddr, fio->io_wbc,
-                                               BIO_MAX_PAGES, false,
-                                               fio->type, fio->temp);
+               io->bio = __bio_alloc(fio, BIO_MAX_PAGES);
                io->fio = *fio;
        }
 
@@ -627,7 +662,7 @@ skip:
                goto next;
 out:
        if (is_sbi_flag_set(sbi, SBI_IS_SHUTDOWN) ||
-                               f2fs_is_checkpoint_ready(sbi))
+                               !f2fs_is_checkpoint_ready(sbi))
                __submit_merged_bio(io);
        up_write(&io->io_rwsem);
 }
@@ -1022,7 +1057,7 @@ alloc:
        if (GET_SEGNO(sbi, old_blkaddr) != NULL_SEGNO)
                invalidate_mapping_pages(META_MAPPING(sbi),
                                        old_blkaddr, old_blkaddr);
-       f2fs_set_data_blkaddr(dn);
+       f2fs_update_data_blkaddr(dn, dn->data_blkaddr);
 
        /*
         * i_size will be updated by direct_IO. Otherwise, we'll get stale
@@ -1199,10 +1234,10 @@ next_block:
                if (test_opt(sbi, LFS) && flag == F2FS_GET_BLOCK_DIO &&
                                                        map->m_may_create) {
                        err = __allocate_data_block(&dn, map->m_seg_type);
-                       if (!err) {
-                               blkaddr = dn.data_blkaddr;
-                               set_inode_flag(inode, FI_APPEND_WRITE);
-                       }
+                       if (err)
+                               goto sync_out;
+                       blkaddr = dn.data_blkaddr;
+                       set_inode_flag(inode, FI_APPEND_WRITE);
                }
        } else {
                if (create) {
@@ -1407,7 +1442,7 @@ static int get_data_block_dio_write(struct inode *inode, sector_t iblock,
        return __get_data_block(inode, iblock, bh_result, create,
                                F2FS_GET_BLOCK_DIO, NULL,
                                f2fs_rw_hint_to_seg_type(inode->i_write_hint),
-                               true);
+                               IS_SWAPFILE(inode) ? false : true);
 }
 
 static int get_data_block_dio(struct inode *inode, sector_t iblock,
@@ -1538,7 +1573,7 @@ int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
                goto out;
        }
 
-       if (f2fs_has_inline_data(inode)) {
+       if (f2fs_has_inline_data(inode) || f2fs_has_inline_dentry(inode)) {
                ret = f2fs_inline_data_fiemap(inode, fieinfo, start, len);
                if (ret != -EAGAIN)
                        goto out;
@@ -1691,8 +1726,8 @@ zero_out:
         * This page will go to BIO.  Do we need to send this
         * BIO off first?
         */
-       if (bio && (*last_block_in_bio != block_nr - 1 ||
-               !__same_bdev(F2FS_I_SB(inode), block_nr, bio))) {
+       if (bio && !page_is_mergeable(F2FS_I_SB(inode), bio,
+                               *last_block_in_bio, block_nr)) {
 submit_and_realloc:
                __submit_bio(F2FS_I_SB(inode), bio, DATA);
                bio = NULL;
@@ -2590,9 +2625,10 @@ static int f2fs_write_begin(struct file *file, struct address_space *mapping,
 
        trace_f2fs_write_begin(inode, pos, len, flags);
 
-       err = f2fs_is_checkpoint_ready(sbi);
-       if (err)
+       if (!f2fs_is_checkpoint_ready(sbi)) {
+               err = -ENOSPC;
                goto fail;
+       }
 
        if ((f2fs_is_atomic_file(inode) &&
                        !f2fs_available_free_memory(sbi, INMEM_PAGES)) ||
index 7706049..9b0bedd 100644 (file)
@@ -67,7 +67,7 @@ static void update_general_status(struct f2fs_sb_info *sbi)
        si->nr_rd_data = get_pages(sbi, F2FS_RD_DATA);
        si->nr_rd_node = get_pages(sbi, F2FS_RD_NODE);
        si->nr_rd_meta = get_pages(sbi, F2FS_RD_META);
-       if (SM_I(sbi) && SM_I(sbi)->fcc_info) {
+       if (SM_I(sbi)->fcc_info) {
                si->nr_flushed =
                        atomic_read(&SM_I(sbi)->fcc_info->issued_flush);
                si->nr_flushing =
@@ -75,7 +75,7 @@ static void update_general_status(struct f2fs_sb_info *sbi)
                si->flush_list_empty =
                        llist_empty(&SM_I(sbi)->fcc_info->issue_list);
        }
-       if (SM_I(sbi) && SM_I(sbi)->dcc_info) {
+       if (SM_I(sbi)->dcc_info) {
                si->nr_discarded =
                        atomic_read(&SM_I(sbi)->dcc_info->issued_discard);
                si->nr_discarding =
index 85a1528..4033778 100644 (file)
@@ -8,6 +8,7 @@
 #include <linux/fs.h>
 #include <linux/f2fs_fs.h>
 #include <linux/sched/signal.h>
+#include <linux/unicode.h>
 #include "f2fs.h"
 #include "node.h"
 #include "acl.h"
@@ -81,7 +82,8 @@ static unsigned long dir_block_index(unsigned int level,
        return bidx;
 }
 
-static struct f2fs_dir_entry *find_in_block(struct page *dentry_page,
+static struct f2fs_dir_entry *find_in_block(struct inode *dir,
+                               struct page *dentry_page,
                                struct fscrypt_name *fname,
                                f2fs_hash_t namehash,
                                int *max_slots,
@@ -93,7 +95,7 @@ static struct f2fs_dir_entry *find_in_block(struct page *dentry_page,
 
        dentry_blk = (struct f2fs_dentry_block *)page_address(dentry_page);
 
-       make_dentry_ptr_block(NULL, &d, dentry_blk);
+       make_dentry_ptr_block(dir, &d, dentry_blk);
        de = f2fs_find_target_dentry(fname, namehash, max_slots, &d);
        if (de)
                *res_page = dentry_page;
@@ -101,14 +103,116 @@ static struct f2fs_dir_entry *find_in_block(struct page *dentry_page,
        return de;
 }
 
+#ifdef CONFIG_UNICODE
+/*
+ * Test whether a case-insensitive directory entry matches the filename
+ * being searched for.
+ *
+ * Returns: 0 if the directory entry matches, more than 0 if it
+ * doesn't match or less than zero on error.
+ */
+int f2fs_ci_compare(const struct inode *parent, const struct qstr *name,
+                               const struct qstr *entry, bool quick)
+{
+       const struct f2fs_sb_info *sbi = F2FS_SB(parent->i_sb);
+       const struct unicode_map *um = sbi->s_encoding;
+       int ret;
+
+       if (quick)
+               ret = utf8_strncasecmp_folded(um, name, entry);
+       else
+               ret = utf8_strncasecmp(um, name, entry);
+
+       if (ret < 0) {
+               /* Handle invalid character sequence as either an error
+                * or as an opaque byte sequence.
+                */
+               if (f2fs_has_strict_mode(sbi))
+                       return -EINVAL;
+
+               if (name->len != entry->len)
+                       return 1;
+
+               return !!memcmp(name->name, entry->name, name->len);
+       }
+
+       return ret;
+}
+
+static void f2fs_fname_setup_ci_filename(struct inode *dir,
+                                       const struct qstr *iname,
+                                       struct fscrypt_str *cf_name)
+{
+       struct f2fs_sb_info *sbi = F2FS_I_SB(dir);
+
+       if (!IS_CASEFOLDED(dir)) {
+               cf_name->name = NULL;
+               return;
+       }
+
+       cf_name->name = f2fs_kmalloc(sbi, F2FS_NAME_LEN, GFP_NOFS);
+       if (!cf_name->name)
+               return;
+
+       cf_name->len = utf8_casefold(sbi->s_encoding,
+                                       iname, cf_name->name,
+                                       F2FS_NAME_LEN);
+       if ((int)cf_name->len <= 0) {
+               kvfree(cf_name->name);
+               cf_name->name = NULL;
+       }
+}
+#endif
+
+static inline bool f2fs_match_name(struct f2fs_dentry_ptr *d,
+                                       struct f2fs_dir_entry *de,
+                                       struct fscrypt_name *fname,
+                                       struct fscrypt_str *cf_str,
+                                       unsigned long bit_pos,
+                                       f2fs_hash_t namehash)
+{
+#ifdef CONFIG_UNICODE
+       struct inode *parent = d->inode;
+       struct f2fs_sb_info *sbi = F2FS_I_SB(parent);
+       struct qstr entry;
+#endif
+
+       if (de->hash_code != namehash)
+               return false;
+
+#ifdef CONFIG_UNICODE
+       entry.name = d->filename[bit_pos];
+       entry.len = de->name_len;
+
+       if (sbi->s_encoding && IS_CASEFOLDED(parent)) {
+               if (cf_str->name) {
+                       struct qstr cf = {.name = cf_str->name,
+                                         .len = cf_str->len};
+                       return !f2fs_ci_compare(parent, &cf, &entry, true);
+               }
+               return !f2fs_ci_compare(parent, fname->usr_fname, &entry,
+                                       false);
+       }
+#endif
+       if (fscrypt_match_name(fname, d->filename[bit_pos],
+                               le16_to_cpu(de->name_len)))
+               return true;
+       return false;
+}
+
 struct f2fs_dir_entry *f2fs_find_target_dentry(struct fscrypt_name *fname,
                        f2fs_hash_t namehash, int *max_slots,
                        struct f2fs_dentry_ptr *d)
 {
        struct f2fs_dir_entry *de;
+       struct fscrypt_str cf_str = { .name = NULL, .len = 0 };
        unsigned long bit_pos = 0;
        int max_len = 0;
 
+#ifdef CONFIG_UNICODE
+       f2fs_fname_setup_ci_filename(d->inode, fname->usr_fname, &cf_str);
+#endif
+
        if (max_slots)
                *max_slots = 0;
        while (bit_pos < d->max) {
@@ -125,9 +229,7 @@ struct f2fs_dir_entry *f2fs_find_target_dentry(struct fscrypt_name *fname,
                        continue;
                }
 
-               if (de->hash_code == namehash &&
-                   fscrypt_match_name(fname, d->filename[bit_pos],
-                                      le16_to_cpu(de->name_len)))
+               if (f2fs_match_name(d, de, fname, &cf_str, bit_pos, namehash))
                        goto found;
 
                if (max_slots && max_len > *max_slots)
@@ -141,6 +243,10 @@ struct f2fs_dir_entry *f2fs_find_target_dentry(struct fscrypt_name *fname,
 found:
        if (max_slots && max_len > *max_slots)
                *max_slots = max_len;
+
+#ifdef CONFIG_UNICODE
+       kvfree(cf_str.name);
+#endif
        return de;
 }
 
@@ -157,7 +263,7 @@ static struct f2fs_dir_entry *find_in_level(struct inode *dir,
        struct f2fs_dir_entry *de = NULL;
        bool room = false;
        int max_slots;
-       f2fs_hash_t namehash = f2fs_dentry_hash(&name, fname);
+       f2fs_hash_t namehash = f2fs_dentry_hash(dir, &name, fname);
 
        nbucket = dir_buckets(level, F2FS_I(dir)->i_dir_level);
        nblock = bucket_blocks(level);
@@ -179,8 +285,8 @@ static struct f2fs_dir_entry *find_in_level(struct inode *dir,
                        }
                }
 
-               de = find_in_block(dentry_page, fname, namehash, &max_slots,
-                                                               res_page);
+               de = find_in_block(dir, dentry_page, fname, namehash,
+                                                       &max_slots, res_page);
                if (de)
                        break;
 
@@ -250,6 +356,14 @@ struct f2fs_dir_entry *f2fs_find_entry(struct inode *dir,
        struct fscrypt_name fname;
        int err;
 
+#ifdef CONFIG_UNICODE
+       if (f2fs_has_strict_mode(F2FS_I_SB(dir)) && IS_CASEFOLDED(dir) &&
+                       utf8_validate(F2FS_I_SB(dir)->s_encoding, child)) {
+               *res_page = ERR_PTR(-EINVAL);
+               return NULL;
+       }
+#endif
+
        err = fscrypt_setup_filename(dir, child, 1, &fname);
        if (err) {
                if (err == -ENOENT)
@@ -504,7 +618,7 @@ int f2fs_add_regular_entry(struct inode *dir, const struct qstr *new_name,
 
        level = 0;
        slots = GET_DENTRY_SLOTS(new_name->len);
-       dentry_hash = f2fs_dentry_hash(new_name, NULL);
+       dentry_hash = f2fs_dentry_hash(dir, new_name, NULL);
 
        current_depth = F2FS_I(dir)->i_current_depth;
        if (F2FS_I(dir)->chash == dentry_hash) {
@@ -568,6 +682,11 @@ add_dentry:
 
        if (inode) {
                f2fs_i_pino_write(inode, dir->i_ino);
+
+               /* synchronize inode page's data from inode cache */
+               if (is_inode_flag_set(inode, FI_NEW_INODE))
+                       f2fs_update_inode(inode, page);
+
                f2fs_put_page(page, 1);
        }
 
@@ -943,3 +1062,50 @@ const struct file_operations f2fs_dir_operations = {
        .compat_ioctl   = f2fs_compat_ioctl,
 #endif
 };
+
+#ifdef CONFIG_UNICODE
+static int f2fs_d_compare(const struct dentry *dentry, unsigned int len,
+                         const char *str, const struct qstr *name)
+{
+       struct qstr qstr = {.name = str, .len = len };
+
+       if (!IS_CASEFOLDED(dentry->d_parent->d_inode)) {
+               if (len != name->len)
+                       return -1;
+               return memcmp(str, name, len);
+       }
+
+       return f2fs_ci_compare(dentry->d_parent->d_inode, name, &qstr, false);
+}
+
+static int f2fs_d_hash(const struct dentry *dentry, struct qstr *str)
+{
+       struct f2fs_sb_info *sbi = F2FS_SB(dentry->d_sb);
+       const struct unicode_map *um = sbi->s_encoding;
+       unsigned char *norm;
+       int len, ret = 0;
+
+       if (!IS_CASEFOLDED(dentry->d_inode))
+               return 0;
+
+       norm = f2fs_kmalloc(sbi, PATH_MAX, GFP_ATOMIC);
+       if (!norm)
+               return -ENOMEM;
+
+       len = utf8_casefold(um, str, norm, PATH_MAX);
+       if (len < 0) {
+               if (f2fs_has_strict_mode(sbi))
+                       ret = -EINVAL;
+               goto out;
+       }
+       str->hash = full_name_hash(dentry, norm, len);
+out:
+       kvfree(norm);
+       return ret;
+}
+
+const struct dentry_operations f2fs_dentry_ops = {
+       .d_hash = f2fs_d_hash,
+       .d_compare = f2fs_d_compare,
+};
+#endif
index 7c5f121..4024790 100644 (file)
@@ -154,6 +154,7 @@ struct f2fs_mount_info {
 #define F2FS_FEATURE_LOST_FOUND                0x0200
 #define F2FS_FEATURE_VERITY            0x0400
 #define F2FS_FEATURE_SB_CHKSUM         0x0800
+#define F2FS_FEATURE_CASEFOLD          0x1000
 
 #define __F2FS_HAS_FEATURE(raw_super, mask)                            \
        ((raw_super->feature & cpu_to_le32(mask)) != 0)
@@ -418,6 +419,9 @@ static inline bool __has_cursum_space(struct f2fs_journal *journal,
 #define F2FS_IOC_PRECACHE_EXTENTS      _IO(F2FS_IOCTL_MAGIC, 15)
 #define F2FS_IOC_RESIZE_FS             _IOW(F2FS_IOCTL_MAGIC, 16, __u64)
 
+#define F2FS_IOC_GET_VOLUME_NAME       FS_IOC_GETFSLABEL
+#define F2FS_IOC_SET_VOLUME_NAME       FS_IOC_SETFSLABEL
+
 #define F2FS_IOC_SET_ENCRYPTION_POLICY FS_IOC_SET_ENCRYPTION_POLICY
 #define F2FS_IOC_GET_ENCRYPTION_POLICY FS_IOC_GET_ENCRYPTION_POLICY
 #define F2FS_IOC_GET_ENCRYPTION_PWSALT FS_IOC_GET_ENCRYPTION_PWSALT
@@ -1172,6 +1176,10 @@ struct f2fs_sb_info {
        int valid_super_block;                  /* valid super block no */
        unsigned long s_flag;                           /* flags for sbi */
        struct mutex writepages;                /* mutex for writepages() */
+#ifdef CONFIG_UNICODE
+       struct unicode_map *s_encoding;
+       __u16 s_encoding_flags;
+#endif
 
 #ifdef CONFIG_BLK_DEV_ZONED
        unsigned int blocks_per_blkz;           /* F2FS blocks per zone */
@@ -1643,6 +1651,7 @@ static inline void clear_ckpt_flags(struct f2fs_sb_info *sbi, unsigned int f)
 static inline void disable_nat_bits(struct f2fs_sb_info *sbi, bool lock)
 {
        unsigned long flags;
+       unsigned char *nat_bits;
 
        /*
         * In order to re-enable nat_bits we need to call fsck.f2fs by
@@ -1653,10 +1662,12 @@ static inline void disable_nat_bits(struct f2fs_sb_info *sbi, bool lock)
        if (lock)
                spin_lock_irqsave(&sbi->cp_lock, flags);
        __clear_ckpt_flags(F2FS_CKPT(sbi), CP_NAT_BITS_FLAG);
-       kvfree(NM_I(sbi)->nat_bits);
+       nat_bits = NM_I(sbi)->nat_bits;
        NM_I(sbi)->nat_bits = NULL;
        if (lock)
                spin_unlock_irqrestore(&sbi->cp_lock, flags);
+
+       kvfree(nat_bits);
 }
 
 static inline bool enabled_nat_bits(struct f2fs_sb_info *sbi,
@@ -1763,7 +1774,7 @@ static inline int inc_valid_block_count(struct f2fs_sb_info *sbi,
        if (time_to_inject(sbi, FAULT_BLOCK)) {
                f2fs_show_injection_info(FAULT_BLOCK);
                release = *count;
-               goto enospc;
+               goto release_quota;
        }
 
        /*
@@ -1808,6 +1819,7 @@ static inline int inc_valid_block_count(struct f2fs_sb_info *sbi,
 
 enospc:
        percpu_counter_sub(&sbi->alloc_valid_block_count, release);
+release_quota:
        dquot_release_reservation_block(inode, release);
        return -ENOSPC;
 }
@@ -2362,13 +2374,16 @@ static inline void f2fs_change_bit(unsigned int nr, char *addr)
 #define F2FS_INDEX_FL                  0x00001000 /* hash-indexed directory */
 #define F2FS_DIRSYNC_FL                        0x00010000 /* dirsync behaviour (directories only) */
 #define F2FS_PROJINHERIT_FL            0x20000000 /* Create with parents projid */
+#define F2FS_CASEFOLD_FL               0x40000000 /* Casefolded file */
 
 /* Flags that should be inherited by new inodes from their parent. */
 #define F2FS_FL_INHERITED (F2FS_SYNC_FL | F2FS_NODUMP_FL | F2FS_NOATIME_FL | \
-                          F2FS_DIRSYNC_FL | F2FS_PROJINHERIT_FL)
+                          F2FS_DIRSYNC_FL | F2FS_PROJINHERIT_FL | \
+                          F2FS_CASEFOLD_FL)
 
 /* Flags that are appropriate for regular files (all but dir-specific ones). */
-#define F2FS_REG_FLMASK                (~(F2FS_DIRSYNC_FL | F2FS_PROJINHERIT_FL))
+#define F2FS_REG_FLMASK                (~(F2FS_DIRSYNC_FL | F2FS_PROJINHERIT_FL | \
+                               F2FS_CASEFOLD_FL))
 
 /* Flags that are appropriate for non-directories/regular files. */
 #define F2FS_OTHER_FLMASK      (F2FS_NODUMP_FL | F2FS_NOATIME_FL)
@@ -2935,6 +2950,11 @@ int f2fs_update_extension_list(struct f2fs_sb_info *sbi, const char *name,
                                                        bool hot, bool set);
 struct dentry *f2fs_get_parent(struct dentry *child);
 
+extern int f2fs_ci_compare(const struct inode *parent,
+                          const struct qstr *name,
+                          const struct qstr *entry,
+                          bool quick);
+
 /*
  * dir.c
  */
@@ -2998,8 +3018,8 @@ int f2fs_sanity_check_ckpt(struct f2fs_sb_info *sbi);
 /*
  * hash.c
  */
-f2fs_hash_t f2fs_dentry_hash(const struct qstr *name_info,
-                               struct fscrypt_name *fname);
+f2fs_hash_t f2fs_dentry_hash(const struct inode *dir,
+               const struct qstr *name_info, struct fscrypt_name *fname);
 
 /*
  * node.c
@@ -3442,6 +3462,9 @@ static inline void f2fs_destroy_root_stats(void) { }
 #endif
 
 extern const struct file_operations f2fs_dir_operations;
+#ifdef CONFIG_UNICODE
+extern const struct dentry_operations f2fs_dentry_ops;
+#endif
 extern const struct file_operations f2fs_file_operations;
 extern const struct inode_operations f2fs_file_inode_operations;
 extern const struct address_space_operations f2fs_dblock_aops;
@@ -3576,6 +3599,7 @@ F2FS_FEATURE_FUNCS(inode_crtime, INODE_CRTIME);
 F2FS_FEATURE_FUNCS(lost_found, LOST_FOUND);
 F2FS_FEATURE_FUNCS(verity, VERITY);
 F2FS_FEATURE_FUNCS(sb_chksum, SB_CHKSUM);
+F2FS_FEATURE_FUNCS(casefold, CASEFOLD);
 
 #ifdef CONFIG_BLK_DEV_ZONED
 static inline bool f2fs_blkz_is_seq(struct f2fs_sb_info *sbi, int devi,
@@ -3694,11 +3718,14 @@ static inline bool f2fs_force_buffered_io(struct inode *inode,
         */
        if (f2fs_sb_has_blkzoned(sbi))
                return true;
-       if (test_opt(sbi, LFS) && (rw == WRITE) &&
-                               block_unaligned_IO(inode, iocb, iter))
-               return true;
+       if (test_opt(sbi, LFS) && (rw == WRITE)) {
+               if (block_unaligned_IO(inode, iocb, iter))
+                       return true;
+               if (F2FS_IO_ALIGNED(sbi))
+                       return true;
+       }
        if (is_sbi_flag_set(F2FS_I_SB(inode), SBI_CP_DISABLED) &&
-                                       !(inode->i_flags & S_SWAPFILE))
+                                       !IS_SWAPFILE(inode))
                return true;
 
        return false;
index 56efde9..29bc0a5 100644 (file)
@@ -20,6 +20,7 @@
 #include <linux/uio.h>
 #include <linux/uuid.h>
 #include <linux/file.h>
+#include <linux/nls.h>
 
 #include "f2fs.h"
 #include "node.h"
@@ -57,6 +58,11 @@ static vm_fault_t f2fs_vm_page_mkwrite(struct vm_fault *vmf)
                goto err;
        }
 
+       if (!f2fs_is_checkpoint_ready(sbi)) {
+               err = -ENOSPC;
+               goto err;
+       }
+
        sb_start_pagefault(inode->i_sb);
 
        f2fs_bug_on(sbi, f2fs_has_inline_data(inode));
@@ -819,14 +825,24 @@ int f2fs_setattr(struct dentry *dentry, struct iattr *attr)
        }
 
        if (attr->ia_valid & ATTR_SIZE) {
-               bool to_smaller = (attr->ia_size <= i_size_read(inode));
+               loff_t old_size = i_size_read(inode);
+
+               if (attr->ia_size > MAX_INLINE_DATA(inode)) {
+                       /*
+                        * should convert inline inode before i_size_write to
+                        * keep smaller than inline_data size with inline flag.
+                        */
+                       err = f2fs_convert_inline_inode(inode);
+                       if (err)
+                               return err;
+               }
 
                down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
                down_write(&F2FS_I(inode)->i_mmap_sem);
 
                truncate_setsize(inode, attr->ia_size);
 
-               if (to_smaller)
+               if (attr->ia_size <= old_size)
                        err = f2fs_truncate(inode);
                /*
                 * do not trim all blocks after i_size if target size is
@@ -834,21 +850,11 @@ int f2fs_setattr(struct dentry *dentry, struct iattr *attr)
                 */
                up_write(&F2FS_I(inode)->i_mmap_sem);
                up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
-
                if (err)
                        return err;
 
-               if (!to_smaller) {
-                       /* should convert inline inode here */
-                       if (!f2fs_may_inline_data(inode)) {
-                               err = f2fs_convert_inline_inode(inode);
-                               if (err)
-                                       return err;
-                       }
-                       inode->i_mtime = inode->i_ctime = current_time(inode);
-               }
-
                down_write(&F2FS_I(inode)->i_sem);
+               inode->i_mtime = inode->i_ctime = current_time(inode);
                F2FS_I(inode)->last_disk_size = i_size_read(inode);
                up_write(&F2FS_I(inode)->i_sem);
        }
@@ -1041,7 +1047,7 @@ next_dnode:
 
                        if (test_opt(sbi, LFS)) {
                                f2fs_put_dnode(&dn);
-                               return -ENOTSUPP;
+                               return -EOPNOTSUPP;
                        }
 
                        /* do not invalidate this block address */
@@ -1578,6 +1584,8 @@ static long f2fs_fallocate(struct file *file, int mode,
 
        if (unlikely(f2fs_cp_error(F2FS_I_SB(inode))))
                return -EIO;
+       if (!f2fs_is_checkpoint_ready(F2FS_I_SB(inode)))
+               return -ENOSPC;
 
        /* f2fs only support ->fallocate for regular file */
        if (!S_ISREG(inode->i_mode))
@@ -1669,6 +1677,13 @@ static int f2fs_setflags_common(struct inode *inode, u32 iflags, u32 mask)
        if (IS_NOQUOTA(inode))
                return -EPERM;
 
+       if ((iflags ^ fi->i_flags) & F2FS_CASEFOLD_FL) {
+               if (!f2fs_sb_has_casefold(F2FS_I_SB(inode)))
+                       return -EOPNOTSUPP;
+               if (!f2fs_empty_dir(inode))
+                       return -ENOTEMPTY;
+       }
+
        fi->i_flags = iflags | (fi->i_flags & ~mask);
 
        if (fi->i_flags & F2FS_PROJINHERIT_FL)
@@ -1703,6 +1718,7 @@ static const struct {
        { F2FS_INDEX_FL,        FS_INDEX_FL },
        { F2FS_DIRSYNC_FL,      FS_DIRSYNC_FL },
        { F2FS_PROJINHERIT_FL,  FS_PROJINHERIT_FL },
+       { F2FS_CASEFOLD_FL,     FS_CASEFOLD_FL },
 };
 
 #define F2FS_GETTABLE_FS_FL (          \
@@ -1717,7 +1733,8 @@ static const struct {
                FS_ENCRYPT_FL |         \
                FS_INLINE_DATA_FL |     \
                FS_NOCOW_FL |           \
-               FS_VERITY_FL)
+               FS_VERITY_FL |          \
+               FS_CASEFOLD_FL)
 
 #define F2FS_SETTABLE_FS_FL (          \
                FS_SYNC_FL |            \
@@ -1726,7 +1743,8 @@ static const struct {
                FS_NODUMP_FL |          \
                FS_NOATIME_FL |         \
                FS_DIRSYNC_FL |         \
-               FS_PROJINHERIT_FL)
+               FS_PROJINHERIT_FL |     \
+               FS_CASEFOLD_FL)
 
 /* Convert f2fs on-disk i_flags to FS_IOC_{GET,SET}FLAGS flags */
 static inline u32 f2fs_iflags_to_fsflags(u32 iflags)
@@ -1825,6 +1843,8 @@ static int f2fs_ioc_getversion(struct file *filp, unsigned long arg)
 static int f2fs_ioc_start_atomic_write(struct file *filp)
 {
        struct inode *inode = file_inode(filp);
+       struct f2fs_inode_info *fi = F2FS_I(inode);
+       struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
        int ret;
 
        if (!inode_owner_or_capable(inode))
@@ -1833,6 +1853,9 @@ static int f2fs_ioc_start_atomic_write(struct file *filp)
        if (!S_ISREG(inode->i_mode))
                return -EINVAL;
 
+       if (filp->f_flags & O_DIRECT)
+               return -EINVAL;
+
        ret = mnt_want_write_file(filp);
        if (ret)
                return ret;
@@ -1864,6 +1887,12 @@ static int f2fs_ioc_start_atomic_write(struct file *filp)
                goto out;
        }
 
+       spin_lock(&sbi->inode_lock[ATOMIC_FILE]);
+       if (list_empty(&fi->inmem_ilist))
+               list_add_tail(&fi->inmem_ilist, &sbi->inode_list[ATOMIC_FILE]);
+       spin_unlock(&sbi->inode_lock[ATOMIC_FILE]);
+
+       /* add inode in inmem_list first and set atomic_file */
        set_inode_flag(inode, FI_ATOMIC_FILE);
        clear_inode_flag(inode, FI_ATOMIC_REVOKE_REQUEST);
        up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
@@ -1905,11 +1934,8 @@ static int f2fs_ioc_commit_atomic_write(struct file *filp)
                        goto err_out;
 
                ret = f2fs_do_sync_file(filp, 0, LLONG_MAX, 0, true);
-               if (!ret) {
-                       clear_inode_flag(inode, FI_ATOMIC_FILE);
-                       F2FS_I(inode)->i_gc_failures[GC_FAILURE_ATOMIC] = 0;
-                       stat_dec_atomic_write(inode);
-               }
+               if (!ret)
+                       f2fs_drop_inmem_pages(inode);
        } else {
                ret = f2fs_do_sync_file(filp, 0, LLONG_MAX, 1, false);
        }
@@ -2295,9 +2321,9 @@ static int f2fs_ioc_gc_range(struct file *filp, unsigned long arg)
                return -EROFS;
 
        end = range.start + range.len;
-       if (range.start < MAIN_BLKADDR(sbi) || end >= MAX_BLKADDR(sbi)) {
+       if (end < range.start || range.start < MAIN_BLKADDR(sbi) ||
+                                       end >= MAX_BLKADDR(sbi))
                return -EINVAL;
-       }
 
        ret = mnt_want_write_file(filp);
        if (ret)
@@ -2421,8 +2447,10 @@ static int f2fs_defragment_range(struct f2fs_sb_info *sbi,
                map.m_lblk += map.m_len;
        }
 
-       if (!fragmented)
+       if (!fragmented) {
+               total = 0;
                goto out;
+       }
 
        sec_num = DIV_ROUND_UP(total, BLKS_PER_SEC(sbi));
 
@@ -2452,7 +2480,7 @@ do_map:
 
                if (!(map.m_flags & F2FS_MAP_FLAGS)) {
                        map.m_lblk = next_pgofs;
-                       continue;
+                       goto check;
                }
 
                set_inode_flag(inode, FI_DO_DEFRAG);
@@ -2476,8 +2504,8 @@ do_map:
                }
 
                map.m_lblk = idx;
-
-               if (idx < pg_end && cnt < blk_per_seg)
+check:
+               if (map.m_lblk < pg_end && cnt < blk_per_seg)
                        goto do_map;
 
                clear_inode_flag(inode, FI_DO_DEFRAG);
@@ -3141,10 +3169,74 @@ static int f2fs_ioc_measure_verity(struct file *filp, unsigned long arg)
        return fsverity_ioctl_measure(filp, (void __user *)arg);
 }
 
+static int f2fs_get_volume_name(struct file *filp, unsigned long arg)
+{
+       struct inode *inode = file_inode(filp);
+       struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
+       char *vbuf;
+       int count;
+       int err = 0;
+
+       vbuf = f2fs_kzalloc(sbi, MAX_VOLUME_NAME, GFP_KERNEL);
+       if (!vbuf)
+               return -ENOMEM;
+
+       down_read(&sbi->sb_lock);
+       count = utf16s_to_utf8s(sbi->raw_super->volume_name,
+                       ARRAY_SIZE(sbi->raw_super->volume_name),
+                       UTF16_LITTLE_ENDIAN, vbuf, MAX_VOLUME_NAME);
+       up_read(&sbi->sb_lock);
+
+       if (copy_to_user((char __user *)arg, vbuf,
+                               min(FSLABEL_MAX, count)))
+               err = -EFAULT;
+
+       kvfree(vbuf);
+       return err;
+}
+
+static int f2fs_set_volume_name(struct file *filp, unsigned long arg)
+{
+       struct inode *inode = file_inode(filp);
+       struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
+       char *vbuf;
+       int err = 0;
+
+       if (!capable(CAP_SYS_ADMIN))
+               return -EPERM;
+
+       vbuf = strndup_user((const char __user *)arg, FSLABEL_MAX);
+       if (IS_ERR(vbuf))
+               return PTR_ERR(vbuf);
+
+       err = mnt_want_write_file(filp);
+       if (err)
+               goto out;
+
+       down_write(&sbi->sb_lock);
+
+       memset(sbi->raw_super->volume_name, 0,
+                       sizeof(sbi->raw_super->volume_name));
+       utf8s_to_utf16s(vbuf, strlen(vbuf), UTF16_LITTLE_ENDIAN,
+                       sbi->raw_super->volume_name,
+                       ARRAY_SIZE(sbi->raw_super->volume_name));
+
+       err = f2fs_commit_super(sbi, false);
+
+       up_write(&sbi->sb_lock);
+
+       mnt_drop_write_file(filp);
+out:
+       kfree(vbuf);
+       return err;
+}
+
 long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 {
        if (unlikely(f2fs_cp_error(F2FS_I_SB(file_inode(filp)))))
                return -EIO;
+       if (!f2fs_is_checkpoint_ready(F2FS_I_SB(file_inode(filp))))
+               return -ENOSPC;
 
        switch (cmd) {
        case F2FS_IOC_GETFLAGS:
@@ -3213,6 +3305,10 @@ long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
                return f2fs_ioc_enable_verity(filp, arg);
        case FS_IOC_MEASURE_VERITY:
                return f2fs_ioc_measure_verity(filp, arg);
+       case F2FS_IOC_GET_VOLUME_NAME:
+               return f2fs_get_volume_name(filp, arg);
+       case F2FS_IOC_SET_VOLUME_NAME:
+               return f2fs_set_volume_name(filp, arg);
        default:
                return -ENOTTY;
        }
@@ -3229,16 +3325,12 @@ static ssize_t f2fs_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
                goto out;
        }
 
-       if ((iocb->ki_flags & IOCB_NOWAIT) && !(iocb->ki_flags & IOCB_DIRECT)) {
-               ret = -EINVAL;
-               goto out;
-       }
-
-       if (!inode_trylock(inode)) {
-               if (iocb->ki_flags & IOCB_NOWAIT) {
+       if (iocb->ki_flags & IOCB_NOWAIT) {
+               if (!inode_trylock(inode)) {
                        ret = -EAGAIN;
                        goto out;
                }
+       } else {
                inode_lock(inode);
        }
 
@@ -3334,6 +3426,8 @@ long f2fs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
        case F2FS_IOC_RESIZE_FS:
        case FS_IOC_ENABLE_VERITY:
        case FS_IOC_MEASURE_VERITY:
+       case F2FS_IOC_GET_VOLUME_NAME:
+       case F2FS_IOC_SET_VOLUME_NAME:
                break;
        default:
                return -ENOIOCTLCMD;
index 8974672..5877bd7 100644 (file)
@@ -382,6 +382,16 @@ static int get_victim_by_default(struct f2fs_sb_info *sbi,
                        nsearched++;
                }
 
+#ifdef CONFIG_F2FS_CHECK_FS
+               /*
+                * skip selecting the invalid segno (that is failed due to block
+                * validity check failure during GC) to avoid endless GC loop in
+                * such cases.
+                */
+               if (test_bit(segno, sm->invalid_segmap))
+                       goto next;
+#endif
+
                secno = GET_SEC_FROM_SEG(sbi, segno);
 
                if (sec_usage_check(sbi, secno))
@@ -627,8 +637,21 @@ static bool is_alive(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
        source_blkaddr = datablock_addr(NULL, node_page, ofs_in_node);
        f2fs_put_page(node_page, 1);
 
-       if (source_blkaddr != blkaddr)
+       if (source_blkaddr != blkaddr) {
+#ifdef CONFIG_F2FS_CHECK_FS
+               unsigned int segno = GET_SEGNO(sbi, blkaddr);
+               unsigned long offset = GET_BLKOFF_FROM_SEG0(sbi, blkaddr);
+
+               if (unlikely(check_valid_map(sbi, segno, offset))) {
+                       if (!test_and_set_bit(segno, SIT_I(sbi)->invalid_segmap)) {
+                               f2fs_err(sbi, "mismatched blkaddr %u (source_blkaddr %u) in seg %u\n",
+                                               blkaddr, source_blkaddr, segno);
+                               f2fs_bug_on(sbi, 1);
+                       }
+               }
+#endif
                return false;
+       }
        return true;
 }
 
@@ -1303,7 +1326,7 @@ gc_more:
                round++;
        }
 
-       if (gc_type == FG_GC)
+       if (gc_type == FG_GC && seg_freed)
                sbi->cur_victim_sec = NULL_SEGNO;
 
        if (sync)
index cc82f14..5bc4dcd 100644 (file)
@@ -14,6 +14,7 @@
 #include <linux/f2fs_fs.h>
 #include <linux/cryptohash.h>
 #include <linux/pagemap.h>
+#include <linux/unicode.h>
 
 #include "f2fs.h"
 
@@ -67,7 +68,7 @@ static void str2hashbuf(const unsigned char *msg, size_t len,
                *buf++ = pad;
 }
 
-f2fs_hash_t f2fs_dentry_hash(const struct qstr *name_info,
+static f2fs_hash_t __f2fs_dentry_hash(const struct qstr *name_info,
                                struct fscrypt_name *fname)
 {
        __u32 hash;
@@ -103,3 +104,37 @@ f2fs_hash_t f2fs_dentry_hash(const struct qstr *name_info,
        f2fs_hash = cpu_to_le32(hash & ~F2FS_HASH_COL_BIT);
        return f2fs_hash;
 }
+
+f2fs_hash_t f2fs_dentry_hash(const struct inode *dir,
+               const struct qstr *name_info, struct fscrypt_name *fname)
+{
+#ifdef CONFIG_UNICODE
+       struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb);
+       const struct unicode_map *um = sbi->s_encoding;
+       int r, dlen;
+       unsigned char *buff;
+       struct qstr folded;
+
+       if (!name_info->len || !IS_CASEFOLDED(dir))
+               goto opaque_seq;
+
+       buff = f2fs_kzalloc(sbi, sizeof(char) * PATH_MAX, GFP_KERNEL);
+       if (!buff)
+               return -ENOMEM;
+
+       dlen = utf8_casefold(um, name_info, buff, PATH_MAX);
+       if (dlen < 0) {
+               kvfree(buff);
+               goto opaque_seq;
+       }
+       folded.name = buff;
+       folded.len = dlen;
+       r = __f2fs_dentry_hash(&folded, fname);
+
+       kvfree(buff);
+       return r;
+
+opaque_seq:
+#endif
+       return __f2fs_dentry_hash(name_info, fname);
+}
index 3613efc..896db04 100644 (file)
@@ -131,6 +131,7 @@ int f2fs_convert_inline_page(struct dnode_of_data *dn, struct page *page)
 
        err = f2fs_get_node_info(fio.sbi, dn->nid, &ni);
        if (err) {
+               f2fs_truncate_data_blocks_range(dn, 1);
                f2fs_put_dnode(dn);
                return err;
        }
@@ -320,7 +321,7 @@ struct f2fs_dir_entry *f2fs_find_in_inline_dir(struct inode *dir,
                return NULL;
        }
 
-       namehash = f2fs_dentry_hash(&name, fname);
+       namehash = f2fs_dentry_hash(dir, &name, fname);
 
        inline_dentry = inline_data_addr(dir, ipage);
 
@@ -580,7 +581,7 @@ int f2fs_add_inline_entry(struct inode *dir, const struct qstr *new_name,
 
        f2fs_wait_on_page_writeback(ipage, NODE, true, true);
 
-       name_hash = f2fs_dentry_hash(new_name, NULL);
+       name_hash = f2fs_dentry_hash(dir, new_name, NULL);
        f2fs_update_dentry(ino, mode, &d, new_name, name_hash, bit_pos);
 
        set_page_dirty(ipage);
@@ -588,6 +589,11 @@ int f2fs_add_inline_entry(struct inode *dir, const struct qstr *new_name,
        /* we don't need to mark_inode_dirty now */
        if (inode) {
                f2fs_i_pino_write(inode, dir->i_ino);
+
+               /* synchronize inode page's data from inode cache */
+               if (is_inode_flag_set(inode, FI_NEW_INODE))
+                       f2fs_update_inode(inode, page);
+
                f2fs_put_page(page, 1);
        }
 
@@ -704,7 +710,13 @@ int f2fs_inline_data_fiemap(struct inode *inode,
        if (IS_ERR(ipage))
                return PTR_ERR(ipage);
 
-       if (!f2fs_has_inline_data(inode)) {
+       if ((S_ISREG(inode->i_mode) || S_ISLNK(inode->i_mode)) &&
+                               !f2fs_has_inline_data(inode)) {
+               err = -EAGAIN;
+               goto out;
+       }
+
+       if (S_ISDIR(inode->i_mode) && !f2fs_has_inline_dentry(inode)) {
                err = -EAGAIN;
                goto out;
        }
index 06da75d..db4fec3 100644 (file)
@@ -48,9 +48,11 @@ void f2fs_set_inode_flags(struct inode *inode)
                new_fl |= S_ENCRYPTED;
        if (file_is_verity(inode))
                new_fl |= S_VERITY;
+       if (flags & F2FS_CASEFOLD_FL)
+               new_fl |= S_CASEFOLD;
        inode_set_flags(inode, new_fl,
                        S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC|
-                       S_ENCRYPTED|S_VERITY);
+                       S_ENCRYPTED|S_VERITY|S_CASEFOLD);
 }
 
 static void __get_inode_rdev(struct inode *inode, struct f2fs_inode *ri)
@@ -616,7 +618,7 @@ int f2fs_write_inode(struct inode *inode, struct writeback_control *wbc)
        if (!is_inode_flag_set(inode, FI_DIRTY_INODE))
                return 0;
 
-       if (f2fs_is_checkpoint_ready(sbi))
+       if (!f2fs_is_checkpoint_ready(sbi))
                return -ENOSPC;
 
        /*
@@ -695,7 +697,8 @@ retry:
 
        if (err) {
                f2fs_update_inode_page(inode);
-               set_sbi_flag(sbi, SBI_QUOTA_NEED_REPAIR);
+               if (dquot_initialize_needed(inode))
+                       set_sbi_flag(sbi, SBI_QUOTA_NEED_REPAIR);
        }
        sb_end_intwrite(inode->i_sb);
 no_delete:
@@ -705,7 +708,7 @@ no_delete:
        stat_dec_inline_dir(inode);
        stat_dec_inline_inode(inode);
 
-       if (likely(!is_set_ckpt_flags(sbi, CP_ERROR_FLAG) &&
+       if (likely(!f2fs_cp_error(sbi) &&
                                !is_sbi_flag_set(sbi, SBI_CP_DISABLED)))
                f2fs_bug_on(sbi, is_inode_flag_set(inode, FI_DIRTY_INODE));
        else
index c5b9904..4faf06e 100644 (file)
@@ -272,9 +272,8 @@ static int f2fs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
 
        if (unlikely(f2fs_cp_error(sbi)))
                return -EIO;
-       err = f2fs_is_checkpoint_ready(sbi);
-       if (err)
-               return err;
+       if (!f2fs_is_checkpoint_ready(sbi))
+               return -ENOSPC;
 
        err = dquot_initialize(dir);
        if (err)
@@ -321,9 +320,8 @@ static int f2fs_link(struct dentry *old_dentry, struct inode *dir,
 
        if (unlikely(f2fs_cp_error(sbi)))
                return -EIO;
-       err = f2fs_is_checkpoint_ready(sbi);
-       if (err)
-               return err;
+       if (!f2fs_is_checkpoint_ready(sbi))
+               return -ENOSPC;
 
        err = fscrypt_prepare_link(old_dentry, dir, dentry);
        if (err)
@@ -489,6 +487,17 @@ static struct dentry *f2fs_lookup(struct inode *dir, struct dentry *dentry,
                goto out_iput;
        }
 out_splice:
+#ifdef CONFIG_UNICODE
+       if (!inode && IS_CASEFOLDED(dir)) {
+               /* Eventually we want to call d_add_ci(dentry, NULL)
+                * for negative dentries in the encoding case as
+                * well.  For now, prevent the negative dentry
+                * from being cached.
+                */
+               trace_f2fs_lookup_end(dir, dentry, ino, err);
+               return NULL;
+       }
+#endif
        new = d_splice_alias(inode, dentry);
        err = PTR_ERR_OR_ZERO(new);
        trace_f2fs_lookup_end(dir, dentry, ino, err);
@@ -537,6 +546,16 @@ static int f2fs_unlink(struct inode *dir, struct dentry *dentry)
                goto fail;
        }
        f2fs_delete_entry(de, page, dir, inode);
+#ifdef CONFIG_UNICODE
+       /* VFS negative dentries are incompatible with Encoding and
+        * Case-insensitiveness. Eventually we'll want avoid
+        * invalidating the dentries here, alongside with returning the
+        * negative dentries at f2fs_lookup(), when it is  better
+        * supported by the VFS for the CI case.
+        */
+       if (IS_CASEFOLDED(dir))
+               d_invalidate(dentry);
+#endif
        f2fs_unlock_op(sbi);
 
        if (IS_DIRSYNC(dir))
@@ -571,9 +590,8 @@ static int f2fs_symlink(struct inode *dir, struct dentry *dentry,
 
        if (unlikely(f2fs_cp_error(sbi)))
                return -EIO;
-       err = f2fs_is_checkpoint_ready(sbi);
-       if (err)
-               return err;
+       if (!f2fs_is_checkpoint_ready(sbi))
+               return -ENOSPC;
 
        err = fscrypt_prepare_symlink(dir, symname, len, dir->i_sb->s_blocksize,
                                      &disk_link);
@@ -703,9 +721,8 @@ static int f2fs_mknod(struct inode *dir, struct dentry *dentry,
 
        if (unlikely(f2fs_cp_error(sbi)))
                return -EIO;
-       err = f2fs_is_checkpoint_ready(sbi);
-       if (err)
-               return err;
+       if (!f2fs_is_checkpoint_ready(sbi))
+               return -ENOSPC;
 
        err = dquot_initialize(dir);
        if (err)
@@ -804,6 +821,8 @@ static int f2fs_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode)
 
        if (unlikely(f2fs_cp_error(sbi)))
                return -EIO;
+       if (!f2fs_is_checkpoint_ready(sbi))
+               return -ENOSPC;
 
        if (IS_ENCRYPTED(dir) || DUMMY_ENCRYPTION_ENABLED(sbi)) {
                int err = fscrypt_get_encryption_info(dir);
@@ -840,9 +859,8 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry,
 
        if (unlikely(f2fs_cp_error(sbi)))
                return -EIO;
-       err = f2fs_is_checkpoint_ready(sbi);
-       if (err)
-               return err;
+       if (!f2fs_is_checkpoint_ready(sbi))
+               return -ENOSPC;
 
        if (is_inode_flag_set(new_dir, FI_PROJ_INHERIT) &&
                        (!projid_eq(F2FS_I(new_dir)->i_projid,
@@ -1035,9 +1053,8 @@ static int f2fs_cross_rename(struct inode *old_dir, struct dentry *old_dentry,
 
        if (unlikely(f2fs_cp_error(sbi)))
                return -EIO;
-       err = f2fs_is_checkpoint_ready(sbi);
-       if (err)
-               return err;
+       if (!f2fs_is_checkpoint_ready(sbi))
+               return -ENOSPC;
 
        if ((is_inode_flag_set(new_dir, FI_PROJ_INHERIT) &&
                        !projid_eq(F2FS_I(new_dir)->i_projid,
@@ -1250,6 +1267,7 @@ const struct inode_operations f2fs_dir_inode_operations = {
 #ifdef CONFIG_F2FS_FS_XATTR
        .listxattr      = f2fs_listxattr,
 #endif
+       .fiemap         = f2fs_fiemap,
 };
 
 const struct inode_operations f2fs_symlink_inode_operations = {
index a18b2a8..8b66bc4 100644 (file)
@@ -1524,7 +1524,8 @@ static int __write_node_page(struct page *page, bool atomic, bool *submitted,
        if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING)))
                goto redirty_out;
 
-       if (wbc->sync_mode == WB_SYNC_NONE &&
+       if (!is_sbi_flag_set(sbi, SBI_CP_DISABLED) &&
+                       wbc->sync_mode == WB_SYNC_NONE &&
                        IS_DNODE(page) && is_cold_node(page))
                goto redirty_out;
 
@@ -1762,6 +1763,47 @@ out:
        return ret ? -EIO: 0;
 }
 
+static int f2fs_match_ino(struct inode *inode, unsigned long ino, void *data)
+{
+       struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
+       bool clean;
+
+       if (inode->i_ino != ino)
+               return 0;
+
+       if (!is_inode_flag_set(inode, FI_DIRTY_INODE))
+               return 0;
+
+       spin_lock(&sbi->inode_lock[DIRTY_META]);
+       clean = list_empty(&F2FS_I(inode)->gdirty_list);
+       spin_unlock(&sbi->inode_lock[DIRTY_META]);
+
+       if (clean)
+               return 0;
+
+       inode = igrab(inode);
+       if (!inode)
+               return 0;
+       return 1;
+}
+
+static bool flush_dirty_inode(struct page *page)
+{
+       struct f2fs_sb_info *sbi = F2FS_P_SB(page);
+       struct inode *inode;
+       nid_t ino = ino_of_node(page);
+
+       inode = find_inode_nowait(sbi->sb, ino, f2fs_match_ino, NULL);
+       if (!inode)
+               return false;
+
+       f2fs_update_inode(inode, page);
+       unlock_page(page);
+
+       iput(inode);
+       return true;
+}
+
 int f2fs_sync_node_pages(struct f2fs_sb_info *sbi,
                                struct writeback_control *wbc,
                                bool do_balance, enum iostat_type io_type)
@@ -1785,6 +1827,7 @@ next_step:
                for (i = 0; i < nr_pages; i++) {
                        struct page *page = pvec.pages[i];
                        bool submitted = false;
+                       bool may_dirty = true;
 
                        /* give a priority to WB_SYNC threads */
                        if (atomic_read(&sbi->wb_sync_req[NODE]) &&
@@ -1832,6 +1875,13 @@ continue_unlock:
                                goto lock_node;
                        }
 
+                       /* flush dirty inode */
+                       if (IS_INODE(page) && may_dirty) {
+                               may_dirty = false;
+                               if (flush_dirty_inode(page))
+                                       goto lock_node;
+                       }
+
                        f2fs_wait_on_page_writeback(page, NODE, true, true);
 
                        if (!clear_page_dirty_for_io(page))
@@ -1860,7 +1910,8 @@ continue_unlock:
        }
 
        if (step < 2) {
-               if (wbc->sync_mode == WB_SYNC_NONE && step == 1)
+               if (!is_sbi_flag_set(sbi, SBI_CP_DISABLED) &&
+                               wbc->sync_mode == WB_SYNC_NONE && step == 1)
                        goto out;
                step++;
                goto next_step;
@@ -2964,7 +3015,7 @@ static int init_node_manager(struct f2fs_sb_info *sbi)
 
        /* not used nids: 0, node, meta, (and root counted as valid node) */
        nm_i->available_nids = nm_i->max_nid - sbi->total_valid_node_count -
-                               sbi->nquota_files - F2FS_RESERVED_NODE_NUM;
+                                               F2FS_RESERVED_NODE_NUM;
        nm_i->nid_cnt[FREE_NID] = 0;
        nm_i->nid_cnt[PREALLOC_NID] = 0;
        nm_i->nat_cnt = 0;
index a661ac3..8087095 100644 (file)
@@ -185,8 +185,6 @@ bool f2fs_need_SSR(struct f2fs_sb_info *sbi)
 
 void f2fs_register_inmem_page(struct inode *inode, struct page *page)
 {
-       struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
-       struct f2fs_inode_info *fi = F2FS_I(inode);
        struct inmem_pages *new;
 
        f2fs_trace_pid(page);
@@ -200,15 +198,11 @@ void f2fs_register_inmem_page(struct inode *inode, struct page *page)
        INIT_LIST_HEAD(&new->list);
 
        /* increase reference count with clean state */
-       mutex_lock(&fi->inmem_lock);
        get_page(page);
-       list_add_tail(&new->list, &fi->inmem_pages);
-       spin_lock(&sbi->inode_lock[ATOMIC_FILE]);
-       if (list_empty(&fi->inmem_ilist))
-               list_add_tail(&fi->inmem_ilist, &sbi->inode_list[ATOMIC_FILE]);
-       spin_unlock(&sbi->inode_lock[ATOMIC_FILE]);
+       mutex_lock(&F2FS_I(inode)->inmem_lock);
+       list_add_tail(&new->list, &F2FS_I(inode)->inmem_pages);
        inc_page_count(F2FS_I_SB(inode), F2FS_INMEM_PAGES);
-       mutex_unlock(&fi->inmem_lock);
+       mutex_unlock(&F2FS_I(inode)->inmem_lock);
 
        trace_f2fs_register_inmem_page(page, INMEM);
 }
@@ -330,19 +324,17 @@ void f2fs_drop_inmem_pages(struct inode *inode)
                mutex_lock(&fi->inmem_lock);
                __revoke_inmem_pages(inode, &fi->inmem_pages,
                                                true, false, true);
-
-               if (list_empty(&fi->inmem_pages)) {
-                       spin_lock(&sbi->inode_lock[ATOMIC_FILE]);
-                       if (!list_empty(&fi->inmem_ilist))
-                               list_del_init(&fi->inmem_ilist);
-                       spin_unlock(&sbi->inode_lock[ATOMIC_FILE]);
-               }
                mutex_unlock(&fi->inmem_lock);
        }
 
        clear_inode_flag(inode, FI_ATOMIC_FILE);
        fi->i_gc_failures[GC_FAILURE_ATOMIC] = 0;
        stat_dec_atomic_write(inode);
+
+       spin_lock(&sbi->inode_lock[ATOMIC_FILE]);
+       if (!list_empty(&fi->inmem_ilist))
+               list_del_init(&fi->inmem_ilist);
+       spin_unlock(&sbi->inode_lock[ATOMIC_FILE]);
 }
 
 void f2fs_drop_inmem_page(struct inode *inode, struct page *page)
@@ -471,11 +463,6 @@ int f2fs_commit_inmem_pages(struct inode *inode)
 
        mutex_lock(&fi->inmem_lock);
        err = __f2fs_commit_inmem_pages(inode);
-
-       spin_lock(&sbi->inode_lock[ATOMIC_FILE]);
-       if (!list_empty(&fi->inmem_ilist))
-               list_del_init(&fi->inmem_ilist);
-       spin_unlock(&sbi->inode_lock[ATOMIC_FILE]);
        mutex_unlock(&fi->inmem_lock);
 
        clear_inode_flag(inode, FI_ATOMIC_COMMIT);
@@ -501,7 +488,7 @@ void f2fs_balance_fs(struct f2fs_sb_info *sbi, bool need)
        if (need && excess_cached_nats(sbi))
                f2fs_balance_fs_bg(sbi);
 
-       if (f2fs_is_checkpoint_ready(sbi))
+       if (!f2fs_is_checkpoint_ready(sbi))
                return;
 
        /*
@@ -817,9 +804,13 @@ static void __remove_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno,
                if (test_and_clear_bit(segno, dirty_i->dirty_segmap[t]))
                        dirty_i->nr_dirty[t]--;
 
-               if (get_valid_blocks(sbi, segno, true) == 0)
+               if (get_valid_blocks(sbi, segno, true) == 0) {
                        clear_bit(GET_SEC_FROM_SEG(sbi, segno),
                                                dirty_i->victim_secmap);
+#ifdef CONFIG_F2FS_CHECK_FS
+                       clear_bit(segno, SIT_I(sbi)->invalid_segmap);
+#endif
+               }
        }
 }
 
@@ -2084,6 +2075,13 @@ static void destroy_discard_cmd_control(struct f2fs_sb_info *sbi)
 
        f2fs_stop_discard_thread(sbi);
 
+       /*
+        * Recovery can cache discard commands, so in error path of
+        * fill_super(), it needs to give a chance to handle them.
+        */
+       if (unlikely(atomic_read(&dcc->discard_cmd_cnt)))
+               f2fs_issue_discard_timeout(sbi);
+
        kvfree(dcc);
        SM_I(sbi)->dcc_info = NULL;
 }
@@ -2156,9 +2154,11 @@ static void update_sit_entry(struct f2fs_sb_info *sbi, block_t blkaddr, int del)
                if (!f2fs_test_and_set_bit(offset, se->discard_map))
                        sbi->discard_blks--;
 
-               /* don't overwrite by SSR to keep node chain */
-               if (IS_NODESEG(se->type) &&
-                               !is_sbi_flag_set(sbi, SBI_CP_DISABLED)) {
+               /*
+                * SSR should never reuse block which is checkpointed
+                * or newly invalidated.
+                */
+               if (!is_sbi_flag_set(sbi, SBI_CP_DISABLED)) {
                        if (!f2fs_test_and_set_bit(offset, se->ckpt_valid_map))
                                se->ckpt_valid_blocks++;
                }
@@ -3116,12 +3116,14 @@ void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
                f2fs_inode_chksum_set(sbi, page);
        }
 
+       if (F2FS_IO_ALIGNED(sbi))
+               fio->retry = false;
+
        if (add_list) {
                struct f2fs_bio_info *io;
 
                INIT_LIST_HEAD(&fio->list);
                fio->in_list = true;
-               fio->retry = false;
                io = sbi->write_io[fio->type] + fio->temp;
                spin_lock(&io->io_lock);
                list_add_tail(&fio->list, &io->io_list);
@@ -3447,11 +3449,6 @@ static int read_compacted_summaries(struct f2fs_sb_info *sbi)
                seg_i = CURSEG_I(sbi, i);
                segno = le32_to_cpu(ckpt->cur_data_segno[i]);
                blk_off = le16_to_cpu(ckpt->cur_data_blkoff[i]);
-               if (blk_off > ENTRIES_IN_SUM) {
-                       f2fs_bug_on(sbi, 1);
-                       f2fs_put_page(page, 1);
-                       return -EFAULT;
-               }
                seg_i->next_segno = segno;
                reset_curseg(sbi, i, 0);
                seg_i->alloc_type = ckpt->alloc_type[i];
@@ -3941,8 +3938,8 @@ static int build_sit_info(struct f2fs_sb_info *sbi)
        struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi);
        struct sit_info *sit_i;
        unsigned int sit_segs, start;
-       char *src_bitmap;
-       unsigned int bitmap_size;
+       char *src_bitmap, *bitmap;
+       unsigned int bitmap_size, main_bitmap_size, sit_bitmap_size;
 
        /* allocate memory for SIT information */
        sit_i = f2fs_kzalloc(sbi, sizeof(struct sit_info), GFP_KERNEL);
@@ -3958,33 +3955,37 @@ static int build_sit_info(struct f2fs_sb_info *sbi)
        if (!sit_i->sentries)
                return -ENOMEM;
 
-       bitmap_size = f2fs_bitmap_size(MAIN_SEGS(sbi));
-       sit_i->dirty_sentries_bitmap = f2fs_kvzalloc(sbi, bitmap_size,
+       main_bitmap_size = f2fs_bitmap_size(MAIN_SEGS(sbi));
+       sit_i->dirty_sentries_bitmap = f2fs_kvzalloc(sbi, main_bitmap_size,
                                                                GFP_KERNEL);
        if (!sit_i->dirty_sentries_bitmap)
                return -ENOMEM;
 
+#ifdef CONFIG_F2FS_CHECK_FS
+       bitmap_size = MAIN_SEGS(sbi) * SIT_VBLOCK_MAP_SIZE * 4;
+#else
+       bitmap_size = MAIN_SEGS(sbi) * SIT_VBLOCK_MAP_SIZE * 3;
+#endif
+       sit_i->bitmap = f2fs_kvzalloc(sbi, bitmap_size, GFP_KERNEL);
+       if (!sit_i->bitmap)
+               return -ENOMEM;
+
+       bitmap = sit_i->bitmap;
+
        for (start = 0; start < MAIN_SEGS(sbi); start++) {
-               sit_i->sentries[start].cur_valid_map
-                       = f2fs_kzalloc(sbi, SIT_VBLOCK_MAP_SIZE, GFP_KERNEL);
-               sit_i->sentries[start].ckpt_valid_map
-                       = f2fs_kzalloc(sbi, SIT_VBLOCK_MAP_SIZE, GFP_KERNEL);
-               if (!sit_i->sentries[start].cur_valid_map ||
-                               !sit_i->sentries[start].ckpt_valid_map)
-                       return -ENOMEM;
+               sit_i->sentries[start].cur_valid_map = bitmap;
+               bitmap += SIT_VBLOCK_MAP_SIZE;
+
+               sit_i->sentries[start].ckpt_valid_map = bitmap;
+               bitmap += SIT_VBLOCK_MAP_SIZE;
 
 #ifdef CONFIG_F2FS_CHECK_FS
-               sit_i->sentries[start].cur_valid_map_mir
-                       = f2fs_kzalloc(sbi, SIT_VBLOCK_MAP_SIZE, GFP_KERNEL);
-               if (!sit_i->sentries[start].cur_valid_map_mir)
-                       return -ENOMEM;
+               sit_i->sentries[start].cur_valid_map_mir = bitmap;
+               bitmap += SIT_VBLOCK_MAP_SIZE;
 #endif
 
-               sit_i->sentries[start].discard_map
-                       = f2fs_kzalloc(sbi, SIT_VBLOCK_MAP_SIZE,
-                                                       GFP_KERNEL);
-               if (!sit_i->sentries[start].discard_map)
-                       return -ENOMEM;
+               sit_i->sentries[start].discard_map = bitmap;
+               bitmap += SIT_VBLOCK_MAP_SIZE;
        }
 
        sit_i->tmp_map = f2fs_kzalloc(sbi, SIT_VBLOCK_MAP_SIZE, GFP_KERNEL);
@@ -4004,17 +4005,23 @@ static int build_sit_info(struct f2fs_sb_info *sbi)
        sit_segs = le32_to_cpu(raw_super->segment_count_sit) >> 1;
 
        /* setup SIT bitmap from ckeckpoint pack */
-       bitmap_size = __bitmap_size(sbi, SIT_BITMAP);
+       sit_bitmap_size = __bitmap_size(sbi, SIT_BITMAP);
        src_bitmap = __bitmap_ptr(sbi, SIT_BITMAP);
 
-       sit_i->sit_bitmap = kmemdup(src_bitmap, bitmap_size, GFP_KERNEL);
+       sit_i->sit_bitmap = kmemdup(src_bitmap, sit_bitmap_size, GFP_KERNEL);
        if (!sit_i->sit_bitmap)
                return -ENOMEM;
 
 #ifdef CONFIG_F2FS_CHECK_FS
-       sit_i->sit_bitmap_mir = kmemdup(src_bitmap, bitmap_size, GFP_KERNEL);
+       sit_i->sit_bitmap_mir = kmemdup(src_bitmap,
+                                       sit_bitmap_size, GFP_KERNEL);
        if (!sit_i->sit_bitmap_mir)
                return -ENOMEM;
+
+       sit_i->invalid_segmap = f2fs_kvzalloc(sbi,
+                                       main_bitmap_size, GFP_KERNEL);
+       if (!sit_i->invalid_segmap)
+               return -ENOMEM;
 #endif
 
        /* init SIT information */
@@ -4023,7 +4030,7 @@ static int build_sit_info(struct f2fs_sb_info *sbi)
        sit_i->sit_base_addr = le32_to_cpu(raw_super->sit_blkaddr);
        sit_i->sit_blocks = sit_segs << sbi->log_blocks_per_seg;
        sit_i->written_valid_blocks = 0;
-       sit_i->bitmap_size = bitmap_size;
+       sit_i->bitmap_size = sit_bitmap_size;
        sit_i->dirty_sentries = 0;
        sit_i->sents_per_block = SIT_ENTRY_PER_BLOCK;
        sit_i->elapsed_time = le64_to_cpu(sbi->ckpt->elapsed_time);
@@ -4161,7 +4168,6 @@ static int build_sit_entries(struct f2fs_sb_info *sbi)
                if (start >= MAIN_SEGS(sbi)) {
                        f2fs_err(sbi, "Wrong journal entry on segno %u",
                                 start);
-                       set_sbi_flag(sbi, SBI_NEED_FSCK);
                        err = -EFSCORRUPTED;
                        break;
                }
@@ -4201,7 +4207,6 @@ static int build_sit_entries(struct f2fs_sb_info *sbi)
        if (!err && total_node_blocks != valid_node_count(sbi)) {
                f2fs_err(sbi, "SIT is corrupted node# %u vs %u",
                         total_node_blocks, valid_node_count(sbi));
-               set_sbi_flag(sbi, SBI_NEED_FSCK);
                err = -EFSCORRUPTED;
        }
 
@@ -4492,21 +4497,12 @@ static void destroy_free_segmap(struct f2fs_sb_info *sbi)
 static void destroy_sit_info(struct f2fs_sb_info *sbi)
 {
        struct sit_info *sit_i = SIT_I(sbi);
-       unsigned int start;
 
        if (!sit_i)
                return;
 
-       if (sit_i->sentries) {
-               for (start = 0; start < MAIN_SEGS(sbi); start++) {
-                       kvfree(sit_i->sentries[start].cur_valid_map);
-#ifdef CONFIG_F2FS_CHECK_FS
-                       kvfree(sit_i->sentries[start].cur_valid_map_mir);
-#endif
-                       kvfree(sit_i->sentries[start].ckpt_valid_map);
-                       kvfree(sit_i->sentries[start].discard_map);
-               }
-       }
+       if (sit_i->sentries)
+               kvfree(sit_i->bitmap);
        kvfree(sit_i->tmp_map);
 
        kvfree(sit_i->sentries);
@@ -4517,6 +4513,7 @@ static void destroy_sit_info(struct f2fs_sb_info *sbi)
        kvfree(sit_i->sit_bitmap);
 #ifdef CONFIG_F2FS_CHECK_FS
        kvfree(sit_i->sit_bitmap_mir);
+       kvfree(sit_i->invalid_segmap);
 #endif
        kvfree(sit_i);
 }
index b746028..325781a 100644 (file)
@@ -226,9 +226,13 @@ struct sit_info {
        block_t sit_base_addr;          /* start block address of SIT area */
        block_t sit_blocks;             /* # of blocks used by SIT area */
        block_t written_valid_blocks;   /* # of valid blocks in main area */
+       char *bitmap;                   /* all bitmaps pointer */
        char *sit_bitmap;               /* SIT bitmap pointer */
 #ifdef CONFIG_F2FS_CHECK_FS
        char *sit_bitmap_mir;           /* SIT bitmap mirror */
+
+       /* bitmap of segments to be ignored by GC in case of errors */
+       unsigned long *invalid_segmap;
 #endif
        unsigned int bitmap_size;       /* SIT bitmap size */
 
@@ -582,13 +586,13 @@ static inline bool has_not_enough_free_secs(struct f2fs_sb_info *sbi,
                reserved_sections(sbi) + needed);
 }
 
-static inline int f2fs_is_checkpoint_ready(struct f2fs_sb_info *sbi)
+static inline bool f2fs_is_checkpoint_ready(struct f2fs_sb_info *sbi)
 {
        if (likely(!is_sbi_flag_set(sbi, SBI_CP_DISABLED)))
-               return 0;
+               return true;
        if (likely(!has_not_enough_free_secs(sbi, 0, 0)))
-               return 0;
-       return -ENOSPC;
+               return true;
+       return false;
 }
 
 static inline bool excess_prefree_segs(struct f2fs_sb_info *sbi)
index f43befd..1443cee 100644 (file)
@@ -23,6 +23,7 @@
 #include <linux/f2fs_fs.h>
 #include <linux/sysfs.h>
 #include <linux/quota.h>
+#include <linux/unicode.h>
 
 #include "f2fs.h"
 #include "node.h"
@@ -222,6 +223,36 @@ void f2fs_printk(struct f2fs_sb_info *sbi, const char *fmt, ...)
        va_end(args);
 }
 
+#ifdef CONFIG_UNICODE
+static const struct f2fs_sb_encodings {
+       __u16 magic;
+       char *name;
+       char *version;
+} f2fs_sb_encoding_map[] = {
+       {F2FS_ENC_UTF8_12_1, "utf8", "12.1.0"},
+};
+
+static int f2fs_sb_read_encoding(const struct f2fs_super_block *sb,
+                                const struct f2fs_sb_encodings **encoding,
+                                __u16 *flags)
+{
+       __u16 magic = le16_to_cpu(sb->s_encoding);
+       int i;
+
+       for (i = 0; i < ARRAY_SIZE(f2fs_sb_encoding_map); i++)
+               if (magic == f2fs_sb_encoding_map[i].magic)
+                       break;
+
+       if (i >= ARRAY_SIZE(f2fs_sb_encoding_map))
+               return -EINVAL;
+
+       *encoding = &f2fs_sb_encoding_map[i];
+       *flags = le16_to_cpu(sb->s_encoding_flags);
+
+       return 0;
+}
+#endif
+
 static inline void limit_reserve_root(struct f2fs_sb_info *sbi)
 {
        block_t limit = min((sbi->user_block_count << 1) / 1000,
@@ -798,6 +829,13 @@ static int parse_options(struct super_block *sb, char *options)
                return -EINVAL;
        }
 #endif
+#ifndef CONFIG_UNICODE
+       if (f2fs_sb_has_casefold(sbi)) {
+               f2fs_err(sbi,
+                       "Filesystem with casefold feature cannot be mounted without CONFIG_UNICODE");
+               return -EINVAL;
+       }
+#endif
 
        if (F2FS_IO_SIZE_BITS(sbi) && !test_opt(sbi, LFS)) {
                f2fs_err(sbi, "Should set mode=lfs with %uKB-sized IO",
@@ -873,7 +911,21 @@ static struct inode *f2fs_alloc_inode(struct super_block *sb)
 
 static int f2fs_drop_inode(struct inode *inode)
 {
+       struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
        int ret;
+
+       /*
+        * during filesystem shutdown, if checkpoint is disabled,
+        * drop useless meta/node dirty pages.
+        */
+       if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) {
+               if (inode->i_ino == F2FS_NODE_INO(sbi) ||
+                       inode->i_ino == F2FS_META_INO(sbi)) {
+                       trace_f2fs_drop_inode(inode, 1);
+                       return 1;
+               }
+       }
+
        /*
         * This is to avoid a deadlock condition like below.
         * writeback_single_inode(inode)
@@ -1091,6 +1143,9 @@ static void f2fs_put_super(struct super_block *sb)
        destroy_percpu_info(sbi);
        for (i = 0; i < NR_PAGE_TYPE; i++)
                kvfree(sbi->write_io[i]);
+#ifdef CONFIG_UNICODE
+       utf8_unload(sbi->s_encoding);
+#endif
        kvfree(sbi);
 }
 
@@ -1216,8 +1271,7 @@ static int f2fs_statfs(struct dentry *dentry, struct kstatfs *buf)
        else
                buf->f_bavail = 0;
 
-       avail_node_count = sbi->total_node_count - sbi->nquota_files -
-                                               F2FS_RESERVED_NODE_NUM;
+       avail_node_count = sbi->total_node_count - F2FS_RESERVED_NODE_NUM;
 
        if (avail_node_count > user_block_count) {
                buf->f_files = user_block_count;
@@ -1524,6 +1578,7 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data)
        bool need_stop_gc = false;
        bool no_extent_cache = !test_opt(sbi, EXTENT_CACHE);
        bool disable_checkpoint = test_opt(sbi, DISABLE_CHECKPOINT);
+       bool no_io_align = !F2FS_IO_ALIGNED(sbi);
        bool checkpoint_changed;
 #ifdef CONFIG_QUOTA
        int i, j;
@@ -1603,6 +1658,12 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data)
                goto restore_opts;
        }
 
+       if (no_io_align == !!F2FS_IO_ALIGNED(sbi)) {
+               err = -EINVAL;
+               f2fs_warn(sbi, "switch io_bits option is not allowed");
+               goto restore_opts;
+       }
+
        if ((*flags & SB_RDONLY) && test_opt(sbi, DISABLE_CHECKPOINT)) {
                err = -EINVAL;
                f2fs_warn(sbi, "disabling checkpoint not compatible with read-only");
@@ -1981,6 +2042,12 @@ static int f2fs_quota_on(struct super_block *sb, int type, int format_id,
        struct inode *inode;
        int err;
 
+       /* if quota sysfile exists, deny enabling quota with specific file */
+       if (f2fs_sb_has_quota_ino(F2FS_SB(sb))) {
+               f2fs_err(F2FS_SB(sb), "quota sysfile already exists");
+               return -EBUSY;
+       }
+
        err = f2fs_quota_sync(sb, type);
        if (err)
                return err;
@@ -2000,7 +2067,7 @@ static int f2fs_quota_on(struct super_block *sb, int type, int format_id,
        return 0;
 }
 
-static int f2fs_quota_off(struct super_block *sb, int type)
+static int __f2fs_quota_off(struct super_block *sb, int type)
 {
        struct inode *inode = sb_dqopt(sb)->files[type];
        int err;
@@ -2026,13 +2093,30 @@ out_put:
        return err;
 }
 
+static int f2fs_quota_off(struct super_block *sb, int type)
+{
+       struct f2fs_sb_info *sbi = F2FS_SB(sb);
+       int err;
+
+       err = __f2fs_quota_off(sb, type);
+
+       /*
+        * quotactl can shutdown journalled quota, result in inconsistence
+        * between quota record and fs data by following updates, tag the
+        * flag to let fsck be aware of it.
+        */
+       if (is_journalled_quota(sbi))
+               set_sbi_flag(sbi, SBI_QUOTA_NEED_REPAIR);
+       return err;
+}
+
 void f2fs_quota_off_umount(struct super_block *sb)
 {
        int type;
        int err;
 
        for (type = 0; type < MAXQUOTAS; type++) {
-               err = f2fs_quota_off(sb, type);
+               err = __f2fs_quota_off(sb, type);
                if (err) {
                        int ret = dquot_quota_off(sb, type);
 
@@ -2617,8 +2701,7 @@ int f2fs_sanity_check_ckpt(struct f2fs_sb_info *sbi)
        }
 
        valid_node_count = le32_to_cpu(ckpt->valid_node_count);
-       avail_node_count = sbi->total_node_count - sbi->nquota_files -
-                                               F2FS_RESERVED_NODE_NUM;
+       avail_node_count = sbi->total_node_count - F2FS_RESERVED_NODE_NUM;
        if (valid_node_count > avail_node_count) {
                f2fs_err(sbi, "Wrong valid_node_count: %u, avail_node_count: %u",
                         valid_node_count, avail_node_count);
@@ -2657,10 +2740,10 @@ int f2fs_sanity_check_ckpt(struct f2fs_sb_info *sbi)
                }
        }
        for (i = 0; i < NR_CURSEG_NODE_TYPE; i++) {
-               for (j = i; j < NR_CURSEG_DATA_TYPE; j++) {
+               for (j = 0; j < NR_CURSEG_DATA_TYPE; j++) {
                        if (le32_to_cpu(ckpt->cur_node_segno[i]) ==
                                le32_to_cpu(ckpt->cur_data_segno[j])) {
-                               f2fs_err(sbi, "Data segment (%u) and Data segment (%u) has the same segno: %u",
+                               f2fs_err(sbi, "Node segment (%u) and Data segment (%u) has the same segno: %u",
                                         i, j,
                                         le32_to_cpu(ckpt->cur_node_segno[i]));
                                return 1;
@@ -3033,6 +3116,53 @@ static int f2fs_scan_devices(struct f2fs_sb_info *sbi)
        return 0;
 }
 
+static int f2fs_setup_casefold(struct f2fs_sb_info *sbi)
+{
+#ifdef CONFIG_UNICODE
+       if (f2fs_sb_has_casefold(sbi) && !sbi->s_encoding) {
+               const struct f2fs_sb_encodings *encoding_info;
+               struct unicode_map *encoding;
+               __u16 encoding_flags;
+
+               if (f2fs_sb_has_encrypt(sbi)) {
+                       f2fs_err(sbi,
+                               "Can't mount with encoding and encryption");
+                       return -EINVAL;
+               }
+
+               if (f2fs_sb_read_encoding(sbi->raw_super, &encoding_info,
+                                         &encoding_flags)) {
+                       f2fs_err(sbi,
+                                "Encoding requested by superblock is unknown");
+                       return -EINVAL;
+               }
+
+               encoding = utf8_load(encoding_info->version);
+               if (IS_ERR(encoding)) {
+                       f2fs_err(sbi,
+                                "can't mount with superblock charset: %s-%s "
+                                "not supported by the kernel. flags: 0x%x.",
+                                encoding_info->name, encoding_info->version,
+                                encoding_flags);
+                       return PTR_ERR(encoding);
+               }
+               f2fs_info(sbi, "Using encoding defined by superblock: "
+                        "%s-%s with flags 0x%hx", encoding_info->name,
+                        encoding_info->version?:"\b", encoding_flags);
+
+               sbi->s_encoding = encoding;
+               sbi->s_encoding_flags = encoding_flags;
+               sbi->sb->s_d_op = &f2fs_dentry_ops;
+       }
+#else
+       if (f2fs_sb_has_casefold(sbi)) {
+               f2fs_err(sbi, "Filesystem with casefold feature cannot be mounted without CONFIG_UNICODE");
+               return -EINVAL;
+       }
+#endif
+       return 0;
+}
+
 static void f2fs_tuning_parameters(struct f2fs_sb_info *sbi)
 {
        struct f2fs_sm_info *sm_i = SM_I(sbi);
@@ -3129,6 +3259,10 @@ try_onemore:
                                le32_to_cpu(raw_super->log_blocksize);
        sb->s_max_links = F2FS_LINK_MAX;
 
+       err = f2fs_setup_casefold(sbi);
+       if (err)
+               goto free_options;
+
 #ifdef CONFIG_QUOTA
        sb->dq_op = &f2fs_quota_operations;
        sb->s_qcop = &f2fs_quotactl_ops;
@@ -3207,7 +3341,7 @@ try_onemore:
        if (err)
                goto free_bio_info;
 
-       if (F2FS_IO_SIZE(sbi) > 1) {
+       if (F2FS_IO_ALIGNED(sbi)) {
                sbi->write_io_dummy =
                        mempool_create_page_pool(2 * (F2FS_IO_SIZE(sbi) - 1), 0);
                if (!sbi->write_io_dummy) {
@@ -3482,6 +3616,10 @@ free_percpu:
 free_bio_info:
        for (i = 0; i < NR_PAGE_TYPE; i++)
                kvfree(sbi->write_io[i]);
+
+#ifdef CONFIG_UNICODE
+       utf8_unload(sbi->s_encoding);
+#endif
 free_options:
 #ifdef CONFIG_QUOTA
        for (i = 0; i < MAXQUOTAS; i++)
index 0cd64f9..b558b64 100644 (file)
@@ -10,6 +10,7 @@
 #include <linux/proc_fs.h>
 #include <linux/f2fs_fs.h>
 #include <linux/seq_file.h>
+#include <linux/unicode.h>
 
 #include "f2fs.h"
 #include "segment.h"
@@ -81,6 +82,19 @@ static ssize_t unusable_show(struct f2fs_attr *a,
                (unsigned long long)unusable);
 }
 
+static ssize_t encoding_show(struct f2fs_attr *a,
+               struct f2fs_sb_info *sbi, char *buf)
+{
+#ifdef CONFIG_UNICODE
+       if (f2fs_sb_has_casefold(sbi))
+               return snprintf(buf, PAGE_SIZE, "%s (%d.%d.%d)\n",
+                       sbi->s_encoding->charset,
+                       (sbi->s_encoding->version >> 16) & 0xff,
+                       (sbi->s_encoding->version >> 8) & 0xff,
+                       sbi->s_encoding->version & 0xff);
+#endif
+       return snprintf(buf, PAGE_SIZE, "(none)");
+}
 
 static ssize_t lifetime_write_kbytes_show(struct f2fs_attr *a,
                struct f2fs_sb_info *sbi, char *buf)
@@ -137,6 +151,9 @@ static ssize_t features_show(struct f2fs_attr *a,
        if (f2fs_sb_has_sb_chksum(sbi))
                len += snprintf(buf + len, PAGE_SIZE - len, "%s%s",
                                len ? ", " : "", "sb_checksum");
+       if (f2fs_sb_has_casefold(sbi))
+               len += snprintf(buf + len, PAGE_SIZE - len, "%s%s",
+                               len ? ", " : "", "casefold");
        len += snprintf(buf + len, PAGE_SIZE - len, "\n");
        return len;
 }
@@ -369,6 +386,7 @@ enum feat_id {
        FEAT_LOST_FOUND,
        FEAT_VERITY,
        FEAT_SB_CHECKSUM,
+       FEAT_CASEFOLD,
 };
 
 static ssize_t f2fs_feature_show(struct f2fs_attr *a,
@@ -387,6 +405,7 @@ static ssize_t f2fs_feature_show(struct f2fs_attr *a,
        case FEAT_LOST_FOUND:
        case FEAT_VERITY:
        case FEAT_SB_CHECKSUM:
+       case FEAT_CASEFOLD:
                return snprintf(buf, PAGE_SIZE, "supported\n");
        }
        return 0;
@@ -460,6 +479,7 @@ F2FS_GENERAL_RO_ATTR(lifetime_write_kbytes);
 F2FS_GENERAL_RO_ATTR(features);
 F2FS_GENERAL_RO_ATTR(current_reserved_blocks);
 F2FS_GENERAL_RO_ATTR(unusable);
+F2FS_GENERAL_RO_ATTR(encoding);
 
 #ifdef CONFIG_FS_ENCRYPTION
 F2FS_FEATURE_RO_ATTR(encryption, FEAT_CRYPTO);
@@ -479,6 +499,7 @@ F2FS_FEATURE_RO_ATTR(lost_found, FEAT_LOST_FOUND);
 F2FS_FEATURE_RO_ATTR(verity, FEAT_VERITY);
 #endif
 F2FS_FEATURE_RO_ATTR(sb_checksum, FEAT_SB_CHECKSUM);
+F2FS_FEATURE_RO_ATTR(casefold, FEAT_CASEFOLD);
 
 #define ATTR_LIST(name) (&f2fs_attr_##name.attr)
 static struct attribute *f2fs_attrs[] = {
@@ -523,6 +544,7 @@ static struct attribute *f2fs_attrs[] = {
        ATTR_LIST(features),
        ATTR_LIST(reserved_blocks),
        ATTR_LIST(current_reserved_blocks),
+       ATTR_LIST(encoding),
        NULL,
 };
 ATTRIBUTE_GROUPS(f2fs);
@@ -546,6 +568,7 @@ static struct attribute *f2fs_feat_attrs[] = {
        ATTR_LIST(verity),
 #endif
        ATTR_LIST(sb_checksum),
+       ATTR_LIST(casefold),
        NULL,
 };
 ATTRIBUTE_GROUPS(f2fs_feat);
index b32c456..181900a 100644 (file)
@@ -21,6 +21,7 @@
 #include <linux/posix_acl_xattr.h>
 #include "f2fs.h"
 #include "xattr.h"
+#include "segment.h"
 
 static int f2fs_xattr_generic_get(const struct xattr_handler *handler,
                struct dentry *unused, struct inode *inode,
@@ -729,6 +730,11 @@ int f2fs_setxattr(struct inode *inode, int index, const char *name,
        struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
        int err;
 
+       if (unlikely(f2fs_cp_error(sbi)))
+               return -EIO;
+       if (!f2fs_is_checkpoint_ready(sbi))
+               return -ENOSPC;
+
        err = dquot_initialize(inode);
        if (err)
                return err;
index 1bda2ab..054acd9 100644 (file)
@@ -88,9 +88,7 @@ static int fat__get_entry(struct inode *dir, loff_t *pos,
        int err, offset;
 
 next:
-       if (*bh)
-               brelse(*bh);
-
+       brelse(*bh);
        *bh = NULL;
        iblock = *pos >> sb->s_blocksize_bits;
        err = fat_bmap(dir, iblock, &phys, &mapped_blocks, 0, false);
@@ -1100,8 +1098,11 @@ static int fat_zeroed_cluster(struct inode *dir, sector_t blknr, int nr_used,
                        err = -ENOMEM;
                        goto error;
                }
+               /* Avoid race with userspace read via bdev */
+               lock_buffer(bhs[n]);
                memset(bhs[n]->b_data, 0, sb->s_blocksize);
                set_buffer_uptodate(bhs[n]);
+               unlock_buffer(bhs[n]);
                mark_buffer_dirty_inode(bhs[n], dir);
 
                n++;
@@ -1158,6 +1159,8 @@ int fat_alloc_new_dir(struct inode *dir, struct timespec64 *ts)
        fat_time_unix2fat(sbi, ts, &time, &date, &time_cs);
 
        de = (struct msdos_dir_entry *)bhs[0]->b_data;
+       /* Avoid race with userspace read via bdev */
+       lock_buffer(bhs[0]);
        /* filling the new directory slots ("." and ".." entries) */
        memcpy(de[0].name, MSDOS_DOT, MSDOS_NAME);
        memcpy(de[1].name, MSDOS_DOTDOT, MSDOS_NAME);
@@ -1180,6 +1183,7 @@ int fat_alloc_new_dir(struct inode *dir, struct timespec64 *ts)
        de[0].size = de[1].size = 0;
        memset(de + 2, 0, sb->s_blocksize - 2 * sizeof(*de));
        set_buffer_uptodate(bhs[0]);
+       unlock_buffer(bhs[0]);
        mark_buffer_dirty_inode(bhs[0], dir);
 
        err = fat_zeroed_cluster(dir, blknr, 1, bhs, MAX_BUF_PER_PAGE);
@@ -1237,11 +1241,14 @@ static int fat_add_new_entries(struct inode *dir, void *slots, int nr_slots,
 
                        /* fill the directory entry */
                        copy = min(size, sb->s_blocksize);
+                       /* Avoid race with userspace read via bdev */
+                       lock_buffer(bhs[n]);
                        memcpy(bhs[n]->b_data, slots, copy);
-                       slots += copy;
-                       size -= copy;
                        set_buffer_uptodate(bhs[n]);
+                       unlock_buffer(bhs[n]);
                        mark_buffer_dirty_inode(bhs[n], dir);
+                       slots += copy;
+                       size -= copy;
                        if (!size)
                                break;
                        n++;
index 2659836..3647c65 100644 (file)
@@ -388,8 +388,11 @@ static int fat_mirror_bhs(struct super_block *sb, struct buffer_head **bhs,
                                err = -ENOMEM;
                                goto error;
                        }
+                       /* Avoid race with userspace read via bdev */
+                       lock_buffer(c_bh);
                        memcpy(c_bh->b_data, bhs[n]->b_data, sb->s_blocksize);
                        set_buffer_uptodate(c_bh);
+                       unlock_buffer(c_bh);
                        mark_buffer_dirty_inode(c_bh, sbi->fat_inode);
                        if (sb->s_flags & SB_SYNCHRONOUS)
                                err = sync_dirty_buffer(c_bh);
index b07b53f..30d55c9 100644 (file)
@@ -327,6 +327,7 @@ void flush_delayed_fput(void)
 {
        delayed_fput(NULL);
 }
+EXPORT_SYMBOL_GPL(flush_delayed_fput);
 
 static DECLARE_DELAYED_WORK(delayed_fput_work, delayed_fput);
 
index 87c2c96..138b5b4 100644 (file)
@@ -504,7 +504,6 @@ void put_fs_context(struct fs_context *fc)
        put_net(fc->net_ns);
        put_user_ns(fc->user_ns);
        put_cred(fc->cred);
-       kfree(fc->subtype);
        put_fc_log(fc);
        put_filesystem(fc->fs_type);
        kfree(fc->source);
@@ -571,17 +570,6 @@ static int legacy_parse_param(struct fs_context *fc, struct fs_parameter *param)
                return 0;
        }
 
-       if ((fc->fs_type->fs_flags & FS_HAS_SUBTYPE) &&
-           strcmp(param->key, "subtype") == 0) {
-               if (param->type != fs_value_is_string)
-                       return invalf(fc, "VFS: Legacy: Non-string subtype");
-               if (fc->subtype)
-                       return invalf(fc, "VFS: Legacy: Multiple subtype");
-               fc->subtype = param->string;
-               param->string = NULL;
-               return 0;
-       }
-
        if (ctx->param_type == LEGACY_FS_MONOLITHIC_PARAMS)
                return invalf(fc, "VFS: Legacy: Can't mix monolithic and individual options");
 
@@ -738,8 +726,6 @@ void vfs_clean_context(struct fs_context *fc)
        fc->s_fs_info = NULL;
        fc->sb_flags = 0;
        security_free_mnt_opts(&fc->security);
-       kfree(fc->subtype);
-       fc->subtype = NULL;
        kfree(fc->source);
        fc->source = NULL;
 
index 24fc5a5..0635cba 100644 (file)
@@ -27,3 +27,14 @@ config CUSE
 
          If you want to develop or use a userspace character device
          based on CUSE, answer Y or M.
+
+config VIRTIO_FS
+       tristate "Virtio Filesystem"
+       depends on FUSE_FS
+       select VIRTIO
+       help
+         The Virtio Filesystem allows guests to mount file systems from the
+          host.
+
+         If you want to share files between guests or with the host, answer Y
+          or M.
index 9485019..6419a2b 100644 (file)
@@ -5,5 +5,6 @@
 
 obj-$(CONFIG_FUSE_FS) += fuse.o
 obj-$(CONFIG_CUSE) += cuse.o
+obj-$(CONFIG_VIRTIO_FS) += virtio_fs.o
 
 fuse-objs := dev.o dir.o file.o inode.o control.o xattr.o acl.o readdir.o
index bab7a0d..00015d8 100644 (file)
@@ -142,11 +142,10 @@ static int cuse_open(struct inode *inode, struct file *file)
 
 static int cuse_release(struct inode *inode, struct file *file)
 {
-       struct fuse_inode *fi = get_fuse_inode(inode);
        struct fuse_file *ff = file->private_data;
        struct fuse_conn *fc = ff->fc;
 
-       fuse_sync_release(fi, ff, file->f_flags);
+       fuse_sync_release(NULL, ff, file->f_flags);
        fuse_conn_put(fc);
 
        return 0;
@@ -299,6 +298,14 @@ static void cuse_gendev_release(struct device *dev)
        kfree(dev);
 }
 
+struct cuse_init_args {
+       struct fuse_args_pages ap;
+       struct cuse_init_in in;
+       struct cuse_init_out out;
+       struct page *page;
+       struct fuse_page_desc desc;
+};
+
 /**
  * cuse_process_init_reply - finish initializing CUSE channel
  *
@@ -306,21 +313,22 @@ static void cuse_gendev_release(struct device *dev)
  * required data structures for it.  Please read the comment at the
  * top of this file for high level overview.
  */
-static void cuse_process_init_reply(struct fuse_conn *fc, struct fuse_req *req)
+static void cuse_process_init_reply(struct fuse_conn *fc,
+                                   struct fuse_args *args, int error)
 {
+       struct cuse_init_args *ia = container_of(args, typeof(*ia), ap.args);
+       struct fuse_args_pages *ap = &ia->ap;
        struct cuse_conn *cc = fc_to_cc(fc), *pos;
-       struct cuse_init_out *arg = req->out.args[0].value;
-       struct page *page = req->pages[0];
+       struct cuse_init_out *arg = &ia->out;
+       struct page *page = ap->pages[0];
        struct cuse_devinfo devinfo = { };
        struct device *dev;
        struct cdev *cdev;
        dev_t devt;
        int rc, i;
 
-       if (req->out.h.error ||
-           arg->major != FUSE_KERNEL_VERSION || arg->minor < 11) {
+       if (error || arg->major != FUSE_KERNEL_VERSION || arg->minor < 11)
                goto err;
-       }
 
        fc->minor = arg->minor;
        fc->max_read = max_t(unsigned, arg->max_read, 4096);
@@ -329,7 +337,7 @@ static void cuse_process_init_reply(struct fuse_conn *fc, struct fuse_req *req)
        /* parse init reply */
        cc->unrestricted_ioctl = arg->flags & CUSE_UNRESTRICTED_IOCTL;
 
-       rc = cuse_parse_devinfo(page_address(page), req->out.args[1].size,
+       rc = cuse_parse_devinfo(page_address(page), ap->args.out_args[1].size,
                                &devinfo);
        if (rc)
                goto err;
@@ -396,7 +404,7 @@ static void cuse_process_init_reply(struct fuse_conn *fc, struct fuse_req *req)
        dev_set_uevent_suppress(dev, 0);
        kobject_uevent(&dev->kobj, KOBJ_ADD);
 out:
-       kfree(arg);
+       kfree(ia);
        __free_page(page);
        return;
 
@@ -415,55 +423,49 @@ err:
 static int cuse_send_init(struct cuse_conn *cc)
 {
        int rc;
-       struct fuse_req *req;
        struct page *page;
        struct fuse_conn *fc = &cc->fc;
-       struct cuse_init_in *arg;
-       void *outarg;
+       struct cuse_init_args *ia;
+       struct fuse_args_pages *ap;
 
        BUILD_BUG_ON(CUSE_INIT_INFO_MAX > PAGE_SIZE);
 
-       req = fuse_get_req_for_background(fc, 1);
-       if (IS_ERR(req)) {
-               rc = PTR_ERR(req);
-               goto err;
-       }
-
        rc = -ENOMEM;
        page = alloc_page(GFP_KERNEL | __GFP_ZERO);
        if (!page)
-               goto err_put_req;
+               goto err;
 
-       outarg = kzalloc(sizeof(struct cuse_init_out), GFP_KERNEL);
-       if (!outarg)
+       ia = kzalloc(sizeof(*ia), GFP_KERNEL);
+       if (!ia)
                goto err_free_page;
 
-       arg = &req->misc.cuse_init_in;
-       arg->major = FUSE_KERNEL_VERSION;
-       arg->minor = FUSE_KERNEL_MINOR_VERSION;
-       arg->flags |= CUSE_UNRESTRICTED_IOCTL;
-       req->in.h.opcode = CUSE_INIT;
-       req->in.numargs = 1;
-       req->in.args[0].size = sizeof(struct cuse_init_in);
-       req->in.args[0].value = arg;
-       req->out.numargs = 2;
-       req->out.args[0].size = sizeof(struct cuse_init_out);
-       req->out.args[0].value = outarg;
-       req->out.args[1].size = CUSE_INIT_INFO_MAX;
-       req->out.argvar = 1;
-       req->out.argpages = 1;
-       req->pages[0] = page;
-       req->page_descs[0].length = req->out.args[1].size;
-       req->num_pages = 1;
-       req->end = cuse_process_init_reply;
-       fuse_request_send_background(fc, req);
-
-       return 0;
-
+       ap = &ia->ap;
+       ia->in.major = FUSE_KERNEL_VERSION;
+       ia->in.minor = FUSE_KERNEL_MINOR_VERSION;
+       ia->in.flags |= CUSE_UNRESTRICTED_IOCTL;
+       ap->args.opcode = CUSE_INIT;
+       ap->args.in_numargs = 1;
+       ap->args.in_args[0].size = sizeof(ia->in);
+       ap->args.in_args[0].value = &ia->in;
+       ap->args.out_numargs = 2;
+       ap->args.out_args[0].size = sizeof(ia->out);
+       ap->args.out_args[0].value = &ia->out;
+       ap->args.out_args[1].size = CUSE_INIT_INFO_MAX;
+       ap->args.out_argvar = 1;
+       ap->args.out_pages = 1;
+       ap->num_pages = 1;
+       ap->pages = &ia->page;
+       ap->descs = &ia->desc;
+       ia->page = page;
+       ia->desc.length = ap->args.out_args[1].size;
+       ap->args.end = cuse_process_init_reply;
+
+       rc = fuse_simple_background(fc, &ap->args, GFP_KERNEL);
+       if (rc) {
+               kfree(ia);
 err_free_page:
-       __free_page(page);
-err_put_req:
-       fuse_put_request(fc, req);
+               __free_page(page);
+       }
 err:
        return rc;
 }
@@ -504,9 +506,9 @@ static int cuse_channel_open(struct inode *inode, struct file *file)
         * Limit the cuse channel to requests that can
         * be represented in file->f_cred->user_ns.
         */
-       fuse_conn_init(&cc->fc, file->f_cred->user_ns);
+       fuse_conn_init(&cc->fc, file->f_cred->user_ns, &fuse_dev_fiq_ops, NULL);
 
-       fud = fuse_dev_alloc(&cc->fc);
+       fud = fuse_dev_alloc_install(&cc->fc);
        if (!fud) {
                kfree(cc);
                return -ENOMEM;
@@ -519,6 +521,7 @@ static int cuse_channel_open(struct inode *inode, struct file *file)
        rc = cuse_send_init(cc);
        if (rc) {
                fuse_dev_free(fud);
+               fuse_conn_put(&cc->fc);
                return rc;
        }
        file->private_data = fud;
index ea82375..dadd617 100644 (file)
@@ -40,107 +40,30 @@ static struct fuse_dev *fuse_get_dev(struct file *file)
        return READ_ONCE(file->private_data);
 }
 
-static void fuse_request_init(struct fuse_req *req, struct page **pages,
-                             struct fuse_page_desc *page_descs,
-                             unsigned npages)
+static void fuse_request_init(struct fuse_req *req)
 {
        INIT_LIST_HEAD(&req->list);
        INIT_LIST_HEAD(&req->intr_entry);
        init_waitqueue_head(&req->waitq);
        refcount_set(&req->count, 1);
-       req->pages = pages;
-       req->page_descs = page_descs;
-       req->max_pages = npages;
        __set_bit(FR_PENDING, &req->flags);
 }
 
-static struct page **fuse_req_pages_alloc(unsigned int npages, gfp_t flags,
-                                         struct fuse_page_desc **desc)
-{
-       struct page **pages;
-
-       pages = kzalloc(npages * (sizeof(struct page *) +
-                                 sizeof(struct fuse_page_desc)), flags);
-       *desc = (void *) pages + npages * sizeof(struct page *);
-
-       return pages;
-}
-
-static struct fuse_req *__fuse_request_alloc(unsigned npages, gfp_t flags)
+static struct fuse_req *fuse_request_alloc(gfp_t flags)
 {
        struct fuse_req *req = kmem_cache_zalloc(fuse_req_cachep, flags);
-       if (req) {
-               struct page **pages = NULL;
-               struct fuse_page_desc *page_descs = NULL;
-
-               WARN_ON(npages > FUSE_MAX_MAX_PAGES);
-               if (npages > FUSE_REQ_INLINE_PAGES) {
-                       pages = fuse_req_pages_alloc(npages, flags,
-                                                    &page_descs);
-                       if (!pages) {
-                               kmem_cache_free(fuse_req_cachep, req);
-                               return NULL;
-                       }
-               } else if (npages) {
-                       pages = req->inline_pages;
-                       page_descs = req->inline_page_descs;
-               }
+       if (req)
+               fuse_request_init(req);
 
-               fuse_request_init(req, pages, page_descs, npages);
-       }
        return req;
 }
 
-struct fuse_req *fuse_request_alloc(unsigned npages)
-{
-       return __fuse_request_alloc(npages, GFP_KERNEL);
-}
-EXPORT_SYMBOL_GPL(fuse_request_alloc);
-
-struct fuse_req *fuse_request_alloc_nofs(unsigned npages)
-{
-       return __fuse_request_alloc(npages, GFP_NOFS);
-}
-
-static void fuse_req_pages_free(struct fuse_req *req)
-{
-       if (req->pages != req->inline_pages)
-               kfree(req->pages);
-}
-
-bool fuse_req_realloc_pages(struct fuse_conn *fc, struct fuse_req *req,
-                           gfp_t flags)
-{
-       struct page **pages;
-       struct fuse_page_desc *page_descs;
-       unsigned int npages = min_t(unsigned int,
-                                   max_t(unsigned int, req->max_pages * 2,
-                                         FUSE_DEFAULT_MAX_PAGES_PER_REQ),
-                                   fc->max_pages);
-       WARN_ON(npages <= req->max_pages);
-
-       pages = fuse_req_pages_alloc(npages, flags, &page_descs);
-       if (!pages)
-               return false;
-
-       memcpy(pages, req->pages, sizeof(struct page *) * req->max_pages);
-       memcpy(page_descs, req->page_descs,
-              sizeof(struct fuse_page_desc) * req->max_pages);
-       fuse_req_pages_free(req);
-       req->pages = pages;
-       req->page_descs = page_descs;
-       req->max_pages = npages;
-
-       return true;
-}
-
-void fuse_request_free(struct fuse_req *req)
+static void fuse_request_free(struct fuse_req *req)
 {
-       fuse_req_pages_free(req);
        kmem_cache_free(fuse_req_cachep, req);
 }
 
-void __fuse_get_request(struct fuse_req *req)
+static void __fuse_get_request(struct fuse_req *req)
 {
        refcount_inc(&req->count);
 }
@@ -177,8 +100,9 @@ static void fuse_drop_waiting(struct fuse_conn *fc)
        }
 }
 
-static struct fuse_req *__fuse_get_req(struct fuse_conn *fc, unsigned npages,
-                                      bool for_background)
+static void fuse_put_request(struct fuse_conn *fc, struct fuse_req *req);
+
+static struct fuse_req *fuse_get_req(struct fuse_conn *fc, bool for_background)
 {
        struct fuse_req *req;
        int err;
@@ -201,7 +125,7 @@ static struct fuse_req *__fuse_get_req(struct fuse_conn *fc, unsigned npages,
        if (fc->conn_error)
                goto out;
 
-       req = fuse_request_alloc(npages);
+       req = fuse_request_alloc(GFP_KERNEL);
        err = -ENOMEM;
        if (!req) {
                if (for_background)
@@ -229,101 +153,7 @@ static struct fuse_req *__fuse_get_req(struct fuse_conn *fc, unsigned npages,
        return ERR_PTR(err);
 }
 
-struct fuse_req *fuse_get_req(struct fuse_conn *fc, unsigned npages)
-{
-       return __fuse_get_req(fc, npages, false);
-}
-EXPORT_SYMBOL_GPL(fuse_get_req);
-
-struct fuse_req *fuse_get_req_for_background(struct fuse_conn *fc,
-                                            unsigned npages)
-{
-       return __fuse_get_req(fc, npages, true);
-}
-EXPORT_SYMBOL_GPL(fuse_get_req_for_background);
-
-/*
- * Return request in fuse_file->reserved_req.  However that may
- * currently be in use.  If that is the case, wait for it to become
- * available.
- */
-static struct fuse_req *get_reserved_req(struct fuse_conn *fc,
-                                        struct file *file)
-{
-       struct fuse_req *req = NULL;
-       struct fuse_inode *fi = get_fuse_inode(file_inode(file));
-       struct fuse_file *ff = file->private_data;
-
-       do {
-               wait_event(fc->reserved_req_waitq, ff->reserved_req);
-               spin_lock(&fi->lock);
-               if (ff->reserved_req) {
-                       req = ff->reserved_req;
-                       ff->reserved_req = NULL;
-                       req->stolen_file = get_file(file);
-               }
-               spin_unlock(&fi->lock);
-       } while (!req);
-
-       return req;
-}
-
-/*
- * Put stolen request back into fuse_file->reserved_req
- */
-static void put_reserved_req(struct fuse_conn *fc, struct fuse_req *req)
-{
-       struct file *file = req->stolen_file;
-       struct fuse_inode *fi = get_fuse_inode(file_inode(file));
-       struct fuse_file *ff = file->private_data;
-
-       WARN_ON(req->max_pages);
-       spin_lock(&fi->lock);
-       memset(req, 0, sizeof(*req));
-       fuse_request_init(req, NULL, NULL, 0);
-       BUG_ON(ff->reserved_req);
-       ff->reserved_req = req;
-       wake_up_all(&fc->reserved_req_waitq);
-       spin_unlock(&fi->lock);
-       fput(file);
-}
-
-/*
- * Gets a requests for a file operation, always succeeds
- *
- * This is used for sending the FLUSH request, which must get to
- * userspace, due to POSIX locks which may need to be unlocked.
- *
- * If allocation fails due to OOM, use the reserved request in
- * fuse_file.
- *
- * This is very unlikely to deadlock accidentally, since the
- * filesystem should not have it's own file open.  If deadlock is
- * intentional, it can still be broken by "aborting" the filesystem.
- */
-struct fuse_req *fuse_get_req_nofail_nopages(struct fuse_conn *fc,
-                                            struct file *file)
-{
-       struct fuse_req *req;
-
-       atomic_inc(&fc->num_waiting);
-       wait_event(fc->blocked_waitq, fc->initialized);
-       /* Matches smp_wmb() in fuse_set_initialized() */
-       smp_rmb();
-       req = fuse_request_alloc(0);
-       if (!req)
-               req = get_reserved_req(fc, file);
-
-       req->in.h.uid = from_kuid_munged(fc->user_ns, current_fsuid());
-       req->in.h.gid = from_kgid_munged(fc->user_ns, current_fsgid());
-       req->in.h.pid = pid_nr_ns(task_pid(current), fc->pid_ns);
-
-       __set_bit(FR_WAITING, &req->flags);
-       __clear_bit(FR_BACKGROUND, &req->flags);
-       return req;
-}
-
-void fuse_put_request(struct fuse_conn *fc, struct fuse_req *req)
+static void fuse_put_request(struct fuse_conn *fc, struct fuse_req *req)
 {
        if (refcount_dec_and_test(&req->count)) {
                if (test_bit(FR_BACKGROUND, &req->flags)) {
@@ -342,15 +172,11 @@ void fuse_put_request(struct fuse_conn *fc, struct fuse_req *req)
                        fuse_drop_waiting(fc);
                }
 
-               if (req->stolen_file)
-                       put_reserved_req(fc, req);
-               else
-                       fuse_request_free(req);
+               fuse_request_free(req);
        }
 }
-EXPORT_SYMBOL_GPL(fuse_put_request);
 
-static unsigned len_args(unsigned numargs, struct fuse_arg *args)
+unsigned int fuse_len_args(unsigned int numargs, struct fuse_arg *args)
 {
        unsigned nbytes = 0;
        unsigned i;
@@ -360,25 +186,47 @@ static unsigned len_args(unsigned numargs, struct fuse_arg *args)
 
        return nbytes;
 }
+EXPORT_SYMBOL_GPL(fuse_len_args);
 
-static u64 fuse_get_unique(struct fuse_iqueue *fiq)
+u64 fuse_get_unique(struct fuse_iqueue *fiq)
 {
        fiq->reqctr += FUSE_REQ_ID_STEP;
        return fiq->reqctr;
 }
+EXPORT_SYMBOL_GPL(fuse_get_unique);
 
 static unsigned int fuse_req_hash(u64 unique)
 {
        return hash_long(unique & ~FUSE_INT_REQ_BIT, FUSE_PQ_HASH_BITS);
 }
 
-static void queue_request(struct fuse_iqueue *fiq, struct fuse_req *req)
+/**
+ * A new request is available, wake fiq->waitq
+ */
+static void fuse_dev_wake_and_unlock(struct fuse_iqueue *fiq)
+__releases(fiq->lock)
+{
+       wake_up(&fiq->waitq);
+       kill_fasync(&fiq->fasync, SIGIO, POLL_IN);
+       spin_unlock(&fiq->lock);
+}
+
+const struct fuse_iqueue_ops fuse_dev_fiq_ops = {
+       .wake_forget_and_unlock         = fuse_dev_wake_and_unlock,
+       .wake_interrupt_and_unlock      = fuse_dev_wake_and_unlock,
+       .wake_pending_and_unlock        = fuse_dev_wake_and_unlock,
+};
+EXPORT_SYMBOL_GPL(fuse_dev_fiq_ops);
+
+static void queue_request_and_unlock(struct fuse_iqueue *fiq,
+                                    struct fuse_req *req)
+__releases(fiq->lock)
 {
        req->in.h.len = sizeof(struct fuse_in_header) +
-               len_args(req->in.numargs, (struct fuse_arg *) req->in.args);
+               fuse_len_args(req->args->in_numargs,
+                             (struct fuse_arg *) req->args->in_args);
        list_add_tail(&req->list, &fiq->pending);
-       wake_up_locked(&fiq->waitq);
-       kill_fasync(&fiq->fasync, SIGIO, POLL_IN);
+       fiq->ops->wake_pending_and_unlock(fiq);
 }
 
 void fuse_queue_forget(struct fuse_conn *fc, struct fuse_forget_link *forget,
@@ -389,16 +237,15 @@ void fuse_queue_forget(struct fuse_conn *fc, struct fuse_forget_link *forget,
        forget->forget_one.nodeid = nodeid;
        forget->forget_one.nlookup = nlookup;
 
-       spin_lock(&fiq->waitq.lock);
+       spin_lock(&fiq->lock);
        if (fiq->connected) {
                fiq->forget_list_tail->next = forget;
                fiq->forget_list_tail = forget;
-               wake_up_locked(&fiq->waitq);
-               kill_fasync(&fiq->fasync, SIGIO, POLL_IN);
+               fiq->ops->wake_forget_and_unlock(fiq);
        } else {
                kfree(forget);
+               spin_unlock(&fiq->lock);
        }
-       spin_unlock(&fiq->waitq.lock);
 }
 
 static void flush_bg_queue(struct fuse_conn *fc)
@@ -412,10 +259,9 @@ static void flush_bg_queue(struct fuse_conn *fc)
                req = list_first_entry(&fc->bg_queue, struct fuse_req, list);
                list_del(&req->list);
                fc->active_background++;
-               spin_lock(&fiq->waitq.lock);
+               spin_lock(&fiq->lock);
                req->in.h.unique = fuse_get_unique(fiq);
-               queue_request(fiq, req);
-               spin_unlock(&fiq->waitq.lock);
+               queue_request_and_unlock(fiq, req);
        }
 }
 
@@ -427,9 +273,10 @@ static void flush_bg_queue(struct fuse_conn *fc)
  * the 'end' callback is called if given, else the reference to the
  * request is released
  */
-static void request_end(struct fuse_conn *fc, struct fuse_req *req)
+void fuse_request_end(struct fuse_conn *fc, struct fuse_req *req)
 {
        struct fuse_iqueue *fiq = &fc->iq;
+       bool async = req->args->end;
 
        if (test_and_set_bit(FR_FINISHED, &req->flags))
                goto put_request;
@@ -439,9 +286,9 @@ static void request_end(struct fuse_conn *fc, struct fuse_req *req)
         * smp_mb() from queue_interrupt().
         */
        if (!list_empty(&req->intr_entry)) {
-               spin_lock(&fiq->waitq.lock);
+               spin_lock(&fiq->lock);
                list_del_init(&req->intr_entry);
-               spin_unlock(&fiq->waitq.lock);
+               spin_unlock(&fiq->lock);
        }
        WARN_ON(test_bit(FR_PENDING, &req->flags));
        WARN_ON(test_bit(FR_SENT, &req->flags));
@@ -475,18 +322,19 @@ static void request_end(struct fuse_conn *fc, struct fuse_req *req)
                wake_up(&req->waitq);
        }
 
-       if (req->end)
-               req->end(fc, req);
+       if (async)
+               req->args->end(fc, req->args, req->out.h.error);
 put_request:
        fuse_put_request(fc, req);
 }
+EXPORT_SYMBOL_GPL(fuse_request_end);
 
 static int queue_interrupt(struct fuse_iqueue *fiq, struct fuse_req *req)
 {
-       spin_lock(&fiq->waitq.lock);
+       spin_lock(&fiq->lock);
        /* Check for we've sent request to interrupt this req */
        if (unlikely(!test_bit(FR_INTERRUPTED, &req->flags))) {
-               spin_unlock(&fiq->waitq.lock);
+               spin_unlock(&fiq->lock);
                return -EINVAL;
        }
 
@@ -499,13 +347,13 @@ static int queue_interrupt(struct fuse_iqueue *fiq, struct fuse_req *req)
                smp_mb();
                if (test_bit(FR_FINISHED, &req->flags)) {
                        list_del_init(&req->intr_entry);
-                       spin_unlock(&fiq->waitq.lock);
+                       spin_unlock(&fiq->lock);
                        return 0;
                }
-               wake_up_locked(&fiq->waitq);
-               kill_fasync(&fiq->fasync, SIGIO, POLL_IN);
+               fiq->ops->wake_interrupt_and_unlock(fiq);
+       } else {
+               spin_unlock(&fiq->lock);
        }
-       spin_unlock(&fiq->waitq.lock);
        return 0;
 }
 
@@ -535,16 +383,16 @@ static void request_wait_answer(struct fuse_conn *fc, struct fuse_req *req)
                if (!err)
                        return;
 
-               spin_lock(&fiq->waitq.lock);
+               spin_lock(&fiq->lock);
                /* Request is not yet in userspace, bail out */
                if (test_bit(FR_PENDING, &req->flags)) {
                        list_del(&req->list);
-                       spin_unlock(&fiq->waitq.lock);
+                       spin_unlock(&fiq->lock);
                        __fuse_put_request(req);
                        req->out.h.error = -EINTR;
                        return;
                }
-               spin_unlock(&fiq->waitq.lock);
+               spin_unlock(&fiq->lock);
        }
 
        /*
@@ -559,101 +407,110 @@ static void __fuse_request_send(struct fuse_conn *fc, struct fuse_req *req)
        struct fuse_iqueue *fiq = &fc->iq;
 
        BUG_ON(test_bit(FR_BACKGROUND, &req->flags));
-       spin_lock(&fiq->waitq.lock);
+       spin_lock(&fiq->lock);
        if (!fiq->connected) {
-               spin_unlock(&fiq->waitq.lock);
+               spin_unlock(&fiq->lock);
                req->out.h.error = -ENOTCONN;
        } else {
                req->in.h.unique = fuse_get_unique(fiq);
-               queue_request(fiq, req);
                /* acquire extra reference, since request is still needed
-                  after request_end() */
+                  after fuse_request_end() */
                __fuse_get_request(req);
-               spin_unlock(&fiq->waitq.lock);
+               queue_request_and_unlock(fiq, req);
 
                request_wait_answer(fc, req);
-               /* Pairs with smp_wmb() in request_end() */
+               /* Pairs with smp_wmb() in fuse_request_end() */
                smp_rmb();
        }
 }
 
-void fuse_request_send(struct fuse_conn *fc, struct fuse_req *req)
-{
-       __set_bit(FR_ISREPLY, &req->flags);
-       if (!test_bit(FR_WAITING, &req->flags)) {
-               __set_bit(FR_WAITING, &req->flags);
-               atomic_inc(&fc->num_waiting);
-       }
-       __fuse_request_send(fc, req);
-}
-EXPORT_SYMBOL_GPL(fuse_request_send);
-
 static void fuse_adjust_compat(struct fuse_conn *fc, struct fuse_args *args)
 {
-       if (fc->minor < 4 && args->in.h.opcode == FUSE_STATFS)
-               args->out.args[0].size = FUSE_COMPAT_STATFS_SIZE;
+       if (fc->minor < 4 && args->opcode == FUSE_STATFS)
+               args->out_args[0].size = FUSE_COMPAT_STATFS_SIZE;
 
        if (fc->minor < 9) {
-               switch (args->in.h.opcode) {
+               switch (args->opcode) {
                case FUSE_LOOKUP:
                case FUSE_CREATE:
                case FUSE_MKNOD:
                case FUSE_MKDIR:
                case FUSE_SYMLINK:
                case FUSE_LINK:
-                       args->out.args[0].size = FUSE_COMPAT_ENTRY_OUT_SIZE;
+                       args->out_args[0].size = FUSE_COMPAT_ENTRY_OUT_SIZE;
                        break;
                case FUSE_GETATTR:
                case FUSE_SETATTR:
-                       args->out.args[0].size = FUSE_COMPAT_ATTR_OUT_SIZE;
+                       args->out_args[0].size = FUSE_COMPAT_ATTR_OUT_SIZE;
                        break;
                }
        }
        if (fc->minor < 12) {
-               switch (args->in.h.opcode) {
+               switch (args->opcode) {
                case FUSE_CREATE:
-                       args->in.args[0].size = sizeof(struct fuse_open_in);
+                       args->in_args[0].size = sizeof(struct fuse_open_in);
                        break;
                case FUSE_MKNOD:
-                       args->in.args[0].size = FUSE_COMPAT_MKNOD_IN_SIZE;
+                       args->in_args[0].size = FUSE_COMPAT_MKNOD_IN_SIZE;
                        break;
                }
        }
 }
 
+static void fuse_force_creds(struct fuse_conn *fc, struct fuse_req *req)
+{
+       req->in.h.uid = from_kuid_munged(fc->user_ns, current_fsuid());
+       req->in.h.gid = from_kgid_munged(fc->user_ns, current_fsgid());
+       req->in.h.pid = pid_nr_ns(task_pid(current), fc->pid_ns);
+}
+
+static void fuse_args_to_req(struct fuse_req *req, struct fuse_args *args)
+{
+       req->in.h.opcode = args->opcode;
+       req->in.h.nodeid = args->nodeid;
+       req->args = args;
+}
+
 ssize_t fuse_simple_request(struct fuse_conn *fc, struct fuse_args *args)
 {
        struct fuse_req *req;
        ssize_t ret;
 
-       req = fuse_get_req(fc, 0);
-       if (IS_ERR(req))
-               return PTR_ERR(req);
+       if (args->force) {
+               atomic_inc(&fc->num_waiting);
+               req = fuse_request_alloc(GFP_KERNEL | __GFP_NOFAIL);
+
+               if (!args->nocreds)
+                       fuse_force_creds(fc, req);
+
+               __set_bit(FR_WAITING, &req->flags);
+               __set_bit(FR_FORCE, &req->flags);
+       } else {
+               WARN_ON(args->nocreds);
+               req = fuse_get_req(fc, false);
+               if (IS_ERR(req))
+                       return PTR_ERR(req);
+       }
 
        /* Needs to be done after fuse_get_req() so that fc->minor is valid */
        fuse_adjust_compat(fc, args);
+       fuse_args_to_req(req, args);
 
-       req->in.h.opcode = args->in.h.opcode;
-       req->in.h.nodeid = args->in.h.nodeid;
-       req->in.numargs = args->in.numargs;
-       memcpy(req->in.args, args->in.args,
-              args->in.numargs * sizeof(struct fuse_in_arg));
-       req->out.argvar = args->out.argvar;
-       req->out.numargs = args->out.numargs;
-       memcpy(req->out.args, args->out.args,
-              args->out.numargs * sizeof(struct fuse_arg));
-       fuse_request_send(fc, req);
+       if (!args->noreply)
+               __set_bit(FR_ISREPLY, &req->flags);
+       __fuse_request_send(fc, req);
        ret = req->out.h.error;
-       if (!ret && args->out.argvar) {
-               BUG_ON(args->out.numargs != 1);
-               ret = req->out.args[0].size;
+       if (!ret && args->out_argvar) {
+               BUG_ON(args->out_numargs == 0);
+               ret = args->out_args[args->out_numargs - 1].size;
        }
        fuse_put_request(fc, req);
 
        return ret;
 }
 
-bool fuse_request_queue_background(struct fuse_conn *fc, struct fuse_req *req)
+static bool fuse_request_queue_background(struct fuse_conn *fc,
+                                         struct fuse_req *req)
 {
        bool queued = false;
 
@@ -681,56 +538,63 @@ bool fuse_request_queue_background(struct fuse_conn *fc, struct fuse_req *req)
        return queued;
 }
 
-void fuse_request_send_background(struct fuse_conn *fc, struct fuse_req *req)
+int fuse_simple_background(struct fuse_conn *fc, struct fuse_args *args,
+                           gfp_t gfp_flags)
 {
-       WARN_ON(!req->end);
+       struct fuse_req *req;
+
+       if (args->force) {
+               WARN_ON(!args->nocreds);
+               req = fuse_request_alloc(gfp_flags);
+               if (!req)
+                       return -ENOMEM;
+               __set_bit(FR_BACKGROUND, &req->flags);
+       } else {
+               WARN_ON(args->nocreds);
+               req = fuse_get_req(fc, true);
+               if (IS_ERR(req))
+                       return PTR_ERR(req);
+       }
+
+       fuse_args_to_req(req, args);
+
        if (!fuse_request_queue_background(fc, req)) {
-               req->out.h.error = -ENOTCONN;
-               req->end(fc, req);
                fuse_put_request(fc, req);
+               return -ENOTCONN;
        }
+
+       return 0;
 }
-EXPORT_SYMBOL_GPL(fuse_request_send_background);
+EXPORT_SYMBOL_GPL(fuse_simple_background);
 
-static int fuse_request_send_notify_reply(struct fuse_conn *fc,
-                                         struct fuse_req *req, u64 unique)
+static int fuse_simple_notify_reply(struct fuse_conn *fc,
+                                   struct fuse_args *args, u64 unique)
 {
-       int err = -ENODEV;
+       struct fuse_req *req;
        struct fuse_iqueue *fiq = &fc->iq;
+       int err = 0;
+
+       req = fuse_get_req(fc, false);
+       if (IS_ERR(req))
+               return PTR_ERR(req);
 
        __clear_bit(FR_ISREPLY, &req->flags);
        req->in.h.unique = unique;
-       spin_lock(&fiq->waitq.lock);
+
+       fuse_args_to_req(req, args);
+
+       spin_lock(&fiq->lock);
        if (fiq->connected) {
-               queue_request(fiq, req);
-               err = 0;
+               queue_request_and_unlock(fiq, req);
+       } else {
+               err = -ENODEV;
+               spin_unlock(&fiq->lock);
+               fuse_put_request(fc, req);
        }
-       spin_unlock(&fiq->waitq.lock);
 
        return err;
 }
 
-void fuse_force_forget(struct file *file, u64 nodeid)
-{
-       struct inode *inode = file_inode(file);
-       struct fuse_conn *fc = get_fuse_conn(inode);
-       struct fuse_req *req;
-       struct fuse_forget_in inarg;
-
-       memset(&inarg, 0, sizeof(inarg));
-       inarg.nlookup = 1;
-       req = fuse_get_req_nofail_nopages(fc, file);
-       req->in.h.opcode = FUSE_FORGET;
-       req->in.h.nodeid = nodeid;
-       req->in.numargs = 1;
-       req->in.args[0].size = sizeof(inarg);
-       req->in.args[0].value = &inarg;
-       __clear_bit(FR_ISREPLY, &req->flags);
-       __fuse_request_send(fc, req);
-       /* ignore errors */
-       fuse_put_request(fc, req);
-}
-
 /*
  * Lock the request.  Up to the next unlock_request() there mustn't be
  * anything that could cause a page-fault.  If the request was already
@@ -1084,14 +948,15 @@ static int fuse_copy_pages(struct fuse_copy_state *cs, unsigned nbytes,
 {
        unsigned i;
        struct fuse_req *req = cs->req;
+       struct fuse_args_pages *ap = container_of(req->args, typeof(*ap), args);
+
 
-       for (i = 0; i < req->num_pages && (nbytes || zeroing); i++) {
+       for (i = 0; i < ap->num_pages && (nbytes || zeroing); i++) {
                int err;
-               unsigned offset = req->page_descs[i].offset;
-               unsigned count = min(nbytes, req->page_descs[i].length);
+               unsigned int offset = ap->descs[i].offset;
+               unsigned int count = min(nbytes, ap->descs[i].length);
 
-               err = fuse_copy_page(cs, &req->pages[i], offset, count,
-                                    zeroing);
+               err = fuse_copy_page(cs, &ap->pages[i], offset, count, zeroing);
                if (err)
                        return err;
 
@@ -1149,12 +1014,12 @@ static int request_pending(struct fuse_iqueue *fiq)
  * Unlike other requests this is assembled on demand, without a need
  * to allocate a separate fuse_req structure.
  *
- * Called with fiq->waitq.lock held, releases it
+ * Called with fiq->lock held, releases it
  */
 static int fuse_read_interrupt(struct fuse_iqueue *fiq,
                               struct fuse_copy_state *cs,
                               size_t nbytes, struct fuse_req *req)
-__releases(fiq->waitq.lock)
+__releases(fiq->lock)
 {
        struct fuse_in_header ih;
        struct fuse_interrupt_in arg;
@@ -1169,7 +1034,7 @@ __releases(fiq->waitq.lock)
        ih.unique = (req->in.h.unique | FUSE_INT_REQ_BIT);
        arg.unique = req->in.h.unique;
 
-       spin_unlock(&fiq->waitq.lock);
+       spin_unlock(&fiq->lock);
        if (nbytes < reqsize)
                return -EINVAL;
 
@@ -1181,9 +1046,9 @@ __releases(fiq->waitq.lock)
        return err ? err : reqsize;
 }
 
-static struct fuse_forget_link *dequeue_forget(struct fuse_iqueue *fiq,
-                                              unsigned max,
-                                              unsigned *countp)
+struct fuse_forget_link *fuse_dequeue_forget(struct fuse_iqueue *fiq,
+                                            unsigned int max,
+                                            unsigned int *countp)
 {
        struct fuse_forget_link *head = fiq->forget_list_head.next;
        struct fuse_forget_link **newhead = &head;
@@ -1202,14 +1067,15 @@ static struct fuse_forget_link *dequeue_forget(struct fuse_iqueue *fiq,
 
        return head;
 }
+EXPORT_SYMBOL(fuse_dequeue_forget);
 
 static int fuse_read_single_forget(struct fuse_iqueue *fiq,
                                   struct fuse_copy_state *cs,
                                   size_t nbytes)
-__releases(fiq->waitq.lock)
+__releases(fiq->lock)
 {
        int err;
-       struct fuse_forget_link *forget = dequeue_forget(fiq, 1, NULL);
+       struct fuse_forget_link *forget = fuse_dequeue_forget(fiq, 1, NULL);
        struct fuse_forget_in arg = {
                .nlookup = forget->forget_one.nlookup,
        };
@@ -1220,7 +1086,7 @@ __releases(fiq->waitq.lock)
                .len = sizeof(ih) + sizeof(arg),
        };
 
-       spin_unlock(&fiq->waitq.lock);
+       spin_unlock(&fiq->lock);
        kfree(forget);
        if (nbytes < ih.len)
                return -EINVAL;
@@ -1238,7 +1104,7 @@ __releases(fiq->waitq.lock)
 
 static int fuse_read_batch_forget(struct fuse_iqueue *fiq,
                                   struct fuse_copy_state *cs, size_t nbytes)
-__releases(fiq->waitq.lock)
+__releases(fiq->lock)
 {
        int err;
        unsigned max_forgets;
@@ -1252,13 +1118,13 @@ __releases(fiq->waitq.lock)
        };
 
        if (nbytes < ih.len) {
-               spin_unlock(&fiq->waitq.lock);
+               spin_unlock(&fiq->lock);
                return -EINVAL;
        }
 
        max_forgets = (nbytes - ih.len) / sizeof(struct fuse_forget_one);
-       head = dequeue_forget(fiq, max_forgets, &count);
-       spin_unlock(&fiq->waitq.lock);
+       head = fuse_dequeue_forget(fiq, max_forgets, &count);
+       spin_unlock(&fiq->lock);
 
        arg.count = count;
        ih.len += count * sizeof(struct fuse_forget_one);
@@ -1288,7 +1154,7 @@ __releases(fiq->waitq.lock)
 static int fuse_read_forget(struct fuse_conn *fc, struct fuse_iqueue *fiq,
                            struct fuse_copy_state *cs,
                            size_t nbytes)
-__releases(fiq->waitq.lock)
+__releases(fiq->lock)
 {
        if (fc->minor < 16 || fiq->forget_list_head.next->next == NULL)
                return fuse_read_single_forget(fiq, cs, nbytes);
@@ -1302,7 +1168,7 @@ __releases(fiq->waitq.lock)
  * the pending list and copies request data to userspace buffer.  If
  * no reply is needed (FORGET) or request has been aborted or there
  * was an error during the copying then it's finished by calling
- * request_end().  Otherwise add it to the processing list, and set
+ * fuse_request_end().  Otherwise add it to the processing list, and set
  * the 'sent' flag.
  */
 static ssize_t fuse_dev_do_read(struct fuse_dev *fud, struct file *file,
@@ -1313,21 +1179,42 @@ static ssize_t fuse_dev_do_read(struct fuse_dev *fud, struct file *file,
        struct fuse_iqueue *fiq = &fc->iq;
        struct fuse_pqueue *fpq = &fud->pq;
        struct fuse_req *req;
-       struct fuse_in *in;
+       struct fuse_args *args;
        unsigned reqsize;
        unsigned int hash;
 
+       /*
+        * Require sane minimum read buffer - that has capacity for fixed part
+        * of any request header + negotiated max_write room for data.
+        *
+        * Historically libfuse reserves 4K for fixed header room, but e.g.
+        * GlusterFS reserves only 80 bytes
+        *
+        *      = `sizeof(fuse_in_header) + sizeof(fuse_write_in)`
+        *
+        * which is the absolute minimum any sane filesystem should be using
+        * for header room.
+        */
+       if (nbytes < max_t(size_t, FUSE_MIN_READ_BUFFER,
+                          sizeof(struct fuse_in_header) +
+                          sizeof(struct fuse_write_in) +
+                          fc->max_write))
+               return -EINVAL;
+
  restart:
-       spin_lock(&fiq->waitq.lock);
-       err = -EAGAIN;
-       if ((file->f_flags & O_NONBLOCK) && fiq->connected &&
-           !request_pending(fiq))
-               goto err_unlock;
+       for (;;) {
+               spin_lock(&fiq->lock);
+               if (!fiq->connected || request_pending(fiq))
+                       break;
+               spin_unlock(&fiq->lock);
 
-       err = wait_event_interruptible_exclusive_locked(fiq->waitq,
+               if (file->f_flags & O_NONBLOCK)
+                       return -EAGAIN;
+               err = wait_event_interruptible_exclusive(fiq->waitq,
                                !fiq->connected || request_pending(fiq));
-       if (err)
-               goto err_unlock;
+               if (err)
+                       return err;
+       }
 
        if (!fiq->connected) {
                err = fc->aborted ? -ECONNABORTED : -ENODEV;
@@ -1351,28 +1238,28 @@ static ssize_t fuse_dev_do_read(struct fuse_dev *fud, struct file *file,
        req = list_entry(fiq->pending.next, struct fuse_req, list);
        clear_bit(FR_PENDING, &req->flags);
        list_del_init(&req->list);
-       spin_unlock(&fiq->waitq.lock);
+       spin_unlock(&fiq->lock);
 
-       in = &req->in;
-       reqsize = in->h.len;
+       args = req->args;
+       reqsize = req->in.h.len;
 
        /* If request is too large, reply with an error and restart the read */
        if (nbytes < reqsize) {
                req->out.h.error = -EIO;
                /* SETXATTR is special, since it may contain too large data */
-               if (in->h.opcode == FUSE_SETXATTR)
+               if (args->opcode == FUSE_SETXATTR)
                        req->out.h.error = -E2BIG;
-               request_end(fc, req);
+               fuse_request_end(fc, req);
                goto restart;
        }
        spin_lock(&fpq->lock);
        list_add(&req->list, &fpq->io);
        spin_unlock(&fpq->lock);
        cs->req = req;
-       err = fuse_copy_one(cs, &in->h, sizeof(in->h));
+       err = fuse_copy_one(cs, &req->in.h, sizeof(req->in.h));
        if (!err)
-               err = fuse_copy_args(cs, in->numargs, in->argpages,
-                                    (struct fuse_arg *) in->args, 0);
+               err = fuse_copy_args(cs, args->in_numargs, args->in_pages,
+                                    (struct fuse_arg *) args->in_args, 0);
        fuse_copy_finish(cs);
        spin_lock(&fpq->lock);
        clear_bit(FR_LOCKED, &req->flags);
@@ -1405,11 +1292,11 @@ out_end:
        if (!test_bit(FR_PRIVATE, &req->flags))
                list_del_init(&req->list);
        spin_unlock(&fpq->lock);
-       request_end(fc, req);
+       fuse_request_end(fc, req);
        return err;
 
  err_unlock:
-       spin_unlock(&fiq->waitq.lock);
+       spin_unlock(&fiq->lock);
        return err;
 }
 
@@ -1728,9 +1615,19 @@ out_finish:
        return err;
 }
 
-static void fuse_retrieve_end(struct fuse_conn *fc, struct fuse_req *req)
+struct fuse_retrieve_args {
+       struct fuse_args_pages ap;
+       struct fuse_notify_retrieve_in inarg;
+};
+
+static void fuse_retrieve_end(struct fuse_conn *fc, struct fuse_args *args,
+                             int error)
 {
-       release_pages(req->pages, req->num_pages);
+       struct fuse_retrieve_args *ra =
+               container_of(args, typeof(*ra), ap.args);
+
+       release_pages(ra->ap.pages, ra->ap.num_pages);
+       kfree(ra);
 }
 
 static int fuse_retrieve(struct fuse_conn *fc, struct inode *inode,
@@ -1738,13 +1635,16 @@ static int fuse_retrieve(struct fuse_conn *fc, struct inode *inode,
 {
        int err;
        struct address_space *mapping = inode->i_mapping;
-       struct fuse_req *req;
        pgoff_t index;
        loff_t file_size;
        unsigned int num;
        unsigned int offset;
        size_t total_len = 0;
        unsigned int num_pages;
+       struct fuse_retrieve_args *ra;
+       size_t args_size = sizeof(*ra);
+       struct fuse_args_pages *ap;
+       struct fuse_args *args;
 
        offset = outarg->offset & ~PAGE_MASK;
        file_size = i_size_read(inode);
@@ -1758,19 +1658,26 @@ static int fuse_retrieve(struct fuse_conn *fc, struct inode *inode,
        num_pages = (num + offset + PAGE_SIZE - 1) >> PAGE_SHIFT;
        num_pages = min(num_pages, fc->max_pages);
 
-       req = fuse_get_req(fc, num_pages);
-       if (IS_ERR(req))
-               return PTR_ERR(req);
+       args_size += num_pages * (sizeof(ap->pages[0]) + sizeof(ap->descs[0]));
 
-       req->in.h.opcode = FUSE_NOTIFY_REPLY;
-       req->in.h.nodeid = outarg->nodeid;
-       req->in.numargs = 2;
-       req->in.argpages = 1;
-       req->end = fuse_retrieve_end;
+       ra = kzalloc(args_size, GFP_KERNEL);
+       if (!ra)
+               return -ENOMEM;
+
+       ap = &ra->ap;
+       ap->pages = (void *) (ra + 1);
+       ap->descs = (void *) (ap->pages + num_pages);
+
+       args = &ap->args;
+       args->nodeid = outarg->nodeid;
+       args->opcode = FUSE_NOTIFY_REPLY;
+       args->in_numargs = 2;
+       args->in_pages = true;
+       args->end = fuse_retrieve_end;
 
        index = outarg->offset >> PAGE_SHIFT;
 
-       while (num && req->num_pages < num_pages) {
+       while (num && ap->num_pages < num_pages) {
                struct page *page;
                unsigned int this_num;
 
@@ -1779,27 +1686,25 @@ static int fuse_retrieve(struct fuse_conn *fc, struct inode *inode,
                        break;
 
                this_num = min_t(unsigned, num, PAGE_SIZE - offset);
-               req->pages[req->num_pages] = page;
-               req->page_descs[req->num_pages].offset = offset;
-               req->page_descs[req->num_pages].length = this_num;
-               req->num_pages++;
+               ap->pages[ap->num_pages] = page;
+               ap->descs[ap->num_pages].offset = offset;
+               ap->descs[ap->num_pages].length = this_num;
+               ap->num_pages++;
 
                offset = 0;
                num -= this_num;
                total_len += this_num;
                index++;
        }
-       req->misc.retrieve_in.offset = outarg->offset;
-       req->misc.retrieve_in.size = total_len;
-       req->in.args[0].size = sizeof(req->misc.retrieve_in);
-       req->in.args[0].value = &req->misc.retrieve_in;
-       req->in.args[1].size = total_len;
+       ra->inarg.offset = outarg->offset;
+       ra->inarg.size = total_len;
+       args->in_args[0].size = sizeof(ra->inarg);
+       args->in_args[0].value = &ra->inarg;
+       args->in_args[1].size = total_len;
 
-       err = fuse_request_send_notify_reply(fc, req, outarg->notify_unique);
-       if (err) {
-               fuse_retrieve_end(fc, req);
-               fuse_put_request(fc, req);
-       }
+       err = fuse_simple_notify_reply(fc, args, outarg->notify_unique);
+       if (err)
+               fuse_retrieve_end(fc, args, err);
 
        return err;
 }
@@ -1885,27 +1790,25 @@ static struct fuse_req *request_find(struct fuse_pqueue *fpq, u64 unique)
        return NULL;
 }
 
-static int copy_out_args(struct fuse_copy_state *cs, struct fuse_out *out,
+static int copy_out_args(struct fuse_copy_state *cs, struct fuse_args *args,
                         unsigned nbytes)
 {
        unsigned reqsize = sizeof(struct fuse_out_header);
 
-       if (out->h.error)
-               return nbytes != reqsize ? -EINVAL : 0;
-
-       reqsize += len_args(out->numargs, out->args);
+       reqsize += fuse_len_args(args->out_numargs, args->out_args);
 
-       if (reqsize < nbytes || (reqsize > nbytes && !out->argvar))
+       if (reqsize < nbytes || (reqsize > nbytes && !args->out_argvar))
                return -EINVAL;
        else if (reqsize > nbytes) {
-               struct fuse_arg *lastarg = &out->args[out->numargs-1];
+               struct fuse_arg *lastarg = &args->out_args[args->out_numargs-1];
                unsigned diffsize = reqsize - nbytes;
+
                if (diffsize > lastarg->size)
                        return -EINVAL;
                lastarg->size -= diffsize;
        }
-       return fuse_copy_args(cs, out->numargs, out->argpages, out->args,
-                             out->page_zeroing);
+       return fuse_copy_args(cs, args->out_numargs, args->out_pages,
+                             args->out_args, args->page_zeroing);
 }
 
 /*
@@ -1913,7 +1816,7 @@ static int copy_out_args(struct fuse_copy_state *cs, struct fuse_out *out,
  * the write buffer.  The request is then searched on the processing
  * list by the unique ID found in the header.  If found, then remove
  * it from the list and copy the rest of the buffer to the request.
- * The request is finished by calling request_end()
+ * The request is finished by calling fuse_request_end().
  */
 static ssize_t fuse_dev_do_write(struct fuse_dev *fud,
                                 struct fuse_copy_state *cs, size_t nbytes)
@@ -1984,10 +1887,13 @@ static ssize_t fuse_dev_do_write(struct fuse_dev *fud,
        set_bit(FR_LOCKED, &req->flags);
        spin_unlock(&fpq->lock);
        cs->req = req;
-       if (!req->out.page_replace)
+       if (!req->args->page_replace)
                cs->move_pages = 0;
 
-       err = copy_out_args(cs, &req->out, nbytes);
+       if (oh.error)
+               err = nbytes != sizeof(oh) ? -EINVAL : 0;
+       else
+               err = copy_out_args(cs, req->args, nbytes);
        fuse_copy_finish(cs);
 
        spin_lock(&fpq->lock);
@@ -2000,7 +1906,7 @@ static ssize_t fuse_dev_do_write(struct fuse_dev *fud,
                list_del_init(&req->list);
        spin_unlock(&fpq->lock);
 
-       request_end(fc, req);
+       fuse_request_end(fc, req);
 out:
        return err ? err : nbytes;
 
@@ -2121,12 +2027,12 @@ static __poll_t fuse_dev_poll(struct file *file, poll_table *wait)
        fiq = &fud->fc->iq;
        poll_wait(file, &fiq->waitq, wait);
 
-       spin_lock(&fiq->waitq.lock);
+       spin_lock(&fiq->lock);
        if (!fiq->connected)
                mask = EPOLLERR;
        else if (request_pending(fiq))
                mask |= EPOLLIN | EPOLLRDNORM;
-       spin_unlock(&fiq->waitq.lock);
+       spin_unlock(&fiq->lock);
 
        return mask;
 }
@@ -2140,7 +2046,7 @@ static void end_requests(struct fuse_conn *fc, struct list_head *head)
                req->out.h.error = -ECONNABORTED;
                clear_bit(FR_SENT, &req->flags);
                list_del_init(&req->list);
-               request_end(fc, req);
+               fuse_request_end(fc, req);
        }
 }
 
@@ -2221,15 +2127,15 @@ void fuse_abort_conn(struct fuse_conn *fc)
                flush_bg_queue(fc);
                spin_unlock(&fc->bg_lock);
 
-               spin_lock(&fiq->waitq.lock);
+               spin_lock(&fiq->lock);
                fiq->connected = 0;
                list_for_each_entry(req, &fiq->pending, list)
                        clear_bit(FR_PENDING, &req->flags);
                list_splice_tail_init(&fiq->pending, &to_end);
                while (forget_pending(fiq))
-                       kfree(dequeue_forget(fiq, 1, NULL));
-               wake_up_all_locked(&fiq->waitq);
-               spin_unlock(&fiq->waitq.lock);
+                       kfree(fuse_dequeue_forget(fiq, 1, NULL));
+               wake_up_all(&fiq->waitq);
+               spin_unlock(&fiq->lock);
                kill_fasync(&fiq->fasync, SIGIO, POLL_IN);
                end_polls(fc);
                wake_up_all(&fc->blocked_waitq);
@@ -2296,7 +2202,7 @@ static int fuse_device_clone(struct fuse_conn *fc, struct file *new)
        if (new->private_data)
                return -EINVAL;
 
-       fud = fuse_dev_alloc(fc);
+       fud = fuse_dev_alloc_install(fc);
        if (!fud)
                return -ENOMEM;
 
index dd0f64f..d572c90 100644 (file)
@@ -24,20 +24,54 @@ static void fuse_advise_use_readdirplus(struct inode *dir)
        set_bit(FUSE_I_ADVISE_RDPLUS, &fi->state);
 }
 
+#if BITS_PER_LONG >= 64
+static inline void __fuse_dentry_settime(struct dentry *entry, u64 time)
+{
+       entry->d_fsdata = (void *) time;
+}
+
+static inline u64 fuse_dentry_time(const struct dentry *entry)
+{
+       return (u64)entry->d_fsdata;
+}
+
+#else
 union fuse_dentry {
        u64 time;
        struct rcu_head rcu;
 };
 
-static inline void fuse_dentry_settime(struct dentry *entry, u64 time)
+static inline void __fuse_dentry_settime(struct dentry *dentry, u64 time)
 {
-       ((union fuse_dentry *) entry->d_fsdata)->time = time;
+       ((union fuse_dentry *) dentry->d_fsdata)->time = time;
 }
 
-static inline u64 fuse_dentry_time(struct dentry *entry)
+static inline u64 fuse_dentry_time(const struct dentry *entry)
 {
        return ((union fuse_dentry *) entry->d_fsdata)->time;
 }
+#endif
+
+static void fuse_dentry_settime(struct dentry *dentry, u64 time)
+{
+       struct fuse_conn *fc = get_fuse_conn_super(dentry->d_sb);
+       bool delete = !time && fc->delete_stale;
+       /*
+        * Mess with DCACHE_OP_DELETE because dput() will be faster without it.
+        * Don't care about races, either way it's just an optimization
+        */
+       if ((!delete && (dentry->d_flags & DCACHE_OP_DELETE)) ||
+           (delete && !(dentry->d_flags & DCACHE_OP_DELETE))) {
+               spin_lock(&dentry->d_lock);
+               if (!delete)
+                       dentry->d_flags &= ~DCACHE_OP_DELETE;
+               else
+                       dentry->d_flags |= DCACHE_OP_DELETE;
+               spin_unlock(&dentry->d_lock);
+       }
+
+       __fuse_dentry_settime(dentry, time);
+}
 
 /*
  * FUSE caches dentries and attributes with separate timeout.  The
@@ -139,14 +173,14 @@ static void fuse_lookup_init(struct fuse_conn *fc, struct fuse_args *args,
                             struct fuse_entry_out *outarg)
 {
        memset(outarg, 0, sizeof(struct fuse_entry_out));
-       args->in.h.opcode = FUSE_LOOKUP;
-       args->in.h.nodeid = nodeid;
-       args->in.numargs = 1;
-       args->in.args[0].size = name->len + 1;
-       args->in.args[0].value = name->name;
-       args->out.numargs = 1;
-       args->out.args[0].size = sizeof(struct fuse_entry_out);
-       args->out.args[0].value = outarg;
+       args->opcode = FUSE_LOOKUP;
+       args->nodeid = nodeid;
+       args->in_numargs = 1;
+       args->in_args[0].size = name->len + 1;
+       args->in_args[0].value = name->name;
+       args->out_numargs = 1;
+       args->out_args[0].size = sizeof(struct fuse_entry_out);
+       args->out_args[0].value = outarg;
 }
 
 /*
@@ -242,9 +276,11 @@ invalid:
        goto out;
 }
 
+#if BITS_PER_LONG < 64
 static int fuse_dentry_init(struct dentry *dentry)
 {
-       dentry->d_fsdata = kzalloc(sizeof(union fuse_dentry), GFP_KERNEL);
+       dentry->d_fsdata = kzalloc(sizeof(union fuse_dentry),
+                                  GFP_KERNEL_ACCOUNT | __GFP_RECLAIMABLE);
 
        return dentry->d_fsdata ? 0 : -ENOMEM;
 }
@@ -254,16 +290,27 @@ static void fuse_dentry_release(struct dentry *dentry)
 
        kfree_rcu(fd, rcu);
 }
+#endif
+
+static int fuse_dentry_delete(const struct dentry *dentry)
+{
+       return time_before64(fuse_dentry_time(dentry), get_jiffies_64());
+}
 
 const struct dentry_operations fuse_dentry_operations = {
        .d_revalidate   = fuse_dentry_revalidate,
+       .d_delete       = fuse_dentry_delete,
+#if BITS_PER_LONG < 64
        .d_init         = fuse_dentry_init,
        .d_release      = fuse_dentry_release,
+#endif
 };
 
 const struct dentry_operations fuse_root_dentry_operations = {
+#if BITS_PER_LONG < 64
        .d_init         = fuse_dentry_init,
        .d_release      = fuse_dentry_release,
+#endif
 };
 
 int fuse_valid_type(int m)
@@ -410,18 +457,18 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry,
        inarg.flags = flags;
        inarg.mode = mode;
        inarg.umask = current_umask();
-       args.in.h.opcode = FUSE_CREATE;
-       args.in.h.nodeid = get_node_id(dir);
-       args.in.numargs = 2;
-       args.in.args[0].size = sizeof(inarg);
-       args.in.args[0].value = &inarg;
-       args.in.args[1].size = entry->d_name.len + 1;
-       args.in.args[1].value = entry->d_name.name;
-       args.out.numargs = 2;
-       args.out.args[0].size = sizeof(outentry);
-       args.out.args[0].value = &outentry;
-       args.out.args[1].size = sizeof(outopen);
-       args.out.args[1].value = &outopen;
+       args.opcode = FUSE_CREATE;
+       args.nodeid = get_node_id(dir);
+       args.in_numargs = 2;
+       args.in_args[0].size = sizeof(inarg);
+       args.in_args[0].value = &inarg;
+       args.in_args[1].size = entry->d_name.len + 1;
+       args.in_args[1].value = entry->d_name.name;
+       args.out_numargs = 2;
+       args.out_args[0].size = sizeof(outentry);
+       args.out_args[0].value = &outentry;
+       args.out_args[1].size = sizeof(outopen);
+       args.out_args[1].value = &outopen;
        err = fuse_simple_request(fc, &args);
        if (err)
                goto out_free_ff;
@@ -526,10 +573,10 @@ static int create_new_entry(struct fuse_conn *fc, struct fuse_args *args,
                return -ENOMEM;
 
        memset(&outarg, 0, sizeof(outarg));
-       args->in.h.nodeid = get_node_id(dir);
-       args->out.numargs = 1;
-       args->out.args[0].size = sizeof(outarg);
-       args->out.args[0].value = &outarg;
+       args->nodeid = get_node_id(dir);
+       args->out_numargs = 1;
+       args->out_args[0].size = sizeof(outarg);
+       args->out_args[0].value = &outarg;
        err = fuse_simple_request(fc, args);
        if (err)
                goto out_put_forget_req;
@@ -582,12 +629,12 @@ static int fuse_mknod(struct inode *dir, struct dentry *entry, umode_t mode,
        inarg.mode = mode;
        inarg.rdev = new_encode_dev(rdev);
        inarg.umask = current_umask();
-       args.in.h.opcode = FUSE_MKNOD;
-       args.in.numargs = 2;
-       args.in.args[0].size = sizeof(inarg);
-       args.in.args[0].value = &inarg;
-       args.in.args[1].size = entry->d_name.len + 1;
-       args.in.args[1].value = entry->d_name.name;
+       args.opcode = FUSE_MKNOD;
+       args.in_numargs = 2;
+       args.in_args[0].size = sizeof(inarg);
+       args.in_args[0].value = &inarg;
+       args.in_args[1].size = entry->d_name.len + 1;
+       args.in_args[1].value = entry->d_name.name;
        return create_new_entry(fc, &args, dir, entry, mode);
 }
 
@@ -609,12 +656,12 @@ static int fuse_mkdir(struct inode *dir, struct dentry *entry, umode_t mode)
        memset(&inarg, 0, sizeof(inarg));
        inarg.mode = mode;
        inarg.umask = current_umask();
-       args.in.h.opcode = FUSE_MKDIR;
-       args.in.numargs = 2;
-       args.in.args[0].size = sizeof(inarg);
-       args.in.args[0].value = &inarg;
-       args.in.args[1].size = entry->d_name.len + 1;
-       args.in.args[1].value = entry->d_name.name;
+       args.opcode = FUSE_MKDIR;
+       args.in_numargs = 2;
+       args.in_args[0].size = sizeof(inarg);
+       args.in_args[0].value = &inarg;
+       args.in_args[1].size = entry->d_name.len + 1;
+       args.in_args[1].value = entry->d_name.name;
        return create_new_entry(fc, &args, dir, entry, S_IFDIR);
 }
 
@@ -625,12 +672,12 @@ static int fuse_symlink(struct inode *dir, struct dentry *entry,
        unsigned len = strlen(link) + 1;
        FUSE_ARGS(args);
 
-       args.in.h.opcode = FUSE_SYMLINK;
-       args.in.numargs = 2;
-       args.in.args[0].size = entry->d_name.len + 1;
-       args.in.args[0].value = entry->d_name.name;
-       args.in.args[1].size = len;
-       args.in.args[1].value = link;
+       args.opcode = FUSE_SYMLINK;
+       args.in_numargs = 2;
+       args.in_args[0].size = entry->d_name.len + 1;
+       args.in_args[0].value = entry->d_name.name;
+       args.in_args[1].size = len;
+       args.in_args[1].value = link;
        return create_new_entry(fc, &args, dir, entry, S_IFLNK);
 }
 
@@ -648,11 +695,11 @@ static int fuse_unlink(struct inode *dir, struct dentry *entry)
        struct fuse_conn *fc = get_fuse_conn(dir);
        FUSE_ARGS(args);
 
-       args.in.h.opcode = FUSE_UNLINK;
-       args.in.h.nodeid = get_node_id(dir);
-       args.in.numargs = 1;
-       args.in.args[0].size = entry->d_name.len + 1;
-       args.in.args[0].value = entry->d_name.name;
+       args.opcode = FUSE_UNLINK;
+       args.nodeid = get_node_id(dir);
+       args.in_numargs = 1;
+       args.in_args[0].size = entry->d_name.len + 1;
+       args.in_args[0].value = entry->d_name.name;
        err = fuse_simple_request(fc, &args);
        if (!err) {
                struct inode *inode = d_inode(entry);
@@ -684,11 +731,11 @@ static int fuse_rmdir(struct inode *dir, struct dentry *entry)
        struct fuse_conn *fc = get_fuse_conn(dir);
        FUSE_ARGS(args);
 
-       args.in.h.opcode = FUSE_RMDIR;
-       args.in.h.nodeid = get_node_id(dir);
-       args.in.numargs = 1;
-       args.in.args[0].size = entry->d_name.len + 1;
-       args.in.args[0].value = entry->d_name.name;
+       args.opcode = FUSE_RMDIR;
+       args.nodeid = get_node_id(dir);
+       args.in_numargs = 1;
+       args.in_args[0].size = entry->d_name.len + 1;
+       args.in_args[0].value = entry->d_name.name;
        err = fuse_simple_request(fc, &args);
        if (!err) {
                clear_nlink(d_inode(entry));
@@ -711,15 +758,15 @@ static int fuse_rename_common(struct inode *olddir, struct dentry *oldent,
        memset(&inarg, 0, argsize);
        inarg.newdir = get_node_id(newdir);
        inarg.flags = flags;
-       args.in.h.opcode = opcode;
-       args.in.h.nodeid = get_node_id(olddir);
-       args.in.numargs = 3;
-       args.in.args[0].size = argsize;
-       args.in.args[0].value = &inarg;
-       args.in.args[1].size = oldent->d_name.len + 1;
-       args.in.args[1].value = oldent->d_name.name;
-       args.in.args[2].size = newent->d_name.len + 1;
-       args.in.args[2].value = newent->d_name.name;
+       args.opcode = opcode;
+       args.nodeid = get_node_id(olddir);
+       args.in_numargs = 3;
+       args.in_args[0].size = argsize;
+       args.in_args[0].value = &inarg;
+       args.in_args[1].size = oldent->d_name.len + 1;
+       args.in_args[1].value = oldent->d_name.name;
+       args.in_args[2].size = newent->d_name.len + 1;
+       args.in_args[2].value = newent->d_name.name;
        err = fuse_simple_request(fc, &args);
        if (!err) {
                /* ctime changes */
@@ -796,12 +843,12 @@ static int fuse_link(struct dentry *entry, struct inode *newdir,
 
        memset(&inarg, 0, sizeof(inarg));
        inarg.oldnodeid = get_node_id(inode);
-       args.in.h.opcode = FUSE_LINK;
-       args.in.numargs = 2;
-       args.in.args[0].size = sizeof(inarg);
-       args.in.args[0].value = &inarg;
-       args.in.args[1].size = newent->d_name.len + 1;
-       args.in.args[1].value = newent->d_name.name;
+       args.opcode = FUSE_LINK;
+       args.in_numargs = 2;
+       args.in_args[0].size = sizeof(inarg);
+       args.in_args[0].value = &inarg;
+       args.in_args[1].size = newent->d_name.len + 1;
+       args.in_args[1].value = newent->d_name.name;
        err = create_new_entry(fc, &args, newdir, newent, inode->i_mode);
        /* Contrary to "normal" filesystems it can happen that link
           makes two "logical" inodes point to the same "physical"
@@ -884,14 +931,14 @@ static int fuse_do_getattr(struct inode *inode, struct kstat *stat,
                inarg.getattr_flags |= FUSE_GETATTR_FH;
                inarg.fh = ff->fh;
        }
-       args.in.h.opcode = FUSE_GETATTR;
-       args.in.h.nodeid = get_node_id(inode);
-       args.in.numargs = 1;
-       args.in.args[0].size = sizeof(inarg);
-       args.in.args[0].value = &inarg;
-       args.out.numargs = 1;
-       args.out.args[0].size = sizeof(outarg);
-       args.out.args[0].value = &outarg;
+       args.opcode = FUSE_GETATTR;
+       args.nodeid = get_node_id(inode);
+       args.in_numargs = 1;
+       args.in_args[0].size = sizeof(inarg);
+       args.in_args[0].value = &inarg;
+       args.out_numargs = 1;
+       args.out_args[0].size = sizeof(outarg);
+       args.out_args[0].value = &outarg;
        err = fuse_simple_request(fc, &args);
        if (!err) {
                if ((inode->i_mode ^ outarg.attr.mode) & S_IFMT) {
@@ -1056,11 +1103,11 @@ static int fuse_access(struct inode *inode, int mask)
 
        memset(&inarg, 0, sizeof(inarg));
        inarg.mask = mask & (MAY_READ | MAY_WRITE | MAY_EXEC);
-       args.in.h.opcode = FUSE_ACCESS;
-       args.in.h.nodeid = get_node_id(inode);
-       args.in.numargs = 1;
-       args.in.args[0].size = sizeof(inarg);
-       args.in.args[0].value = &inarg;
+       args.opcode = FUSE_ACCESS;
+       args.nodeid = get_node_id(inode);
+       args.in_numargs = 1;
+       args.in_args[0].size = sizeof(inarg);
+       args.in_args[0].value = &inarg;
        err = fuse_simple_request(fc, &args);
        if (err == -ENOSYS) {
                fc->no_access = 1;
@@ -1152,38 +1199,36 @@ static int fuse_permission(struct inode *inode, int mask)
 static int fuse_readlink_page(struct inode *inode, struct page *page)
 {
        struct fuse_conn *fc = get_fuse_conn(inode);
-       struct fuse_req *req;
-       int err;
+       struct fuse_page_desc desc = { .length = PAGE_SIZE - 1 };
+       struct fuse_args_pages ap = {
+               .num_pages = 1,
+               .pages = &page,
+               .descs = &desc,
+       };
+       char *link;
+       ssize_t res;
+
+       ap.args.opcode = FUSE_READLINK;
+       ap.args.nodeid = get_node_id(inode);
+       ap.args.out_pages = true;
+       ap.args.out_argvar = true;
+       ap.args.page_zeroing = true;
+       ap.args.out_numargs = 1;
+       ap.args.out_args[0].size = desc.length;
+       res = fuse_simple_request(fc, &ap.args);
 
-       req = fuse_get_req(fc, 1);
-       if (IS_ERR(req))
-               return PTR_ERR(req);
-
-       req->out.page_zeroing = 1;
-       req->out.argpages = 1;
-       req->num_pages = 1;
-       req->pages[0] = page;
-       req->page_descs[0].length = PAGE_SIZE - 1;
-       req->in.h.opcode = FUSE_READLINK;
-       req->in.h.nodeid = get_node_id(inode);
-       req->out.argvar = 1;
-       req->out.numargs = 1;
-       req->out.args[0].size = PAGE_SIZE - 1;
-       fuse_request_send(fc, req);
-       err = req->out.h.error;
+       fuse_invalidate_atime(inode);
 
-       if (!err) {
-               char *link = page_address(page);
-               size_t len = req->out.args[0].size;
+       if (res < 0)
+               return res;
 
-               BUG_ON(len >= PAGE_SIZE);
-               link[len] = '\0';
-       }
+       if (WARN_ON(res >= PAGE_SIZE))
+               return -EIO;
 
-       fuse_put_request(fc, req);
-       fuse_invalidate_atime(inode);
+       link = page_address(page);
+       link[res] = '\0';
 
-       return err;
+       return 0;
 }
 
 static const char *fuse_get_link(struct dentry *dentry, struct inode *inode,
@@ -1383,14 +1428,14 @@ static void fuse_setattr_fill(struct fuse_conn *fc, struct fuse_args *args,
                              struct fuse_setattr_in *inarg_p,
                              struct fuse_attr_out *outarg_p)
 {
-       args->in.h.opcode = FUSE_SETATTR;
-       args->in.h.nodeid = get_node_id(inode);
-       args->in.numargs = 1;
-       args->in.args[0].size = sizeof(*inarg_p);
-       args->in.args[0].value = inarg_p;
-       args->out.numargs = 1;
-       args->out.args[0].size = sizeof(*outarg_p);
-       args->out.args[0].value = outarg_p;
+       args->opcode = FUSE_SETATTR;
+       args->nodeid = get_node_id(inode);
+       args->in_numargs = 1;
+       args->in_args[0].size = sizeof(*inarg_p);
+       args->in_args[0].value = inarg_p;
+       args->out_numargs = 1;
+       args->out_args[0].size = sizeof(*outarg_p);
+       args->out_args[0].value = outarg_p;
 }
 
 /*
index 5ae2828..0f02256 100644 (file)
 #include <linux/falloc.h>
 #include <linux/uio.h>
 
+static struct page **fuse_pages_alloc(unsigned int npages, gfp_t flags,
+                                     struct fuse_page_desc **desc)
+{
+       struct page **pages;
+
+       pages = kzalloc(npages * (sizeof(struct page *) +
+                                 sizeof(struct fuse_page_desc)), flags);
+       *desc = (void *) (pages + npages);
+
+       return pages;
+}
+
 static int fuse_send_open(struct fuse_conn *fc, u64 nodeid, struct file *file,
                          int opcode, struct fuse_open_out *outargp)
 {
@@ -29,29 +41,36 @@ static int fuse_send_open(struct fuse_conn *fc, u64 nodeid, struct file *file,
        inarg.flags = file->f_flags & ~(O_CREAT | O_EXCL | O_NOCTTY);
        if (!fc->atomic_o_trunc)
                inarg.flags &= ~O_TRUNC;
-       args.in.h.opcode = opcode;
-       args.in.h.nodeid = nodeid;
-       args.in.numargs = 1;
-       args.in.args[0].size = sizeof(inarg);
-       args.in.args[0].value = &inarg;
-       args.out.numargs = 1;
-       args.out.args[0].size = sizeof(*outargp);
-       args.out.args[0].value = outargp;
+       args.opcode = opcode;
+       args.nodeid = nodeid;
+       args.in_numargs = 1;
+       args.in_args[0].size = sizeof(inarg);
+       args.in_args[0].value = &inarg;
+       args.out_numargs = 1;
+       args.out_args[0].size = sizeof(*outargp);
+       args.out_args[0].value = outargp;
 
        return fuse_simple_request(fc, &args);
 }
 
+struct fuse_release_args {
+       struct fuse_args args;
+       struct fuse_release_in inarg;
+       struct inode *inode;
+};
+
 struct fuse_file *fuse_file_alloc(struct fuse_conn *fc)
 {
        struct fuse_file *ff;
 
-       ff = kzalloc(sizeof(struct fuse_file), GFP_KERNEL);
+       ff = kzalloc(sizeof(struct fuse_file), GFP_KERNEL_ACCOUNT);
        if (unlikely(!ff))
                return NULL;
 
        ff->fc = fc;
-       ff->reserved_req = fuse_request_alloc(0);
-       if (unlikely(!ff->reserved_req)) {
+       ff->release_args = kzalloc(sizeof(*ff->release_args),
+                                  GFP_KERNEL_ACCOUNT);
+       if (!ff->release_args) {
                kfree(ff);
                return NULL;
        }
@@ -69,7 +88,7 @@ struct fuse_file *fuse_file_alloc(struct fuse_conn *fc)
 
 void fuse_file_free(struct fuse_file *ff)
 {
-       fuse_request_free(ff->reserved_req);
+       kfree(ff->release_args);
        mutex_destroy(&ff->readdir.lock);
        kfree(ff);
 }
@@ -80,34 +99,31 @@ static struct fuse_file *fuse_file_get(struct fuse_file *ff)
        return ff;
 }
 
-static void fuse_release_end(struct fuse_conn *fc, struct fuse_req *req)
+static void fuse_release_end(struct fuse_conn *fc, struct fuse_args *args,
+                            int error)
 {
-       iput(req->misc.release.inode);
+       struct fuse_release_args *ra = container_of(args, typeof(*ra), args);
+
+       iput(ra->inode);
+       kfree(ra);
 }
 
 static void fuse_file_put(struct fuse_file *ff, bool sync, bool isdir)
 {
        if (refcount_dec_and_test(&ff->count)) {
-               struct fuse_req *req = ff->reserved_req;
+               struct fuse_args *args = &ff->release_args->args;
 
                if (isdir ? ff->fc->no_opendir : ff->fc->no_open) {
-                       /*
-                        * Drop the release request when client does not
-                        * implement 'open'
-                        */
-                       __clear_bit(FR_BACKGROUND, &req->flags);
-                       iput(req->misc.release.inode);
-                       fuse_put_request(ff->fc, req);
+                       /* Do nothing when client does not implement 'open' */
+                       fuse_release_end(ff->fc, args, 0);
                } else if (sync) {
-                       __set_bit(FR_FORCE, &req->flags);
-                       __clear_bit(FR_BACKGROUND, &req->flags);
-                       fuse_request_send(ff->fc, req);
-                       iput(req->misc.release.inode);
-                       fuse_put_request(ff->fc, req);
+                       fuse_simple_request(ff->fc, args);
+                       fuse_release_end(ff->fc, args, 0);
                } else {
-                       req->end = fuse_release_end;
-                       __set_bit(FR_BACKGROUND, &req->flags);
-                       fuse_request_send_background(ff->fc, req);
+                       args->end = fuse_release_end;
+                       if (fuse_simple_background(ff->fc, args,
+                                                  GFP_KERNEL | __GFP_NOFAIL))
+                               fuse_release_end(ff->fc, args, -ENOTCONN);
                }
                kfree(ff);
        }
@@ -227,8 +243,7 @@ static void fuse_prepare_release(struct fuse_inode *fi, struct fuse_file *ff,
                                 int flags, int opcode)
 {
        struct fuse_conn *fc = ff->fc;
-       struct fuse_req *req = ff->reserved_req;
-       struct fuse_release_in *inarg = &req->misc.release.in;
+       struct fuse_release_args *ra = ff->release_args;
 
        /* Inode is NULL on error path of fuse_create_open() */
        if (likely(fi)) {
@@ -243,32 +258,33 @@ static void fuse_prepare_release(struct fuse_inode *fi, struct fuse_file *ff,
 
        wake_up_interruptible_all(&ff->poll_wait);
 
-       inarg->fh = ff->fh;
-       inarg->flags = flags;
-       req->in.h.opcode = opcode;
-       req->in.h.nodeid = ff->nodeid;
-       req->in.numargs = 1;
-       req->in.args[0].size = sizeof(struct fuse_release_in);
-       req->in.args[0].value = inarg;
+       ra->inarg.fh = ff->fh;
+       ra->inarg.flags = flags;
+       ra->args.in_numargs = 1;
+       ra->args.in_args[0].size = sizeof(struct fuse_release_in);
+       ra->args.in_args[0].value = &ra->inarg;
+       ra->args.opcode = opcode;
+       ra->args.nodeid = ff->nodeid;
+       ra->args.force = true;
+       ra->args.nocreds = true;
 }
 
 void fuse_release_common(struct file *file, bool isdir)
 {
        struct fuse_inode *fi = get_fuse_inode(file_inode(file));
        struct fuse_file *ff = file->private_data;
-       struct fuse_req *req = ff->reserved_req;
+       struct fuse_release_args *ra = ff->release_args;
        int opcode = isdir ? FUSE_RELEASEDIR : FUSE_RELEASE;
 
        fuse_prepare_release(fi, ff, file->f_flags, opcode);
 
        if (ff->flock) {
-               struct fuse_release_in *inarg = &req->misc.release.in;
-               inarg->release_flags |= FUSE_RELEASE_FLOCK_UNLOCK;
-               inarg->lock_owner = fuse_lock_owner_id(ff->fc,
-                                                      (fl_owner_t) file);
+               ra->inarg.release_flags |= FUSE_RELEASE_FLOCK_UNLOCK;
+               ra->inarg.lock_owner = fuse_lock_owner_id(ff->fc,
+                                                         (fl_owner_t) file);
        }
        /* Hold inode until release is finished */
-       req->misc.release.inode = igrab(file_inode(file));
+       ra->inode = igrab(file_inode(file));
 
        /*
         * Normally this will send the RELEASE request, however if
@@ -279,7 +295,7 @@ void fuse_release_common(struct file *file, bool isdir)
         * synchronous RELEASE is allowed (and desirable) in this case
         * because the server can be trusted not to screw up.
         */
-       fuse_file_put(ff, ff->fc->destroy_req != NULL, isdir);
+       fuse_file_put(ff, ff->fc->destroy, isdir);
 }
 
 static int fuse_open(struct inode *inode, struct file *file)
@@ -335,19 +351,27 @@ u64 fuse_lock_owner_id(struct fuse_conn *fc, fl_owner_t id)
        return (u64) v0 + ((u64) v1 << 32);
 }
 
-static struct fuse_req *fuse_find_writeback(struct fuse_inode *fi,
+struct fuse_writepage_args {
+       struct fuse_io_args ia;
+       struct list_head writepages_entry;
+       struct list_head queue_entry;
+       struct fuse_writepage_args *next;
+       struct inode *inode;
+};
+
+static struct fuse_writepage_args *fuse_find_writeback(struct fuse_inode *fi,
                                            pgoff_t idx_from, pgoff_t idx_to)
 {
-       struct fuse_req *req;
+       struct fuse_writepage_args *wpa;
 
-       list_for_each_entry(req, &fi->writepages, writepages_entry) {
+       list_for_each_entry(wpa, &fi->writepages, writepages_entry) {
                pgoff_t curr_index;
 
-               WARN_ON(get_fuse_inode(req->inode) != fi);
-               curr_index = req->misc.write.in.offset >> PAGE_SHIFT;
-               if (idx_from < curr_index + req->num_pages &&
+               WARN_ON(get_fuse_inode(wpa->inode) != fi);
+               curr_index = wpa->ia.write.in.offset >> PAGE_SHIFT;
+               if (idx_from < curr_index + wpa->ia.ap.num_pages &&
                    curr_index <= idx_to) {
-                       return req;
+                       return wpa;
                }
        }
        return NULL;
@@ -383,12 +407,11 @@ static inline bool fuse_page_is_writeback(struct inode *inode, pgoff_t index)
  * Since fuse doesn't rely on the VM writeback tracking, this has to
  * use some other means.
  */
-static int fuse_wait_on_page_writeback(struct inode *inode, pgoff_t index)
+static void fuse_wait_on_page_writeback(struct inode *inode, pgoff_t index)
 {
        struct fuse_inode *fi = get_fuse_inode(inode);
 
        wait_event(fi->page_waitq, !fuse_page_is_writeback(inode, index));
-       return 0;
 }
 
 /*
@@ -411,8 +434,8 @@ static int fuse_flush(struct file *file, fl_owner_t id)
        struct inode *inode = file_inode(file);
        struct fuse_conn *fc = get_fuse_conn(inode);
        struct fuse_file *ff = file->private_data;
-       struct fuse_req *req;
        struct fuse_flush_in inarg;
+       FUSE_ARGS(args);
        int err;
 
        if (is_bad_inode(inode))
@@ -433,19 +456,17 @@ static int fuse_flush(struct file *file, fl_owner_t id)
        if (err)
                return err;
 
-       req = fuse_get_req_nofail_nopages(fc, file);
        memset(&inarg, 0, sizeof(inarg));
        inarg.fh = ff->fh;
        inarg.lock_owner = fuse_lock_owner_id(fc, id);
-       req->in.h.opcode = FUSE_FLUSH;
-       req->in.h.nodeid = get_node_id(inode);
-       req->in.numargs = 1;
-       req->in.args[0].size = sizeof(inarg);
-       req->in.args[0].value = &inarg;
-       __set_bit(FR_FORCE, &req->flags);
-       fuse_request_send(fc, req);
-       err = req->out.h.error;
-       fuse_put_request(fc, req);
+       args.opcode = FUSE_FLUSH;
+       args.nodeid = get_node_id(inode);
+       args.in_numargs = 1;
+       args.in_args[0].size = sizeof(inarg);
+       args.in_args[0].value = &inarg;
+       args.force = true;
+
+       err = fuse_simple_request(fc, &args);
        if (err == -ENOSYS) {
                fc->no_flush = 1;
                err = 0;
@@ -465,11 +486,11 @@ int fuse_fsync_common(struct file *file, loff_t start, loff_t end,
        memset(&inarg, 0, sizeof(inarg));
        inarg.fh = ff->fh;
        inarg.fsync_flags = datasync ? FUSE_FSYNC_FDATASYNC : 0;
-       args.in.h.opcode = opcode;
-       args.in.h.nodeid = get_node_id(inode);
-       args.in.numargs = 1;
-       args.in.args[0].size = sizeof(inarg);
-       args.in.args[0].value = &inarg;
+       args.opcode = opcode;
+       args.nodeid = get_node_id(inode);
+       args.in_numargs = 1;
+       args.in_args[0].size = sizeof(inarg);
+       args.in_args[0].value = &inarg;
        return fuse_simple_request(fc, &args);
 }
 
@@ -523,35 +544,35 @@ out:
        return err;
 }
 
-void fuse_read_fill(struct fuse_req *req, struct file *file, loff_t pos,
-                   size_t count, int opcode)
+void fuse_read_args_fill(struct fuse_io_args *ia, struct file *file, loff_t pos,
+                        size_t count, int opcode)
 {
-       struct fuse_read_in *inarg = &req->misc.read.in;
        struct fuse_file *ff = file->private_data;
+       struct fuse_args *args = &ia->ap.args;
 
-       inarg->fh = ff->fh;
-       inarg->offset = pos;
-       inarg->size = count;
-       inarg->flags = file->f_flags;
-       req->in.h.opcode = opcode;
-       req->in.h.nodeid = ff->nodeid;
-       req->in.numargs = 1;
-       req->in.args[0].size = sizeof(struct fuse_read_in);
-       req->in.args[0].value = inarg;
-       req->out.argvar = 1;
-       req->out.numargs = 1;
-       req->out.args[0].size = count;
+       ia->read.in.fh = ff->fh;
+       ia->read.in.offset = pos;
+       ia->read.in.size = count;
+       ia->read.in.flags = file->f_flags;
+       args->opcode = opcode;
+       args->nodeid = ff->nodeid;
+       args->in_numargs = 1;
+       args->in_args[0].size = sizeof(ia->read.in);
+       args->in_args[0].value = &ia->read.in;
+       args->out_argvar = true;
+       args->out_numargs = 1;
+       args->out_args[0].size = count;
 }
 
-static void fuse_release_user_pages(struct fuse_req *req, bool should_dirty)
+static void fuse_release_user_pages(struct fuse_args_pages *ap,
+                                   bool should_dirty)
 {
-       unsigned i;
+       unsigned int i;
 
-       for (i = 0; i < req->num_pages; i++) {
-               struct page *page = req->pages[i];
+       for (i = 0; i < ap->num_pages; i++) {
                if (should_dirty)
-                       set_page_dirty_lock(page);
-               put_page(page);
+                       set_page_dirty_lock(ap->pages[i]);
+               put_page(ap->pages[i]);
        }
 }
 
@@ -621,64 +642,94 @@ static void fuse_aio_complete(struct fuse_io_priv *io, int err, ssize_t pos)
        kref_put(&io->refcnt, fuse_io_release);
 }
 
-static void fuse_aio_complete_req(struct fuse_conn *fc, struct fuse_req *req)
+static struct fuse_io_args *fuse_io_alloc(struct fuse_io_priv *io,
+                                         unsigned int npages)
+{
+       struct fuse_io_args *ia;
+
+       ia = kzalloc(sizeof(*ia), GFP_KERNEL);
+       if (ia) {
+               ia->io = io;
+               ia->ap.pages = fuse_pages_alloc(npages, GFP_KERNEL,
+                                               &ia->ap.descs);
+               if (!ia->ap.pages) {
+                       kfree(ia);
+                       ia = NULL;
+               }
+       }
+       return ia;
+}
+
+static void fuse_io_free(struct fuse_io_args *ia)
+{
+       kfree(ia->ap.pages);
+       kfree(ia);
+}
+
+static void fuse_aio_complete_req(struct fuse_conn *fc, struct fuse_args *args,
+                                 int err)
 {
-       struct fuse_io_priv *io = req->io;
+       struct fuse_io_args *ia = container_of(args, typeof(*ia), ap.args);
+       struct fuse_io_priv *io = ia->io;
        ssize_t pos = -1;
 
-       fuse_release_user_pages(req, io->should_dirty);
+       fuse_release_user_pages(&ia->ap, io->should_dirty);
 
-       if (io->write) {
-               if (req->misc.write.in.size != req->misc.write.out.size)
-                       pos = req->misc.write.in.offset - io->offset +
-                               req->misc.write.out.size;
+       if (err) {
+               /* Nothing */
+       } else if (io->write) {
+               if (ia->write.out.size > ia->write.in.size) {
+                       err = -EIO;
+               } else if (ia->write.in.size != ia->write.out.size) {
+                       pos = ia->write.in.offset - io->offset +
+                               ia->write.out.size;
+               }
        } else {
-               if (req->misc.read.in.size != req->out.args[0].size)
-                       pos = req->misc.read.in.offset - io->offset +
-                               req->out.args[0].size;
+               u32 outsize = args->out_args[0].size;
+
+               if (ia->read.in.size != outsize)
+                       pos = ia->read.in.offset - io->offset + outsize;
        }
 
-       fuse_aio_complete(io, req->out.h.error, pos);
+       fuse_aio_complete(io, err, pos);
+       fuse_io_free(ia);
 }
 
-static size_t fuse_async_req_send(struct fuse_conn *fc, struct fuse_req *req,
-               size_t num_bytes, struct fuse_io_priv *io)
+static ssize_t fuse_async_req_send(struct fuse_conn *fc,
+                                  struct fuse_io_args *ia, size_t num_bytes)
 {
+       ssize_t err;
+       struct fuse_io_priv *io = ia->io;
+
        spin_lock(&io->lock);
        kref_get(&io->refcnt);
        io->size += num_bytes;
        io->reqs++;
        spin_unlock(&io->lock);
 
-       req->io = io;
-       req->end = fuse_aio_complete_req;
+       ia->ap.args.end = fuse_aio_complete_req;
+       err = fuse_simple_background(fc, &ia->ap.args, GFP_KERNEL);
 
-       __fuse_get_request(req);
-       fuse_request_send_background(fc, req);
-
-       return num_bytes;
+       return err ?: num_bytes;
 }
 
-static size_t fuse_send_read(struct fuse_req *req, struct fuse_io_priv *io,
-                            loff_t pos, size_t count, fl_owner_t owner)
+static ssize_t fuse_send_read(struct fuse_io_args *ia, loff_t pos, size_t count,
+                             fl_owner_t owner)
 {
-       struct file *file = io->iocb->ki_filp;
+       struct file *file = ia->io->iocb->ki_filp;
        struct fuse_file *ff = file->private_data;
        struct fuse_conn *fc = ff->fc;
 
-       fuse_read_fill(req, file, pos, count, FUSE_READ);
+       fuse_read_args_fill(ia, file, pos, count, FUSE_READ);
        if (owner != NULL) {
-               struct fuse_read_in *inarg = &req->misc.read.in;
-
-               inarg->read_flags |= FUSE_READ_LOCKOWNER;
-               inarg->lock_owner = fuse_lock_owner_id(fc, owner);
+               ia->read.in.read_flags |= FUSE_READ_LOCKOWNER;
+               ia->read.in.lock_owner = fuse_lock_owner_id(fc, owner);
        }
 
-       if (io->async)
-               return fuse_async_req_send(fc, req, count, io);
+       if (ia->io->async)
+               return fuse_async_req_send(fc, ia, count);
 
-       fuse_request_send(fc, req);
-       return req->out.args[0].size;
+       return fuse_simple_request(fc, &ia->ap.args);
 }
 
 static void fuse_read_update_size(struct inode *inode, loff_t size,
@@ -696,10 +747,9 @@ static void fuse_read_update_size(struct inode *inode, loff_t size,
        spin_unlock(&fi->lock);
 }
 
-static void fuse_short_read(struct fuse_req *req, struct inode *inode,
-                           u64 attr_ver)
+static void fuse_short_read(struct inode *inode, u64 attr_ver, size_t num_read,
+                           struct fuse_args_pages *ap)
 {
-       size_t num_read = req->out.args[0].size;
        struct fuse_conn *fc = get_fuse_conn(inode);
 
        if (fc->writeback_cache) {
@@ -712,28 +762,31 @@ static void fuse_short_read(struct fuse_req *req, struct inode *inode,
                int start_idx = num_read >> PAGE_SHIFT;
                size_t off = num_read & (PAGE_SIZE - 1);
 
-               for (i = start_idx; i < req->num_pages; i++) {
-                       zero_user_segment(req->pages[i], off, PAGE_SIZE);
+               for (i = start_idx; i < ap->num_pages; i++) {
+                       zero_user_segment(ap->pages[i], off, PAGE_SIZE);
                        off = 0;
                }
        } else {
-               loff_t pos = page_offset(req->pages[0]) + num_read;
+               loff_t pos = page_offset(ap->pages[0]) + num_read;
                fuse_read_update_size(inode, pos, attr_ver);
        }
 }
 
 static int fuse_do_readpage(struct file *file, struct page *page)
 {
-       struct kiocb iocb;
-       struct fuse_io_priv io;
        struct inode *inode = page->mapping->host;
        struct fuse_conn *fc = get_fuse_conn(inode);
-       struct fuse_req *req;
-       size_t num_read;
        loff_t pos = page_offset(page);
-       size_t count = PAGE_SIZE;
+       struct fuse_page_desc desc = { .length = PAGE_SIZE };
+       struct fuse_io_args ia = {
+               .ap.args.page_zeroing = true,
+               .ap.args.out_pages = true,
+               .ap.num_pages = 1,
+               .ap.pages = &page,
+               .ap.descs = &desc,
+       };
+       ssize_t res;
        u64 attr_ver;
-       int err;
 
        /*
         * Page writeback can extend beyond the lifetime of the
@@ -742,35 +795,21 @@ static int fuse_do_readpage(struct file *file, struct page *page)
         */
        fuse_wait_on_page_writeback(inode, page->index);
 
-       req = fuse_get_req(fc, 1);
-       if (IS_ERR(req))
-               return PTR_ERR(req);
-
        attr_ver = fuse_get_attr_version(fc);
 
-       req->out.page_zeroing = 1;
-       req->out.argpages = 1;
-       req->num_pages = 1;
-       req->pages[0] = page;
-       req->page_descs[0].length = count;
-       init_sync_kiocb(&iocb, file);
-       io = (struct fuse_io_priv) FUSE_IO_PRIV_SYNC(&iocb);
-       num_read = fuse_send_read(req, &io, pos, count, NULL);
-       err = req->out.h.error;
-
-       if (!err) {
-               /*
-                * Short read means EOF.  If file size is larger, truncate it
-                */
-               if (num_read < count)
-                       fuse_short_read(req, inode, attr_ver);
-
-               SetPageUptodate(page);
-       }
+       fuse_read_args_fill(&ia, file, pos, desc.length, FUSE_READ);
+       res = fuse_simple_request(fc, &ia.ap.args);
+       if (res < 0)
+               return res;
+       /*
+        * Short read means EOF.  If file size is larger, truncate it
+        */
+       if (res < desc.length)
+               fuse_short_read(inode, attr_ver, res, &ia.ap);
 
-       fuse_put_request(fc, req);
+       SetPageUptodate(page);
 
-       return err;
+       return 0;
 }
 
 static int fuse_readpage(struct file *file, struct page *page)
@@ -789,15 +828,18 @@ static int fuse_readpage(struct file *file, struct page *page)
        return err;
 }
 
-static void fuse_readpages_end(struct fuse_conn *fc, struct fuse_req *req)
+static void fuse_readpages_end(struct fuse_conn *fc, struct fuse_args *args,
+                              int err)
 {
        int i;
-       size_t count = req->misc.read.in.size;
-       size_t num_read = req->out.args[0].size;
+       struct fuse_io_args *ia = container_of(args, typeof(*ia), ap.args);
+       struct fuse_args_pages *ap = &ia->ap;
+       size_t count = ia->read.in.size;
+       size_t num_read = args->out_args[0].size;
        struct address_space *mapping = NULL;
 
-       for (i = 0; mapping == NULL && i < req->num_pages; i++)
-               mapping = req->pages[i]->mapping;
+       for (i = 0; mapping == NULL && i < ap->num_pages; i++)
+               mapping = ap->pages[i]->mapping;
 
        if (mapping) {
                struct inode *inode = mapping->host;
@@ -805,93 +847,97 @@ static void fuse_readpages_end(struct fuse_conn *fc, struct fuse_req *req)
                /*
                 * Short read means EOF. If file size is larger, truncate it
                 */
-               if (!req->out.h.error && num_read < count)
-                       fuse_short_read(req, inode, req->misc.read.attr_ver);
+               if (!err && num_read < count)
+                       fuse_short_read(inode, ia->read.attr_ver, num_read, ap);
 
                fuse_invalidate_atime(inode);
        }
 
-       for (i = 0; i < req->num_pages; i++) {
-               struct page *page = req->pages[i];
-               if (!req->out.h.error)
+       for (i = 0; i < ap->num_pages; i++) {
+               struct page *page = ap->pages[i];
+
+               if (!err)
                        SetPageUptodate(page);
                else
                        SetPageError(page);
                unlock_page(page);
                put_page(page);
        }
-       if (req->ff)
-               fuse_file_put(req->ff, false, false);
+       if (ia->ff)
+               fuse_file_put(ia->ff, false, false);
+
+       fuse_io_free(ia);
 }
 
-static void fuse_send_readpages(struct fuse_req *req, struct file *file)
+static void fuse_send_readpages(struct fuse_io_args *ia, struct file *file)
 {
        struct fuse_file *ff = file->private_data;
        struct fuse_conn *fc = ff->fc;
-       loff_t pos = page_offset(req->pages[0]);
-       size_t count = req->num_pages << PAGE_SHIFT;
-
-       req->out.argpages = 1;
-       req->out.page_zeroing = 1;
-       req->out.page_replace = 1;
-       fuse_read_fill(req, file, pos, count, FUSE_READ);
-       req->misc.read.attr_ver = fuse_get_attr_version(fc);
+       struct fuse_args_pages *ap = &ia->ap;
+       loff_t pos = page_offset(ap->pages[0]);
+       size_t count = ap->num_pages << PAGE_SHIFT;
+       int err;
+
+       ap->args.out_pages = true;
+       ap->args.page_zeroing = true;
+       ap->args.page_replace = true;
+       fuse_read_args_fill(ia, file, pos, count, FUSE_READ);
+       ia->read.attr_ver = fuse_get_attr_version(fc);
        if (fc->async_read) {
-               req->ff = fuse_file_get(ff);
-               req->end = fuse_readpages_end;
-               fuse_request_send_background(fc, req);
+               ia->ff = fuse_file_get(ff);
+               ap->args.end = fuse_readpages_end;
+               err = fuse_simple_background(fc, &ap->args, GFP_KERNEL);
+               if (!err)
+                       return;
        } else {
-               fuse_request_send(fc, req);
-               fuse_readpages_end(fc, req);
-               fuse_put_request(fc, req);
+               err = fuse_simple_request(fc, &ap->args);
        }
+       fuse_readpages_end(fc, &ap->args, err);
 }
 
 struct fuse_fill_data {
-       struct fuse_req *req;
+       struct fuse_io_args *ia;
        struct file *file;
        struct inode *inode;
-       unsigned nr_pages;
+       unsigned int nr_pages;
+       unsigned int max_pages;
 };
 
 static int fuse_readpages_fill(void *_data, struct page *page)
 {
        struct fuse_fill_data *data = _data;
-       struct fuse_req *req = data->req;
+       struct fuse_io_args *ia = data->ia;
+       struct fuse_args_pages *ap = &ia->ap;
        struct inode *inode = data->inode;
        struct fuse_conn *fc = get_fuse_conn(inode);
 
        fuse_wait_on_page_writeback(inode, page->index);
 
-       if (req->num_pages &&
-           (req->num_pages == fc->max_pages ||
-            (req->num_pages + 1) * PAGE_SIZE > fc->max_read ||
-            req->pages[req->num_pages - 1]->index + 1 != page->index)) {
-               unsigned int nr_alloc = min_t(unsigned int, data->nr_pages,
-                                             fc->max_pages);
-               fuse_send_readpages(req, data->file);
-               if (fc->async_read)
-                       req = fuse_get_req_for_background(fc, nr_alloc);
-               else
-                       req = fuse_get_req(fc, nr_alloc);
-
-               data->req = req;
-               if (IS_ERR(req)) {
+       if (ap->num_pages &&
+           (ap->num_pages == fc->max_pages ||
+            (ap->num_pages + 1) * PAGE_SIZE > fc->max_read ||
+            ap->pages[ap->num_pages - 1]->index + 1 != page->index)) {
+               data->max_pages = min_t(unsigned int, data->nr_pages,
+                                       fc->max_pages);
+               fuse_send_readpages(ia, data->file);
+               data->ia = ia = fuse_io_alloc(NULL, data->max_pages);
+               if (!ia) {
                        unlock_page(page);
-                       return PTR_ERR(req);
+                       return -ENOMEM;
                }
+               ap = &ia->ap;
        }
 
-       if (WARN_ON(req->num_pages >= req->max_pages)) {
+       if (WARN_ON(ap->num_pages >= data->max_pages)) {
                unlock_page(page);
-               fuse_put_request(fc, req);
+               fuse_io_free(ia);
                return -EIO;
        }
 
        get_page(page);
-       req->pages[req->num_pages] = page;
-       req->page_descs[req->num_pages].length = PAGE_SIZE;
-       req->num_pages++;
+       ap->pages[ap->num_pages] = page;
+       ap->descs[ap->num_pages].length = PAGE_SIZE;
+       ap->num_pages++;
        data->nr_pages--;
        return 0;
 }
@@ -903,7 +949,6 @@ static int fuse_readpages(struct file *file, struct address_space *mapping,
        struct fuse_conn *fc = get_fuse_conn(inode);
        struct fuse_fill_data data;
        int err;
-       unsigned int nr_alloc = min_t(unsigned int, nr_pages, fc->max_pages);
 
        err = -EIO;
        if (is_bad_inode(inode))
@@ -911,21 +956,20 @@ static int fuse_readpages(struct file *file, struct address_space *mapping,
 
        data.file = file;
        data.inode = inode;
-       if (fc->async_read)
-               data.req = fuse_get_req_for_background(fc, nr_alloc);
-       else
-               data.req = fuse_get_req(fc, nr_alloc);
        data.nr_pages = nr_pages;
-       err = PTR_ERR(data.req);
-       if (IS_ERR(data.req))
+       data.max_pages = min_t(unsigned int, nr_pages, fc->max_pages);
+;
+       data.ia = fuse_io_alloc(NULL, data.max_pages);
+       err = -ENOMEM;
+       if (!data.ia)
                goto out;
 
        err = read_cache_pages(mapping, pages, fuse_readpages_fill, &data);
        if (!err) {
-               if (data.req->num_pages)
-                       fuse_send_readpages(data.req, file);
+               if (data.ia->ap.num_pages)
+                       fuse_send_readpages(data.ia, file);
                else
-                       fuse_put_request(fc, data.req);
+                       fuse_io_free(data.ia);
        }
 out:
        return err;
@@ -952,54 +996,65 @@ static ssize_t fuse_cache_read_iter(struct kiocb *iocb, struct iov_iter *to)
        return generic_file_read_iter(iocb, to);
 }
 
-static void fuse_write_fill(struct fuse_req *req, struct fuse_file *ff,
-                           loff_t pos, size_t count)
+static void fuse_write_args_fill(struct fuse_io_args *ia, struct fuse_file *ff,
+                                loff_t pos, size_t count)
 {
-       struct fuse_write_in *inarg = &req->misc.write.in;
-       struct fuse_write_out *outarg = &req->misc.write.out;
+       struct fuse_args *args = &ia->ap.args;
 
-       inarg->fh = ff->fh;
-       inarg->offset = pos;
-       inarg->size = count;
-       req->in.h.opcode = FUSE_WRITE;
-       req->in.h.nodeid = ff->nodeid;
-       req->in.numargs = 2;
+       ia->write.in.fh = ff->fh;
+       ia->write.in.offset = pos;
+       ia->write.in.size = count;
+       args->opcode = FUSE_WRITE;
+       args->nodeid = ff->nodeid;
+       args->in_numargs = 2;
        if (ff->fc->minor < 9)
-               req->in.args[0].size = FUSE_COMPAT_WRITE_IN_SIZE;
+               args->in_args[0].size = FUSE_COMPAT_WRITE_IN_SIZE;
        else
-               req->in.args[0].size = sizeof(struct fuse_write_in);
-       req->in.args[0].value = inarg;
-       req->in.args[1].size = count;
-       req->out.numargs = 1;
-       req->out.args[0].size = sizeof(struct fuse_write_out);
-       req->out.args[0].value = outarg;
+               args->in_args[0].size = sizeof(ia->write.in);
+       args->in_args[0].value = &ia->write.in;
+       args->in_args[1].size = count;
+       args->out_numargs = 1;
+       args->out_args[0].size = sizeof(ia->write.out);
+       args->out_args[0].value = &ia->write.out;
 }
 
-static size_t fuse_send_write(struct fuse_req *req, struct fuse_io_priv *io,
-                             loff_t pos, size_t count, fl_owner_t owner)
+static unsigned int fuse_write_flags(struct kiocb *iocb)
 {
-       struct kiocb *iocb = io->iocb;
+       unsigned int flags = iocb->ki_filp->f_flags;
+
+       if (iocb->ki_flags & IOCB_DSYNC)
+               flags |= O_DSYNC;
+       if (iocb->ki_flags & IOCB_SYNC)
+               flags |= O_SYNC;
+
+       return flags;
+}
+
+static ssize_t fuse_send_write(struct fuse_io_args *ia, loff_t pos,
+                              size_t count, fl_owner_t owner)
+{
+       struct kiocb *iocb = ia->io->iocb;
        struct file *file = iocb->ki_filp;
        struct fuse_file *ff = file->private_data;
        struct fuse_conn *fc = ff->fc;
-       struct fuse_write_in *inarg = &req->misc.write.in;
+       struct fuse_write_in *inarg = &ia->write.in;
+       ssize_t err;
 
-       fuse_write_fill(req, ff, pos, count);
-       inarg->flags = file->f_flags;
-       if (iocb->ki_flags & IOCB_DSYNC)
-               inarg->flags |= O_DSYNC;
-       if (iocb->ki_flags & IOCB_SYNC)
-               inarg->flags |= O_SYNC;
+       fuse_write_args_fill(ia, ff, pos, count);
+       inarg->flags = fuse_write_flags(iocb);
        if (owner != NULL) {
                inarg->write_flags |= FUSE_WRITE_LOCKOWNER;
                inarg->lock_owner = fuse_lock_owner_id(fc, owner);
        }
 
-       if (io->async)
-               return fuse_async_req_send(fc, req, count, io);
+       if (ia->io->async)
+               return fuse_async_req_send(fc, ia, count);
+
+       err = fuse_simple_request(fc, &ia->ap.args);
+       if (!err && ia->write.out.size > count)
+               err = -EIO;
 
-       fuse_request_send(fc, req);
-       return req->misc.write.out.size;
+       return err ?: ia->write.out.size;
 }
 
 bool fuse_write_update_size(struct inode *inode, loff_t pos)
@@ -1019,26 +1074,31 @@ bool fuse_write_update_size(struct inode *inode, loff_t pos)
        return ret;
 }
 
-static size_t fuse_send_write_pages(struct fuse_req *req, struct kiocb *iocb,
-                                   struct inode *inode, loff_t pos,
-                                   size_t count)
+static ssize_t fuse_send_write_pages(struct fuse_io_args *ia,
+                                    struct kiocb *iocb, struct inode *inode,
+                                    loff_t pos, size_t count)
 {
-       size_t res;
-       unsigned offset;
-       unsigned i;
-       struct fuse_io_priv io = FUSE_IO_PRIV_SYNC(iocb);
+       struct fuse_args_pages *ap = &ia->ap;
+       struct file *file = iocb->ki_filp;
+       struct fuse_file *ff = file->private_data;
+       struct fuse_conn *fc = ff->fc;
+       unsigned int offset, i;
+       int err;
 
-       for (i = 0; i < req->num_pages; i++)
-               fuse_wait_on_page_writeback(inode, req->pages[i]->index);
+       for (i = 0; i < ap->num_pages; i++)
+               fuse_wait_on_page_writeback(inode, ap->pages[i]->index);
 
-       res = fuse_send_write(req, &io, pos, count, NULL);
+       fuse_write_args_fill(ia, ff, pos, count);
+       ia->write.in.flags = fuse_write_flags(iocb);
 
-       offset = req->page_descs[0].offset;
-       count = res;
-       for (i = 0; i < req->num_pages; i++) {
-               struct page *page = req->pages[i];
+       err = fuse_simple_request(fc, &ap->args);
 
-               if (!req->out.h.error && !offset && count >= PAGE_SIZE)
+       offset = ap->descs[0].offset;
+       count = ia->write.out.size;
+       for (i = 0; i < ap->num_pages; i++) {
+               struct page *page = ap->pages[i];
+
+               if (!err && !offset && count >= PAGE_SIZE)
                        SetPageUptodate(page);
 
                if (count > PAGE_SIZE - offset)
@@ -1051,20 +1111,21 @@ static size_t fuse_send_write_pages(struct fuse_req *req, struct kiocb *iocb,
                put_page(page);
        }
 
-       return res;
+       return err;
 }
 
-static ssize_t fuse_fill_write_pages(struct fuse_req *req,
-                              struct address_space *mapping,
-                              struct iov_iter *ii, loff_t pos)
+static ssize_t fuse_fill_write_pages(struct fuse_args_pages *ap,
+                                    struct address_space *mapping,
+                                    struct iov_iter *ii, loff_t pos,
+                                    unsigned int max_pages)
 {
        struct fuse_conn *fc = get_fuse_conn(mapping->host);
        unsigned offset = pos & (PAGE_SIZE - 1);
        size_t count = 0;
        int err;
 
-       req->in.argpages = 1;
-       req->page_descs[0].offset = offset;
+       ap->args.in_pages = true;
+       ap->descs[0].offset = offset;
 
        do {
                size_t tmp;
@@ -1100,9 +1161,9 @@ static ssize_t fuse_fill_write_pages(struct fuse_req *req,
                }
 
                err = 0;
-               req->pages[req->num_pages] = page;
-               req->page_descs[req->num_pages].length = tmp;
-               req->num_pages++;
+               ap->pages[ap->num_pages] = page;
+               ap->descs[ap->num_pages].length = tmp;
+               ap->num_pages++;
 
                count += tmp;
                pos += tmp;
@@ -1113,7 +1174,7 @@ static ssize_t fuse_fill_write_pages(struct fuse_req *req,
                if (!fc->big_writes)
                        break;
        } while (iov_iter_count(ii) && count < fc->max_write &&
-                req->num_pages < req->max_pages && offset == 0);
+                ap->num_pages < max_pages && offset == 0);
 
        return count > 0 ? count : err;
 }
@@ -1141,27 +1202,27 @@ static ssize_t fuse_perform_write(struct kiocb *iocb,
                set_bit(FUSE_I_SIZE_UNSTABLE, &fi->state);
 
        do {
-               struct fuse_req *req;
                ssize_t count;
+               struct fuse_io_args ia = {};
+               struct fuse_args_pages *ap = &ia.ap;
                unsigned int nr_pages = fuse_wr_pages(pos, iov_iter_count(ii),
                                                      fc->max_pages);
 
-               req = fuse_get_req(fc, nr_pages);
-               if (IS_ERR(req)) {
-                       err = PTR_ERR(req);
+               ap->pages = fuse_pages_alloc(nr_pages, GFP_KERNEL, &ap->descs);
+               if (!ap->pages) {
+                       err = -ENOMEM;
                        break;
                }
 
-               count = fuse_fill_write_pages(req, mapping, ii, pos);
+               count = fuse_fill_write_pages(ap, mapping, ii, pos, nr_pages);
                if (count <= 0) {
                        err = count;
                } else {
-                       size_t num_written;
-
-                       num_written = fuse_send_write_pages(req, iocb, inode,
-                                                           pos, count);
-                       err = req->out.h.error;
+                       err = fuse_send_write_pages(&ia, iocb, inode,
+                                                   pos, count);
                        if (!err) {
+                               size_t num_written = ia.write.out.size;
+
                                res += num_written;
                                pos += num_written;
 
@@ -1170,7 +1231,7 @@ static ssize_t fuse_perform_write(struct kiocb *iocb,
                                        err = -EIO;
                        }
                }
-               fuse_put_request(fc, req);
+               kfree(ap->pages);
        } while (!err && iov_iter_count(ii));
 
        if (res > 0)
@@ -1258,14 +1319,14 @@ out:
        return written ? written : err;
 }
 
-static inline void fuse_page_descs_length_init(struct fuse_req *req,
-               unsigned index, unsigned nr_pages)
+static inline void fuse_page_descs_length_init(struct fuse_page_desc *descs,
+                                              unsigned int index,
+                                              unsigned int nr_pages)
 {
        int i;
 
        for (i = index; i < index + nr_pages; i++)
-               req->page_descs[i].length = PAGE_SIZE -
-                       req->page_descs[i].offset;
+               descs[i].length = PAGE_SIZE - descs[i].offset;
 }
 
 static inline unsigned long fuse_get_user_addr(const struct iov_iter *ii)
@@ -1279,8 +1340,9 @@ static inline size_t fuse_get_frag_size(const struct iov_iter *ii,
        return min(iov_iter_single_seg_count(ii), max_size);
 }
 
-static int fuse_get_user_pages(struct fuse_req *req, struct iov_iter *ii,
-                              size_t *nbytesp, int write)
+static int fuse_get_user_pages(struct fuse_args_pages *ap, struct iov_iter *ii,
+                              size_t *nbytesp, int write,
+                              unsigned int max_pages)
 {
        size_t nbytes = 0;  /* # bytes already packed in req */
        ssize_t ret = 0;
@@ -1291,21 +1353,21 @@ static int fuse_get_user_pages(struct fuse_req *req, struct iov_iter *ii,
                size_t frag_size = fuse_get_frag_size(ii, *nbytesp);
 
                if (write)
-                       req->in.args[1].value = (void *) user_addr;
+                       ap->args.in_args[1].value = (void *) user_addr;
                else
-                       req->out.args[0].value = (void *) user_addr;
+                       ap->args.out_args[0].value = (void *) user_addr;
 
                iov_iter_advance(ii, frag_size);
                *nbytesp = frag_size;
                return 0;
        }
 
-       while (nbytes < *nbytesp && req->num_pages < req->max_pages) {
+       while (nbytes < *nbytesp && ap->num_pages < max_pages) {
                unsigned npages;
                size_t start;
-               ret = iov_iter_get_pages(ii, &req->pages[req->num_pages],
+               ret = iov_iter_get_pages(ii, &ap->pages[ap->num_pages],
                                        *nbytesp - nbytes,
-                                       req->max_pages - req->num_pages,
+                                       max_pages - ap->num_pages,
                                        &start);
                if (ret < 0)
                        break;
@@ -1316,18 +1378,18 @@ static int fuse_get_user_pages(struct fuse_req *req, struct iov_iter *ii,
                ret += start;
                npages = (ret + PAGE_SIZE - 1) / PAGE_SIZE;
 
-               req->page_descs[req->num_pages].offset = start;
-               fuse_page_descs_length_init(req, req->num_pages, npages);
+               ap->descs[ap->num_pages].offset = start;
+               fuse_page_descs_length_init(ap->descs, ap->num_pages, npages);
 
-               req->num_pages += npages;
-               req->page_descs[req->num_pages - 1].length -=
+               ap->num_pages += npages;
+               ap->descs[ap->num_pages - 1].length -=
                        (PAGE_SIZE - ret) & (PAGE_SIZE - 1);
        }
 
        if (write)
-               req->in.argpages = 1;
+               ap->args.in_pages = 1;
        else
-               req->out.argpages = 1;
+               ap->args.out_pages = 1;
 
        *nbytesp = nbytes;
 
@@ -1349,17 +1411,16 @@ ssize_t fuse_direct_io(struct fuse_io_priv *io, struct iov_iter *iter,
        pgoff_t idx_from = pos >> PAGE_SHIFT;
        pgoff_t idx_to = (pos + count - 1) >> PAGE_SHIFT;
        ssize_t res = 0;
-       struct fuse_req *req;
        int err = 0;
+       struct fuse_io_args *ia;
+       unsigned int max_pages;
 
-       if (io->async)
-               req = fuse_get_req_for_background(fc, iov_iter_npages(iter,
-                                                               fc->max_pages));
-       else
-               req = fuse_get_req(fc, iov_iter_npages(iter, fc->max_pages));
-       if (IS_ERR(req))
-               return PTR_ERR(req);
+       max_pages = iov_iter_npages(iter, fc->max_pages);
+       ia = fuse_io_alloc(io, max_pages);
+       if (!ia)
+               return -ENOMEM;
 
+       ia->io = io;
        if (!cuse && fuse_range_is_writeback(inode, idx_from, idx_to)) {
                if (!write)
                        inode_lock(inode);
@@ -1370,54 +1431,49 @@ ssize_t fuse_direct_io(struct fuse_io_priv *io, struct iov_iter *iter,
 
        io->should_dirty = !write && iter_is_iovec(iter);
        while (count) {
-               size_t nres;
+               ssize_t nres;
                fl_owner_t owner = current->files;
                size_t nbytes = min(count, nmax);
-               err = fuse_get_user_pages(req, iter, &nbytes, write);
+
+               err = fuse_get_user_pages(&ia->ap, iter, &nbytes, write,
+                                         max_pages);
                if (err && !nbytes)
                        break;
 
                if (write) {
-                       if (!capable(CAP_FSETID)) {
-                               struct fuse_write_in *inarg;
+                       if (!capable(CAP_FSETID))
+                               ia->write.in.write_flags |= FUSE_WRITE_KILL_PRIV;
 
-                               inarg = &req->misc.write.in;
-                               inarg->write_flags |= FUSE_WRITE_KILL_PRIV;
-                       }
-                       nres = fuse_send_write(req, io, pos, nbytes, owner);
+                       nres = fuse_send_write(ia, pos, nbytes, owner);
                } else {
-                       nres = fuse_send_read(req, io, pos, nbytes, owner);
+                       nres = fuse_send_read(ia, pos, nbytes, owner);
                }
 
-               if (!io->async)
-                       fuse_release_user_pages(req, io->should_dirty);
-               if (req->out.h.error) {
-                       err = req->out.h.error;
-                       break;
-               } else if (nres > nbytes) {
-                       res = 0;
-                       err = -EIO;
+               if (!io->async || nres < 0) {
+                       fuse_release_user_pages(&ia->ap, io->should_dirty);
+                       fuse_io_free(ia);
+               }
+               ia = NULL;
+               if (nres < 0) {
+                       err = nres;
                        break;
                }
+               WARN_ON(nres > nbytes);
+
                count -= nres;
                res += nres;
                pos += nres;
                if (nres != nbytes)
                        break;
                if (count) {
-                       fuse_put_request(fc, req);
-                       if (io->async)
-                               req = fuse_get_req_for_background(fc,
-                                       iov_iter_npages(iter, fc->max_pages));
-                       else
-                               req = fuse_get_req(fc, iov_iter_npages(iter,
-                                                               fc->max_pages));
-                       if (IS_ERR(req))
+                       max_pages = iov_iter_npages(iter, fc->max_pages);
+                       ia = fuse_io_alloc(io, max_pages);
+                       if (!ia)
                                break;
                }
        }
-       if (!IS_ERR(req))
-               fuse_put_request(fc, req);
+       if (ia)
+               fuse_io_free(ia);
        if (res > 0)
                *ppos = pos;
 
@@ -1509,45 +1565,53 @@ static ssize_t fuse_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
                return fuse_direct_write_iter(iocb, from);
 }
 
-static void fuse_writepage_free(struct fuse_conn *fc, struct fuse_req *req)
+static void fuse_writepage_free(struct fuse_writepage_args *wpa)
 {
+       struct fuse_args_pages *ap = &wpa->ia.ap;
        int i;
 
-       for (i = 0; i < req->num_pages; i++)
-               __free_page(req->pages[i]);
+       for (i = 0; i < ap->num_pages; i++)
+               __free_page(ap->pages[i]);
+
+       if (wpa->ia.ff)
+               fuse_file_put(wpa->ia.ff, false, false);
 
-       if (req->ff)
-               fuse_file_put(req->ff, false, false);
+       kfree(ap->pages);
+       kfree(wpa);
 }
 
-static void fuse_writepage_finish(struct fuse_conn *fc, struct fuse_req *req)
+static void fuse_writepage_finish(struct fuse_conn *fc,
+                                 struct fuse_writepage_args *wpa)
 {
-       struct inode *inode = req->inode;
+       struct fuse_args_pages *ap = &wpa->ia.ap;
+       struct inode *inode = wpa->inode;
        struct fuse_inode *fi = get_fuse_inode(inode);
        struct backing_dev_info *bdi = inode_to_bdi(inode);
        int i;
 
-       list_del(&req->writepages_entry);
-       for (i = 0; i < req->num_pages; i++) {
+       list_del(&wpa->writepages_entry);
+       for (i = 0; i < ap->num_pages; i++) {
                dec_wb_stat(&bdi->wb, WB_WRITEBACK);
-               dec_node_page_state(req->pages[i], NR_WRITEBACK_TEMP);
+               dec_node_page_state(ap->pages[i], NR_WRITEBACK_TEMP);
                wb_writeout_inc(&bdi->wb);
        }
        wake_up(&fi->page_waitq);
 }
 
 /* Called under fi->lock, may release and reacquire it */
-static void fuse_send_writepage(struct fuse_conn *fc, struct fuse_req *req,
-                               loff_t size)
+static void fuse_send_writepage(struct fuse_conn *fc,
+                               struct fuse_writepage_args *wpa, loff_t size)
 __releases(fi->lock)
 __acquires(fi->lock)
 {
-       struct fuse_req *aux, *next;
-       struct fuse_inode *fi = get_fuse_inode(req->inode);
-       struct fuse_write_in *inarg = &req->misc.write.in;
-       __u64 data_size = req->num_pages * PAGE_SIZE;
-       bool queued;
+       struct fuse_writepage_args *aux, *next;
+       struct fuse_inode *fi = get_fuse_inode(wpa->inode);
+       struct fuse_write_in *inarg = &wpa->ia.write.in;
+       struct fuse_args *args = &wpa->ia.ap.args;
+       __u64 data_size = wpa->ia.ap.num_pages * PAGE_SIZE;
+       int err;
 
+       fi->writectr++;
        if (inarg->offset + data_size <= size) {
                inarg->size = data_size;
        } else if (inarg->offset < size) {
@@ -1557,29 +1621,36 @@ __acquires(fi->lock)
                goto out_free;
        }
 
-       req->in.args[1].size = inarg->size;
-       queued = fuse_request_queue_background(fc, req);
+       args->in_args[1].size = inarg->size;
+       args->force = true;
+       args->nocreds = true;
+
+       err = fuse_simple_background(fc, args, GFP_ATOMIC);
+       if (err == -ENOMEM) {
+               spin_unlock(&fi->lock);
+               err = fuse_simple_background(fc, args, GFP_NOFS | __GFP_NOFAIL);
+               spin_lock(&fi->lock);
+       }
+
        /* Fails on broken connection only */
-       if (unlikely(!queued))
+       if (unlikely(err))
                goto out_free;
 
-       fi->writectr++;
        return;
 
  out_free:
-       fuse_writepage_finish(fc, req);
+       fi->writectr--;
+       fuse_writepage_finish(fc, wpa);
        spin_unlock(&fi->lock);
 
        /* After fuse_writepage_finish() aux request list is private */
-       for (aux = req->misc.write.next; aux; aux = next) {
-               next = aux->misc.write.next;
-               aux->misc.write.next = NULL;
-               fuse_writepage_free(fc, aux);
-               fuse_put_request(fc, aux);
+       for (aux = wpa->next; aux; aux = next) {
+               next = aux->next;
+               aux->next = NULL;
+               fuse_writepage_free(aux);
        }
 
-       fuse_writepage_free(fc, req);
-       fuse_put_request(fc, req);
+       fuse_writepage_free(wpa);
        spin_lock(&fi->lock);
 }
 
@@ -1596,29 +1667,34 @@ __acquires(fi->lock)
        struct fuse_conn *fc = get_fuse_conn(inode);
        struct fuse_inode *fi = get_fuse_inode(inode);
        loff_t crop = i_size_read(inode);
-       struct fuse_req *req;
+       struct fuse_writepage_args *wpa;
 
        while (fi->writectr >= 0 && !list_empty(&fi->queued_writes)) {
-               req = list_entry(fi->queued_writes.next, struct fuse_req, list);
-               list_del_init(&req->list);
-               fuse_send_writepage(fc, req, crop);
+               wpa = list_entry(fi->queued_writes.next,
+                                struct fuse_writepage_args, queue_entry);
+               list_del_init(&wpa->queue_entry);
+               fuse_send_writepage(fc, wpa, crop);
        }
 }
 
-static void fuse_writepage_end(struct fuse_conn *fc, struct fuse_req *req)
+static void fuse_writepage_end(struct fuse_conn *fc, struct fuse_args *args,
+                              int error)
 {
-       struct inode *inode = req->inode;
+       struct fuse_writepage_args *wpa =
+               container_of(args, typeof(*wpa), ia.ap.args);
+       struct inode *inode = wpa->inode;
        struct fuse_inode *fi = get_fuse_inode(inode);
 
-       mapping_set_error(inode->i_mapping, req->out.h.error);
+       mapping_set_error(inode->i_mapping, error);
        spin_lock(&fi->lock);
-       while (req->misc.write.next) {
+       while (wpa->next) {
                struct fuse_conn *fc = get_fuse_conn(inode);
-               struct fuse_write_in *inarg = &req->misc.write.in;
-               struct fuse_req *next = req->misc.write.next;
-               req->misc.write.next = next->misc.write.next;
-               next->misc.write.next = NULL;
-               next->ff = fuse_file_get(req->ff);
+               struct fuse_write_in *inarg = &wpa->ia.write.in;
+               struct fuse_writepage_args *next = wpa->next;
+
+               wpa->next = next->next;
+               next->next = NULL;
+               next->ia.ff = fuse_file_get(wpa->ia.ff);
                list_add(&next->writepages_entry, &fi->writepages);
 
                /*
@@ -1647,9 +1723,9 @@ static void fuse_writepage_end(struct fuse_conn *fc, struct fuse_req *req)
                fuse_send_writepage(fc, next, inarg->offset + inarg->size);
        }
        fi->writectr--;
-       fuse_writepage_finish(fc, req);
+       fuse_writepage_finish(fc, wpa);
        spin_unlock(&fi->lock);
-       fuse_writepage_free(fc, req);
+       fuse_writepage_free(wpa);
 }
 
 static struct fuse_file *__fuse_write_file_get(struct fuse_conn *fc,
@@ -1691,52 +1767,71 @@ int fuse_write_inode(struct inode *inode, struct writeback_control *wbc)
        return err;
 }
 
+static struct fuse_writepage_args *fuse_writepage_args_alloc(void)
+{
+       struct fuse_writepage_args *wpa;
+       struct fuse_args_pages *ap;
+
+       wpa = kzalloc(sizeof(*wpa), GFP_NOFS);
+       if (wpa) {
+               ap = &wpa->ia.ap;
+               ap->num_pages = 0;
+               ap->pages = fuse_pages_alloc(1, GFP_NOFS, &ap->descs);
+               if (!ap->pages) {
+                       kfree(wpa);
+                       wpa = NULL;
+               }
+       }
+       return wpa;
+
+}
+
 static int fuse_writepage_locked(struct page *page)
 {
        struct address_space *mapping = page->mapping;
        struct inode *inode = mapping->host;
        struct fuse_conn *fc = get_fuse_conn(inode);
        struct fuse_inode *fi = get_fuse_inode(inode);
-       struct fuse_req *req;
+       struct fuse_writepage_args *wpa;
+       struct fuse_args_pages *ap;
        struct page *tmp_page;
        int error = -ENOMEM;
 
        set_page_writeback(page);
 
-       req = fuse_request_alloc_nofs(1);
-       if (!req)
+       wpa = fuse_writepage_args_alloc();
+       if (!wpa)
                goto err;
+       ap = &wpa->ia.ap;
 
-       /* writeback always goes to bg_queue */
-       __set_bit(FR_BACKGROUND, &req->flags);
        tmp_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM);
        if (!tmp_page)
                goto err_free;
 
        error = -EIO;
-       req->ff = fuse_write_file_get(fc, fi);
-       if (!req->ff)
+       wpa->ia.ff = fuse_write_file_get(fc, fi);
+       if (!wpa->ia.ff)
                goto err_nofile;
 
-       fuse_write_fill(req, req->ff, page_offset(page), 0);
+       fuse_write_args_fill(&wpa->ia, wpa->ia.ff, page_offset(page), 0);
 
        copy_highpage(tmp_page, page);
-       req->misc.write.in.write_flags |= FUSE_WRITE_CACHE;
-       req->misc.write.next = NULL;
-       req->in.argpages = 1;
-       req->num_pages = 1;
-       req->pages[0] = tmp_page;
-       req->page_descs[0].offset = 0;
-       req->page_descs[0].length = PAGE_SIZE;
-       req->end = fuse_writepage_end;
-       req->inode = inode;
+       wpa->ia.write.in.write_flags |= FUSE_WRITE_CACHE;
+       wpa->next = NULL;
+       ap->args.in_pages = true;
+       ap->num_pages = 1;
+       ap->pages[0] = tmp_page;
+       ap->descs[0].offset = 0;
+       ap->descs[0].length = PAGE_SIZE;
+       ap->args.end = fuse_writepage_end;
+       wpa->inode = inode;
 
        inc_wb_stat(&inode_to_bdi(inode)->wb, WB_WRITEBACK);
        inc_node_page_state(tmp_page, NR_WRITEBACK_TEMP);
 
        spin_lock(&fi->lock);
-       list_add(&req->writepages_entry, &fi->writepages);
-       list_add_tail(&req->list, &fi->queued_writes);
+       list_add(&wpa->writepages_entry, &fi->writepages);
+       list_add_tail(&wpa->queue_entry, &fi->queued_writes);
        fuse_flush_writepages(inode);
        spin_unlock(&fi->lock);
 
@@ -1747,7 +1842,7 @@ static int fuse_writepage_locked(struct page *page)
 err_nofile:
        __free_page(tmp_page);
 err_free:
-       fuse_request_free(req);
+       kfree(wpa);
 err:
        mapping_set_error(page->mapping, error);
        end_page_writeback(page);
@@ -1767,6 +1862,7 @@ static int fuse_writepage(struct page *page, struct writeback_control *wbc)
                WARN_ON(wbc->sync_mode == WB_SYNC_ALL);
 
                redirty_page_for_writepage(wbc, page);
+               unlock_page(page);
                return 0;
        }
 
@@ -1777,23 +1873,50 @@ static int fuse_writepage(struct page *page, struct writeback_control *wbc)
 }
 
 struct fuse_fill_wb_data {
-       struct fuse_req *req;
+       struct fuse_writepage_args *wpa;
        struct fuse_file *ff;
        struct inode *inode;
        struct page **orig_pages;
+       unsigned int max_pages;
 };
 
+static bool fuse_pages_realloc(struct fuse_fill_wb_data *data)
+{
+       struct fuse_args_pages *ap = &data->wpa->ia.ap;
+       struct fuse_conn *fc = get_fuse_conn(data->inode);
+       struct page **pages;
+       struct fuse_page_desc *descs;
+       unsigned int npages = min_t(unsigned int,
+                                   max_t(unsigned int, data->max_pages * 2,
+                                         FUSE_DEFAULT_MAX_PAGES_PER_REQ),
+                                   fc->max_pages);
+       WARN_ON(npages <= data->max_pages);
+
+       pages = fuse_pages_alloc(npages, GFP_NOFS, &descs);
+       if (!pages)
+               return false;
+
+       memcpy(pages, ap->pages, sizeof(struct page *) * ap->num_pages);
+       memcpy(descs, ap->descs, sizeof(struct fuse_page_desc) * ap->num_pages);
+       kfree(ap->pages);
+       ap->pages = pages;
+       ap->descs = descs;
+       data->max_pages = npages;
+
+       return true;
+}
+
 static void fuse_writepages_send(struct fuse_fill_wb_data *data)
 {
-       struct fuse_req *req = data->req;
+       struct fuse_writepage_args *wpa = data->wpa;
        struct inode *inode = data->inode;
        struct fuse_inode *fi = get_fuse_inode(inode);
-       int num_pages = req->num_pages;
+       int num_pages = wpa->ia.ap.num_pages;
        int i;
 
-       req->ff = fuse_file_get(data->ff);
+       wpa->ia.ff = fuse_file_get(data->ff);
        spin_lock(&fi->lock);
-       list_add_tail(&req->list, &fi->queued_writes);
+       list_add_tail(&wpa->queue_entry, &fi->queued_writes);
        fuse_flush_writepages(inode);
        spin_unlock(&fi->lock);
 
@@ -1808,54 +1931,52 @@ static void fuse_writepages_send(struct fuse_fill_wb_data *data)
  * this new request onto the auxiliary list, otherwise reuse the existing one by
  * copying the new page contents over to the old temporary page.
  */
-static bool fuse_writepage_in_flight(struct fuse_req *new_req,
+static bool fuse_writepage_in_flight(struct fuse_writepage_args *new_wpa,
                                     struct page *page)
 {
-       struct fuse_conn *fc = get_fuse_conn(new_req->inode);
-       struct fuse_inode *fi = get_fuse_inode(new_req->inode);
-       struct fuse_req *tmp;
-       struct fuse_req *old_req;
+       struct fuse_inode *fi = get_fuse_inode(new_wpa->inode);
+       struct fuse_writepage_args *tmp;
+       struct fuse_writepage_args *old_wpa;
+       struct fuse_args_pages *new_ap = &new_wpa->ia.ap;
 
-       WARN_ON(new_req->num_pages != 0);
+       WARN_ON(new_ap->num_pages != 0);
 
        spin_lock(&fi->lock);
-       list_del(&new_req->writepages_entry);
-       old_req = fuse_find_writeback(fi, page->index, page->index);
-       if (!old_req) {
-               list_add(&new_req->writepages_entry, &fi->writepages);
+       list_del(&new_wpa->writepages_entry);
+       old_wpa = fuse_find_writeback(fi, page->index, page->index);
+       if (!old_wpa) {
+               list_add(&new_wpa->writepages_entry, &fi->writepages);
                spin_unlock(&fi->lock);
                return false;
        }
 
-       new_req->num_pages = 1;
-       for (tmp = old_req->misc.write.next; tmp; tmp = tmp->misc.write.next) {
+       new_ap->num_pages = 1;
+       for (tmp = old_wpa->next; tmp; tmp = tmp->next) {
                pgoff_t curr_index;
 
-               WARN_ON(tmp->inode != new_req->inode);
-               curr_index = tmp->misc.write.in.offset >> PAGE_SHIFT;
+               WARN_ON(tmp->inode != new_wpa->inode);
+               curr_index = tmp->ia.write.in.offset >> PAGE_SHIFT;
                if (curr_index == page->index) {
-                       WARN_ON(tmp->num_pages != 1);
-                       WARN_ON(!test_bit(FR_PENDING, &tmp->flags));
-                       swap(tmp->pages[0], new_req->pages[0]);
+                       WARN_ON(tmp->ia.ap.num_pages != 1);
+                       swap(tmp->ia.ap.pages[0], new_ap->pages[0]);
                        break;
                }
        }
 
        if (!tmp) {
-               new_req->misc.write.next = old_req->misc.write.next;
-               old_req->misc.write.next = new_req;
+               new_wpa->next = old_wpa->next;
+               old_wpa->next = new_wpa;
        }
 
        spin_unlock(&fi->lock);
 
        if (tmp) {
-               struct backing_dev_info *bdi = inode_to_bdi(new_req->inode);
+               struct backing_dev_info *bdi = inode_to_bdi(new_wpa->inode);
 
                dec_wb_stat(&bdi->wb, WB_WRITEBACK);
-               dec_node_page_state(new_req->pages[0], NR_WRITEBACK_TEMP);
+               dec_node_page_state(new_ap->pages[0], NR_WRITEBACK_TEMP);
                wb_writeout_inc(&bdi->wb);
-               fuse_writepage_free(fc, new_req);
-               fuse_request_free(new_req);
+               fuse_writepage_free(new_wpa);
        }
 
        return true;
@@ -1865,7 +1986,8 @@ static int fuse_writepages_fill(struct page *page,
                struct writeback_control *wbc, void *_data)
 {
        struct fuse_fill_wb_data *data = _data;
-       struct fuse_req *req = data->req;
+       struct fuse_writepage_args *wpa = data->wpa;
+       struct fuse_args_pages *ap = &wpa->ia.ap;
        struct inode *inode = data->inode;
        struct fuse_inode *fi = get_fuse_inode(inode);
        struct fuse_conn *fc = get_fuse_conn(inode);
@@ -1888,16 +2010,16 @@ static int fuse_writepages_fill(struct page *page,
         */
        is_writeback = fuse_page_is_writeback(inode, page->index);
 
-       if (req && req->num_pages &&
-           (is_writeback || req->num_pages == fc->max_pages ||
-            (req->num_pages + 1) * PAGE_SIZE > fc->max_write ||
-            data->orig_pages[req->num_pages - 1]->index + 1 != page->index)) {
+       if (wpa && ap->num_pages &&
+           (is_writeback || ap->num_pages == fc->max_pages ||
+            (ap->num_pages + 1) * PAGE_SIZE > fc->max_write ||
+            data->orig_pages[ap->num_pages - 1]->index + 1 != page->index)) {
                fuse_writepages_send(data);
-               data->req = NULL;
-       } else if (req && req->num_pages == req->max_pages) {
-               if (!fuse_req_realloc_pages(fc, req, GFP_NOFS)) {
+               data->wpa = NULL;
+       } else if (wpa && ap->num_pages == data->max_pages) {
+               if (!fuse_pages_realloc(data)) {
                        fuse_writepages_send(data);
-                       req = data->req = NULL;
+                       data->wpa = NULL;
                }
        }
 
@@ -1915,59 +2037,60 @@ static int fuse_writepages_fill(struct page *page,
         * This is ensured by holding the page lock in page_mkwrite() while
         * checking fuse_page_is_writeback().  We already hold the page lock
         * since clear_page_dirty_for_io() and keep it held until we add the
-        * request to the fi->writepages list and increment req->num_pages.
+        * request to the fi->writepages list and increment ap->num_pages.
         * After this fuse_page_is_writeback() will indicate that the page is
         * under writeback, so we can release the page lock.
         */
-       if (data->req == NULL) {
+       if (data->wpa == NULL) {
                struct fuse_inode *fi = get_fuse_inode(inode);
 
                err = -ENOMEM;
-               req = fuse_request_alloc_nofs(FUSE_REQ_INLINE_PAGES);
-               if (!req) {
+               wpa = fuse_writepage_args_alloc();
+               if (!wpa) {
                        __free_page(tmp_page);
                        goto out_unlock;
                }
+               data->max_pages = 1;
 
-               fuse_write_fill(req, data->ff, page_offset(page), 0);
-               req->misc.write.in.write_flags |= FUSE_WRITE_CACHE;
-               req->misc.write.next = NULL;
-               req->in.argpages = 1;
-               __set_bit(FR_BACKGROUND, &req->flags);
-               req->num_pages = 0;
-               req->end = fuse_writepage_end;
-               req->inode = inode;
+               ap = &wpa->ia.ap;
+               fuse_write_args_fill(&wpa->ia, data->ff, page_offset(page), 0);
+               wpa->ia.write.in.write_flags |= FUSE_WRITE_CACHE;
+               wpa->next = NULL;
+               ap->args.in_pages = true;
+               ap->args.end = fuse_writepage_end;
+               ap->num_pages = 0;
+               wpa->inode = inode;
 
                spin_lock(&fi->lock);
-               list_add(&req->writepages_entry, &fi->writepages);
+               list_add(&wpa->writepages_entry, &fi->writepages);
                spin_unlock(&fi->lock);
 
-               data->req = req;
+               data->wpa = wpa;
        }
        set_page_writeback(page);
 
        copy_highpage(tmp_page, page);
-       req->pages[req->num_pages] = tmp_page;
-       req->page_descs[req->num_pages].offset = 0;
-       req->page_descs[req->num_pages].length = PAGE_SIZE;
+       ap->pages[ap->num_pages] = tmp_page;
+       ap->descs[ap->num_pages].offset = 0;
+       ap->descs[ap->num_pages].length = PAGE_SIZE;
 
        inc_wb_stat(&inode_to_bdi(inode)->wb, WB_WRITEBACK);
        inc_node_page_state(tmp_page, NR_WRITEBACK_TEMP);
 
        err = 0;
-       if (is_writeback && fuse_writepage_in_flight(req, page)) {
+       if (is_writeback && fuse_writepage_in_flight(wpa, page)) {
                end_page_writeback(page);
-               data->req = NULL;
+               data->wpa = NULL;
                goto out_unlock;
        }
-       data->orig_pages[req->num_pages] = page;
+       data->orig_pages[ap->num_pages] = page;
 
        /*
         * Protected by fi->lock against concurrent access by
         * fuse_page_is_writeback().
         */
        spin_lock(&fi->lock);
-       req->num_pages++;
+       ap->num_pages++;
        spin_unlock(&fi->lock);
 
 out_unlock:
@@ -1989,7 +2112,7 @@ static int fuse_writepages(struct address_space *mapping,
                goto out;
 
        data.inode = inode;
-       data.req = NULL;
+       data.wpa = NULL;
        data.ff = NULL;
 
        err = -ENOMEM;
@@ -2000,9 +2123,9 @@ static int fuse_writepages(struct address_space *mapping,
                goto out;
 
        err = write_cache_pages(mapping, wbc, fuse_writepages_fill, &data);
-       if (data.req) {
+       if (data.wpa) {
                /* Ignore errors if we can write at least one page */
-               BUG_ON(!data.req->num_pages);
+               WARN_ON(!data.wpa->ia.ap.num_pages);
                fuse_writepages_send(&data);
                err = 0;
        }
@@ -2222,11 +2345,11 @@ static void fuse_lk_fill(struct fuse_args *args, struct file *file,
        inarg->lk.pid = pid;
        if (flock)
                inarg->lk_flags |= FUSE_LK_FLOCK;
-       args->in.h.opcode = opcode;
-       args->in.h.nodeid = get_node_id(inode);
-       args->in.numargs = 1;
-       args->in.args[0].size = sizeof(*inarg);
-       args->in.args[0].value = inarg;
+       args->opcode = opcode;
+       args->nodeid = get_node_id(inode);
+       args->in_numargs = 1;
+       args->in_args[0].size = sizeof(*inarg);
+       args->in_args[0].value = inarg;
 }
 
 static int fuse_getlk(struct file *file, struct file_lock *fl)
@@ -2239,9 +2362,9 @@ static int fuse_getlk(struct file *file, struct file_lock *fl)
        int err;
 
        fuse_lk_fill(&args, file, fl, FUSE_GETLK, 0, 0, &inarg);
-       args.out.numargs = 1;
-       args.out.args[0].size = sizeof(outarg);
-       args.out.args[0].value = &outarg;
+       args.out_numargs = 1;
+       args.out_args[0].size = sizeof(outarg);
+       args.out_args[0].value = &outarg;
        err = fuse_simple_request(fc, &args);
        if (!err)
                err = convert_fuse_file_lock(fc, &outarg.lk, fl);
@@ -2336,14 +2459,14 @@ static sector_t fuse_bmap(struct address_space *mapping, sector_t block)
        memset(&inarg, 0, sizeof(inarg));
        inarg.block = block;
        inarg.blocksize = inode->i_sb->s_blocksize;
-       args.in.h.opcode = FUSE_BMAP;
-       args.in.h.nodeid = get_node_id(inode);
-       args.in.numargs = 1;
-       args.in.args[0].size = sizeof(inarg);
-       args.in.args[0].value = &inarg;
-       args.out.numargs = 1;
-       args.out.args[0].size = sizeof(outarg);
-       args.out.args[0].value = &outarg;
+       args.opcode = FUSE_BMAP;
+       args.nodeid = get_node_id(inode);
+       args.in_numargs = 1;
+       args.in_args[0].size = sizeof(inarg);
+       args.in_args[0].value = &inarg;
+       args.out_numargs = 1;
+       args.out_args[0].size = sizeof(outarg);
+       args.out_args[0].value = &outarg;
        err = fuse_simple_request(fc, &args);
        if (err == -ENOSYS)
                fc->no_bmap = 1;
@@ -2368,14 +2491,14 @@ static loff_t fuse_lseek(struct file *file, loff_t offset, int whence)
        if (fc->no_lseek)
                goto fallback;
 
-       args.in.h.opcode = FUSE_LSEEK;
-       args.in.h.nodeid = ff->nodeid;
-       args.in.numargs = 1;
-       args.in.args[0].size = sizeof(inarg);
-       args.in.args[0].value = &inarg;
-       args.out.numargs = 1;
-       args.out.args[0].size = sizeof(outarg);
-       args.out.args[0].value = &outarg;
+       args.opcode = FUSE_LSEEK;
+       args.nodeid = ff->nodeid;
+       args.in_numargs = 1;
+       args.in_args[0].size = sizeof(inarg);
+       args.in_args[0].value = &inarg;
+       args.out_numargs = 1;
+       args.out_args[0].size = sizeof(outarg);
+       args.out_args[0].value = &outarg;
        err = fuse_simple_request(fc, &args);
        if (err) {
                if (err == -ENOSYS) {
@@ -2573,14 +2696,14 @@ long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg,
                .flags = flags
        };
        struct fuse_ioctl_out outarg;
-       struct fuse_req *req = NULL;
-       struct page **pages = NULL;
        struct iovec *iov_page = NULL;
        struct iovec *in_iov = NULL, *out_iov = NULL;
-       unsigned int in_iovs = 0, out_iovs = 0, num_pages = 0, max_pages;
-       size_t in_size, out_size, transferred, c;
+       unsigned int in_iovs = 0, out_iovs = 0, max_pages;
+       size_t in_size, out_size, c;
+       ssize_t transferred;
        int err, i;
        struct iov_iter ii;
+       struct fuse_args_pages ap = {};
 
 #if BITS_PER_LONG == 32
        inarg.flags |= FUSE_IOCTL_32BIT;
@@ -2598,11 +2721,13 @@ long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg,
        BUILD_BUG_ON(sizeof(struct fuse_ioctl_iovec) * FUSE_IOCTL_MAX_IOV > PAGE_SIZE);
 
        err = -ENOMEM;
-       pages = kcalloc(fc->max_pages, sizeof(pages[0]), GFP_KERNEL);
+       ap.pages = fuse_pages_alloc(fc->max_pages, GFP_KERNEL, &ap.descs);
        iov_page = (struct iovec *) __get_free_page(GFP_KERNEL);
-       if (!pages || !iov_page)
+       if (!ap.pages || !iov_page)
                goto out;
 
+       fuse_page_descs_length_init(ap.descs, 0, fc->max_pages);
+
        /*
         * If restricted, initialize IO parameters as encoded in @cmd.
         * RETRY from server is not allowed.
@@ -2639,56 +2764,44 @@ long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg,
        err = -ENOMEM;
        if (max_pages > fc->max_pages)
                goto out;
-       while (num_pages < max_pages) {
-               pages[num_pages] = alloc_page(GFP_KERNEL | __GFP_HIGHMEM);
-               if (!pages[num_pages])
+       while (ap.num_pages < max_pages) {
+               ap.pages[ap.num_pages] = alloc_page(GFP_KERNEL | __GFP_HIGHMEM);
+               if (!ap.pages[ap.num_pages])
                        goto out;
-               num_pages++;
+               ap.num_pages++;
        }
 
-       req = fuse_get_req(fc, num_pages);
-       if (IS_ERR(req)) {
-               err = PTR_ERR(req);
-               req = NULL;
-               goto out;
-       }
-       memcpy(req->pages, pages, sizeof(req->pages[0]) * num_pages);
-       req->num_pages = num_pages;
-       fuse_page_descs_length_init(req, 0, req->num_pages);
 
        /* okay, let's send it to the client */
-       req->in.h.opcode = FUSE_IOCTL;
-       req->in.h.nodeid = ff->nodeid;
-       req->in.numargs = 1;
-       req->in.args[0].size = sizeof(inarg);
-       req->in.args[0].value = &inarg;
+       ap.args.opcode = FUSE_IOCTL;
+       ap.args.nodeid = ff->nodeid;
+       ap.args.in_numargs = 1;
+       ap.args.in_args[0].size = sizeof(inarg);
+       ap.args.in_args[0].value = &inarg;
        if (in_size) {
-               req->in.numargs++;
-               req->in.args[1].size = in_size;
-               req->in.argpages = 1;
+               ap.args.in_numargs++;
+               ap.args.in_args[1].size = in_size;
+               ap.args.in_pages = true;
 
                err = -EFAULT;
                iov_iter_init(&ii, WRITE, in_iov, in_iovs, in_size);
-               for (i = 0; iov_iter_count(&ii) && !WARN_ON(i >= num_pages); i++) {
-                       c = copy_page_from_iter(pages[i], 0, PAGE_SIZE, &ii);
+               for (i = 0; iov_iter_count(&ii) && !WARN_ON(i >= ap.num_pages); i++) {
+                       c = copy_page_from_iter(ap.pages[i], 0, PAGE_SIZE, &ii);
                        if (c != PAGE_SIZE && iov_iter_count(&ii))
                                goto out;
                }
        }
 
-       req->out.numargs = 2;
-       req->out.args[0].size = sizeof(outarg);
-       req->out.args[0].value = &outarg;
-       req->out.args[1].size = out_size;
-       req->out.argpages = 1;
-       req->out.argvar = 1;
+       ap.args.out_numargs = 2;
+       ap.args.out_args[0].size = sizeof(outarg);
+       ap.args.out_args[0].value = &outarg;
+       ap.args.out_args[1].size = out_size;
+       ap.args.out_pages = true;
+       ap.args.out_argvar = true;
 
-       fuse_request_send(fc, req);
-       err = req->out.h.error;
-       transferred = req->out.args[1].size;
-       fuse_put_request(fc, req);
-       req = NULL;
-       if (err)
+       transferred = fuse_simple_request(fc, &ap.args);
+       err = transferred;
+       if (transferred < 0)
                goto out;
 
        /* did it ask for retry? */
@@ -2713,7 +2826,7 @@ long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg,
                    in_iovs + out_iovs > FUSE_IOCTL_MAX_IOV)
                        goto out;
 
-               vaddr = kmap_atomic(pages[0]);
+               vaddr = kmap_atomic(ap.pages[0]);
                err = fuse_copy_ioctl_iovec(fc, iov_page, vaddr,
                                            transferred, in_iovs + out_iovs,
                                            (flags & FUSE_IOCTL_COMPAT) != 0);
@@ -2741,19 +2854,17 @@ long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg,
 
        err = -EFAULT;
        iov_iter_init(&ii, READ, out_iov, out_iovs, transferred);
-       for (i = 0; iov_iter_count(&ii) && !WARN_ON(i >= num_pages); i++) {
-               c = copy_page_to_iter(pages[i], 0, PAGE_SIZE, &ii);
+       for (i = 0; iov_iter_count(&ii) && !WARN_ON(i >= ap.num_pages); i++) {
+               c = copy_page_to_iter(ap.pages[i], 0, PAGE_SIZE, &ii);
                if (c != PAGE_SIZE && iov_iter_count(&ii))
                        goto out;
        }
        err = 0;
  out:
-       if (req)
-               fuse_put_request(fc, req);
        free_page((unsigned long) iov_page);
-       while (num_pages)
-               __free_page(pages[--num_pages]);
-       kfree(pages);
+       while (ap.num_pages)
+               __free_page(ap.pages[--ap.num_pages]);
+       kfree(ap.pages);
 
        return err ? err : outarg.result;
 }
@@ -2861,14 +2972,14 @@ __poll_t fuse_file_poll(struct file *file, poll_table *wait)
                fuse_register_polled_file(fc, ff);
        }
 
-       args.in.h.opcode = FUSE_POLL;
-       args.in.h.nodeid = ff->nodeid;
-       args.in.numargs = 1;
-       args.in.args[0].size = sizeof(inarg);
-       args.in.args[0].value = &inarg;
-       args.out.numargs = 1;
-       args.out.args[0].size = sizeof(outarg);
-       args.out.args[0].value = &outarg;
+       args.opcode = FUSE_POLL;
+       args.nodeid = ff->nodeid;
+       args.in_numargs = 1;
+       args.in_args[0].size = sizeof(inarg);
+       args.in_args[0].value = &inarg;
+       args.out_numargs = 1;
+       args.out_args[0].size = sizeof(outarg);
+       args.out_args[0].value = &outarg;
        err = fuse_simple_request(fc, &args);
 
        if (!err)
@@ -3076,11 +3187,11 @@ static long fuse_file_fallocate(struct file *file, int mode, loff_t offset,
        if (!(mode & FALLOC_FL_KEEP_SIZE))
                set_bit(FUSE_I_SIZE_UNSTABLE, &fi->state);
 
-       args.in.h.opcode = FUSE_FALLOCATE;
-       args.in.h.nodeid = ff->nodeid;
-       args.in.numargs = 1;
-       args.in.args[0].size = sizeof(inarg);
-       args.in.args[0].value = &inarg;
+       args.opcode = FUSE_FALLOCATE;
+       args.nodeid = ff->nodeid;
+       args.in_numargs = 1;
+       args.in_args[0].size = sizeof(inarg);
+       args.in_args[0].value = &inarg;
        err = fuse_simple_request(fc, &args);
        if (err == -ENOSYS) {
                fc->no_fallocate = 1;
@@ -3168,14 +3279,14 @@ static ssize_t __fuse_copy_file_range(struct file *file_in, loff_t pos_in,
        if (is_unstable)
                set_bit(FUSE_I_SIZE_UNSTABLE, &fi_out->state);
 
-       args.in.h.opcode = FUSE_COPY_FILE_RANGE;
-       args.in.h.nodeid = ff_in->nodeid;
-       args.in.numargs = 1;
-       args.in.args[0].size = sizeof(inarg);
-       args.in.args[0].value = &inarg;
-       args.out.numargs = 1;
-       args.out.args[0].size = sizeof(outarg);
-       args.out.args[0].value = &outarg;
+       args.opcode = FUSE_COPY_FILE_RANGE;
+       args.nodeid = ff_in->nodeid;
+       args.in_numargs = 1;
+       args.in_args[0].size = sizeof(inarg);
+       args.in_args[0].value = &inarg;
+       args.out_numargs = 1;
+       args.out_args[0].size = sizeof(outarg);
+       args.out_args[0].value = &outarg;
        err = fuse_simple_request(fc, &args);
        if (err == -ENOSYS) {
                fc->no_copy_file_range = 1;
index 24dbca7..956aeaf 100644 (file)
@@ -47,9 +47,6 @@
 /** Number of dentries for each connection in the control filesystem */
 #define FUSE_CTL_NUM_DENTRIES 5
 
-/** Number of page pointers embedded in fuse_req */
-#define FUSE_REQ_INLINE_PAGES 1
-
 /** List of active connections */
 extern struct list_head fuse_conn_list;
 
@@ -164,17 +161,15 @@ enum {
 };
 
 struct fuse_conn;
+struct fuse_release_args;
 
 /** FUSE specific file data */
 struct fuse_file {
        /** Fuse connection for this file */
        struct fuse_conn *fc;
 
-       /*
-        * Request reserved for flush and release.
-        * Modified under relative fuse_inode::lock.
-        */
-       struct fuse_req *reserved_req;
+       /* Argument space reserved for release */
+       struct fuse_release_args *release_args;
 
        /** Kernel file handle guaranteed to be unique */
        u64 kh;
@@ -229,57 +224,12 @@ struct fuse_in_arg {
        const void *value;
 };
 
-/** The request input */
-struct fuse_in {
-       /** The request header */
-       struct fuse_in_header h;
-
-       /** True if the data for the last argument is in req->pages */
-       unsigned argpages:1;
-
-       /** Number of arguments */
-       unsigned numargs;
-
-       /** Array of arguments */
-       struct fuse_in_arg args[3];
-};
-
 /** One output argument of a request */
 struct fuse_arg {
        unsigned size;
        void *value;
 };
 
-/** The request output */
-struct fuse_out {
-       /** Header returned from userspace */
-       struct fuse_out_header h;
-
-       /*
-        * The following bitfields are not changed during the request
-        * processing
-        */
-
-       /** Last argument is variable length (can be shorter than
-           arg->size) */
-       unsigned argvar:1;
-
-       /** Last argument is a list of pages to copy data to */
-       unsigned argpages:1;
-
-       /** Zero partially or not copied pages */
-       unsigned page_zeroing:1;
-
-       /** Pages may be replaced with new ones */
-       unsigned page_replace:1;
-
-       /** Number or arguments */
-       unsigned numargs;
-
-       /** Array of arguments */
-       struct fuse_arg args[2];
-};
-
 /** FUSE page descriptor */
 struct fuse_page_desc {
        unsigned int length;
@@ -287,20 +237,28 @@ struct fuse_page_desc {
 };
 
 struct fuse_args {
-       struct {
-               struct {
-                       uint32_t opcode;
-                       uint64_t nodeid;
-               } h;
-               unsigned numargs;
-               struct fuse_in_arg args[3];
+       uint64_t nodeid;
+       uint32_t opcode;
+       unsigned short in_numargs;
+       unsigned short out_numargs;
+       bool force:1;
+       bool noreply:1;
+       bool nocreds:1;
+       bool in_pages:1;
+       bool out_pages:1;
+       bool out_argvar:1;
+       bool page_zeroing:1;
+       bool page_replace:1;
+       struct fuse_in_arg in_args[3];
+       struct fuse_arg out_args[2];
+       void (*end)(struct fuse_conn *fc, struct fuse_args *args, int error);
+};
 
-       } in;
-       struct {
-               unsigned argvar:1;
-               unsigned numargs;
-               struct fuse_arg args[2];
-       } out;
+struct fuse_args_pages {
+       struct fuse_args args;
+       struct page **pages;
+       struct fuse_page_desc *descs;
+       unsigned int num_pages;
 };
 
 #define FUSE_ARGS(args) struct fuse_args args = {}
@@ -373,83 +331,79 @@ struct fuse_req {
        /** Entry on the interrupts list  */
        struct list_head intr_entry;
 
+       /* Input/output arguments */
+       struct fuse_args *args;
+
        /** refcount */
        refcount_t count;
 
        /* Request flags, updated with test/set/clear_bit() */
        unsigned long flags;
 
-       /** The request input */
-       struct fuse_in in;
+       /* The request input header */
+       struct {
+               struct fuse_in_header h;
+       } in;
 
-       /** The request output */
-       struct fuse_out out;
+       /* The request output header */
+       struct {
+               struct fuse_out_header h;
+       } out;
 
        /** Used to wake up the task waiting for completion of request*/
        wait_queue_head_t waitq;
 
-       /** Data for asynchronous requests */
-       union {
-               struct {
-                       struct fuse_release_in in;
-                       struct inode *inode;
-               } release;
-               struct fuse_init_in init_in;
-               struct fuse_init_out init_out;
-               struct cuse_init_in cuse_init_in;
-               struct {
-                       struct fuse_read_in in;
-                       u64 attr_ver;
-               } read;
-               struct {
-                       struct fuse_write_in in;
-                       struct fuse_write_out out;
-                       struct fuse_req *next;
-               } write;
-               struct fuse_notify_retrieve_in retrieve_in;
-       } misc;
-
-       /** page vector */
-       struct page **pages;
-
-       /** page-descriptor vector */
-       struct fuse_page_desc *page_descs;
-
-       /** size of the 'pages' array */
-       unsigned max_pages;
-
-       /** inline page vector */
-       struct page *inline_pages[FUSE_REQ_INLINE_PAGES];
-
-       /** inline page-descriptor vector */
-       struct fuse_page_desc inline_page_descs[FUSE_REQ_INLINE_PAGES];
-
-       /** number of pages in vector */
-       unsigned num_pages;
-
-       /** File used in the request (or NULL) */
-       struct fuse_file *ff;
+#if IS_ENABLED(CONFIG_VIRTIO_FS)
+       /** virtio-fs's physically contiguous buffer for in and out args */
+       void *argbuf;
+#endif
+};
 
-       /** Inode used in the request or NULL */
-       struct inode *inode;
+struct fuse_iqueue;
 
-       /** AIO control block */
-       struct fuse_io_priv *io;
+/**
+ * Input queue callbacks
+ *
+ * Input queue signalling is device-specific.  For example, the /dev/fuse file
+ * uses fiq->waitq and fasync to wake processes that are waiting on queue
+ * readiness.  These callbacks allow other device types to respond to input
+ * queue activity.
+ */
+struct fuse_iqueue_ops {
+       /**
+        * Signal that a forget has been queued
+        */
+       void (*wake_forget_and_unlock)(struct fuse_iqueue *fiq)
+               __releases(fiq->lock);
 
-       /** Link on fi->writepages */
-       struct list_head writepages_entry;
+       /**
+        * Signal that an INTERRUPT request has been queued
+        */
+       void (*wake_interrupt_and_unlock)(struct fuse_iqueue *fiq)
+               __releases(fiq->lock);
 
-       /** Request completion callback */
-       void (*end)(struct fuse_conn *, struct fuse_req *);
+       /**
+        * Signal that a request has been queued
+        */
+       void (*wake_pending_and_unlock)(struct fuse_iqueue *fiq)
+               __releases(fiq->lock);
 
-       /** Request is stolen from fuse_file->reserved_req */
-       struct file *stolen_file;
+       /**
+        * Clean up when fuse_iqueue is destroyed
+        */
+       void (*release)(struct fuse_iqueue *fiq);
 };
 
+/** /dev/fuse input queue operations */
+extern const struct fuse_iqueue_ops fuse_dev_fiq_ops;
+
 struct fuse_iqueue {
        /** Connection established */
        unsigned connected;
 
+       /** Lock protecting accesses to members of this structure */
+       spinlock_t lock;
+
        /** Readers of the connection are waiting on this */
        wait_queue_head_t waitq;
 
@@ -471,6 +425,12 @@ struct fuse_iqueue {
 
        /** O_ASYNC requests */
        struct fasync_struct *fasync;
+
+       /** Device-specific callbacks */
+       const struct fuse_iqueue_ops *ops;
+
+       /** Device-specific state */
+       void *priv;
 };
 
 #define FUSE_PQ_HASH_BITS 8
@@ -504,6 +464,29 @@ struct fuse_dev {
        struct list_head entry;
 };
 
+struct fuse_fs_context {
+       int fd;
+       unsigned int rootmode;
+       kuid_t user_id;
+       kgid_t group_id;
+       bool is_bdev:1;
+       bool fd_present:1;
+       bool rootmode_present:1;
+       bool user_id_present:1;
+       bool group_id_present:1;
+       bool default_permissions:1;
+       bool allow_other:1;
+       bool destroy:1;
+       bool no_control:1;
+       bool no_force_umount:1;
+       unsigned int max_read;
+       unsigned int blksize;
+       const char *subtype;
+
+       /* fuse_dev pointer to fill in, should contain NULL on entry */
+       void **fudptr;
+};
+
 /**
  * A Fuse connection.
  *
@@ -584,9 +567,6 @@ struct fuse_conn {
        /** waitq for blocked connection */
        wait_queue_head_t blocked_waitq;
 
-       /** waitq for reserved requests */
-       wait_queue_head_t reserved_req_waitq;
-
        /** Connection established, cleared on umount, connection
            abort and device release */
        unsigned connected;
@@ -721,6 +701,18 @@ struct fuse_conn {
        /** Does the filesystem support copy_file_range? */
        unsigned no_copy_file_range:1;
 
+       /* Send DESTROY request */
+       unsigned int destroy:1;
+
+       /* Delete dentries that have gone stale */
+       unsigned int delete_stale:1;
+
+       /** Do not create entry in fusectl fs */
+       unsigned int no_control:1;
+
+       /** Do not allow MNT_FORCE umount */
+       unsigned int no_force_umount:1;
+
        /** The number of requests waiting for completion */
        atomic_t num_waiting;
 
@@ -742,9 +734,6 @@ struct fuse_conn {
        /** Key for lock owner ID scrambling */
        u32 scramble_key[4];
 
-       /** Reserved request for the DESTROY message */
-       struct fuse_req *destroy_req;
-
        /** Version counter for attribute changes */
        atomic64_t attr_version;
 
@@ -820,14 +809,32 @@ void fuse_queue_forget(struct fuse_conn *fc, struct fuse_forget_link *forget,
 
 struct fuse_forget_link *fuse_alloc_forget(void);
 
-/* Used by READDIRPLUS */
-void fuse_force_forget(struct file *file, u64 nodeid);
+struct fuse_forget_link *fuse_dequeue_forget(struct fuse_iqueue *fiq,
+                                            unsigned int max,
+                                            unsigned int *countp);
 
-/**
+/*
  * Initialize READ or READDIR request
  */
-void fuse_read_fill(struct fuse_req *req, struct file *file,
-                   loff_t pos, size_t count, int opcode);
+struct fuse_io_args {
+       union {
+               struct {
+                       struct fuse_read_in in;
+                       u64 attr_ver;
+               } read;
+               struct {
+                       struct fuse_write_in in;
+                       struct fuse_write_out out;
+               } write;
+       };
+       struct fuse_args_pages ap;
+       struct fuse_io_priv *io;
+       struct fuse_file *ff;
+};
+
+void fuse_read_args_fill(struct fuse_io_args *ia, struct file *file, loff_t pos,
+                        size_t count, int opcode);
+
 
 /**
  * Send OPEN or OPENDIR request
@@ -900,61 +907,16 @@ int fuse_ctl_init(void);
 void __exit fuse_ctl_cleanup(void);
 
 /**
- * Allocate a request
- */
-struct fuse_req *fuse_request_alloc(unsigned npages);
-
-struct fuse_req *fuse_request_alloc_nofs(unsigned npages);
-
-bool fuse_req_realloc_pages(struct fuse_conn *fc, struct fuse_req *req,
-                           gfp_t flags);
-
-
-/**
- * Free a request
- */
-void fuse_request_free(struct fuse_req *req);
-
-/**
- * Get a request, may fail with -ENOMEM,
- * caller should specify # elements in req->pages[] explicitly
- */
-struct fuse_req *fuse_get_req(struct fuse_conn *fc, unsigned npages);
-struct fuse_req *fuse_get_req_for_background(struct fuse_conn *fc,
-                                            unsigned npages);
-
-/*
- * Increment reference count on request
- */
-void __fuse_get_request(struct fuse_req *req);
-
-/**
- * Gets a requests for a file operation, always succeeds
- */
-struct fuse_req *fuse_get_req_nofail_nopages(struct fuse_conn *fc,
-                                            struct file *file);
-
-/**
- * Decrement reference count of a request.  If count goes to zero free
- * the request.
- */
-void fuse_put_request(struct fuse_conn *fc, struct fuse_req *req);
-
-/**
- * Send a request (synchronous)
- */
-void fuse_request_send(struct fuse_conn *fc, struct fuse_req *req);
-
-/**
  * Simple request sending that does request allocation and freeing
  */
 ssize_t fuse_simple_request(struct fuse_conn *fc, struct fuse_args *args);
+int fuse_simple_background(struct fuse_conn *fc, struct fuse_args *args,
+                          gfp_t gfp_flags);
 
 /**
- * Send a request in the background
+ * End a finished request
  */
-void fuse_request_send_background(struct fuse_conn *fc, struct fuse_req *req);
-bool fuse_request_queue_background(struct fuse_conn *fc, struct fuse_req *req);
+void fuse_request_end(struct fuse_conn *fc, struct fuse_req *req);
 
 /* Abort all requests */
 void fuse_abort_conn(struct fuse_conn *fc);
@@ -980,15 +942,33 @@ struct fuse_conn *fuse_conn_get(struct fuse_conn *fc);
 /**
  * Initialize fuse_conn
  */
-void fuse_conn_init(struct fuse_conn *fc, struct user_namespace *user_ns);
+void fuse_conn_init(struct fuse_conn *fc, struct user_namespace *user_ns,
+                   const struct fuse_iqueue_ops *fiq_ops, void *fiq_priv);
 
 /**
  * Release reference to fuse_conn
  */
 void fuse_conn_put(struct fuse_conn *fc);
 
-struct fuse_dev *fuse_dev_alloc(struct fuse_conn *fc);
+struct fuse_dev *fuse_dev_alloc_install(struct fuse_conn *fc);
+struct fuse_dev *fuse_dev_alloc(void);
+void fuse_dev_install(struct fuse_dev *fud, struct fuse_conn *fc);
 void fuse_dev_free(struct fuse_dev *fud);
+void fuse_send_init(struct fuse_conn *fc);
+
+/**
+ * Fill in superblock and initialize fuse connection
+ * @sb: partially-initialized superblock to fill in
+ * @ctx: mount context
+ */
+int fuse_fill_super_common(struct super_block *sb, struct fuse_fs_context *ctx);
+
+/**
+ * Disassociate fuse connection from superblock and kill the superblock
+ *
+ * Calls kill_anon_super(), do not use with bdev mounts.
+ */
+void fuse_kill_sb_anon(struct super_block *sb);
 
 /**
  * Add connection to control filesystem
@@ -1093,4 +1073,15 @@ int fuse_set_acl(struct inode *inode, struct posix_acl *acl, int type);
 /* readdir.c */
 int fuse_readdir(struct file *file, struct dir_context *ctx);
 
+/**
+ * Return the number of bytes in an arguments list
+ */
+unsigned int fuse_len_args(unsigned int numargs, struct fuse_arg *args);
+
+/**
+ * Get the next unique ID for a request
+ */
+u64 fuse_get_unique(struct fuse_iqueue *fiq);
+void fuse_free_conn(struct fuse_conn *fc);
+
 #endif /* _FS_FUSE_I_H */
index 4bb885b..e040e2a 100644 (file)
@@ -15,7 +15,8 @@
 #include <linux/init.h>
 #include <linux/module.h>
 #include <linux/moduleparam.h>
-#include <linux/parser.h>
+#include <linux/fs_context.h>
+#include <linux/fs_parser.h>
 #include <linux/statfs.h>
 #include <linux/random.h>
 #include <linux/sched.h>
@@ -59,24 +60,13 @@ MODULE_PARM_DESC(max_user_congthresh,
 /** Congestion starts at 75% of maximum */
 #define FUSE_DEFAULT_CONGESTION_THRESHOLD (FUSE_DEFAULT_MAX_BACKGROUND * 3 / 4)
 
-struct fuse_mount_data {
-       int fd;
-       unsigned rootmode;
-       kuid_t user_id;
-       kgid_t group_id;
-       unsigned fd_present:1;
-       unsigned rootmode_present:1;
-       unsigned user_id_present:1;
-       unsigned group_id_present:1;
-       unsigned default_permissions:1;
-       unsigned allow_other:1;
-       unsigned max_read;
-       unsigned blksize;
-};
+#ifdef CONFIG_BLOCK
+static struct file_system_type fuseblk_fs_type;
+#endif
 
 struct fuse_forget_link *fuse_alloc_forget(void)
 {
-       return kzalloc(sizeof(struct fuse_forget_link), GFP_KERNEL);
+       return kzalloc(sizeof(struct fuse_forget_link), GFP_KERNEL_ACCOUNT);
 }
 
 static struct inode *fuse_alloc_inode(struct super_block *sb)
@@ -374,19 +364,21 @@ void fuse_unlock_inode(struct inode *inode, bool locked)
 
 static void fuse_umount_begin(struct super_block *sb)
 {
-       fuse_abort_conn(get_fuse_conn_super(sb));
+       struct fuse_conn *fc = get_fuse_conn_super(sb);
+
+       if (!fc->no_force_umount)
+               fuse_abort_conn(fc);
 }
 
 static void fuse_send_destroy(struct fuse_conn *fc)
 {
-       struct fuse_req *req = fc->destroy_req;
-       if (req && fc->conn_init) {
-               fc->destroy_req = NULL;
-               req->in.h.opcode = FUSE_DESTROY;
-               __set_bit(FR_FORCE, &req->flags);
-               __clear_bit(FR_BACKGROUND, &req->flags);
-               fuse_request_send(fc, req);
-               fuse_put_request(fc, req);
+       if (fc->conn_init) {
+               FUSE_ARGS(args);
+
+               args.opcode = FUSE_DESTROY;
+               args.force = true;
+               args.nocreds = true;
+               fuse_simple_request(fc, &args);
        }
 }
 
@@ -430,12 +422,12 @@ static int fuse_statfs(struct dentry *dentry, struct kstatfs *buf)
        }
 
        memset(&outarg, 0, sizeof(outarg));
-       args.in.numargs = 0;
-       args.in.h.opcode = FUSE_STATFS;
-       args.in.h.nodeid = get_node_id(d_inode(dentry));
-       args.out.numargs = 1;
-       args.out.args[0].size = sizeof(outarg);
-       args.out.args[0].value = &outarg;
+       args.in_numargs = 0;
+       args.opcode = FUSE_STATFS;
+       args.nodeid = get_node_id(d_inode(dentry));
+       args.out_numargs = 1;
+       args.out_args[0].size = sizeof(outarg);
+       args.out_args[0].value = &outarg;
        err = fuse_simple_request(fc, &args);
        if (!err)
                convert_fuse_statfs(buf, &outarg.st);
@@ -443,6 +435,8 @@ static int fuse_statfs(struct dentry *dentry, struct kstatfs *buf)
 }
 
 enum {
+       OPT_SOURCE,
+       OPT_SUBTYPE,
        OPT_FD,
        OPT_ROOTMODE,
        OPT_USER_ID,
@@ -454,111 +448,109 @@ enum {
        OPT_ERR
 };
 
-static const match_table_t tokens = {
-       {OPT_FD,                        "fd=%u"},
-       {OPT_ROOTMODE,                  "rootmode=%o"},
-       {OPT_USER_ID,                   "user_id=%u"},
-       {OPT_GROUP_ID,                  "group_id=%u"},
-       {OPT_DEFAULT_PERMISSIONS,       "default_permissions"},
-       {OPT_ALLOW_OTHER,               "allow_other"},
-       {OPT_MAX_READ,                  "max_read=%u"},
-       {OPT_BLKSIZE,                   "blksize=%u"},
-       {OPT_ERR,                       NULL}
+static const struct fs_parameter_spec fuse_param_specs[] = {
+       fsparam_string  ("source",              OPT_SOURCE),
+       fsparam_u32     ("fd",                  OPT_FD),
+       fsparam_u32oct  ("rootmode",            OPT_ROOTMODE),
+       fsparam_u32     ("user_id",             OPT_USER_ID),
+       fsparam_u32     ("group_id",            OPT_GROUP_ID),
+       fsparam_flag    ("default_permissions", OPT_DEFAULT_PERMISSIONS),
+       fsparam_flag    ("allow_other",         OPT_ALLOW_OTHER),
+       fsparam_u32     ("max_read",            OPT_MAX_READ),
+       fsparam_u32     ("blksize",             OPT_BLKSIZE),
+       fsparam_string  ("subtype",             OPT_SUBTYPE),
+       {}
+};
+
+static const struct fs_parameter_description fuse_fs_parameters = {
+       .name           = "fuse",
+       .specs          = fuse_param_specs,
 };
 
-static int fuse_match_uint(substring_t *s, unsigned int *res)
+static int fuse_parse_param(struct fs_context *fc, struct fs_parameter *param)
 {
-       int err = -ENOMEM;
-       char *buf = match_strdup(s);
-       if (buf) {
-               err = kstrtouint(buf, 10, res);
-               kfree(buf);
+       struct fs_parse_result result;
+       struct fuse_fs_context *ctx = fc->fs_private;
+       int opt;
+
+       opt = fs_parse(fc, &fuse_fs_parameters, param, &result);
+       if (opt < 0)
+               return opt;
+
+       switch (opt) {
+       case OPT_SOURCE:
+               if (fc->source)
+                       return invalf(fc, "fuse: Multiple sources specified");
+               fc->source = param->string;
+               param->string = NULL;
+               break;
+
+       case OPT_SUBTYPE:
+               if (ctx->subtype)
+                       return invalf(fc, "fuse: Multiple subtypes specified");
+               ctx->subtype = param->string;
+               param->string = NULL;
+               return 0;
+
+       case OPT_FD:
+               ctx->fd = result.uint_32;
+               ctx->fd_present = 1;
+               break;
+
+       case OPT_ROOTMODE:
+               if (!fuse_valid_type(result.uint_32))
+                       return invalf(fc, "fuse: Invalid rootmode");
+               ctx->rootmode = result.uint_32;
+               ctx->rootmode_present = 1;
+               break;
+
+       case OPT_USER_ID:
+               ctx->user_id = make_kuid(fc->user_ns, result.uint_32);
+               if (!uid_valid(ctx->user_id))
+                       return invalf(fc, "fuse: Invalid user_id");
+               ctx->user_id_present = 1;
+               break;
+
+       case OPT_GROUP_ID:
+               ctx->group_id = make_kgid(fc->user_ns, result.uint_32);
+               if (!gid_valid(ctx->group_id))
+                       return invalf(fc, "fuse: Invalid group_id");
+               ctx->group_id_present = 1;
+               break;
+
+       case OPT_DEFAULT_PERMISSIONS:
+               ctx->default_permissions = 1;
+               break;
+
+       case OPT_ALLOW_OTHER:
+               ctx->allow_other = 1;
+               break;
+
+       case OPT_MAX_READ:
+               ctx->max_read = result.uint_32;
+               break;
+
+       case OPT_BLKSIZE:
+               if (!ctx->is_bdev)
+                       return invalf(fc, "fuse: blksize only supported for fuseblk");
+               ctx->blksize = result.uint_32;
+               break;
+
+       default:
+               return -EINVAL;
        }
-       return err;
+
+       return 0;
 }
 
-static int parse_fuse_opt(char *opt, struct fuse_mount_data *d, int is_bdev,
-                         struct user_namespace *user_ns)
+static void fuse_free_fc(struct fs_context *fc)
 {
-       char *p;
-       memset(d, 0, sizeof(struct fuse_mount_data));
-       d->max_read = ~0;
-       d->blksize = FUSE_DEFAULT_BLKSIZE;
-
-       while ((p = strsep(&opt, ",")) != NULL) {
-               int token;
-               int value;
-               unsigned uv;
-               substring_t args[MAX_OPT_ARGS];
-               if (!*p)
-                       continue;
-
-               token = match_token(p, tokens, args);
-               switch (token) {
-               case OPT_FD:
-                       if (match_int(&args[0], &value))
-                               return 0;
-                       d->fd = value;
-                       d->fd_present = 1;
-                       break;
-
-               case OPT_ROOTMODE:
-                       if (match_octal(&args[0], &value))
-                               return 0;
-                       if (!fuse_valid_type(value))
-                               return 0;
-                       d->rootmode = value;
-                       d->rootmode_present = 1;
-                       break;
-
-               case OPT_USER_ID:
-                       if (fuse_match_uint(&args[0], &uv))
-                               return 0;
-                       d->user_id = make_kuid(user_ns, uv);
-                       if (!uid_valid(d->user_id))
-                               return 0;
-                       d->user_id_present = 1;
-                       break;
-
-               case OPT_GROUP_ID:
-                       if (fuse_match_uint(&args[0], &uv))
-                               return 0;
-                       d->group_id = make_kgid(user_ns, uv);
-                       if (!gid_valid(d->group_id))
-                               return 0;
-                       d->group_id_present = 1;
-                       break;
-
-               case OPT_DEFAULT_PERMISSIONS:
-                       d->default_permissions = 1;
-                       break;
-
-               case OPT_ALLOW_OTHER:
-                       d->allow_other = 1;
-                       break;
-
-               case OPT_MAX_READ:
-                       if (match_int(&args[0], &value))
-                               return 0;
-                       d->max_read = value;
-                       break;
-
-               case OPT_BLKSIZE:
-                       if (!is_bdev || match_int(&args[0], &value))
-                               return 0;
-                       d->blksize = value;
-                       break;
-
-               default:
-                       return 0;
-               }
-       }
+       struct fuse_fs_context *ctx = fc->fs_private;
 
-       if (!d->fd_present || !d->rootmode_present ||
-           !d->user_id_present || !d->group_id_present)
-               return 0;
-
-       return 1;
+       if (ctx) {
+               kfree(ctx->subtype);
+               kfree(ctx);
+       }
 }
 
 static int fuse_show_options(struct seq_file *m, struct dentry *root)
@@ -579,14 +571,19 @@ static int fuse_show_options(struct seq_file *m, struct dentry *root)
        return 0;
 }
 
-static void fuse_iqueue_init(struct fuse_iqueue *fiq)
+static void fuse_iqueue_init(struct fuse_iqueue *fiq,
+                            const struct fuse_iqueue_ops *ops,
+                            void *priv)
 {
        memset(fiq, 0, sizeof(struct fuse_iqueue));
+       spin_lock_init(&fiq->lock);
        init_waitqueue_head(&fiq->waitq);
        INIT_LIST_HEAD(&fiq->pending);
        INIT_LIST_HEAD(&fiq->interrupts);
        fiq->forget_list_tail = &fiq->forget_list_head;
        fiq->connected = 1;
+       fiq->ops = ops;
+       fiq->priv = priv;
 }
 
 static void fuse_pqueue_init(struct fuse_pqueue *fpq)
@@ -600,7 +597,8 @@ static void fuse_pqueue_init(struct fuse_pqueue *fpq)
        fpq->connected = 1;
 }
 
-void fuse_conn_init(struct fuse_conn *fc, struct user_namespace *user_ns)
+void fuse_conn_init(struct fuse_conn *fc, struct user_namespace *user_ns,
+                   const struct fuse_iqueue_ops *fiq_ops, void *fiq_priv)
 {
        memset(fc, 0, sizeof(*fc));
        spin_lock_init(&fc->lock);
@@ -609,8 +607,7 @@ void fuse_conn_init(struct fuse_conn *fc, struct user_namespace *user_ns)
        refcount_set(&fc->count, 1);
        atomic_set(&fc->dev_count, 1);
        init_waitqueue_head(&fc->blocked_waitq);
-       init_waitqueue_head(&fc->reserved_req_waitq);
-       fuse_iqueue_init(&fc->iq);
+       fuse_iqueue_init(&fc->iq, fiq_ops, fiq_priv);
        INIT_LIST_HEAD(&fc->bg_queue);
        INIT_LIST_HEAD(&fc->entry);
        INIT_LIST_HEAD(&fc->devices);
@@ -633,8 +630,10 @@ EXPORT_SYMBOL_GPL(fuse_conn_init);
 void fuse_conn_put(struct fuse_conn *fc)
 {
        if (refcount_dec_and_test(&fc->count)) {
-               if (fc->destroy_req)
-                       fuse_request_free(fc->destroy_req);
+               struct fuse_iqueue *fiq = &fc->iq;
+
+               if (fiq->ops->release)
+                       fiq->ops->release(fiq);
                put_pid_ns(fc->pid_ns);
                put_user_ns(fc->user_ns);
                fc->release(fc);
@@ -822,9 +821,12 @@ static const struct super_operations fuse_super_operations = {
 
 static void sanitize_global_limit(unsigned *limit)
 {
+       /*
+        * The default maximum number of async requests is calculated to consume
+        * 1/2^13 of the total memory, assuming 392 bytes per request.
+        */
        if (*limit == 0)
-               *limit = ((totalram_pages() << PAGE_SHIFT) >> 13) /
-                        sizeof(struct fuse_req);
+               *limit = ((totalram_pages() << PAGE_SHIFT) >> 13) / 392;
 
        if (*limit >= 1 << 16)
                *limit = (1 << 16) - 1;
@@ -870,11 +872,19 @@ static void process_init_limits(struct fuse_conn *fc, struct fuse_init_out *arg)
        spin_unlock(&fc->bg_lock);
 }
 
-static void process_init_reply(struct fuse_conn *fc, struct fuse_req *req)
+struct fuse_init_args {
+       struct fuse_args args;
+       struct fuse_init_in in;
+       struct fuse_init_out out;
+};
+
+static void process_init_reply(struct fuse_conn *fc, struct fuse_args *args,
+                              int error)
 {
-       struct fuse_init_out *arg = &req->misc.init_out;
+       struct fuse_init_args *ia = container_of(args, typeof(*ia), args);
+       struct fuse_init_out *arg = &ia->out;
 
-       if (req->out.h.error || arg->major != FUSE_KERNEL_VERSION)
+       if (error || arg->major != FUSE_KERNEL_VERSION)
                fc->conn_error = 1;
        else {
                unsigned long ra_pages;
@@ -951,18 +961,23 @@ static void process_init_reply(struct fuse_conn *fc, struct fuse_req *req)
                fc->max_write = max_t(unsigned, 4096, fc->max_write);
                fc->conn_init = 1;
        }
+       kfree(ia);
+
        fuse_set_initialized(fc);
        wake_up_all(&fc->blocked_waitq);
 }
 
-static void fuse_send_init(struct fuse_conn *fc, struct fuse_req *req)
+void fuse_send_init(struct fuse_conn *fc)
 {
-       struct fuse_init_in *arg = &req->misc.init_in;
+       struct fuse_init_args *ia;
 
-       arg->major = FUSE_KERNEL_VERSION;
-       arg->minor = FUSE_KERNEL_MINOR_VERSION;
-       arg->max_readahead = fc->sb->s_bdi->ra_pages * PAGE_SIZE;
-       arg->flags |= FUSE_ASYNC_READ | FUSE_POSIX_LOCKS | FUSE_ATOMIC_O_TRUNC |
+       ia = kzalloc(sizeof(*ia), GFP_KERNEL | __GFP_NOFAIL);
+
+       ia->in.major = FUSE_KERNEL_VERSION;
+       ia->in.minor = FUSE_KERNEL_MINOR_VERSION;
+       ia->in.max_readahead = fc->sb->s_bdi->ra_pages * PAGE_SIZE;
+       ia->in.flags |=
+               FUSE_ASYNC_READ | FUSE_POSIX_LOCKS | FUSE_ATOMIC_O_TRUNC |
                FUSE_EXPORT_SUPPORT | FUSE_BIG_WRITES | FUSE_DONT_MASK |
                FUSE_SPLICE_WRITE | FUSE_SPLICE_MOVE | FUSE_SPLICE_READ |
                FUSE_FLOCK_LOCKS | FUSE_HAS_IOCTL_DIR | FUSE_AUTO_INVAL_DATA |
@@ -971,26 +986,32 @@ static void fuse_send_init(struct fuse_conn *fc, struct fuse_req *req)
                FUSE_PARALLEL_DIROPS | FUSE_HANDLE_KILLPRIV | FUSE_POSIX_ACL |
                FUSE_ABORT_ERROR | FUSE_MAX_PAGES | FUSE_CACHE_SYMLINKS |
                FUSE_NO_OPENDIR_SUPPORT | FUSE_EXPLICIT_INVAL_DATA;
-       req->in.h.opcode = FUSE_INIT;
-       req->in.numargs = 1;
-       req->in.args[0].size = sizeof(*arg);
-       req->in.args[0].value = arg;
-       req->out.numargs = 1;
+       ia->args.opcode = FUSE_INIT;
+       ia->args.in_numargs = 1;
+       ia->args.in_args[0].size = sizeof(ia->in);
+       ia->args.in_args[0].value = &ia->in;
+       ia->args.out_numargs = 1;
        /* Variable length argument used for backward compatibility
           with interface version < 7.5.  Rest of init_out is zeroed
           by do_get_request(), so a short reply is not a problem */
-       req->out.argvar = 1;
-       req->out.args[0].size = sizeof(struct fuse_init_out);
-       req->out.args[0].value = &req->misc.init_out;
-       req->end = process_init_reply;
-       fuse_request_send_background(fc, req);
+       ia->args.out_argvar = 1;
+       ia->args.out_args[0].size = sizeof(ia->out);
+       ia->args.out_args[0].value = &ia->out;
+       ia->args.force = true;
+       ia->args.nocreds = true;
+       ia->args.end = process_init_reply;
+
+       if (fuse_simple_background(fc, &ia->args, GFP_KERNEL) != 0)
+               process_init_reply(fc, &ia->args, -ENOTCONN);
 }
+EXPORT_SYMBOL_GPL(fuse_send_init);
 
-static void fuse_free_conn(struct fuse_conn *fc)
+void fuse_free_conn(struct fuse_conn *fc)
 {
        WARN_ON(!list_empty(&fc->devices));
        kfree_rcu(fc, rcu);
 }
+EXPORT_SYMBOL_GPL(fuse_free_conn);
 
 static int fuse_bdi_init(struct fuse_conn *fc, struct super_block *sb)
 {
@@ -1032,7 +1053,7 @@ static int fuse_bdi_init(struct fuse_conn *fc, struct super_block *sb)
        return 0;
 }
 
-struct fuse_dev *fuse_dev_alloc(struct fuse_conn *fc)
+struct fuse_dev *fuse_dev_alloc(void)
 {
        struct fuse_dev *fud;
        struct list_head *pq;
@@ -1048,16 +1069,33 @@ struct fuse_dev *fuse_dev_alloc(struct fuse_conn *fc)
        }
 
        fud->pq.processing = pq;
-       fud->fc = fuse_conn_get(fc);
        fuse_pqueue_init(&fud->pq);
 
+       return fud;
+}
+EXPORT_SYMBOL_GPL(fuse_dev_alloc);
+
+void fuse_dev_install(struct fuse_dev *fud, struct fuse_conn *fc)
+{
+       fud->fc = fuse_conn_get(fc);
        spin_lock(&fc->lock);
        list_add_tail(&fud->entry, &fc->devices);
        spin_unlock(&fc->lock);
+}
+EXPORT_SYMBOL_GPL(fuse_dev_install);
 
+struct fuse_dev *fuse_dev_alloc_install(struct fuse_conn *fc)
+{
+       struct fuse_dev *fud;
+
+       fud = fuse_dev_alloc();
+       if (!fud)
+               return NULL;
+
+       fuse_dev_install(fud, fc);
        return fud;
 }
-EXPORT_SYMBOL_GPL(fuse_dev_alloc);
+EXPORT_SYMBOL_GPL(fuse_dev_alloc_install);
 
 void fuse_dev_free(struct fuse_dev *fud)
 {
@@ -1075,17 +1113,13 @@ void fuse_dev_free(struct fuse_dev *fud)
 }
 EXPORT_SYMBOL_GPL(fuse_dev_free);
 
-static int fuse_fill_super(struct super_block *sb, void *data, int silent)
+int fuse_fill_super_common(struct super_block *sb, struct fuse_fs_context *ctx)
 {
        struct fuse_dev *fud;
-       struct fuse_conn *fc;
+       struct fuse_conn *fc = get_fuse_conn_super(sb);
        struct inode *root;
-       struct fuse_mount_data d;
-       struct file *file;
        struct dentry *root_dentry;
-       struct fuse_req *init_req;
        int err;
-       int is_bdev = sb->s_bdev != NULL;
 
        err = -EINVAL;
        if (sb->s_flags & SB_MANDLOCK)
@@ -1093,19 +1127,19 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent)
 
        sb->s_flags &= ~(SB_NOSEC | SB_I_VERSION);
 
-       if (!parse_fuse_opt(data, &d, is_bdev, sb->s_user_ns))
-               goto err;
-
-       if (is_bdev) {
+       if (ctx->is_bdev) {
 #ifdef CONFIG_BLOCK
                err = -EINVAL;
-               if (!sb_set_blocksize(sb, d.blksize))
+               if (!sb_set_blocksize(sb, ctx->blksize))
                        goto err;
 #endif
        } else {
                sb->s_blocksize = PAGE_SIZE;
                sb->s_blocksize_bits = PAGE_SHIFT;
        }
+
+       sb->s_subtype = ctx->subtype;
+       ctx->subtype = NULL;
        sb->s_magic = FUSE_SUPER_MAGIC;
        sb->s_op = &fuse_super_operations;
        sb->s_xattr = fuse_xattr_handlers;
@@ -1116,19 +1150,6 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent)
        if (sb->s_user_ns != &init_user_ns)
                sb->s_iflags |= SB_I_UNTRUSTED_MOUNTER;
 
-       file = fget(d.fd);
-       err = -EINVAL;
-       if (!file)
-               goto err;
-
-       /*
-        * Require mount to happen from the same user namespace which
-        * opened /dev/fuse to prevent potential attacks.
-        */
-       if (file->f_op != &fuse_dev_operations ||
-           file->f_cred->user_ns != sb->s_user_ns)
-               goto err_fput;
-
        /*
         * If we are not in the initial user namespace posix
         * acls must be translated.
@@ -1136,17 +1157,9 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent)
        if (sb->s_user_ns != &init_user_ns)
                sb->s_xattr = fuse_no_acl_xattr_handlers;
 
-       fc = kmalloc(sizeof(*fc), GFP_KERNEL);
-       err = -ENOMEM;
-       if (!fc)
-               goto err_fput;
-
-       fuse_conn_init(fc, sb->s_user_ns);
-       fc->release = fuse_free_conn;
-
-       fud = fuse_dev_alloc(fc);
+       fud = fuse_dev_alloc_install(fc);
        if (!fud)
-               goto err_put_conn;
+               goto err;
 
        fc->dev = sb->s_dev;
        fc->sb = sb;
@@ -1159,17 +1172,17 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent)
                fc->dont_mask = 1;
        sb->s_flags |= SB_POSIXACL;
 
-       fc->default_permissions = d.default_permissions;
-       fc->allow_other = d.allow_other;
-       fc->user_id = d.user_id;
-       fc->group_id = d.group_id;
-       fc->max_read = max_t(unsigned, 4096, d.max_read);
-
-       /* Used by get_root_inode() */
-       sb->s_fs_info = fc;
+       fc->default_permissions = ctx->default_permissions;
+       fc->allow_other = ctx->allow_other;
+       fc->user_id = ctx->user_id;
+       fc->group_id = ctx->group_id;
+       fc->max_read = max_t(unsigned, 4096, ctx->max_read);
+       fc->destroy = ctx->destroy;
+       fc->no_control = ctx->no_control;
+       fc->no_force_umount = ctx->no_force_umount;
 
        err = -ENOMEM;
-       root = fuse_get_root_inode(sb, d.rootmode);
+       root = fuse_get_root_inode(sb, ctx->rootmode);
        sb->s_d_op = &fuse_root_dentry_operations;
        root_dentry = d_make_root(root);
        if (!root_dentry)
@@ -1177,20 +1190,9 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent)
        /* Root dentry doesn't have .d_revalidate */
        sb->s_d_op = &fuse_dentry_operations;
 
-       init_req = fuse_request_alloc(0);
-       if (!init_req)
-               goto err_put_root;
-       __set_bit(FR_BACKGROUND, &init_req->flags);
-
-       if (is_bdev) {
-               fc->destroy_req = fuse_request_alloc(0);
-               if (!fc->destroy_req)
-                       goto err_free_init_req;
-       }
-
        mutex_lock(&fuse_mutex);
        err = -EINVAL;
-       if (file->private_data)
+       if (*ctx->fudptr)
                goto err_unlock;
 
        err = fuse_ctl_add_conn(fc);
@@ -1199,27 +1201,62 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent)
 
        list_add_tail(&fc->entry, &fuse_conn_list);
        sb->s_root = root_dentry;
-       file->private_data = fud;
+       *ctx->fudptr = fud;
        mutex_unlock(&fuse_mutex);
+       return 0;
+
+ err_unlock:
+       mutex_unlock(&fuse_mutex);
+       dput(root_dentry);
+ err_dev_free:
+       fuse_dev_free(fud);
+ err:
+       return err;
+}
+EXPORT_SYMBOL_GPL(fuse_fill_super_common);
+
+static int fuse_fill_super(struct super_block *sb, struct fs_context *fsc)
+{
+       struct fuse_fs_context *ctx = fsc->fs_private;
+       struct file *file;
+       int err;
+       struct fuse_conn *fc;
+
+       err = -EINVAL;
+       file = fget(ctx->fd);
+       if (!file)
+               goto err;
+
+       /*
+        * Require mount to happen from the same user namespace which
+        * opened /dev/fuse to prevent potential attacks.
+        */
+       if ((file->f_op != &fuse_dev_operations) ||
+           (file->f_cred->user_ns != sb->s_user_ns))
+               goto err_fput;
+       ctx->fudptr = &file->private_data;
+
+       fc = kmalloc(sizeof(*fc), GFP_KERNEL);
+       err = -ENOMEM;
+       if (!fc)
+               goto err_fput;
+
+       fuse_conn_init(fc, sb->s_user_ns, &fuse_dev_fiq_ops, NULL);
+       fc->release = fuse_free_conn;
+       sb->s_fs_info = fc;
+
+       err = fuse_fill_super_common(sb, ctx);
+       if (err)
+               goto err_put_conn;
        /*
         * atomic_dec_and_test() in fput() provides the necessary
         * memory barrier for file->private_data to be visible on all
         * CPUs after this
         */
        fput(file);
-
-       fuse_send_init(fc, init_req);
-
+       fuse_send_init(get_fuse_conn_super(sb));
        return 0;
 
- err_unlock:
-       mutex_unlock(&fuse_mutex);
- err_free_init_req:
-       fuse_request_free(init_req);
- err_put_root:
-       dput(root_dentry);
- err_dev_free:
-       fuse_dev_free(fud);
  err_put_conn:
        fuse_conn_put(fc);
        sb->s_fs_info = NULL;
@@ -1229,11 +1266,52 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent)
        return err;
 }
 
-static struct dentry *fuse_mount(struct file_system_type *fs_type,
-                      int flags, const char *dev_name,
-                      void *raw_data)
+static int fuse_get_tree(struct fs_context *fc)
 {
-       return mount_nodev(fs_type, flags, raw_data, fuse_fill_super);
+       struct fuse_fs_context *ctx = fc->fs_private;
+
+       if (!ctx->fd_present || !ctx->rootmode_present ||
+           !ctx->user_id_present || !ctx->group_id_present)
+               return -EINVAL;
+
+#ifdef CONFIG_BLOCK
+       if (ctx->is_bdev)
+               return get_tree_bdev(fc, fuse_fill_super);
+#endif
+
+       return get_tree_nodev(fc, fuse_fill_super);
+}
+
+static const struct fs_context_operations fuse_context_ops = {
+       .free           = fuse_free_fc,
+       .parse_param    = fuse_parse_param,
+       .get_tree       = fuse_get_tree,
+};
+
+/*
+ * Set up the filesystem mount context.
+ */
+static int fuse_init_fs_context(struct fs_context *fc)
+{
+       struct fuse_fs_context *ctx;
+
+       ctx = kzalloc(sizeof(struct fuse_fs_context), GFP_KERNEL);
+       if (!ctx)
+               return -ENOMEM;
+
+       ctx->max_read = ~0;
+       ctx->blksize = FUSE_DEFAULT_BLKSIZE;
+
+#ifdef CONFIG_BLOCK
+       if (fc->fs_type == &fuseblk_fs_type) {
+               ctx->is_bdev = true;
+               ctx->destroy = true;
+       }
+#endif
+
+       fc->fs_private = ctx;
+       fc->ops = &fuse_context_ops;
+       return 0;
 }
 
 static void fuse_sb_destroy(struct super_block *sb)
@@ -1241,7 +1319,8 @@ static void fuse_sb_destroy(struct super_block *sb)
        struct fuse_conn *fc = get_fuse_conn_super(sb);
 
        if (fc) {
-               fuse_send_destroy(fc);
+               if (fc->destroy)
+                       fuse_send_destroy(fc);
 
                fuse_abort_conn(fc);
                fuse_wait_aborted(fc);
@@ -1252,29 +1331,24 @@ static void fuse_sb_destroy(struct super_block *sb)
        }
 }
 
-static void fuse_kill_sb_anon(struct super_block *sb)
+void fuse_kill_sb_anon(struct super_block *sb)
 {
        fuse_sb_destroy(sb);
        kill_anon_super(sb);
 }
+EXPORT_SYMBOL_GPL(fuse_kill_sb_anon);
 
 static struct file_system_type fuse_fs_type = {
        .owner          = THIS_MODULE,
        .name           = "fuse",
        .fs_flags       = FS_HAS_SUBTYPE | FS_USERNS_MOUNT,
-       .mount          = fuse_mount,
+       .init_fs_context = fuse_init_fs_context,
+       .parameters     = &fuse_fs_parameters,
        .kill_sb        = fuse_kill_sb_anon,
 };
 MODULE_ALIAS_FS("fuse");
 
 #ifdef CONFIG_BLOCK
-static struct dentry *fuse_mount_blk(struct file_system_type *fs_type,
-                          int flags, const char *dev_name,
-                          void *raw_data)
-{
-       return mount_bdev(fs_type, flags, dev_name, raw_data, fuse_fill_super);
-}
-
 static void fuse_kill_sb_blk(struct super_block *sb)
 {
        fuse_sb_destroy(sb);
@@ -1284,7 +1358,8 @@ static void fuse_kill_sb_blk(struct super_block *sb)
 static struct file_system_type fuseblk_fs_type = {
        .owner          = THIS_MODULE,
        .name           = "fuseblk",
-       .mount          = fuse_mount_blk,
+       .init_fs_context = fuse_init_fs_context,
+       .parameters     = &fuse_fs_parameters,
        .kill_sb        = fuse_kill_sb_blk,
        .fs_flags       = FS_REQUIRES_DEV | FS_HAS_SUBTYPE,
 };
index 574d03f..5c38b9d 100644 (file)
@@ -249,6 +249,27 @@ retry:
        return 0;
 }
 
+static void fuse_force_forget(struct file *file, u64 nodeid)
+{
+       struct inode *inode = file_inode(file);
+       struct fuse_conn *fc = get_fuse_conn(inode);
+       struct fuse_forget_in inarg;
+       FUSE_ARGS(args);
+
+       memset(&inarg, 0, sizeof(inarg));
+       inarg.nlookup = 1;
+       args.opcode = FUSE_FORGET;
+       args.nodeid = nodeid;
+       args.in_numargs = 1;
+       args.in_args[0].size = sizeof(inarg);
+       args.in_args[0].value = &inarg;
+       args.force = true;
+       args.noreply = true;
+
+       fuse_simple_request(fc, &args);
+       /* ignore errors */
+}
+
 static int parse_dirplusfile(char *buf, size_t nbytes, struct file *file,
                             struct dir_context *ctx, u64 attr_version)
 {
@@ -295,62 +316,55 @@ static int parse_dirplusfile(char *buf, size_t nbytes, struct file *file,
 
 static int fuse_readdir_uncached(struct file *file, struct dir_context *ctx)
 {
-       int plus, err;
-       size_t nbytes;
+       int plus;
+       ssize_t res;
        struct page *page;
        struct inode *inode = file_inode(file);
        struct fuse_conn *fc = get_fuse_conn(inode);
-       struct fuse_req *req;
+       struct fuse_io_args ia = {};
+       struct fuse_args_pages *ap = &ia.ap;
+       struct fuse_page_desc desc = { .length = PAGE_SIZE };
        u64 attr_version = 0;
        bool locked;
 
-       req = fuse_get_req(fc, 1);
-       if (IS_ERR(req))
-               return PTR_ERR(req);
-
        page = alloc_page(GFP_KERNEL);
-       if (!page) {
-               fuse_put_request(fc, req);
+       if (!page)
                return -ENOMEM;
-       }
 
        plus = fuse_use_readdirplus(inode, ctx);
-       req->out.argpages = 1;
-       req->num_pages = 1;
-       req->pages[0] = page;
-       req->page_descs[0].length = PAGE_SIZE;
+       ap->args.out_pages = 1;
+       ap->num_pages = 1;
+       ap->pages = &page;
+       ap->descs = &desc;
        if (plus) {
                attr_version = fuse_get_attr_version(fc);
-               fuse_read_fill(req, file, ctx->pos, PAGE_SIZE,
-                              FUSE_READDIRPLUS);
+               fuse_read_args_fill(&ia, file, ctx->pos, PAGE_SIZE,
+                                   FUSE_READDIRPLUS);
        } else {
-               fuse_read_fill(req, file, ctx->pos, PAGE_SIZE,
-                              FUSE_READDIR);
+               fuse_read_args_fill(&ia, file, ctx->pos, PAGE_SIZE,
+                                   FUSE_READDIR);
        }
        locked = fuse_lock_inode(inode);
-       fuse_request_send(fc, req);
+       res = fuse_simple_request(fc, &ap->args);
        fuse_unlock_inode(inode, locked);
-       nbytes = req->out.args[0].size;
-       err = req->out.h.error;
-       fuse_put_request(fc, req);
-       if (!err) {
-               if (!nbytes) {
+       if (res >= 0) {
+               if (!res) {
                        struct fuse_file *ff = file->private_data;
 
                        if (ff->open_flags & FOPEN_CACHE_DIR)
                                fuse_readdir_cache_end(file, ctx->pos);
                } else if (plus) {
-                       err = parse_dirplusfile(page_address(page), nbytes,
+                       res = parse_dirplusfile(page_address(page), res,
                                                file, ctx, attr_version);
                } else {
-                       err = parse_dirfile(page_address(page), nbytes, file,
+                       res = parse_dirfile(page_address(page), res, file,
                                            ctx);
                }
        }
 
        __free_page(page);
        fuse_invalidate_atime(inode);
-       return err;
+       return res;
 }
 
 enum fuse_parse_result {
@@ -372,11 +386,13 @@ static enum fuse_parse_result fuse_parse_cache(struct fuse_file *ff,
        for (;;) {
                struct fuse_dirent *dirent = addr + offset;
                unsigned int nbytes = size - offset;
-               size_t reclen = FUSE_DIRENT_SIZE(dirent);
+               size_t reclen;
 
                if (nbytes < FUSE_NAME_OFFSET || !dirent->namelen)
                        break;
 
+               reclen = FUSE_DIRENT_SIZE(dirent); /* derefs ->namelen */
+
                if (WARN_ON(dirent->namelen > FUSE_NAME_MAX))
                        return FOUND_ERR;
                if (WARN_ON(reclen > nbytes))
diff --git a/fs/fuse/virtio_fs.c b/fs/fuse/virtio_fs.c
new file mode 100644 (file)
index 0000000..6af3f13
--- /dev/null
@@ -0,0 +1,1195 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * virtio-fs: Virtio Filesystem
+ * Copyright (C) 2018 Red Hat, Inc.
+ */
+
+#include <linux/fs.h>
+#include <linux/module.h>
+#include <linux/virtio.h>
+#include <linux/virtio_fs.h>
+#include <linux/delay.h>
+#include <linux/fs_context.h>
+#include <linux/highmem.h>
+#include "fuse_i.h"
+
+/* List of virtio-fs device instances and a lock for the list. Also provides
+ * mutual exclusion in device removal and mounting path
+ */
+static DEFINE_MUTEX(virtio_fs_mutex);
+static LIST_HEAD(virtio_fs_instances);
+
+enum {
+       VQ_HIPRIO,
+       VQ_REQUEST
+};
+
+/* Per-virtqueue state */
+struct virtio_fs_vq {
+       spinlock_t lock;
+       struct virtqueue *vq;     /* protected by ->lock */
+       struct work_struct done_work;
+       struct list_head queued_reqs;
+       struct delayed_work dispatch_work;
+       struct fuse_dev *fud;
+       bool connected;
+       long in_flight;
+       char name[24];
+} ____cacheline_aligned_in_smp;
+
+/* A virtio-fs device instance */
+struct virtio_fs {
+       struct kref refcount;
+       struct list_head list;    /* on virtio_fs_instances */
+       char *tag;
+       struct virtio_fs_vq *vqs;
+       unsigned int nvqs;               /* number of virtqueues */
+       unsigned int num_request_queues; /* number of request queues */
+};
+
+struct virtio_fs_forget {
+       struct fuse_in_header ih;
+       struct fuse_forget_in arg;
+       /* This request can be temporarily queued on virt queue */
+       struct list_head list;
+};
+
+static inline struct virtio_fs_vq *vq_to_fsvq(struct virtqueue *vq)
+{
+       struct virtio_fs *fs = vq->vdev->priv;
+
+       return &fs->vqs[vq->index];
+}
+
+static inline struct fuse_pqueue *vq_to_fpq(struct virtqueue *vq)
+{
+       return &vq_to_fsvq(vq)->fud->pq;
+}
+
+static void release_virtio_fs_obj(struct kref *ref)
+{
+       struct virtio_fs *vfs = container_of(ref, struct virtio_fs, refcount);
+
+       kfree(vfs->vqs);
+       kfree(vfs);
+}
+
+/* Make sure virtiofs_mutex is held */
+static void virtio_fs_put(struct virtio_fs *fs)
+{
+       kref_put(&fs->refcount, release_virtio_fs_obj);
+}
+
+static void virtio_fs_fiq_release(struct fuse_iqueue *fiq)
+{
+       struct virtio_fs *vfs = fiq->priv;
+
+       mutex_lock(&virtio_fs_mutex);
+       virtio_fs_put(vfs);
+       mutex_unlock(&virtio_fs_mutex);
+}
+
+static void virtio_fs_drain_queue(struct virtio_fs_vq *fsvq)
+{
+       WARN_ON(fsvq->in_flight < 0);
+
+       /* Wait for in flight requests to finish.*/
+       while (1) {
+               spin_lock(&fsvq->lock);
+               if (!fsvq->in_flight) {
+                       spin_unlock(&fsvq->lock);
+                       break;
+               }
+               spin_unlock(&fsvq->lock);
+               /* TODO use completion instead of timeout */
+               usleep_range(1000, 2000);
+       }
+
+       flush_work(&fsvq->done_work);
+       flush_delayed_work(&fsvq->dispatch_work);
+}
+
+static inline void drain_hiprio_queued_reqs(struct virtio_fs_vq *fsvq)
+{
+       struct virtio_fs_forget *forget;
+
+       spin_lock(&fsvq->lock);
+       while (1) {
+               forget = list_first_entry_or_null(&fsvq->queued_reqs,
+                                               struct virtio_fs_forget, list);
+               if (!forget)
+                       break;
+               list_del(&forget->list);
+               kfree(forget);
+       }
+       spin_unlock(&fsvq->lock);
+}
+
+static void virtio_fs_drain_all_queues(struct virtio_fs *fs)
+{
+       struct virtio_fs_vq *fsvq;
+       int i;
+
+       for (i = 0; i < fs->nvqs; i++) {
+               fsvq = &fs->vqs[i];
+               if (i == VQ_HIPRIO)
+                       drain_hiprio_queued_reqs(fsvq);
+
+               virtio_fs_drain_queue(fsvq);
+       }
+}
+
+static void virtio_fs_start_all_queues(struct virtio_fs *fs)
+{
+       struct virtio_fs_vq *fsvq;
+       int i;
+
+       for (i = 0; i < fs->nvqs; i++) {
+               fsvq = &fs->vqs[i];
+               spin_lock(&fsvq->lock);
+               fsvq->connected = true;
+               spin_unlock(&fsvq->lock);
+       }
+}
+
+/* Add a new instance to the list or return -EEXIST if tag name exists*/
+static int virtio_fs_add_instance(struct virtio_fs *fs)
+{
+       struct virtio_fs *fs2;
+       bool duplicate = false;
+
+       mutex_lock(&virtio_fs_mutex);
+
+       list_for_each_entry(fs2, &virtio_fs_instances, list) {
+               if (strcmp(fs->tag, fs2->tag) == 0)
+                       duplicate = true;
+       }
+
+       if (!duplicate)
+               list_add_tail(&fs->list, &virtio_fs_instances);
+
+       mutex_unlock(&virtio_fs_mutex);
+
+       if (duplicate)
+               return -EEXIST;
+       return 0;
+}
+
+/* Return the virtio_fs with a given tag, or NULL */
+static struct virtio_fs *virtio_fs_find_instance(const char *tag)
+{
+       struct virtio_fs *fs;
+
+       mutex_lock(&virtio_fs_mutex);
+
+       list_for_each_entry(fs, &virtio_fs_instances, list) {
+               if (strcmp(fs->tag, tag) == 0) {
+                       kref_get(&fs->refcount);
+                       goto found;
+               }
+       }
+
+       fs = NULL; /* not found */
+
+found:
+       mutex_unlock(&virtio_fs_mutex);
+
+       return fs;
+}
+
+static void virtio_fs_free_devs(struct virtio_fs *fs)
+{
+       unsigned int i;
+
+       for (i = 0; i < fs->nvqs; i++) {
+               struct virtio_fs_vq *fsvq = &fs->vqs[i];
+
+               if (!fsvq->fud)
+                       continue;
+
+               fuse_dev_free(fsvq->fud);
+               fsvq->fud = NULL;
+       }
+}
+
+/* Read filesystem name from virtio config into fs->tag (must kfree()). */
+static int virtio_fs_read_tag(struct virtio_device *vdev, struct virtio_fs *fs)
+{
+       char tag_buf[sizeof_field(struct virtio_fs_config, tag)];
+       char *end;
+       size_t len;
+
+       virtio_cread_bytes(vdev, offsetof(struct virtio_fs_config, tag),
+                          &tag_buf, sizeof(tag_buf));
+       end = memchr(tag_buf, '\0', sizeof(tag_buf));
+       if (end == tag_buf)
+               return -EINVAL; /* empty tag */
+       if (!end)
+               end = &tag_buf[sizeof(tag_buf)];
+
+       len = end - tag_buf;
+       fs->tag = devm_kmalloc(&vdev->dev, len + 1, GFP_KERNEL);
+       if (!fs->tag)
+               return -ENOMEM;
+       memcpy(fs->tag, tag_buf, len);
+       fs->tag[len] = '\0';
+       return 0;
+}
+
+/* Work function for hiprio completion */
+static void virtio_fs_hiprio_done_work(struct work_struct *work)
+{
+       struct virtio_fs_vq *fsvq = container_of(work, struct virtio_fs_vq,
+                                                done_work);
+       struct virtqueue *vq = fsvq->vq;
+
+       /* Free completed FUSE_FORGET requests */
+       spin_lock(&fsvq->lock);
+       do {
+               unsigned int len;
+               void *req;
+
+               virtqueue_disable_cb(vq);
+
+               while ((req = virtqueue_get_buf(vq, &len)) != NULL) {
+                       kfree(req);
+                       fsvq->in_flight--;
+               }
+       } while (!virtqueue_enable_cb(vq) && likely(!virtqueue_is_broken(vq)));
+       spin_unlock(&fsvq->lock);
+}
+
+static void virtio_fs_dummy_dispatch_work(struct work_struct *work)
+{
+}
+
+static void virtio_fs_hiprio_dispatch_work(struct work_struct *work)
+{
+       struct virtio_fs_forget *forget;
+       struct virtio_fs_vq *fsvq = container_of(work, struct virtio_fs_vq,
+                                                dispatch_work.work);
+       struct virtqueue *vq = fsvq->vq;
+       struct scatterlist sg;
+       struct scatterlist *sgs[] = {&sg};
+       bool notify;
+       int ret;
+
+       pr_debug("virtio-fs: worker %s called.\n", __func__);
+       while (1) {
+               spin_lock(&fsvq->lock);
+               forget = list_first_entry_or_null(&fsvq->queued_reqs,
+                                       struct virtio_fs_forget, list);
+               if (!forget) {
+                       spin_unlock(&fsvq->lock);
+                       return;
+               }
+
+               list_del(&forget->list);
+               if (!fsvq->connected) {
+                       spin_unlock(&fsvq->lock);
+                       kfree(forget);
+                       continue;
+               }
+
+               sg_init_one(&sg, forget, sizeof(*forget));
+
+               /* Enqueue the request */
+               dev_dbg(&vq->vdev->dev, "%s\n", __func__);
+               ret = virtqueue_add_sgs(vq, sgs, 1, 0, forget, GFP_ATOMIC);
+               if (ret < 0) {
+                       if (ret == -ENOMEM || ret == -ENOSPC) {
+                               pr_debug("virtio-fs: Could not queue FORGET: err=%d. Will try later\n",
+                                        ret);
+                               list_add_tail(&forget->list,
+                                               &fsvq->queued_reqs);
+                               schedule_delayed_work(&fsvq->dispatch_work,
+                                               msecs_to_jiffies(1));
+                       } else {
+                               pr_debug("virtio-fs: Could not queue FORGET: err=%d. Dropping it.\n",
+                                        ret);
+                               kfree(forget);
+                       }
+                       spin_unlock(&fsvq->lock);
+                       return;
+               }
+
+               fsvq->in_flight++;
+               notify = virtqueue_kick_prepare(vq);
+               spin_unlock(&fsvq->lock);
+
+               if (notify)
+                       virtqueue_notify(vq);
+               pr_debug("virtio-fs: worker %s dispatched one forget request.\n",
+                        __func__);
+       }
+}
+
+/* Allocate and copy args into req->argbuf */
+static int copy_args_to_argbuf(struct fuse_req *req)
+{
+       struct fuse_args *args = req->args;
+       unsigned int offset = 0;
+       unsigned int num_in;
+       unsigned int num_out;
+       unsigned int len;
+       unsigned int i;
+
+       num_in = args->in_numargs - args->in_pages;
+       num_out = args->out_numargs - args->out_pages;
+       len = fuse_len_args(num_in, (struct fuse_arg *) args->in_args) +
+             fuse_len_args(num_out, args->out_args);
+
+       req->argbuf = kmalloc(len, GFP_ATOMIC);
+       if (!req->argbuf)
+               return -ENOMEM;
+
+       for (i = 0; i < num_in; i++) {
+               memcpy(req->argbuf + offset,
+                      args->in_args[i].value,
+                      args->in_args[i].size);
+               offset += args->in_args[i].size;
+       }
+
+       return 0;
+}
+
+/* Copy args out of and free req->argbuf */
+static void copy_args_from_argbuf(struct fuse_args *args, struct fuse_req *req)
+{
+       unsigned int remaining;
+       unsigned int offset;
+       unsigned int num_in;
+       unsigned int num_out;
+       unsigned int i;
+
+       remaining = req->out.h.len - sizeof(req->out.h);
+       num_in = args->in_numargs - args->in_pages;
+       num_out = args->out_numargs - args->out_pages;
+       offset = fuse_len_args(num_in, (struct fuse_arg *)args->in_args);
+
+       for (i = 0; i < num_out; i++) {
+               unsigned int argsize = args->out_args[i].size;
+
+               if (args->out_argvar &&
+                   i == args->out_numargs - 1 &&
+                   argsize > remaining) {
+                       argsize = remaining;
+               }
+
+               memcpy(args->out_args[i].value, req->argbuf + offset, argsize);
+               offset += argsize;
+
+               if (i != args->out_numargs - 1)
+                       remaining -= argsize;
+       }
+
+       /* Store the actual size of the variable-length arg */
+       if (args->out_argvar)
+               args->out_args[args->out_numargs - 1].size = remaining;
+
+       kfree(req->argbuf);
+       req->argbuf = NULL;
+}
+
+/* Work function for request completion */
+static void virtio_fs_requests_done_work(struct work_struct *work)
+{
+       struct virtio_fs_vq *fsvq = container_of(work, struct virtio_fs_vq,
+                                                done_work);
+       struct fuse_pqueue *fpq = &fsvq->fud->pq;
+       struct fuse_conn *fc = fsvq->fud->fc;
+       struct virtqueue *vq = fsvq->vq;
+       struct fuse_req *req;
+       struct fuse_args_pages *ap;
+       struct fuse_req *next;
+       struct fuse_args *args;
+       unsigned int len, i, thislen;
+       struct page *page;
+       LIST_HEAD(reqs);
+
+       /* Collect completed requests off the virtqueue */
+       spin_lock(&fsvq->lock);
+       do {
+               virtqueue_disable_cb(vq);
+
+               while ((req = virtqueue_get_buf(vq, &len)) != NULL) {
+                       spin_lock(&fpq->lock);
+                       list_move_tail(&req->list, &reqs);
+                       spin_unlock(&fpq->lock);
+               }
+       } while (!virtqueue_enable_cb(vq) && likely(!virtqueue_is_broken(vq)));
+       spin_unlock(&fsvq->lock);
+
+       /* End requests */
+       list_for_each_entry_safe(req, next, &reqs, list) {
+               /*
+                * TODO verify that server properly follows FUSE protocol
+                * (oh.uniq, oh.len)
+                */
+               args = req->args;
+               copy_args_from_argbuf(args, req);
+
+               if (args->out_pages && args->page_zeroing) {
+                       len = args->out_args[args->out_numargs - 1].size;
+                       ap = container_of(args, typeof(*ap), args);
+                       for (i = 0; i < ap->num_pages; i++) {
+                               thislen = ap->descs[i].length;
+                               if (len < thislen) {
+                                       WARN_ON(ap->descs[i].offset);
+                                       page = ap->pages[i];
+                                       zero_user_segment(page, len, thislen);
+                                       len = 0;
+                               } else {
+                                       len -= thislen;
+                               }
+                       }
+               }
+
+               spin_lock(&fpq->lock);
+               clear_bit(FR_SENT, &req->flags);
+               list_del_init(&req->list);
+               spin_unlock(&fpq->lock);
+
+               fuse_request_end(fc, req);
+               spin_lock(&fsvq->lock);
+               fsvq->in_flight--;
+               spin_unlock(&fsvq->lock);
+       }
+}
+
+/* Virtqueue interrupt handler */
+static void virtio_fs_vq_done(struct virtqueue *vq)
+{
+       struct virtio_fs_vq *fsvq = vq_to_fsvq(vq);
+
+       dev_dbg(&vq->vdev->dev, "%s %s\n", __func__, fsvq->name);
+
+       schedule_work(&fsvq->done_work);
+}
+
+/* Initialize virtqueues */
+static int virtio_fs_setup_vqs(struct virtio_device *vdev,
+                              struct virtio_fs *fs)
+{
+       struct virtqueue **vqs;
+       vq_callback_t **callbacks;
+       const char **names;
+       unsigned int i;
+       int ret = 0;
+
+       virtio_cread(vdev, struct virtio_fs_config, num_request_queues,
+                    &fs->num_request_queues);
+       if (fs->num_request_queues == 0)
+               return -EINVAL;
+
+       fs->nvqs = 1 + fs->num_request_queues;
+       fs->vqs = kcalloc(fs->nvqs, sizeof(fs->vqs[VQ_HIPRIO]), GFP_KERNEL);
+       if (!fs->vqs)
+               return -ENOMEM;
+
+       vqs = kmalloc_array(fs->nvqs, sizeof(vqs[VQ_HIPRIO]), GFP_KERNEL);
+       callbacks = kmalloc_array(fs->nvqs, sizeof(callbacks[VQ_HIPRIO]),
+                                       GFP_KERNEL);
+       names = kmalloc_array(fs->nvqs, sizeof(names[VQ_HIPRIO]), GFP_KERNEL);
+       if (!vqs || !callbacks || !names) {
+               ret = -ENOMEM;
+               goto out;
+       }
+
+       callbacks[VQ_HIPRIO] = virtio_fs_vq_done;
+       snprintf(fs->vqs[VQ_HIPRIO].name, sizeof(fs->vqs[VQ_HIPRIO].name),
+                       "hiprio");
+       names[VQ_HIPRIO] = fs->vqs[VQ_HIPRIO].name;
+       INIT_WORK(&fs->vqs[VQ_HIPRIO].done_work, virtio_fs_hiprio_done_work);
+       INIT_LIST_HEAD(&fs->vqs[VQ_HIPRIO].queued_reqs);
+       INIT_DELAYED_WORK(&fs->vqs[VQ_HIPRIO].dispatch_work,
+                       virtio_fs_hiprio_dispatch_work);
+       spin_lock_init(&fs->vqs[VQ_HIPRIO].lock);
+
+       /* Initialize the requests virtqueues */
+       for (i = VQ_REQUEST; i < fs->nvqs; i++) {
+               spin_lock_init(&fs->vqs[i].lock);
+               INIT_WORK(&fs->vqs[i].done_work, virtio_fs_requests_done_work);
+               INIT_DELAYED_WORK(&fs->vqs[i].dispatch_work,
+                                       virtio_fs_dummy_dispatch_work);
+               INIT_LIST_HEAD(&fs->vqs[i].queued_reqs);
+               snprintf(fs->vqs[i].name, sizeof(fs->vqs[i].name),
+                        "requests.%u", i - VQ_REQUEST);
+               callbacks[i] = virtio_fs_vq_done;
+               names[i] = fs->vqs[i].name;
+       }
+
+       ret = virtio_find_vqs(vdev, fs->nvqs, vqs, callbacks, names, NULL);
+       if (ret < 0)
+               goto out;
+
+       for (i = 0; i < fs->nvqs; i++)
+               fs->vqs[i].vq = vqs[i];
+
+       virtio_fs_start_all_queues(fs);
+out:
+       kfree(names);
+       kfree(callbacks);
+       kfree(vqs);
+       if (ret)
+               kfree(fs->vqs);
+       return ret;
+}
+
+/* Free virtqueues (device must already be reset) */
+static void virtio_fs_cleanup_vqs(struct virtio_device *vdev,
+                                 struct virtio_fs *fs)
+{
+       vdev->config->del_vqs(vdev);
+}
+
+static int virtio_fs_probe(struct virtio_device *vdev)
+{
+       struct virtio_fs *fs;
+       int ret;
+
+       fs = kzalloc(sizeof(*fs), GFP_KERNEL);
+       if (!fs)
+               return -ENOMEM;
+       kref_init(&fs->refcount);
+       vdev->priv = fs;
+
+       ret = virtio_fs_read_tag(vdev, fs);
+       if (ret < 0)
+               goto out;
+
+       ret = virtio_fs_setup_vqs(vdev, fs);
+       if (ret < 0)
+               goto out;
+
+       /* TODO vq affinity */
+
+       /* Bring the device online in case the filesystem is mounted and
+        * requests need to be sent before we return.
+        */
+       virtio_device_ready(vdev);
+
+       ret = virtio_fs_add_instance(fs);
+       if (ret < 0)
+               goto out_vqs;
+
+       return 0;
+
+out_vqs:
+       vdev->config->reset(vdev);
+       virtio_fs_cleanup_vqs(vdev, fs);
+
+out:
+       vdev->priv = NULL;
+       kfree(fs);
+       return ret;
+}
+
+static void virtio_fs_stop_all_queues(struct virtio_fs *fs)
+{
+       struct virtio_fs_vq *fsvq;
+       int i;
+
+       for (i = 0; i < fs->nvqs; i++) {
+               fsvq = &fs->vqs[i];
+               spin_lock(&fsvq->lock);
+               fsvq->connected = false;
+               spin_unlock(&fsvq->lock);
+       }
+}
+
+static void virtio_fs_remove(struct virtio_device *vdev)
+{
+       struct virtio_fs *fs = vdev->priv;
+
+       mutex_lock(&virtio_fs_mutex);
+       /* This device is going away. No one should get new reference */
+       list_del_init(&fs->list);
+       virtio_fs_stop_all_queues(fs);
+       virtio_fs_drain_all_queues(fs);
+       vdev->config->reset(vdev);
+       virtio_fs_cleanup_vqs(vdev, fs);
+
+       vdev->priv = NULL;
+       /* Put device reference on virtio_fs object */
+       virtio_fs_put(fs);
+       mutex_unlock(&virtio_fs_mutex);
+}
+
+#ifdef CONFIG_PM_SLEEP
+static int virtio_fs_freeze(struct virtio_device *vdev)
+{
+       /* TODO need to save state here */
+       pr_warn("virtio-fs: suspend/resume not yet supported\n");
+       return -EOPNOTSUPP;
+}
+
+static int virtio_fs_restore(struct virtio_device *vdev)
+{
+        /* TODO need to restore state here */
+       return 0;
+}
+#endif /* CONFIG_PM_SLEEP */
+
+const static struct virtio_device_id id_table[] = {
+       { VIRTIO_ID_FS, VIRTIO_DEV_ANY_ID },
+       {},
+};
+
+const static unsigned int feature_table[] = {};
+
+static struct virtio_driver virtio_fs_driver = {
+       .driver.name            = KBUILD_MODNAME,
+       .driver.owner           = THIS_MODULE,
+       .id_table               = id_table,
+       .feature_table          = feature_table,
+       .feature_table_size     = ARRAY_SIZE(feature_table),
+       .probe                  = virtio_fs_probe,
+       .remove                 = virtio_fs_remove,
+#ifdef CONFIG_PM_SLEEP
+       .freeze                 = virtio_fs_freeze,
+       .restore                = virtio_fs_restore,
+#endif
+};
+
+static void virtio_fs_wake_forget_and_unlock(struct fuse_iqueue *fiq)
+__releases(fiq->lock)
+{
+       struct fuse_forget_link *link;
+       struct virtio_fs_forget *forget;
+       struct scatterlist sg;
+       struct scatterlist *sgs[] = {&sg};
+       struct virtio_fs *fs;
+       struct virtqueue *vq;
+       struct virtio_fs_vq *fsvq;
+       bool notify;
+       u64 unique;
+       int ret;
+
+       link = fuse_dequeue_forget(fiq, 1, NULL);
+       unique = fuse_get_unique(fiq);
+
+       fs = fiq->priv;
+       fsvq = &fs->vqs[VQ_HIPRIO];
+       spin_unlock(&fiq->lock);
+
+       /* Allocate a buffer for the request */
+       forget = kmalloc(sizeof(*forget), GFP_NOFS | __GFP_NOFAIL);
+
+       forget->ih = (struct fuse_in_header){
+               .opcode = FUSE_FORGET,
+               .nodeid = link->forget_one.nodeid,
+               .unique = unique,
+               .len = sizeof(*forget),
+       };
+       forget->arg = (struct fuse_forget_in){
+               .nlookup = link->forget_one.nlookup,
+       };
+
+       sg_init_one(&sg, forget, sizeof(*forget));
+
+       /* Enqueue the request */
+       spin_lock(&fsvq->lock);
+
+       if (!fsvq->connected) {
+               kfree(forget);
+               spin_unlock(&fsvq->lock);
+               goto out;
+       }
+
+       vq = fsvq->vq;
+       dev_dbg(&vq->vdev->dev, "%s\n", __func__);
+
+       ret = virtqueue_add_sgs(vq, sgs, 1, 0, forget, GFP_ATOMIC);
+       if (ret < 0) {
+               if (ret == -ENOMEM || ret == -ENOSPC) {
+                       pr_debug("virtio-fs: Could not queue FORGET: err=%d. Will try later.\n",
+                                ret);
+                       list_add_tail(&forget->list, &fsvq->queued_reqs);
+                       schedule_delayed_work(&fsvq->dispatch_work,
+                                       msecs_to_jiffies(1));
+               } else {
+                       pr_debug("virtio-fs: Could not queue FORGET: err=%d. Dropping it.\n",
+                                ret);
+                       kfree(forget);
+               }
+               spin_unlock(&fsvq->lock);
+               goto out;
+       }
+
+       fsvq->in_flight++;
+       notify = virtqueue_kick_prepare(vq);
+
+       spin_unlock(&fsvq->lock);
+
+       if (notify)
+               virtqueue_notify(vq);
+out:
+       kfree(link);
+}
+
+static void virtio_fs_wake_interrupt_and_unlock(struct fuse_iqueue *fiq)
+__releases(fiq->lock)
+{
+       /*
+        * TODO interrupts.
+        *
+        * Normal fs operations on a local filesystems aren't interruptible.
+        * Exceptions are blocking lock operations; for example fcntl(F_SETLKW)
+        * with shared lock between host and guest.
+        */
+       spin_unlock(&fiq->lock);
+}
+
+/* Return the number of scatter-gather list elements required */
+static unsigned int sg_count_fuse_req(struct fuse_req *req)
+{
+       struct fuse_args *args = req->args;
+       struct fuse_args_pages *ap = container_of(args, typeof(*ap), args);
+       unsigned int total_sgs = 1 /* fuse_in_header */;
+
+       if (args->in_numargs - args->in_pages)
+               total_sgs += 1;
+
+       if (args->in_pages)
+               total_sgs += ap->num_pages;
+
+       if (!test_bit(FR_ISREPLY, &req->flags))
+               return total_sgs;
+
+       total_sgs += 1 /* fuse_out_header */;
+
+       if (args->out_numargs - args->out_pages)
+               total_sgs += 1;
+
+       if (args->out_pages)
+               total_sgs += ap->num_pages;
+
+       return total_sgs;
+}
+
+/* Add pages to scatter-gather list and return number of elements used */
+static unsigned int sg_init_fuse_pages(struct scatterlist *sg,
+                                      struct page **pages,
+                                      struct fuse_page_desc *page_descs,
+                                      unsigned int num_pages,
+                                      unsigned int total_len)
+{
+       unsigned int i;
+       unsigned int this_len;
+
+       for (i = 0; i < num_pages && total_len; i++) {
+               sg_init_table(&sg[i], 1);
+               this_len =  min(page_descs[i].length, total_len);
+               sg_set_page(&sg[i], pages[i], this_len, page_descs[i].offset);
+               total_len -= this_len;
+       }
+
+       return i;
+}
+
+/* Add args to scatter-gather list and return number of elements used */
+static unsigned int sg_init_fuse_args(struct scatterlist *sg,
+                                     struct fuse_req *req,
+                                     struct fuse_arg *args,
+                                     unsigned int numargs,
+                                     bool argpages,
+                                     void *argbuf,
+                                     unsigned int *len_used)
+{
+       struct fuse_args_pages *ap = container_of(req->args, typeof(*ap), args);
+       unsigned int total_sgs = 0;
+       unsigned int len;
+
+       len = fuse_len_args(numargs - argpages, args);
+       if (len)
+               sg_init_one(&sg[total_sgs++], argbuf, len);
+
+       if (argpages)
+               total_sgs += sg_init_fuse_pages(&sg[total_sgs],
+                                               ap->pages, ap->descs,
+                                               ap->num_pages,
+                                               args[numargs - 1].size);
+
+       if (len_used)
+               *len_used = len;
+
+       return total_sgs;
+}
+
+/* Add a request to a virtqueue and kick the device */
+static int virtio_fs_enqueue_req(struct virtio_fs_vq *fsvq,
+                                struct fuse_req *req)
+{
+       /* requests need at least 4 elements */
+       struct scatterlist *stack_sgs[6];
+       struct scatterlist stack_sg[ARRAY_SIZE(stack_sgs)];
+       struct scatterlist **sgs = stack_sgs;
+       struct scatterlist *sg = stack_sg;
+       struct virtqueue *vq;
+       struct fuse_args *args = req->args;
+       unsigned int argbuf_used = 0;
+       unsigned int out_sgs = 0;
+       unsigned int in_sgs = 0;
+       unsigned int total_sgs;
+       unsigned int i;
+       int ret;
+       bool notify;
+
+       /* Does the sglist fit on the stack? */
+       total_sgs = sg_count_fuse_req(req);
+       if (total_sgs > ARRAY_SIZE(stack_sgs)) {
+               sgs = kmalloc_array(total_sgs, sizeof(sgs[0]), GFP_ATOMIC);
+               sg = kmalloc_array(total_sgs, sizeof(sg[0]), GFP_ATOMIC);
+               if (!sgs || !sg) {
+                       ret = -ENOMEM;
+                       goto out;
+               }
+       }
+
+       /* Use a bounce buffer since stack args cannot be mapped */
+       ret = copy_args_to_argbuf(req);
+       if (ret < 0)
+               goto out;
+
+       /* Request elements */
+       sg_init_one(&sg[out_sgs++], &req->in.h, sizeof(req->in.h));
+       out_sgs += sg_init_fuse_args(&sg[out_sgs], req,
+                                    (struct fuse_arg *)args->in_args,
+                                    args->in_numargs, args->in_pages,
+                                    req->argbuf, &argbuf_used);
+
+       /* Reply elements */
+       if (test_bit(FR_ISREPLY, &req->flags)) {
+               sg_init_one(&sg[out_sgs + in_sgs++],
+                           &req->out.h, sizeof(req->out.h));
+               in_sgs += sg_init_fuse_args(&sg[out_sgs + in_sgs], req,
+                                           args->out_args, args->out_numargs,
+                                           args->out_pages,
+                                           req->argbuf + argbuf_used, NULL);
+       }
+
+       WARN_ON(out_sgs + in_sgs != total_sgs);
+
+       for (i = 0; i < total_sgs; i++)
+               sgs[i] = &sg[i];
+
+       spin_lock(&fsvq->lock);
+
+       if (!fsvq->connected) {
+               spin_unlock(&fsvq->lock);
+               ret = -ENOTCONN;
+               goto out;
+       }
+
+       vq = fsvq->vq;
+       ret = virtqueue_add_sgs(vq, sgs, out_sgs, in_sgs, req, GFP_ATOMIC);
+       if (ret < 0) {
+               spin_unlock(&fsvq->lock);
+               goto out;
+       }
+
+       fsvq->in_flight++;
+       notify = virtqueue_kick_prepare(vq);
+
+       spin_unlock(&fsvq->lock);
+
+       if (notify)
+               virtqueue_notify(vq);
+
+out:
+       if (ret < 0 && req->argbuf) {
+               kfree(req->argbuf);
+               req->argbuf = NULL;
+       }
+       if (sgs != stack_sgs) {
+               kfree(sgs);
+               kfree(sg);
+       }
+
+       return ret;
+}
+
+static void virtio_fs_wake_pending_and_unlock(struct fuse_iqueue *fiq)
+__releases(fiq->lock)
+{
+       unsigned int queue_id = VQ_REQUEST; /* TODO multiqueue */
+       struct virtio_fs *fs;
+       struct fuse_conn *fc;
+       struct fuse_req *req;
+       struct fuse_pqueue *fpq;
+       int ret;
+
+       WARN_ON(list_empty(&fiq->pending));
+       req = list_last_entry(&fiq->pending, struct fuse_req, list);
+       clear_bit(FR_PENDING, &req->flags);
+       list_del_init(&req->list);
+       WARN_ON(!list_empty(&fiq->pending));
+       spin_unlock(&fiq->lock);
+
+       fs = fiq->priv;
+       fc = fs->vqs[queue_id].fud->fc;
+
+       pr_debug("%s: opcode %u unique %#llx nodeid %#llx in.len %u out.len %u\n",
+                 __func__, req->in.h.opcode, req->in.h.unique,
+                req->in.h.nodeid, req->in.h.len,
+                fuse_len_args(req->args->out_numargs, req->args->out_args));
+
+       fpq = &fs->vqs[queue_id].fud->pq;
+       spin_lock(&fpq->lock);
+       if (!fpq->connected) {
+               spin_unlock(&fpq->lock);
+               req->out.h.error = -ENODEV;
+               pr_err("virtio-fs: %s disconnected\n", __func__);
+               fuse_request_end(fc, req);
+               return;
+       }
+       list_add_tail(&req->list, fpq->processing);
+       spin_unlock(&fpq->lock);
+       set_bit(FR_SENT, &req->flags);
+       /* matches barrier in request_wait_answer() */
+       smp_mb__after_atomic();
+
+retry:
+       ret = virtio_fs_enqueue_req(&fs->vqs[queue_id], req);
+       if (ret < 0) {
+               if (ret == -ENOMEM || ret == -ENOSPC) {
+                       /* Virtqueue full. Retry submission */
+                       /* TODO use completion instead of timeout */
+                       usleep_range(20, 30);
+                       goto retry;
+               }
+               req->out.h.error = ret;
+               pr_err("virtio-fs: virtio_fs_enqueue_req() failed %d\n", ret);
+               spin_lock(&fpq->lock);
+               clear_bit(FR_SENT, &req->flags);
+               list_del_init(&req->list);
+               spin_unlock(&fpq->lock);
+               fuse_request_end(fc, req);
+               return;
+       }
+}
+
+const static struct fuse_iqueue_ops virtio_fs_fiq_ops = {
+       .wake_forget_and_unlock         = virtio_fs_wake_forget_and_unlock,
+       .wake_interrupt_and_unlock      = virtio_fs_wake_interrupt_and_unlock,
+       .wake_pending_and_unlock        = virtio_fs_wake_pending_and_unlock,
+       .release                        = virtio_fs_fiq_release,
+};
+
+static int virtio_fs_fill_super(struct super_block *sb)
+{
+       struct fuse_conn *fc = get_fuse_conn_super(sb);
+       struct virtio_fs *fs = fc->iq.priv;
+       unsigned int i;
+       int err;
+       struct fuse_fs_context ctx = {
+               .rootmode = S_IFDIR,
+               .default_permissions = 1,
+               .allow_other = 1,
+               .max_read = UINT_MAX,
+               .blksize = 512,
+               .destroy = true,
+               .no_control = true,
+               .no_force_umount = true,
+       };
+
+       mutex_lock(&virtio_fs_mutex);
+
+       /* After holding mutex, make sure virtiofs device is still there.
+        * Though we are holding a reference to it, drive ->remove might
+        * still have cleaned up virtual queues. In that case bail out.
+        */
+       err = -EINVAL;
+       if (list_empty(&fs->list)) {
+               pr_info("virtio-fs: tag <%s> not found\n", fs->tag);
+               goto err;
+       }
+
+       err = -ENOMEM;
+       /* Allocate fuse_dev for hiprio and notification queues */
+       for (i = 0; i < VQ_REQUEST; i++) {
+               struct virtio_fs_vq *fsvq = &fs->vqs[i];
+
+               fsvq->fud = fuse_dev_alloc();
+               if (!fsvq->fud)
+                       goto err_free_fuse_devs;
+       }
+
+       ctx.fudptr = (void **)&fs->vqs[VQ_REQUEST].fud;
+       err = fuse_fill_super_common(sb, &ctx);
+       if (err < 0)
+               goto err_free_fuse_devs;
+
+       fc = fs->vqs[VQ_REQUEST].fud->fc;
+
+       for (i = 0; i < fs->nvqs; i++) {
+               struct virtio_fs_vq *fsvq = &fs->vqs[i];
+
+               if (i == VQ_REQUEST)
+                       continue; /* already initialized */
+               fuse_dev_install(fsvq->fud, fc);
+       }
+
+       /* Previous unmount will stop all queues. Start these again */
+       virtio_fs_start_all_queues(fs);
+       fuse_send_init(fc);
+       mutex_unlock(&virtio_fs_mutex);
+       return 0;
+
+err_free_fuse_devs:
+       virtio_fs_free_devs(fs);
+err:
+       mutex_unlock(&virtio_fs_mutex);
+       return err;
+}
+
+static void virtio_kill_sb(struct super_block *sb)
+{
+       struct fuse_conn *fc = get_fuse_conn_super(sb);
+       struct virtio_fs *vfs;
+       struct virtio_fs_vq *fsvq;
+
+       /* If mount failed, we can still be called without any fc */
+       if (!fc)
+               return fuse_kill_sb_anon(sb);
+
+       vfs = fc->iq.priv;
+       fsvq = &vfs->vqs[VQ_HIPRIO];
+
+       /* Stop forget queue. Soon destroy will be sent */
+       spin_lock(&fsvq->lock);
+       fsvq->connected = false;
+       spin_unlock(&fsvq->lock);
+       virtio_fs_drain_all_queues(vfs);
+
+       fuse_kill_sb_anon(sb);
+
+       /* fuse_kill_sb_anon() must have sent destroy. Stop all queues
+        * and drain one more time and free fuse devices. Freeing fuse
+        * devices will drop their reference on fuse_conn and that in
+        * turn will drop its reference on virtio_fs object.
+        */
+       virtio_fs_stop_all_queues(vfs);
+       virtio_fs_drain_all_queues(vfs);
+       virtio_fs_free_devs(vfs);
+}
+
+static int virtio_fs_test_super(struct super_block *sb,
+                               struct fs_context *fsc)
+{
+       struct fuse_conn *fc = fsc->s_fs_info;
+
+       return fc->iq.priv == get_fuse_conn_super(sb)->iq.priv;
+}
+
+static int virtio_fs_set_super(struct super_block *sb,
+                              struct fs_context *fsc)
+{
+       int err;
+
+       err = get_anon_bdev(&sb->s_dev);
+       if (!err)
+               fuse_conn_get(fsc->s_fs_info);
+
+       return err;
+}
+
+static int virtio_fs_get_tree(struct fs_context *fsc)
+{
+       struct virtio_fs *fs;
+       struct super_block *sb;
+       struct fuse_conn *fc;
+       int err;
+
+       /* This gets a reference on virtio_fs object. This ptr gets installed
+        * in fc->iq->priv. Once fuse_conn is going away, it calls ->put()
+        * to drop the reference to this object.
+        */
+       fs = virtio_fs_find_instance(fsc->source);
+       if (!fs) {
+               pr_info("virtio-fs: tag <%s> not found\n", fsc->source);
+               return -EINVAL;
+       }
+
+       fc = kzalloc(sizeof(struct fuse_conn), GFP_KERNEL);
+       if (!fc) {
+               mutex_lock(&virtio_fs_mutex);
+               virtio_fs_put(fs);
+               mutex_unlock(&virtio_fs_mutex);
+               return -ENOMEM;
+       }
+
+       fuse_conn_init(fc, get_user_ns(current_user_ns()), &virtio_fs_fiq_ops,
+                      fs);
+       fc->release = fuse_free_conn;
+       fc->delete_stale = true;
+
+       fsc->s_fs_info = fc;
+       sb = sget_fc(fsc, virtio_fs_test_super, virtio_fs_set_super);
+       fuse_conn_put(fc);
+       if (IS_ERR(sb))
+               return PTR_ERR(sb);
+
+       if (!sb->s_root) {
+               err = virtio_fs_fill_super(sb);
+               if (err) {
+                       deactivate_locked_super(sb);
+                       return err;
+               }
+
+               sb->s_flags |= SB_ACTIVE;
+       }
+
+       WARN_ON(fsc->root);
+       fsc->root = dget(sb->s_root);
+       return 0;
+}
+
+static const struct fs_context_operations virtio_fs_context_ops = {
+       .get_tree       = virtio_fs_get_tree,
+};
+
+static int virtio_fs_init_fs_context(struct fs_context *fsc)
+{
+       fsc->ops = &virtio_fs_context_ops;
+       return 0;
+}
+
+static struct file_system_type virtio_fs_type = {
+       .owner          = THIS_MODULE,
+       .name           = "virtiofs",
+       .init_fs_context = virtio_fs_init_fs_context,
+       .kill_sb        = virtio_kill_sb,
+};
+
+static int __init virtio_fs_init(void)
+{
+       int ret;
+
+       ret = register_virtio_driver(&virtio_fs_driver);
+       if (ret < 0)
+               return ret;
+
+       ret = register_filesystem(&virtio_fs_type);
+       if (ret < 0) {
+               unregister_virtio_driver(&virtio_fs_driver);
+               return ret;
+       }
+
+       return 0;
+}
+module_init(virtio_fs_init);
+
+static void __exit virtio_fs_exit(void)
+{
+       unregister_filesystem(&virtio_fs_type);
+       unregister_virtio_driver(&virtio_fs_driver);
+}
+module_exit(virtio_fs_exit);
+
+MODULE_AUTHOR("Stefan Hajnoczi <stefanha@redhat.com>");
+MODULE_DESCRIPTION("Virtio Filesystem");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS_FS(KBUILD_MODNAME);
+MODULE_DEVICE_TABLE(virtio, id_table);
index 4337176..20d052e 100644 (file)
@@ -25,15 +25,15 @@ int fuse_setxattr(struct inode *inode, const char *name, const void *value,
        memset(&inarg, 0, sizeof(inarg));
        inarg.size = size;
        inarg.flags = flags;
-       args.in.h.opcode = FUSE_SETXATTR;
-       args.in.h.nodeid = get_node_id(inode);
-       args.in.numargs = 3;
-       args.in.args[0].size = sizeof(inarg);
-       args.in.args[0].value = &inarg;
-       args.in.args[1].size = strlen(name) + 1;
-       args.in.args[1].value = name;
-       args.in.args[2].size = size;
-       args.in.args[2].value = value;
+       args.opcode = FUSE_SETXATTR;
+       args.nodeid = get_node_id(inode);
+       args.in_numargs = 3;
+       args.in_args[0].size = sizeof(inarg);
+       args.in_args[0].value = &inarg;
+       args.in_args[1].size = strlen(name) + 1;
+       args.in_args[1].value = name;
+       args.in_args[2].size = size;
+       args.in_args[2].value = value;
        err = fuse_simple_request(fc, &args);
        if (err == -ENOSYS) {
                fc->no_setxattr = 1;
@@ -60,22 +60,22 @@ ssize_t fuse_getxattr(struct inode *inode, const char *name, void *value,
 
        memset(&inarg, 0, sizeof(inarg));
        inarg.size = size;
-       args.in.h.opcode = FUSE_GETXATTR;
-       args.in.h.nodeid = get_node_id(inode);
-       args.in.numargs = 2;
-       args.in.args[0].size = sizeof(inarg);
-       args.in.args[0].value = &inarg;
-       args.in.args[1].size = strlen(name) + 1;
-       args.in.args[1].value = name;
+       args.opcode = FUSE_GETXATTR;
+       args.nodeid = get_node_id(inode);
+       args.in_numargs = 2;
+       args.in_args[0].size = sizeof(inarg);
+       args.in_args[0].value = &inarg;
+       args.in_args[1].size = strlen(name) + 1;
+       args.in_args[1].value = name;
        /* This is really two different operations rolled into one */
-       args.out.numargs = 1;
+       args.out_numargs = 1;
        if (size) {
-               args.out.argvar = 1;
-               args.out.args[0].size = size;
-               args.out.args[0].value = value;
+               args.out_argvar = true;
+               args.out_args[0].size = size;
+               args.out_args[0].value = value;
        } else {
-               args.out.args[0].size = sizeof(outarg);
-               args.out.args[0].value = &outarg;
+               args.out_args[0].size = sizeof(outarg);
+               args.out_args[0].value = &outarg;
        }
        ret = fuse_simple_request(fc, &args);
        if (!ret && !size)
@@ -121,20 +121,20 @@ ssize_t fuse_listxattr(struct dentry *entry, char *list, size_t size)
 
        memset(&inarg, 0, sizeof(inarg));
        inarg.size = size;
-       args.in.h.opcode = FUSE_LISTXATTR;
-       args.in.h.nodeid = get_node_id(inode);
-       args.in.numargs = 1;
-       args.in.args[0].size = sizeof(inarg);
-       args.in.args[0].value = &inarg;
+       args.opcode = FUSE_LISTXATTR;
+       args.nodeid = get_node_id(inode);
+       args.in_numargs = 1;
+       args.in_args[0].size = sizeof(inarg);
+       args.in_args[0].value = &inarg;
        /* This is really two different operations rolled into one */
-       args.out.numargs = 1;
+       args.out_numargs = 1;
        if (size) {
-               args.out.argvar = 1;
-               args.out.args[0].size = size;
-               args.out.args[0].value = list;
+               args.out_argvar = true;
+               args.out_args[0].size = size;
+               args.out_args[0].value = list;
        } else {
-               args.out.args[0].size = sizeof(outarg);
-               args.out.args[0].value = &outarg;
+               args.out_args[0].size = sizeof(outarg);
+               args.out_args[0].value = &outarg;
        }
        ret = fuse_simple_request(fc, &args);
        if (!ret && !size)
@@ -157,11 +157,11 @@ int fuse_removexattr(struct inode *inode, const char *name)
        if (fc->no_removexattr)
                return -EOPNOTSUPP;
 
-       args.in.h.opcode = FUSE_REMOVEXATTR;
-       args.in.h.nodeid = get_node_id(inode);
-       args.in.numargs = 1;
-       args.in.args[0].size = strlen(name) + 1;
-       args.in.args[0].value = name;
+       args.opcode = FUSE_REMOVEXATTR;
+       args.nodeid = get_node_id(inode);
+       args.in_numargs = 1;
+       args.in_args[0].size = strlen(name) + 1;
+       args.in_args[0].value = name;
        err = fuse_simple_request(fc, &args);
        if (err == -ENOSYS) {
                fc->no_removexattr = 1;
index f42048c..b9fe975 100644 (file)
@@ -243,7 +243,7 @@ static int gfs2_write_jdata_pagevec(struct address_space *mapping,
 {
        struct inode *inode = mapping->host;
        struct gfs2_sbd *sdp = GFS2_SB(inode);
-       unsigned nrblocks = nr_pages * (PAGE_SIZE/inode->i_sb->s_blocksize);
+       unsigned nrblocks = nr_pages * (PAGE_SIZE >> inode->i_blkbits);
        int i;
        int ret;
 
@@ -552,7 +552,7 @@ int gfs2_internal_read(struct gfs2_inode *ip, char *buf, loff_t *pos,
                        unsigned size)
 {
        struct address_space *mapping = ip->i_inode.i_mapping;
-       unsigned long index = *pos / PAGE_SIZE;
+       unsigned long index = *pos >> PAGE_SHIFT;
        unsigned offset = *pos & (PAGE_SIZE - 1);
        unsigned copied = 0;
        unsigned amt;
index 4f8b5fd..f63df54 100644 (file)
@@ -1065,54 +1065,38 @@ static int gfs2_iomap_begin_write(struct inode *inode, loff_t pos,
 {
        struct gfs2_inode *ip = GFS2_I(inode);
        struct gfs2_sbd *sdp = GFS2_SB(inode);
-       unsigned int data_blocks = 0, ind_blocks = 0, rblocks;
-       bool unstuff, alloc_required;
+       bool unstuff;
        int ret;
 
-       ret = gfs2_write_lock(inode);
-       if (ret)
-               return ret;
-
        unstuff = gfs2_is_stuffed(ip) &&
                  pos + length > gfs2_max_stuffed_size(ip);
 
-       ret = gfs2_iomap_get(inode, pos, length, flags, iomap, mp);
-       if (ret)
-               goto out_unlock;
-
-       alloc_required = unstuff || iomap->type == IOMAP_HOLE;
+       if (unstuff || iomap->type == IOMAP_HOLE) {
+               unsigned int data_blocks, ind_blocks;
+               struct gfs2_alloc_parms ap = {};
+               unsigned int rblocks;
+               struct gfs2_trans *tr;
 
-       if (alloc_required || gfs2_is_jdata(ip))
                gfs2_write_calc_reserv(ip, iomap->length, &data_blocks,
                                       &ind_blocks);
-
-       if (alloc_required) {
-               struct gfs2_alloc_parms ap = {
-                       .target = data_blocks + ind_blocks
-               };
-
+               ap.target = data_blocks + ind_blocks;
                ret = gfs2_quota_lock_check(ip, &ap);
                if (ret)
-                       goto out_unlock;
+                       return ret;
 
                ret = gfs2_inplace_reserve(ip, &ap);
                if (ret)
                        goto out_qunlock;
-       }
 
-       rblocks = RES_DINODE + ind_blocks;
-       if (gfs2_is_jdata(ip))
-               rblocks += data_blocks;
-       if (ind_blocks || data_blocks)
-               rblocks += RES_STATFS + RES_QUOTA;
-       if (inode == sdp->sd_rindex)
-               rblocks += 2 * RES_STATFS;
-       if (alloc_required)
+               rblocks = RES_DINODE + ind_blocks;
+               if (gfs2_is_jdata(ip))
+                       rblocks += data_blocks;
+               if (ind_blocks || data_blocks)
+                       rblocks += RES_STATFS + RES_QUOTA;
+               if (inode == sdp->sd_rindex)
+                       rblocks += 2 * RES_STATFS;
                rblocks += gfs2_rg_blocks(ip, data_blocks + ind_blocks);
 
-       if (unstuff || iomap->type == IOMAP_HOLE) {
-               struct gfs2_trans *tr;
-
                ret = gfs2_trans_begin(sdp, rblocks,
                                       iomap->length >> inode->i_blkbits);
                if (ret)
@@ -1153,16 +1137,17 @@ static int gfs2_iomap_begin_write(struct inode *inode, loff_t pos,
 out_trans_end:
        gfs2_trans_end(sdp);
 out_trans_fail:
-       if (alloc_required)
-               gfs2_inplace_release(ip);
+       gfs2_inplace_release(ip);
 out_qunlock:
-       if (alloc_required)
-               gfs2_quota_unlock(ip);
-out_unlock:
-       gfs2_write_unlock(inode);
+       gfs2_quota_unlock(ip);
        return ret;
 }
 
+static inline bool gfs2_iomap_need_write_lock(unsigned flags)
+{
+       return (flags & IOMAP_WRITE) && !(flags & IOMAP_DIRECT);
+}
+
 static int gfs2_iomap_begin(struct inode *inode, loff_t pos, loff_t length,
                            unsigned flags, struct iomap *iomap)
 {
@@ -1173,20 +1158,43 @@ static int gfs2_iomap_begin(struct inode *inode, loff_t pos, loff_t length,
        iomap->flags |= IOMAP_F_BUFFER_HEAD;
 
        trace_gfs2_iomap_start(ip, pos, length, flags);
-       if ((flags & IOMAP_WRITE) && !(flags & IOMAP_DIRECT)) {
-               ret = gfs2_iomap_begin_write(inode, pos, length, flags, iomap, &mp);
-       } else {
-               ret = gfs2_iomap_get(inode, pos, length, flags, iomap, &mp);
+       if (gfs2_iomap_need_write_lock(flags)) {
+               ret = gfs2_write_lock(inode);
+               if (ret)
+                       goto out;
+       }
 
-               /*
-                * Silently fall back to buffered I/O for stuffed files or if
-                * we've hot a hole (see gfs2_file_direct_write).
-                */
-               if ((flags & IOMAP_WRITE) && (flags & IOMAP_DIRECT) &&
-                   iomap->type != IOMAP_MAPPED)
-                       ret = -ENOTBLK;
+       ret = gfs2_iomap_get(inode, pos, length, flags, iomap, &mp);
+       if (ret)
+               goto out_unlock;
+
+       switch(flags & (IOMAP_WRITE | IOMAP_ZERO)) {
+       case IOMAP_WRITE:
+               if (flags & IOMAP_DIRECT) {
+                       /*
+                        * Silently fall back to buffered I/O for stuffed files
+                        * or if we've got a hole (see gfs2_file_direct_write).
+                        */
+                       if (iomap->type != IOMAP_MAPPED)
+                               ret = -ENOTBLK;
+                       goto out_unlock;
+               }
+               break;
+       case IOMAP_ZERO:
+               if (iomap->type == IOMAP_HOLE)
+                       goto out_unlock;
+               break;
+       default:
+               goto out_unlock;
        }
+
+       ret = gfs2_iomap_begin_write(inode, pos, length, flags, iomap, &mp);
+
+out_unlock:
+       if (ret && gfs2_iomap_need_write_lock(flags))
+               gfs2_write_unlock(inode);
        release_metapath(&mp);
+out:
        trace_gfs2_iomap_end(ip, iomap, ret);
        return ret;
 }
@@ -1197,8 +1205,18 @@ static int gfs2_iomap_end(struct inode *inode, loff_t pos, loff_t length,
        struct gfs2_inode *ip = GFS2_I(inode);
        struct gfs2_sbd *sdp = GFS2_SB(inode);
 
-       if ((flags & (IOMAP_WRITE | IOMAP_DIRECT)) != IOMAP_WRITE)
-               goto out;
+       switch (flags & (IOMAP_WRITE | IOMAP_ZERO)) {
+       case IOMAP_WRITE:
+               if (flags & IOMAP_DIRECT)
+                       return 0;
+               break;
+       case IOMAP_ZERO:
+                if (iomap->type == IOMAP_HOLE)
+                        return 0;
+                break;
+       default:
+                return 0;
+       }
 
        if (!gfs2_is_stuffed(ip))
                gfs2_ordered_add_inode(ip);
@@ -1231,8 +1249,8 @@ static int gfs2_iomap_end(struct inode *inode, loff_t pos, loff_t length,
        set_bit(GLF_DIRTY, &ip->i_gl->gl_flags);
 
 out_unlock:
-       gfs2_write_unlock(inode);
-out:
+       if (gfs2_iomap_need_write_lock(flags))
+               gfs2_write_unlock(inode);
        return 0;
 }
 
@@ -1330,76 +1348,10 @@ int gfs2_extent_map(struct inode *inode, u64 lblock, int *new, u64 *dblock, unsi
        return ret;
 }
 
-/**
- * gfs2_block_zero_range - Deal with zeroing out data
- *
- * This is partly borrowed from ext3.
- */
 static int gfs2_block_zero_range(struct inode *inode, loff_t from,
                                 unsigned int length)
 {
-       struct address_space *mapping = inode->i_mapping;
-       struct gfs2_inode *ip = GFS2_I(inode);
-       unsigned long index = from >> PAGE_SHIFT;
-       unsigned offset = from & (PAGE_SIZE-1);
-       unsigned blocksize, iblock, pos;
-       struct buffer_head *bh;
-       struct page *page;
-       int err;
-
-       page = find_or_create_page(mapping, index, GFP_NOFS);
-       if (!page)
-               return 0;
-
-       blocksize = inode->i_sb->s_blocksize;
-       iblock = index << (PAGE_SHIFT - inode->i_sb->s_blocksize_bits);
-
-       if (!page_has_buffers(page))
-               create_empty_buffers(page, blocksize, 0);
-
-       /* Find the buffer that contains "offset" */
-       bh = page_buffers(page);
-       pos = blocksize;
-       while (offset >= pos) {
-               bh = bh->b_this_page;
-               iblock++;
-               pos += blocksize;
-       }
-
-       err = 0;
-
-       if (!buffer_mapped(bh)) {
-               gfs2_block_map(inode, iblock, bh, 0);
-               /* unmapped? It's a hole - nothing to do */
-               if (!buffer_mapped(bh))
-                       goto unlock;
-       }
-
-       /* Ok, it's mapped. Make sure it's up-to-date */
-       if (PageUptodate(page))
-               set_buffer_uptodate(bh);
-
-       if (!buffer_uptodate(bh)) {
-               err = -EIO;
-               ll_rw_block(REQ_OP_READ, 0, 1, &bh);
-               wait_on_buffer(bh);
-               /* Uhhuh. Read error. Complain and punt. */
-               if (!buffer_uptodate(bh))
-                       goto unlock;
-               err = 0;
-       }
-
-       if (gfs2_is_jdata(ip))
-               gfs2_trans_add_data(ip->i_gl, bh);
-       else
-               gfs2_ordered_add_inode(ip);
-
-       zero_user(page, offset, length);
-       mark_buffer_dirty(bh);
-unlock:
-       unlock_page(page);
-       put_page(page);
-       return err;
+       return iomap_zero_range(inode, from, length, NULL, &gfs2_iomap_ops);
 }
 
 #define GFS2_JTRUNC_REVOKES 8192
@@ -1680,6 +1632,7 @@ out_unlock:
                        brelse(dibh);
                        up_write(&ip->i_rw_mutex);
                        gfs2_trans_end(sdp);
+                       buf_in_tr = false;
                }
                gfs2_glock_dq_uninit(rd_gh);
                cond_resched();
@@ -2187,7 +2140,7 @@ static int do_grow(struct inode *inode, u64 size)
        if (error)
                goto do_end_trans;
 
-       i_size_write(inode, size);
+       truncate_setsize(inode, size);
        ip->i_inode.i_mtime = ip->i_inode.i_ctime = current_time(&ip->i_inode);
        gfs2_trans_add_meta(ip->i_gl, dibh);
        gfs2_dinode_out(ip, dibh->b_data);
index a7bb76e..2e215e8 100644 (file)
@@ -38,7 +38,7 @@ static int gfs2_drevalidate(struct dentry *dentry, unsigned int flags)
        struct inode *inode;
        struct gfs2_holder d_gh;
        struct gfs2_inode *ip = NULL;
-       int error;
+       int error, valid = 0;
        int had_lock = 0;
 
        if (flags & LOOKUP_RCU)
@@ -51,53 +51,30 @@ static int gfs2_drevalidate(struct dentry *dentry, unsigned int flags)
 
        if (inode) {
                if (is_bad_inode(inode))
-                       goto invalid;
+                       goto out;
                ip = GFS2_I(inode);
        }
 
-       if (sdp->sd_lockstruct.ls_ops->lm_mount == NULL)
-               goto valid;
+       if (sdp->sd_lockstruct.ls_ops->lm_mount == NULL) {
+               valid = 1;
+               goto out;
+       }
 
        had_lock = (gfs2_glock_is_locked_by_me(dip->i_gl) != NULL);
        if (!had_lock) {
                error = gfs2_glock_nq_init(dip->i_gl, LM_ST_SHARED, 0, &d_gh);
                if (error)
-                       goto fail;
-       } 
-
-       error = gfs2_dir_check(d_inode(parent), &dentry->d_name, ip);
-       switch (error) {
-       case 0:
-               if (!inode)
-                       goto invalid_gunlock;
-               break;
-       case -ENOENT:
-               if (!inode)
-                       goto valid_gunlock;
-               goto invalid_gunlock;
-       default:
-               goto fail_gunlock;
+                       goto out;
        }
 
-valid_gunlock:
-       if (!had_lock)
-               gfs2_glock_dq_uninit(&d_gh);
-valid:
-       dput(parent);
-       return 1;
+       error = gfs2_dir_check(d_inode(parent), &dentry->d_name, ip);
+       valid = inode ? !error : (error == -ENOENT);
 
-invalid_gunlock:
        if (!had_lock)
                gfs2_glock_dq_uninit(&d_gh);
-invalid:
+out:
        dput(parent);
-       return 0;
-
-fail_gunlock:
-       gfs2_glock_dq_uninit(&d_gh);
-fail:
-       dput(parent);
-       return 0;
+       return valid;
 }
 
 static int gfs2_dhash(const struct dentry *dentry, struct qstr *str)
index 6f35d19..eb9c057 100644 (file)
@@ -1463,8 +1463,7 @@ static int gfs2_dir_read_leaf(struct inode *inode, struct dir_context *ctx,
                                sort_offset : entries, copied);
 out_free:
        for(i = 0; i < leaf; i++)
-               if (larr[i])
-                       brelse(larr[i]);
+               brelse(larr[i]);
        kvfree(larr);
 out:
        return error;
index 0ac2dc8..5b76480 100644 (file)
@@ -32,8 +32,7 @@ extern int gfs2_dir_add(struct inode *inode, const struct qstr *filename,
                        const struct gfs2_inode *ip, struct gfs2_diradd *da);
 static inline void gfs2_dir_no_add(struct gfs2_diradd *da)
 {
-       if (da->bh)
-               brelse(da->bh);
+       brelse(da->bh);
        da->bh = NULL;
 }
 extern int gfs2_dir_del(struct gfs2_inode *dip, const struct dentry *dentry);
index 52fa1ef..997b326 100644 (file)
@@ -1049,7 +1049,7 @@ static long __gfs2_fallocate(struct file *file, int mode, loff_t offset, loff_t
                        rblocks += data_blocks ? data_blocks : 1;
 
                error = gfs2_trans_begin(sdp, rblocks,
-                                        PAGE_SIZE/sdp->sd_sb.sb_bsize);
+                                        PAGE_SIZE >> inode->i_blkbits);
                if (error)
                        goto out_trans_fail;
 
@@ -1065,11 +1065,10 @@ static long __gfs2_fallocate(struct file *file, int mode, loff_t offset, loff_t
                gfs2_quota_unlock(ip);
        }
 
-       if (!(mode & FALLOC_FL_KEEP_SIZE) && (pos + count) > inode->i_size) {
+       if (!(mode & FALLOC_FL_KEEP_SIZE) && (pos + count) > inode->i_size)
                i_size_write(inode, pos + count);
-               file_update_time(file);
-               mark_inode_dirty(inode);
-       }
+       file_update_time(file);
+       mark_inode_dirty(inode);
 
        if ((file->f_flags & O_DSYNC) || IS_SYNC(file->f_mapping->host))
                return vfs_fsync_range(file, pos, pos + count - 1,
index e23fb8b..0290a22 100644 (file)
@@ -305,6 +305,11 @@ static void gfs2_holder_wake(struct gfs2_holder *gh)
        clear_bit(HIF_WAIT, &gh->gh_iflags);
        smp_mb__after_atomic();
        wake_up_bit(&gh->gh_iflags, HIF_WAIT);
+       if (gh->gh_flags & GL_ASYNC) {
+               struct gfs2_sbd *sdp = gh->gh_gl->gl_name.ln_sbd;
+
+               wake_up(&sdp->sd_async_glock_wait);
+       }
 }
 
 /**
@@ -931,6 +936,17 @@ void gfs2_holder_uninit(struct gfs2_holder *gh)
        gh->gh_ip = 0;
 }
 
+static void gfs2_glock_update_hold_time(struct gfs2_glock *gl,
+                                       unsigned long start_time)
+{
+       /* Have we waited longer that a second? */
+       if (time_after(jiffies, start_time + HZ)) {
+               /* Lengthen the minimum hold time. */
+               gl->gl_hold_time = min(gl->gl_hold_time + GL_GLOCK_HOLD_INCR,
+                                      GL_GLOCK_MAX_HOLD);
+       }
+}
+
 /**
  * gfs2_glock_wait - wait on a glock acquisition
  * @gh: the glock holder
@@ -940,18 +956,99 @@ void gfs2_holder_uninit(struct gfs2_holder *gh)
 
 int gfs2_glock_wait(struct gfs2_holder *gh)
 {
-       unsigned long time1 = jiffies;
+       unsigned long start_time = jiffies;
 
        might_sleep();
        wait_on_bit(&gh->gh_iflags, HIF_WAIT, TASK_UNINTERRUPTIBLE);
-       if (time_after(jiffies, time1 + HZ)) /* have we waited > a second? */
-               /* Lengthen the minimum hold time. */
-               gh->gh_gl->gl_hold_time = min(gh->gh_gl->gl_hold_time +
-                                             GL_GLOCK_HOLD_INCR,
-                                             GL_GLOCK_MAX_HOLD);
+       gfs2_glock_update_hold_time(gh->gh_gl, start_time);
        return gh->gh_error;
 }
 
+static int glocks_pending(unsigned int num_gh, struct gfs2_holder *ghs)
+{
+       int i;
+
+       for (i = 0; i < num_gh; i++)
+               if (test_bit(HIF_WAIT, &ghs[i].gh_iflags))
+                       return 1;
+       return 0;
+}
+
+/**
+ * gfs2_glock_async_wait - wait on multiple asynchronous glock acquisitions
+ * @num_gh: the number of holders in the array
+ * @ghs: the glock holder array
+ *
+ * Returns: 0 on success, meaning all glocks have been granted and are held.
+ *          -ESTALE if the request timed out, meaning all glocks were released,
+ *          and the caller should retry the operation.
+ */
+
+int gfs2_glock_async_wait(unsigned int num_gh, struct gfs2_holder *ghs)
+{
+       struct gfs2_sbd *sdp = ghs[0].gh_gl->gl_name.ln_sbd;
+       int i, ret = 0, timeout = 0;
+       unsigned long start_time = jiffies;
+       bool keep_waiting;
+
+       might_sleep();
+       /*
+        * Total up the (minimum hold time * 2) of all glocks and use that to
+        * determine the max amount of time we should wait.
+        */
+       for (i = 0; i < num_gh; i++)
+               timeout += ghs[i].gh_gl->gl_hold_time << 1;
+
+wait_for_dlm:
+       if (!wait_event_timeout(sdp->sd_async_glock_wait,
+                               !glocks_pending(num_gh, ghs), timeout))
+               ret = -ESTALE; /* request timed out. */
+
+       /*
+        * If dlm granted all our requests, we need to adjust the glock
+        * minimum hold time values according to how long we waited.
+        *
+        * If our request timed out, we need to repeatedly release any held
+        * glocks we acquired thus far to allow dlm to acquire the remaining
+        * glocks without deadlocking.  We cannot currently cancel outstanding
+        * glock acquisitions.
+        *
+        * The HIF_WAIT bit tells us which requests still need a response from
+        * dlm.
+        *
+        * If dlm sent us any errors, we return the first error we find.
+        */
+       keep_waiting = false;
+       for (i = 0; i < num_gh; i++) {
+               /* Skip holders we have already dequeued below. */
+               if (!gfs2_holder_queued(&ghs[i]))
+                       continue;
+               /* Skip holders with a pending DLM response. */
+               if (test_bit(HIF_WAIT, &ghs[i].gh_iflags)) {
+                       keep_waiting = true;
+                       continue;
+               }
+
+               if (test_bit(HIF_HOLDER, &ghs[i].gh_iflags)) {
+                       if (ret == -ESTALE)
+                               gfs2_glock_dq(&ghs[i]);
+                       else
+                               gfs2_glock_update_hold_time(ghs[i].gh_gl,
+                                                           start_time);
+               }
+               if (!ret)
+                       ret = ghs[i].gh_error;
+       }
+
+       if (keep_waiting)
+               goto wait_for_dlm;
+
+       /*
+        * At this point, we've either acquired all locks or released them all.
+        */
+       return ret;
+}
+
 /**
  * handle_callback - process a demote request
  * @gl: the glock
@@ -1018,9 +1115,9 @@ __acquires(&gl->gl_lockref.lock)
        struct gfs2_holder *gh2;
        int try_futile = 0;
 
-       BUG_ON(gh->gh_owner_pid == NULL);
+       GLOCK_BUG_ON(gl, gh->gh_owner_pid == NULL);
        if (test_and_set_bit(HIF_WAIT, &gh->gh_iflags))
-               BUG();
+               GLOCK_BUG_ON(gl, true);
 
        if (gh->gh_flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB)) {
                if (test_bit(GLF_LOCK, &gl->gl_flags))
@@ -1788,8 +1885,8 @@ void gfs2_dump_glock(struct seq_file *seq, struct gfs2_glock *gl, bool fsid)
        unsigned long long dtime;
        const struct gfs2_holder *gh;
        char gflags_buf[32];
-       char fs_id_buf[GFS2_FSNAME_LEN + 3 * sizeof(int) + 2];
        struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
+       char fs_id_buf[sizeof(sdp->sd_fsname) + 7];
 
        memset(fs_id_buf, 0, sizeof(fs_id_buf));
        if (fsid && sdp) /* safety precaution */
index e4e0bed..b8adaf8 100644 (file)
@@ -190,6 +190,7 @@ extern void gfs2_holder_uninit(struct gfs2_holder *gh);
 extern int gfs2_glock_nq(struct gfs2_holder *gh);
 extern int gfs2_glock_poll(struct gfs2_holder *gh);
 extern int gfs2_glock_wait(struct gfs2_holder *gh);
+extern int gfs2_glock_async_wait(unsigned int num_gh, struct gfs2_holder *ghs);
 extern void gfs2_glock_dq(struct gfs2_holder *gh);
 extern void gfs2_glock_dq_wait(struct gfs2_holder *gh);
 extern void gfs2_glock_dq_uninit(struct gfs2_holder *gh);
@@ -260,6 +261,11 @@ static inline bool gfs2_holder_initialized(struct gfs2_holder *gh)
        return gh->gh_gl;
 }
 
+static inline bool gfs2_holder_queued(struct gfs2_holder *gh)
+{
+       return !list_empty(&gh->gh_list);
+}
+
 /**
  * glock_set_object - set the gl_object field of a glock
  * @gl: the glock
index 7a993d7..5f89c51 100644 (file)
@@ -584,10 +584,10 @@ struct gfs2_args {
        unsigned int ar_rgrplvb:1;              /* use lvbs for rgrp info */
        unsigned int ar_loccookie:1;            /* use location based readdir
                                                   cookies */
-       int ar_commit;                          /* Commit interval */
-       int ar_statfs_quantum;                  /* The fast statfs interval */
-       int ar_quota_quantum;                   /* The quota interval */
-       int ar_statfs_percent;                  /* The % change to force sync */
+       s32 ar_commit;                          /* Commit interval */
+       s32 ar_statfs_quantum;                  /* The fast statfs interval */
+       s32 ar_quota_quantum;                   /* The quota interval */
+       s32 ar_statfs_percent;                  /* The % change to force sync */
 };
 
 struct gfs2_tune {
@@ -725,6 +725,7 @@ struct gfs2_sbd {
        struct gfs2_glock *sd_freeze_gl;
        struct work_struct sd_freeze_work;
        wait_queue_head_t sd_glock_wait;
+       wait_queue_head_t sd_async_glock_wait;
        atomic_t sd_glock_disposal;
        struct completion sd_locking_init;
        struct completion sd_wdack;
index 2e2a8a2..e1e18fb 100644 (file)
@@ -1348,7 +1348,7 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry,
        struct gfs2_inode *ip = GFS2_I(d_inode(odentry));
        struct gfs2_inode *nip = NULL;
        struct gfs2_sbd *sdp = GFS2_SB(odir);
-       struct gfs2_holder ghs[5], r_gh;
+       struct gfs2_holder ghs[4], r_gh, rd_gh;
        struct gfs2_rgrpd *nrgd;
        unsigned int num_gh;
        int dir_rename = 0;
@@ -1357,6 +1357,7 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry,
        int error;
 
        gfs2_holder_mark_uninitialized(&r_gh);
+       gfs2_holder_mark_uninitialized(&rd_gh);
        if (d_really_is_positive(ndentry)) {
                nip = GFS2_I(d_inode(ndentry));
                if (ip == nip)
@@ -1387,24 +1388,19 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry,
        }
 
        num_gh = 1;
-       gfs2_holder_init(odip->i_gl, LM_ST_EXCLUSIVE, 0, ghs);
+       gfs2_holder_init(odip->i_gl, LM_ST_EXCLUSIVE, GL_ASYNC, ghs);
        if (odip != ndip) {
-               gfs2_holder_init(ndip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + num_gh);
+               gfs2_holder_init(ndip->i_gl, LM_ST_EXCLUSIVE,GL_ASYNC,
+                                ghs + num_gh);
                num_gh++;
        }
-       gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + num_gh);
+       gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, GL_ASYNC, ghs + num_gh);
        num_gh++;
 
        if (nip) {
-               gfs2_holder_init(nip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + num_gh);
+               gfs2_holder_init(nip->i_gl, LM_ST_EXCLUSIVE, GL_ASYNC,
+                                ghs + num_gh);
                num_gh++;
-               /* grab the resource lock for unlink flag twiddling 
-                * this is the case of the target file already existing
-                * so we unlink before doing the rename
-                */
-               nrgd = gfs2_blk2rgrpd(sdp, nip->i_no_addr, 1);
-               if (nrgd)
-                       gfs2_holder_init(nrgd->rd_gl, LM_ST_EXCLUSIVE, 0, ghs + num_gh++);
        }
 
        for (x = 0; x < num_gh; x++) {
@@ -1412,6 +1408,25 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry,
                if (error)
                        goto out_gunlock;
        }
+       error = gfs2_glock_async_wait(num_gh, ghs);
+       if (error)
+               goto out_gunlock;
+
+       if (nip) {
+               /* Grab the resource group glock for unlink flag twiddling.
+                * This is the case where the target dinode already exists
+                * so we unlink before doing the rename.
+                */
+               nrgd = gfs2_blk2rgrpd(sdp, nip->i_no_addr, 1);
+               if (!nrgd) {
+                       error = -ENOENT;
+                       goto out_gunlock;
+               }
+               error = gfs2_glock_nq_init(nrgd->rd_gl, LM_ST_EXCLUSIVE, 0,
+                                          &rd_gh);
+               if (error)
+                       goto out_gunlock;
+       }
 
        error = -ENOENT;
        if (ip->i_inode.i_nlink == 0)
@@ -1541,8 +1556,12 @@ out_gunlock_q:
                gfs2_quota_unlock(ndip);
 out_gunlock:
        gfs2_dir_no_add(&da);
+       if (gfs2_holder_initialized(&rd_gh))
+               gfs2_glock_dq_uninit(&rd_gh);
+
        while (x--) {
-               gfs2_glock_dq(ghs + x);
+               if (gfs2_holder_queued(ghs + x))
+                       gfs2_glock_dq(ghs + x);
                gfs2_holder_uninit(ghs + x);
        }
 out_gunlock_r:
@@ -1572,7 +1591,7 @@ static int gfs2_exchange(struct inode *odir, struct dentry *odentry,
        struct gfs2_inode *oip = GFS2_I(odentry->d_inode);
        struct gfs2_inode *nip = GFS2_I(ndentry->d_inode);
        struct gfs2_sbd *sdp = GFS2_SB(odir);
-       struct gfs2_holder ghs[5], r_gh;
+       struct gfs2_holder ghs[4], r_gh;
        unsigned int num_gh;
        unsigned int x;
        umode_t old_mode = oip->i_inode.i_mode;
@@ -1606,15 +1625,16 @@ static int gfs2_exchange(struct inode *odir, struct dentry *odentry,
        }
 
        num_gh = 1;
-       gfs2_holder_init(odip->i_gl, LM_ST_EXCLUSIVE, 0, ghs);
+       gfs2_holder_init(odip->i_gl, LM_ST_EXCLUSIVE, GL_ASYNC, ghs);
        if (odip != ndip) {
-               gfs2_holder_init(ndip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + num_gh);
+               gfs2_holder_init(ndip->i_gl, LM_ST_EXCLUSIVE, GL_ASYNC,
+                                ghs + num_gh);
                num_gh++;
        }
-       gfs2_holder_init(oip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + num_gh);
+       gfs2_holder_init(oip->i_gl, LM_ST_EXCLUSIVE, GL_ASYNC, ghs + num_gh);
        num_gh++;
 
-       gfs2_holder_init(nip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + num_gh);
+       gfs2_holder_init(nip->i_gl, LM_ST_EXCLUSIVE, GL_ASYNC, ghs + num_gh);
        num_gh++;
 
        for (x = 0; x < num_gh; x++) {
@@ -1623,6 +1643,10 @@ static int gfs2_exchange(struct inode *odir, struct dentry *odentry,
                        goto out_gunlock;
        }
 
+       error = gfs2_glock_async_wait(num_gh, ghs);
+       if (error)
+               goto out_gunlock;
+
        error = -ENOENT;
        if (oip->i_inode.i_nlink == 0 || nip->i_inode.i_nlink == 0)
                goto out_gunlock;
@@ -1683,7 +1707,8 @@ out_end_trans:
        gfs2_trans_end(sdp);
 out_gunlock:
        while (x--) {
-               gfs2_glock_dq(ghs + x);
+               if (gfs2_holder_queued(ghs + x))
+                       gfs2_glock_dq(ghs + x);
                gfs2_holder_uninit(ghs + x);
        }
 out_gunlock_r:
index 4361804..7c71973 100644 (file)
@@ -1035,12 +1035,12 @@ static int set_recover_size(struct gfs2_sbd *sdp, struct dlm_slot *slots,
        }
 
        old_size = ls->ls_recover_size;
-
-       if (old_size >= max_jid + 1)
+       new_size = old_size;
+       while (new_size < max_jid + 1)
+               new_size += RECOVER_SIZE_INC;
+       if (new_size == old_size)
                return 0;
 
-       new_size = old_size + RECOVER_SIZE_INC;
-
        submit = kcalloc(new_size, sizeof(uint32_t), GFP_NOFS);
        result = kcalloc(new_size, sizeof(uint32_t), GFP_NOFS);
        if (!submit || !result) {
index 4a8e5a7..681b446 100644 (file)
@@ -21,6 +21,7 @@
 #include <linux/lockdep.h>
 #include <linux/module.h>
 #include <linux/backing-dev.h>
+#include <linux/fs_parser.h>
 
 #include "gfs2.h"
 #include "incore.h"
@@ -87,6 +88,7 @@ static struct gfs2_sbd *init_sbd(struct super_block *sb)
        gfs2_tune_init(&sdp->sd_tune);
 
        init_waitqueue_head(&sdp->sd_glock_wait);
+       init_waitqueue_head(&sdp->sd_async_glock_wait);
        atomic_set(&sdp->sd_glock_disposal, 0);
        init_completion(&sdp->sd_locking_init);
        init_completion(&sdp->sd_wdack);
@@ -1030,16 +1032,17 @@ void gfs2_online_uevent(struct gfs2_sbd *sdp)
 }
 
 /**
- * fill_super - Read in superblock
+ * gfs2_fill_super - Read in superblock
  * @sb: The VFS superblock
- * @data: Mount options
+ * @args: Mount options
  * @silent: Don't complain if it's not a GFS2 filesystem
  *
- * Returns: errno
+ * Returns: -errno
  */
-
-static int fill_super(struct super_block *sb, struct gfs2_args *args, int silent)
+static int gfs2_fill_super(struct super_block *sb, struct fs_context *fc)
 {
+       struct gfs2_args *args = fc->fs_private;
+       int silent = fc->sb_flags & SB_SILENT;
        struct gfs2_sbd *sdp;
        struct gfs2_holder mount_gh;
        int error;
@@ -1204,161 +1207,411 @@ fail_debug:
        return error;
 }
 
-static int set_gfs2_super(struct super_block *s, void *data)
+/**
+ * gfs2_get_tree - Get the GFS2 superblock and root directory
+ * @fc: The filesystem context
+ *
+ * Returns: 0 or -errno on error
+ */
+static int gfs2_get_tree(struct fs_context *fc)
 {
-       s->s_bdev = data;
-       s->s_dev = s->s_bdev->bd_dev;
-       s->s_bdi = bdi_get(s->s_bdev->bd_bdi);
+       struct gfs2_args *args = fc->fs_private;
+       struct gfs2_sbd *sdp;
+       int error;
+
+       error = get_tree_bdev(fc, gfs2_fill_super);
+       if (error)
+               return error;
+
+       sdp = fc->root->d_sb->s_fs_info;
+       dput(fc->root);
+       if (args->ar_meta)
+               fc->root = dget(sdp->sd_master_dir);
+       else
+               fc->root = dget(sdp->sd_root_dir);
        return 0;
 }
 
-static int test_gfs2_super(struct super_block *s, void *ptr)
+static void gfs2_fc_free(struct fs_context *fc)
 {
-       struct block_device *bdev = ptr;
-       return (bdev == s->s_bdev);
+       struct gfs2_args *args = fc->fs_private;
+
+       kfree(args);
 }
 
-/**
- * gfs2_mount - Get the GFS2 superblock
- * @fs_type: The GFS2 filesystem type
- * @flags: Mount flags
- * @dev_name: The name of the device
- * @data: The mount arguments
- *
- * Q. Why not use get_sb_bdev() ?
- * A. We need to select one of two root directories to mount, independent
- *    of whether this is the initial, or subsequent, mount of this sb
- *
- * Returns: 0 or -ve on error
- */
+enum gfs2_param {
+       Opt_lockproto,
+       Opt_locktable,
+       Opt_hostdata,
+       Opt_spectator,
+       Opt_ignore_local_fs,
+       Opt_localflocks,
+       Opt_localcaching,
+       Opt_debug,
+       Opt_upgrade,
+       Opt_acl,
+       Opt_quota,
+       Opt_suiddir,
+       Opt_data,
+       Opt_meta,
+       Opt_discard,
+       Opt_commit,
+       Opt_errors,
+       Opt_statfs_quantum,
+       Opt_statfs_percent,
+       Opt_quota_quantum,
+       Opt_barrier,
+       Opt_rgrplvb,
+       Opt_loccookie,
+};
 
-static struct dentry *gfs2_mount(struct file_system_type *fs_type, int flags,
-                      const char *dev_name, void *data)
-{
-       struct block_device *bdev;
-       struct super_block *s;
-       fmode_t mode = FMODE_READ | FMODE_EXCL;
-       int error;
-       struct gfs2_args args;
-       struct gfs2_sbd *sdp;
+enum opt_quota {
+       Opt_quota_unset = 0,
+       Opt_quota_off,
+       Opt_quota_account,
+       Opt_quota_on,
+};
+
+static const unsigned int opt_quota_values[] = {
+       [Opt_quota_off]     = GFS2_QUOTA_OFF,
+       [Opt_quota_account] = GFS2_QUOTA_ACCOUNT,
+       [Opt_quota_on]      = GFS2_QUOTA_ON,
+};
 
-       if (!(flags & SB_RDONLY))
-               mode |= FMODE_WRITE;
+enum opt_data {
+       Opt_data_writeback = GFS2_DATA_WRITEBACK,
+       Opt_data_ordered   = GFS2_DATA_ORDERED,
+};
 
-       bdev = blkdev_get_by_path(dev_name, mode, fs_type);
-       if (IS_ERR(bdev))
-               return ERR_CAST(bdev);
+enum opt_errors {
+       Opt_errors_withdraw = GFS2_ERRORS_WITHDRAW,
+       Opt_errors_panic    = GFS2_ERRORS_PANIC,
+};
 
-       /*
-        * once the super is inserted into the list by sget, s_umount
-        * will protect the lockfs code from trying to start a snapshot
-        * while we are mounting
-        */
-       mutex_lock(&bdev->bd_fsfreeze_mutex);
-       if (bdev->bd_fsfreeze_count > 0) {
-               mutex_unlock(&bdev->bd_fsfreeze_mutex);
-               error = -EBUSY;
-               goto error_bdev;
-       }
-       s = sget(fs_type, test_gfs2_super, set_gfs2_super, flags, bdev);
-       mutex_unlock(&bdev->bd_fsfreeze_mutex);
-       error = PTR_ERR(s);
-       if (IS_ERR(s))
-               goto error_bdev;
-
-       if (s->s_root) {
-               /*
-                * s_umount nests inside bd_mutex during
-                * __invalidate_device().  blkdev_put() acquires
-                * bd_mutex and can't be called under s_umount.  Drop
-                * s_umount temporarily.  This is safe as we're
-                * holding an active reference.
-                */
-               up_write(&s->s_umount);
-               blkdev_put(bdev, mode);
-               down_write(&s->s_umount);
-       } else {
-               /* s_mode must be set before deactivate_locked_super calls */
-               s->s_mode = mode;
-       }
+static const struct fs_parameter_spec gfs2_param_specs[] = {
+       fsparam_string ("lockproto",          Opt_lockproto),
+       fsparam_string ("locktable",          Opt_locktable),
+       fsparam_string ("hostdata",           Opt_hostdata),
+       fsparam_flag   ("spectator",          Opt_spectator),
+       fsparam_flag   ("norecovery",         Opt_spectator),
+       fsparam_flag   ("ignore_local_fs",    Opt_ignore_local_fs),
+       fsparam_flag   ("localflocks",        Opt_localflocks),
+       fsparam_flag   ("localcaching",       Opt_localcaching),
+       fsparam_flag_no("debug",              Opt_debug),
+       fsparam_flag   ("upgrade",            Opt_upgrade),
+       fsparam_flag_no("acl",                Opt_acl),
+       fsparam_flag_no("suiddir",            Opt_suiddir),
+       fsparam_enum   ("data",               Opt_data),
+       fsparam_flag   ("meta",               Opt_meta),
+       fsparam_flag_no("discard",            Opt_discard),
+       fsparam_s32    ("commit",             Opt_commit),
+       fsparam_enum   ("errors",             Opt_errors),
+       fsparam_s32    ("statfs_quantum",     Opt_statfs_quantum),
+       fsparam_s32    ("statfs_percent",     Opt_statfs_percent),
+       fsparam_s32    ("quota_quantum",      Opt_quota_quantum),
+       fsparam_flag_no("barrier",            Opt_barrier),
+       fsparam_flag_no("rgrplvb",            Opt_rgrplvb),
+       fsparam_flag_no("loccookie",          Opt_loccookie),
+       /* quota can be a flag or an enum so it gets special treatment */
+       __fsparam(fs_param_is_enum, "quota", Opt_quota, fs_param_neg_with_no|fs_param_v_optional),
+       {}
+};
 
-       memset(&args, 0, sizeof(args));
-       args.ar_quota = GFS2_QUOTA_DEFAULT;
-       args.ar_data = GFS2_DATA_DEFAULT;
-       args.ar_commit = 30;
-       args.ar_statfs_quantum = 30;
-       args.ar_quota_quantum = 60;
-       args.ar_errors = GFS2_ERRORS_DEFAULT;
+static const struct fs_parameter_enum gfs2_param_enums[] = {
+       { Opt_quota,    "off",        Opt_quota_off },
+       { Opt_quota,    "account",    Opt_quota_account },
+       { Opt_quota,    "on",         Opt_quota_on },
+       { Opt_data,     "writeback",  Opt_data_writeback },
+       { Opt_data,     "ordered",    Opt_data_ordered },
+       { Opt_errors,   "withdraw",   Opt_errors_withdraw },
+       { Opt_errors,   "panic",      Opt_errors_panic },
+       {}
+};
 
-       error = gfs2_mount_args(&args, data);
-       if (error) {
-               pr_warn("can't parse mount arguments\n");
-               goto error_super;
+const struct fs_parameter_description gfs2_fs_parameters = {
+       .name = "gfs2",
+       .specs = gfs2_param_specs,
+       .enums = gfs2_param_enums,
+};
+
+/* Parse a single mount parameter */
+static int gfs2_parse_param(struct fs_context *fc, struct fs_parameter *param)
+{
+       struct gfs2_args *args = fc->fs_private;
+       struct fs_parse_result result;
+       int o;
+
+       o = fs_parse(fc, &gfs2_fs_parameters, param, &result);
+       if (o < 0)
+               return o;
+
+       switch (o) {
+       case Opt_lockproto:
+               strlcpy(args->ar_lockproto, param->string, GFS2_LOCKNAME_LEN);
+               break;
+       case Opt_locktable:
+               strlcpy(args->ar_locktable, param->string, GFS2_LOCKNAME_LEN);
+               break;
+       case Opt_hostdata:
+               strlcpy(args->ar_hostdata, param->string, GFS2_LOCKNAME_LEN);
+               break;
+       case Opt_spectator:
+               args->ar_spectator = 1;
+               break;
+       case Opt_ignore_local_fs:
+               /* Retained for backwards compat only */
+               break;
+       case Opt_localflocks:
+               args->ar_localflocks = 1;
+               break;
+       case Opt_localcaching:
+               /* Retained for backwards compat only */
+               break;
+       case Opt_debug:
+               if (result.boolean && args->ar_errors == GFS2_ERRORS_PANIC)
+                       return invalf(fc, "gfs2: -o debug and -o errors=panic are mutually exclusive");
+               args->ar_debug = result.boolean;
+               break;
+       case Opt_upgrade:
+               /* Retained for backwards compat only */
+               break;
+       case Opt_acl:
+               args->ar_posix_acl = result.boolean;
+               break;
+       case Opt_quota:
+               /* The quota option can be a flag or an enum. A non-zero int_32
+                  result means that we have an enum index. Otherwise we have
+                  to rely on the 'negated' flag to tell us whether 'quota' or
+                  'noquota' was specified. */
+               if (result.negated)
+                       args->ar_quota = GFS2_QUOTA_OFF;
+               else if (result.int_32 > 0)
+                       args->ar_quota = opt_quota_values[result.int_32];
+               else
+                       args->ar_quota = GFS2_QUOTA_ON;
+               break;
+       case Opt_suiddir:
+               args->ar_suiddir = result.boolean;
+               break;
+       case Opt_data:
+               /* The uint_32 result maps directly to GFS2_DATA_* */
+               args->ar_data = result.uint_32;
+               break;
+       case Opt_meta:
+               args->ar_meta = 1;
+               break;
+       case Opt_discard:
+               args->ar_discard = result.boolean;
+               break;
+       case Opt_commit:
+               if (result.int_32 <= 0)
+                       return invalf(fc, "gfs2: commit mount option requires a positive numeric argument");
+               args->ar_commit = result.int_32;
+               break;
+       case Opt_statfs_quantum:
+               if (result.int_32 < 0)
+                       return invalf(fc, "gfs2: statfs_quantum mount option requires a non-negative numeric argument");
+               args->ar_statfs_quantum = result.int_32;
+               break;
+       case Opt_quota_quantum:
+               if (result.int_32 <= 0)
+                       return invalf(fc, "gfs2: quota_quantum mount option requires a positive numeric argument");
+               args->ar_quota_quantum = result.int_32;
+               break;
+       case Opt_statfs_percent:
+               if (result.int_32 < 0 || result.int_32 > 100)
+                       return invalf(fc, "gfs2: statfs_percent mount option requires a numeric argument between 0 and 100");
+               args->ar_statfs_percent = result.int_32;
+               break;
+       case Opt_errors:
+               if (args->ar_debug && result.uint_32 == GFS2_ERRORS_PANIC)
+                       return invalf(fc, "gfs2: -o debug and -o errors=panic are mutually exclusive");
+               args->ar_errors = result.uint_32;
+               break;
+       case Opt_barrier:
+               args->ar_nobarrier = result.boolean;
+               break;
+       case Opt_rgrplvb:
+               args->ar_rgrplvb = result.boolean;
+               break;
+       case Opt_loccookie:
+               args->ar_loccookie = result.boolean;
+               break;
+       default:
+               return invalf(fc, "gfs2: invalid mount option: %s", param->key);
        }
+       return 0;
+}
 
-       if (s->s_root) {
-               error = -EBUSY;
-               if ((flags ^ s->s_flags) & SB_RDONLY)
-                       goto error_super;
-       } else {
-               snprintf(s->s_id, sizeof(s->s_id), "%pg", bdev);
-               sb_set_blocksize(s, block_size(bdev));
-               error = fill_super(s, &args, flags & SB_SILENT ? 1 : 0);
-               if (error)
-                       goto error_super;
-               s->s_flags |= SB_ACTIVE;
-               bdev->bd_super = s;
+static int gfs2_reconfigure(struct fs_context *fc)
+{
+       struct super_block *sb = fc->root->d_sb;
+       struct gfs2_sbd *sdp = sb->s_fs_info;
+       struct gfs2_args *oldargs = &sdp->sd_args;
+       struct gfs2_args *newargs = fc->fs_private;
+       struct gfs2_tune *gt = &sdp->sd_tune;
+       int error = 0;
+
+       sync_filesystem(sb);
+
+       spin_lock(&gt->gt_spin);
+       oldargs->ar_commit = gt->gt_logd_secs;
+       oldargs->ar_quota_quantum = gt->gt_quota_quantum;
+       if (gt->gt_statfs_slow)
+               oldargs->ar_statfs_quantum = 0;
+       else
+               oldargs->ar_statfs_quantum = gt->gt_statfs_quantum;
+       spin_unlock(&gt->gt_spin);
+
+       if (strcmp(newargs->ar_lockproto, oldargs->ar_lockproto)) {
+               errorf(fc, "gfs2: reconfiguration of locking protocol not allowed");
+               return -EINVAL;
+       }
+       if (strcmp(newargs->ar_locktable, oldargs->ar_locktable)) {
+               errorf(fc, "gfs2: reconfiguration of lock table not allowed");
+               return -EINVAL;
+       }
+       if (strcmp(newargs->ar_hostdata, oldargs->ar_hostdata)) {
+               errorf(fc, "gfs2: reconfiguration of host data not allowed");
+               return -EINVAL;
+       }
+       if (newargs->ar_spectator != oldargs->ar_spectator) {
+               errorf(fc, "gfs2: reconfiguration of spectator mode not allowed");
+               return -EINVAL;
+       }
+       if (newargs->ar_localflocks != oldargs->ar_localflocks) {
+               errorf(fc, "gfs2: reconfiguration of localflocks not allowed");
+               return -EINVAL;
+       }
+       if (newargs->ar_meta != oldargs->ar_meta) {
+               errorf(fc, "gfs2: switching between gfs2 and gfs2meta not allowed");
+               return -EINVAL;
+       }
+       if (oldargs->ar_spectator)
+               fc->sb_flags |= SB_RDONLY;
+
+       if ((sb->s_flags ^ fc->sb_flags) & SB_RDONLY) {
+               if (fc->sb_flags & SB_RDONLY) {
+                       error = gfs2_make_fs_ro(sdp);
+                       if (error)
+                               errorf(fc, "gfs2: unable to remount read-only");
+               } else {
+                       error = gfs2_make_fs_rw(sdp);
+                       if (error)
+                               errorf(fc, "gfs2: unable to remount read-write");
+               }
        }
+       sdp->sd_args = *newargs;
 
-       sdp = s->s_fs_info;
-       if (args.ar_meta)
-               return dget(sdp->sd_master_dir);
+       if (sdp->sd_args.ar_posix_acl)
+               sb->s_flags |= SB_POSIXACL;
+       else
+               sb->s_flags &= ~SB_POSIXACL;
+       if (sdp->sd_args.ar_nobarrier)
+               set_bit(SDF_NOBARRIERS, &sdp->sd_flags);
        else
-               return dget(sdp->sd_root_dir);
-
-error_super:
-       deactivate_locked_super(s);
-       return ERR_PTR(error);
-error_bdev:
-       blkdev_put(bdev, mode);
-       return ERR_PTR(error);
+               clear_bit(SDF_NOBARRIERS, &sdp->sd_flags);
+       spin_lock(&gt->gt_spin);
+       gt->gt_logd_secs = newargs->ar_commit;
+       gt->gt_quota_quantum = newargs->ar_quota_quantum;
+       if (newargs->ar_statfs_quantum) {
+               gt->gt_statfs_slow = 0;
+               gt->gt_statfs_quantum = newargs->ar_statfs_quantum;
+       }
+       else {
+               gt->gt_statfs_slow = 1;
+               gt->gt_statfs_quantum = 30;
+       }
+       spin_unlock(&gt->gt_spin);
+
+       gfs2_online_uevent(sdp);
+       return error;
+}
+
+static const struct fs_context_operations gfs2_context_ops = {
+       .free        = gfs2_fc_free,
+       .parse_param = gfs2_parse_param,
+       .get_tree    = gfs2_get_tree,
+       .reconfigure = gfs2_reconfigure,
+};
+
+/* Set up the filesystem mount context */
+static int gfs2_init_fs_context(struct fs_context *fc)
+{
+       struct gfs2_args *args;
+
+       args = kzalloc(sizeof(*args), GFP_KERNEL);
+       if (args == NULL)
+               return -ENOMEM;
+
+       args->ar_quota = GFS2_QUOTA_DEFAULT;
+       args->ar_data = GFS2_DATA_DEFAULT;
+       args->ar_commit = 30;
+       args->ar_statfs_quantum = 30;
+       args->ar_quota_quantum = 60;
+       args->ar_errors = GFS2_ERRORS_DEFAULT;
+
+       fc->fs_private = args;
+       fc->ops = &gfs2_context_ops;
+       return 0;
 }
 
-static int set_meta_super(struct super_block *s, void *ptr)
+static int set_meta_super(struct super_block *s, struct fs_context *fc)
 {
        return -EINVAL;
 }
 
-static struct dentry *gfs2_mount_meta(struct file_system_type *fs_type,
-                       int flags, const char *dev_name, void *data)
+static int test_meta_super(struct super_block *s, struct fs_context *fc)
+{
+       return (fc->sget_key == s->s_bdev);
+}
+
+static int gfs2_meta_get_tree(struct fs_context *fc)
 {
        struct super_block *s;
        struct gfs2_sbd *sdp;
        struct path path;
        int error;
 
-       if (!dev_name || !*dev_name)
-               return ERR_PTR(-EINVAL);
+       if (!fc->source || !*fc->source)
+               return -EINVAL;
 
-       error = kern_path(dev_name, LOOKUP_FOLLOW, &path);
+       error = kern_path(fc->source, LOOKUP_FOLLOW, &path);
        if (error) {
                pr_warn("path_lookup on %s returned error %d\n",
-                       dev_name, error);
-               return ERR_PTR(error);
+                       fc->source, error);
+               return error;
        }
-       s = sget(&gfs2_fs_type, test_gfs2_super, set_meta_super, flags,
-                path.dentry->d_sb->s_bdev);
+       fc->fs_type = &gfs2_fs_type;
+       fc->sget_key = path.dentry->d_sb->s_bdev;
+       s = sget_fc(fc, test_meta_super, set_meta_super);
        path_put(&path);
        if (IS_ERR(s)) {
                pr_warn("gfs2 mount does not exist\n");
-               return ERR_CAST(s);
+               return PTR_ERR(s);
        }
-       if ((flags ^ s->s_flags) & SB_RDONLY) {
+       if ((fc->sb_flags ^ s->s_flags) & SB_RDONLY) {
                deactivate_locked_super(s);
-               return ERR_PTR(-EBUSY);
+               return -EBUSY;
        }
        sdp = s->s_fs_info;
-       return dget(sdp->sd_master_dir);
+       fc->root = dget(sdp->sd_master_dir);
+       return 0;
+}
+
+static const struct fs_context_operations gfs2_meta_context_ops = {
+       .get_tree    = gfs2_meta_get_tree,
+};
+
+static int gfs2_meta_init_fs_context(struct fs_context *fc)
+{
+       int ret = gfs2_init_fs_context(fc);
+
+       if (ret)
+               return ret;
+
+       fc->ops = &gfs2_meta_context_ops;
+       return 0;
 }
 
 static void gfs2_kill_sb(struct super_block *sb)
@@ -1382,7 +1635,8 @@ static void gfs2_kill_sb(struct super_block *sb)
 struct file_system_type gfs2_fs_type = {
        .name = "gfs2",
        .fs_flags = FS_REQUIRES_DEV,
-       .mount = gfs2_mount,
+       .init_fs_context = gfs2_init_fs_context,
+       .parameters = &gfs2_fs_parameters,
        .kill_sb = gfs2_kill_sb,
        .owner = THIS_MODULE,
 };
@@ -1391,7 +1645,7 @@ MODULE_ALIAS_FS("gfs2");
 struct file_system_type gfs2meta_fs_type = {
        .name = "gfs2meta",
        .fs_flags = FS_REQUIRES_DEV,
-       .mount = gfs2_mount_meta,
+       .init_fs_context = gfs2_meta_init_fs_context,
        .owner = THIS_MODULE,
 };
 MODULE_ALIAS_FS("gfs2meta");
index 69c4b77..7c016a0 100644 (file)
@@ -774,7 +774,7 @@ static int gfs2_write_disk_quota(struct gfs2_inode *ip, struct gfs2_quota *qp,
        nbytes = sizeof(struct gfs2_quota);
 
        pg_beg = loc >> PAGE_SHIFT;
-       pg_off = loc % PAGE_SIZE;
+       pg_off = offset_in_page(loc);
 
        /* If the quota straddles a page boundary, split the write in two */
        if ((pg_off + nbytes) > PAGE_SIZE) {
index 49ac0a5..2466bb4 100644 (file)
@@ -2285,7 +2285,7 @@ void gfs2_rgrp_dump(struct seq_file *seq, struct gfs2_glock *gl,
 static void gfs2_rgrp_error(struct gfs2_rgrpd *rgd)
 {
        struct gfs2_sbd *sdp = rgd->rd_sbd;
-       char fs_id_buf[GFS2_FSNAME_LEN + 3 * sizeof(int) + 2];
+       char fs_id_buf[sizeof(sdp->sd_fsname) + 7];
 
        fs_warn(sdp, "rgrp %llu has an error, marking it readonly until umount\n",
                (unsigned long long)rgd->rd_addr);
index 0acc583..5fa1eec 100644 (file)
 #include "xattr.h"
 #include "lops.h"
 
-#define args_neq(a1, a2, x) ((a1)->ar_##x != (a2)->ar_##x)
-
-enum {
-       Opt_lockproto,
-       Opt_locktable,
-       Opt_hostdata,
-       Opt_spectator,
-       Opt_ignore_local_fs,
-       Opt_localflocks,
-       Opt_localcaching,
-       Opt_debug,
-       Opt_nodebug,
-       Opt_upgrade,
-       Opt_acl,
-       Opt_noacl,
-       Opt_quota_off,
-       Opt_quota_account,
-       Opt_quota_on,
-       Opt_quota,
-       Opt_noquota,
-       Opt_suiddir,
-       Opt_nosuiddir,
-       Opt_data_writeback,
-       Opt_data_ordered,
-       Opt_meta,
-       Opt_discard,
-       Opt_nodiscard,
-       Opt_commit,
-       Opt_err_withdraw,
-       Opt_err_panic,
-       Opt_statfs_quantum,
-       Opt_statfs_percent,
-       Opt_quota_quantum,
-       Opt_barrier,
-       Opt_nobarrier,
-       Opt_rgrplvb,
-       Opt_norgrplvb,
-       Opt_loccookie,
-       Opt_noloccookie,
-       Opt_error,
-};
-
-static const match_table_t tokens = {
-       {Opt_lockproto, "lockproto=%s"},
-       {Opt_locktable, "locktable=%s"},
-       {Opt_hostdata, "hostdata=%s"},
-       {Opt_spectator, "spectator"},
-       {Opt_spectator, "norecovery"},
-       {Opt_ignore_local_fs, "ignore_local_fs"},
-       {Opt_localflocks, "localflocks"},
-       {Opt_localcaching, "localcaching"},
-       {Opt_debug, "debug"},
-       {Opt_nodebug, "nodebug"},
-       {Opt_upgrade, "upgrade"},
-       {Opt_acl, "acl"},
-       {Opt_noacl, "noacl"},
-       {Opt_quota_off, "quota=off"},
-       {Opt_quota_account, "quota=account"},
-       {Opt_quota_on, "quota=on"},
-       {Opt_quota, "quota"},
-       {Opt_noquota, "noquota"},
-       {Opt_suiddir, "suiddir"},
-       {Opt_nosuiddir, "nosuiddir"},
-       {Opt_data_writeback, "data=writeback"},
-       {Opt_data_ordered, "data=ordered"},
-       {Opt_meta, "meta"},
-       {Opt_discard, "discard"},
-       {Opt_nodiscard, "nodiscard"},
-       {Opt_commit, "commit=%d"},
-       {Opt_err_withdraw, "errors=withdraw"},
-       {Opt_err_panic, "errors=panic"},
-       {Opt_statfs_quantum, "statfs_quantum=%d"},
-       {Opt_statfs_percent, "statfs_percent=%d"},
-       {Opt_quota_quantum, "quota_quantum=%d"},
-       {Opt_barrier, "barrier"},
-       {Opt_nobarrier, "nobarrier"},
-       {Opt_rgrplvb, "rgrplvb"},
-       {Opt_norgrplvb, "norgrplvb"},
-       {Opt_loccookie, "loccookie"},
-       {Opt_noloccookie, "noloccookie"},
-       {Opt_error, NULL}
-};
-
-/**
- * gfs2_mount_args - Parse mount options
- * @args: The structure into which the parsed options will be written
- * @options: The options to parse
- *
- * Return: errno
- */
-
-int gfs2_mount_args(struct gfs2_args *args, char *options)
-{
-       char *o;
-       int token;
-       substring_t tmp[MAX_OPT_ARGS];
-       int rv;
-
-       /* Split the options into tokens with the "," character and
-          process them */
-
-       while (1) {
-               o = strsep(&options, ",");
-               if (o == NULL)
-                       break;
-               if (*o == '\0')
-                       continue;
-
-               token = match_token(o, tokens, tmp);
-               switch (token) {
-               case Opt_lockproto:
-                       match_strlcpy(args->ar_lockproto, &tmp[0],
-                                     GFS2_LOCKNAME_LEN);
-                       break;
-               case Opt_locktable:
-                       match_strlcpy(args->ar_locktable, &tmp[0],
-                                     GFS2_LOCKNAME_LEN);
-                       break;
-               case Opt_hostdata:
-                       match_strlcpy(args->ar_hostdata, &tmp[0],
-                                     GFS2_LOCKNAME_LEN);
-                       break;
-               case Opt_spectator:
-                       args->ar_spectator = 1;
-                       break;
-               case Opt_ignore_local_fs:
-                       /* Retained for backwards compat only */
-                       break;
-               case Opt_localflocks:
-                       args->ar_localflocks = 1;
-                       break;
-               case Opt_localcaching:
-                       /* Retained for backwards compat only */
-                       break;
-               case Opt_debug:
-                       if (args->ar_errors == GFS2_ERRORS_PANIC) {
-                               pr_warn("-o debug and -o errors=panic are mutually exclusive\n");
-                               return -EINVAL;
-                       }
-                       args->ar_debug = 1;
-                       break;
-               case Opt_nodebug:
-                       args->ar_debug = 0;
-                       break;
-               case Opt_upgrade:
-                       /* Retained for backwards compat only */
-                       break;
-               case Opt_acl:
-                       args->ar_posix_acl = 1;
-                       break;
-               case Opt_noacl:
-                       args->ar_posix_acl = 0;
-                       break;
-               case Opt_quota_off:
-               case Opt_noquota:
-                       args->ar_quota = GFS2_QUOTA_OFF;
-                       break;
-               case Opt_quota_account:
-                       args->ar_quota = GFS2_QUOTA_ACCOUNT;
-                       break;
-               case Opt_quota_on:
-               case Opt_quota:
-                       args->ar_quota = GFS2_QUOTA_ON;
-                       break;
-               case Opt_suiddir:
-                       args->ar_suiddir = 1;
-                       break;
-               case Opt_nosuiddir:
-                       args->ar_suiddir = 0;
-                       break;
-               case Opt_data_writeback:
-                       args->ar_data = GFS2_DATA_WRITEBACK;
-                       break;
-               case Opt_data_ordered:
-                       args->ar_data = GFS2_DATA_ORDERED;
-                       break;
-               case Opt_meta:
-                       args->ar_meta = 1;
-                       break;
-               case Opt_discard:
-                       args->ar_discard = 1;
-                       break;
-               case Opt_nodiscard:
-                       args->ar_discard = 0;
-                       break;
-               case Opt_commit:
-                       rv = match_int(&tmp[0], &args->ar_commit);
-                       if (rv || args->ar_commit <= 0) {
-                               pr_warn("commit mount option requires a positive numeric argument\n");
-                               return rv ? rv : -EINVAL;
-                       }
-                       break;
-               case Opt_statfs_quantum:
-                       rv = match_int(&tmp[0], &args->ar_statfs_quantum);
-                       if (rv || args->ar_statfs_quantum < 0) {
-                               pr_warn("statfs_quantum mount option requires a non-negative numeric argument\n");
-                               return rv ? rv : -EINVAL;
-                       }
-                       break;
-               case Opt_quota_quantum:
-                       rv = match_int(&tmp[0], &args->ar_quota_quantum);
-                       if (rv || args->ar_quota_quantum <= 0) {
-                               pr_warn("quota_quantum mount option requires a positive numeric argument\n");
-                               return rv ? rv : -EINVAL;
-                       }
-                       break;
-               case Opt_statfs_percent:
-                       rv = match_int(&tmp[0], &args->ar_statfs_percent);
-                       if (rv || args->ar_statfs_percent < 0 ||
-                           args->ar_statfs_percent > 100) {
-                               pr_warn("statfs_percent mount option requires a numeric argument between 0 and 100\n");
-                               return rv ? rv : -EINVAL;
-                       }
-                       break;
-               case Opt_err_withdraw:
-                       args->ar_errors = GFS2_ERRORS_WITHDRAW;
-                       break;
-               case Opt_err_panic:
-                       if (args->ar_debug) {
-                               pr_warn("-o debug and -o errors=panic are mutually exclusive\n");
-                               return -EINVAL;
-                       }
-                       args->ar_errors = GFS2_ERRORS_PANIC;
-                       break;
-               case Opt_barrier:
-                       args->ar_nobarrier = 0;
-                       break;
-               case Opt_nobarrier:
-                       args->ar_nobarrier = 1;
-                       break;
-               case Opt_rgrplvb:
-                       args->ar_rgrplvb = 1;
-                       break;
-               case Opt_norgrplvb:
-                       args->ar_rgrplvb = 0;
-                       break;
-               case Opt_loccookie:
-                       args->ar_loccookie = 1;
-                       break;
-               case Opt_noloccookie:
-                       args->ar_loccookie = 0;
-                       break;
-               case Opt_error:
-               default:
-                       pr_warn("invalid mount option: %s\n", o);
-                       return -EINVAL;
-               }
-       }
-
-       return 0;
-}
-
 /**
  * gfs2_jindex_free - Clear all the journal index information
  * @sdp: The GFS2 superblock
@@ -847,7 +595,7 @@ out:
  * Returns: errno
  */
 
-static int gfs2_make_fs_ro(struct gfs2_sbd *sdp)
+int gfs2_make_fs_ro(struct gfs2_sbd *sdp)
 {
        struct gfs2_holder freeze_gh;
        int error;
@@ -1227,84 +975,6 @@ static int gfs2_statfs(struct dentry *dentry, struct kstatfs *buf)
 }
 
 /**
- * gfs2_remount_fs - called when the FS is remounted
- * @sb:  the filesystem
- * @flags:  the remount flags
- * @data:  extra data passed in (not used right now)
- *
- * Returns: errno
- */
-
-static int gfs2_remount_fs(struct super_block *sb, int *flags, char *data)
-{
-       struct gfs2_sbd *sdp = sb->s_fs_info;
-       struct gfs2_args args = sdp->sd_args; /* Default to current settings */
-       struct gfs2_tune *gt = &sdp->sd_tune;
-       int error;
-
-       sync_filesystem(sb);
-
-       spin_lock(&gt->gt_spin);
-       args.ar_commit = gt->gt_logd_secs;
-       args.ar_quota_quantum = gt->gt_quota_quantum;
-       if (gt->gt_statfs_slow)
-               args.ar_statfs_quantum = 0;
-       else
-               args.ar_statfs_quantum = gt->gt_statfs_quantum;
-       spin_unlock(&gt->gt_spin);
-       error = gfs2_mount_args(&args, data);
-       if (error)
-               return error;
-
-       /* Not allowed to change locking details */
-       if (strcmp(args.ar_lockproto, sdp->sd_args.ar_lockproto) ||
-           strcmp(args.ar_locktable, sdp->sd_args.ar_locktable) ||
-           strcmp(args.ar_hostdata, sdp->sd_args.ar_hostdata))
-               return -EINVAL;
-
-       /* Some flags must not be changed */
-       if (args_neq(&args, &sdp->sd_args, spectator) ||
-           args_neq(&args, &sdp->sd_args, localflocks) ||
-           args_neq(&args, &sdp->sd_args, meta))
-               return -EINVAL;
-
-       if (sdp->sd_args.ar_spectator)
-               *flags |= SB_RDONLY;
-
-       if ((sb->s_flags ^ *flags) & SB_RDONLY) {
-               if (*flags & SB_RDONLY)
-                       error = gfs2_make_fs_ro(sdp);
-               else
-                       error = gfs2_make_fs_rw(sdp);
-       }
-
-       sdp->sd_args = args;
-       if (sdp->sd_args.ar_posix_acl)
-               sb->s_flags |= SB_POSIXACL;
-       else
-               sb->s_flags &= ~SB_POSIXACL;
-       if (sdp->sd_args.ar_nobarrier)
-               set_bit(SDF_NOBARRIERS, &sdp->sd_flags);
-       else
-               clear_bit(SDF_NOBARRIERS, &sdp->sd_flags);
-       spin_lock(&gt->gt_spin);
-       gt->gt_logd_secs = args.ar_commit;
-       gt->gt_quota_quantum = args.ar_quota_quantum;
-       if (args.ar_statfs_quantum) {
-               gt->gt_statfs_slow = 0;
-               gt->gt_statfs_quantum = args.ar_statfs_quantum;
-       }
-       else {
-               gt->gt_statfs_slow = 1;
-               gt->gt_statfs_quantum = 30;
-       }
-       spin_unlock(&gt->gt_spin);
-
-       gfs2_online_uevent(sdp);
-       return error;
-}
-
-/**
  * gfs2_drop_inode - Drop an inode (test for remote unlink)
  * @inode: The inode to drop
  *
@@ -1722,13 +1392,13 @@ static struct inode *gfs2_alloc_inode(struct super_block *sb)
        struct gfs2_inode *ip;
 
        ip = kmem_cache_alloc(gfs2_inode_cachep, GFP_KERNEL);
-       if (ip) {
-               ip->i_flags = 0;
-               ip->i_gl = NULL;
-               memset(&ip->i_res, 0, sizeof(ip->i_res));
-               RB_CLEAR_NODE(&ip->i_res.rs_node);
-               ip->i_rahead = 0;
-       }
+       if (!ip)
+               return NULL;
+       ip->i_flags = 0;
+       ip->i_gl = NULL;
+       memset(&ip->i_res, 0, sizeof(ip->i_res));
+       RB_CLEAR_NODE(&ip->i_res.rs_node);
+       ip->i_rahead = 0;
        return &ip->i_inode;
 }
 
@@ -1748,7 +1418,6 @@ const struct super_operations gfs2_super_ops = {
        .freeze_super           = gfs2_freeze,
        .thaw_super             = gfs2_unfreeze,
        .statfs                 = gfs2_statfs,
-       .remount_fs             = gfs2_remount_fs,
        .drop_inode             = gfs2_drop_inode,
        .show_options           = gfs2_show_options,
 };
index 9d49eaa..b8bf811 100644 (file)
@@ -24,8 +24,6 @@ static inline unsigned int gfs2_jindex_size(struct gfs2_sbd *sdp)
 
 extern void gfs2_jindex_free(struct gfs2_sbd *sdp);
 
-extern int gfs2_mount_args(struct gfs2_args *args, char *data);
-
 extern struct gfs2_jdesc *gfs2_jdesc_find(struct gfs2_sbd *sdp, unsigned int jid);
 extern int gfs2_jdesc_check(struct gfs2_jdesc *jd);
 
@@ -33,6 +31,7 @@ extern int gfs2_lookup_in_master_dir(struct gfs2_sbd *sdp, char *filename,
                                     struct gfs2_inode **ipp);
 
 extern int gfs2_make_fs_rw(struct gfs2_sbd *sdp);
+extern int gfs2_make_fs_ro(struct gfs2_sbd *sdp);
 extern void gfs2_online_uevent(struct gfs2_sbd *sdp);
 extern int gfs2_statfs_init(struct gfs2_sbd *sdp);
 extern void gfs2_statfs_change(struct gfs2_sbd *sdp, s64 total, s64 free,
index 83f6c58..c451591 100644 (file)
@@ -178,7 +178,7 @@ int gfs2_consist_rgrpd_i(struct gfs2_rgrpd *rgd, int cluster_wide,
                         const char *function, char *file, unsigned int line)
 {
        struct gfs2_sbd *sdp = rgd->rd_sbd;
-       char fs_id_buf[GFS2_FSNAME_LEN + 3 * sizeof(int) + 2];
+       char fs_id_buf[sizeof(sdp->sd_fsname) + 7];
        int rv;
 
        sprintf(fs_id_buf, "fsid=%s: ", sdp->sd_fsname);
index 64bf28c..fef457a 100644 (file)
@@ -181,6 +181,9 @@ int inode_init_always(struct super_block *sb, struct inode *inode)
        mapping->flags = 0;
        mapping->wb_err = 0;
        atomic_set(&mapping->i_mmap_writable, 0);
+#ifdef CONFIG_READ_ONLY_THP_FOR_FS
+       atomic_set(&mapping->nr_thps, 0);
+#endif
        mapping_set_gfp_mask(mapping, GFP_HIGHUSER_MOVABLE);
        mapping->private_data = NULL;
        mapping->writeback_index = 0;
index 0dadbdb..aa8ac55 100644 (file)
@@ -200,6 +200,7 @@ struct io_ring_ctx {
                struct io_uring_sqe     *sq_sqes;
 
                struct list_head        defer_list;
+               struct list_head        timeout_list;
        } ____cacheline_aligned_in_smp;
 
        /* IO offload */
@@ -216,6 +217,7 @@ struct io_ring_ctx {
                struct wait_queue_head  cq_wait;
                struct fasync_struct    *cq_fasync;
                struct eventfd_ctx      *cq_ev_fd;
+               atomic_t                cq_timeouts;
        } ____cacheline_aligned_in_smp;
 
        struct io_rings *rings;
@@ -283,6 +285,11 @@ struct io_poll_iocb {
        struct wait_queue_entry         wait;
 };
 
+struct io_timeout {
+       struct file                     *file;
+       struct hrtimer                  timer;
+};
+
 /*
  * NOTE! Each of the iocb union members has the file pointer
  * as the first entry in their struct definition. So you can
@@ -294,6 +301,7 @@ struct io_kiocb {
                struct file             *file;
                struct kiocb            rw;
                struct io_poll_iocb     poll;
+               struct io_timeout       timeout;
        };
 
        struct sqe_submit       submit;
@@ -313,6 +321,7 @@ struct io_kiocb {
 #define REQ_F_LINK_DONE                128     /* linked sqes done */
 #define REQ_F_FAIL_LINK                256     /* fail rest of links */
 #define REQ_F_SHADOW_DRAIN     512     /* link-drain shadow req */
+#define REQ_F_TIMEOUT          1024    /* timeout request */
        u64                     user_data;
        u32                     result;
        u32                     sequence;
@@ -344,6 +353,8 @@ struct io_submit_state {
 };
 
 static void io_sq_wq_submit_work(struct work_struct *work);
+static void io_cqring_fill_event(struct io_ring_ctx *ctx, u64 ki_user_data,
+                                long res);
 static void __io_free_req(struct io_kiocb *req);
 
 static struct kmem_cache *req_cachep;
@@ -400,26 +411,30 @@ static struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p)
        INIT_LIST_HEAD(&ctx->poll_list);
        INIT_LIST_HEAD(&ctx->cancel_list);
        INIT_LIST_HEAD(&ctx->defer_list);
+       INIT_LIST_HEAD(&ctx->timeout_list);
        return ctx;
 }
 
 static inline bool io_sequence_defer(struct io_ring_ctx *ctx,
                                     struct io_kiocb *req)
 {
-       if ((req->flags & (REQ_F_IO_DRAIN|REQ_F_IO_DRAINED)) != REQ_F_IO_DRAIN)
+       /* timeout requests always honor sequence */
+       if (!(req->flags & REQ_F_TIMEOUT) &&
+           (req->flags & (REQ_F_IO_DRAIN|REQ_F_IO_DRAINED)) != REQ_F_IO_DRAIN)
                return false;
 
        return req->sequence != ctx->cached_cq_tail + ctx->rings->sq_dropped;
 }
 
-static struct io_kiocb *io_get_deferred_req(struct io_ring_ctx *ctx)
+static struct io_kiocb *__io_get_deferred_req(struct io_ring_ctx *ctx,
+                                             struct list_head *list)
 {
        struct io_kiocb *req;
 
-       if (list_empty(&ctx->defer_list))
+       if (list_empty(list))
                return NULL;
 
-       req = list_first_entry(&ctx->defer_list, struct io_kiocb, list);
+       req = list_first_entry(list, struct io_kiocb, list);
        if (!io_sequence_defer(ctx, req)) {
                list_del_init(&req->list);
                return req;
@@ -428,6 +443,16 @@ static struct io_kiocb *io_get_deferred_req(struct io_ring_ctx *ctx)
        return NULL;
 }
 
+static struct io_kiocb *io_get_deferred_req(struct io_ring_ctx *ctx)
+{
+       return __io_get_deferred_req(ctx, &ctx->defer_list);
+}
+
+static struct io_kiocb *io_get_timeout_req(struct io_ring_ctx *ctx)
+{
+       return __io_get_deferred_req(ctx, &ctx->timeout_list);
+}
+
 static void __io_commit_cqring(struct io_ring_ctx *ctx)
 {
        struct io_rings *rings = ctx->rings;
@@ -446,25 +471,50 @@ static void __io_commit_cqring(struct io_ring_ctx *ctx)
 static inline void io_queue_async_work(struct io_ring_ctx *ctx,
                                       struct io_kiocb *req)
 {
-       int rw;
+       int rw = 0;
 
-       switch (req->submit.sqe->opcode) {
-       case IORING_OP_WRITEV:
-       case IORING_OP_WRITE_FIXED:
-               rw = !(req->rw.ki_flags & IOCB_DIRECT);
-               break;
-       default:
-               rw = 0;
-               break;
+       if (req->submit.sqe) {
+               switch (req->submit.sqe->opcode) {
+               case IORING_OP_WRITEV:
+               case IORING_OP_WRITE_FIXED:
+                       rw = !(req->rw.ki_flags & IOCB_DIRECT);
+                       break;
+               }
        }
 
        queue_work(ctx->sqo_wq[rw], &req->work);
 }
 
+static void io_kill_timeout(struct io_kiocb *req)
+{
+       int ret;
+
+       ret = hrtimer_try_to_cancel(&req->timeout.timer);
+       if (ret != -1) {
+               atomic_inc(&req->ctx->cq_timeouts);
+               list_del(&req->list);
+               io_cqring_fill_event(req->ctx, req->user_data, 0);
+               __io_free_req(req);
+       }
+}
+
+static void io_kill_timeouts(struct io_ring_ctx *ctx)
+{
+       struct io_kiocb *req, *tmp;
+
+       spin_lock_irq(&ctx->completion_lock);
+       list_for_each_entry_safe(req, tmp, &ctx->timeout_list, list)
+               io_kill_timeout(req);
+       spin_unlock_irq(&ctx->completion_lock);
+}
+
 static void io_commit_cqring(struct io_ring_ctx *ctx)
 {
        struct io_kiocb *req;
 
+       while ((req = io_get_timeout_req(ctx)) != NULL)
+               io_kill_timeout(req);
+
        __io_commit_cqring(ctx);
 
        while ((req = io_get_deferred_req(ctx)) != NULL) {
@@ -1248,6 +1298,51 @@ static void io_async_list_note(int rw, struct io_kiocb *req, size_t len)
        }
 }
 
+/*
+ * For files that don't have ->read_iter() and ->write_iter(), handle them
+ * by looping over ->read() or ->write() manually.
+ */
+static ssize_t loop_rw_iter(int rw, struct file *file, struct kiocb *kiocb,
+                          struct iov_iter *iter)
+{
+       ssize_t ret = 0;
+
+       /*
+        * Don't support polled IO through this interface, and we can't
+        * support non-blocking either. For the latter, this just causes
+        * the kiocb to be handled from an async context.
+        */
+       if (kiocb->ki_flags & IOCB_HIPRI)
+               return -EOPNOTSUPP;
+       if (kiocb->ki_flags & IOCB_NOWAIT)
+               return -EAGAIN;
+
+       while (iov_iter_count(iter)) {
+               struct iovec iovec = iov_iter_iovec(iter);
+               ssize_t nr;
+
+               if (rw == READ) {
+                       nr = file->f_op->read(file, iovec.iov_base,
+                                             iovec.iov_len, &kiocb->ki_pos);
+               } else {
+                       nr = file->f_op->write(file, iovec.iov_base,
+                                              iovec.iov_len, &kiocb->ki_pos);
+               }
+
+               if (nr < 0) {
+                       if (!ret)
+                               ret = nr;
+                       break;
+               }
+               ret += nr;
+               if (nr != iovec.iov_len)
+                       break;
+               iov_iter_advance(iter, nr);
+       }
+
+       return ret;
+}
+
 static int io_read(struct io_kiocb *req, const struct sqe_submit *s,
                   bool force_nonblock)
 {
@@ -1265,8 +1360,6 @@ static int io_read(struct io_kiocb *req, const struct sqe_submit *s,
 
        if (unlikely(!(file->f_mode & FMODE_READ)))
                return -EBADF;
-       if (unlikely(!file->f_op->read_iter))
-               return -EINVAL;
 
        ret = io_import_iovec(req->ctx, READ, s, &iovec, &iter);
        if (ret < 0)
@@ -1281,7 +1374,11 @@ static int io_read(struct io_kiocb *req, const struct sqe_submit *s,
        if (!ret) {
                ssize_t ret2;
 
-               ret2 = call_read_iter(file, kiocb, &iter);
+               if (file->f_op->read_iter)
+                       ret2 = call_read_iter(file, kiocb, &iter);
+               else
+                       ret2 = loop_rw_iter(READ, file, kiocb, &iter);
+
                /*
                 * In case of a short read, punt to async. This can happen
                 * if we have data partially cached. Alternatively we can
@@ -1326,8 +1423,6 @@ static int io_write(struct io_kiocb *req, const struct sqe_submit *s,
        file = kiocb->ki_filp;
        if (unlikely(!(file->f_mode & FMODE_WRITE)))
                return -EBADF;
-       if (unlikely(!file->f_op->write_iter))
-               return -EINVAL;
 
        ret = io_import_iovec(req->ctx, WRITE, s, &iovec, &iter);
        if (ret < 0)
@@ -1365,7 +1460,10 @@ static int io_write(struct io_kiocb *req, const struct sqe_submit *s,
                }
                kiocb->ki_flags |= IOCB_WRITE;
 
-               ret2 = call_write_iter(file, kiocb, &iter);
+               if (file->f_op->write_iter)
+                       ret2 = call_write_iter(file, kiocb, &iter);
+               else
+                       ret2 = loop_rw_iter(WRITE, file, kiocb, &iter);
                if (!force_nonblock || ret2 != -EAGAIN) {
                        io_rw_done(kiocb, ret2);
                } else {
@@ -1714,6 +1812,7 @@ static int io_poll_add(struct io_kiocb *req, const struct io_uring_sqe *sqe)
        if (!poll->file)
                return -EBADF;
 
+       req->submit.sqe = NULL;
        INIT_WORK(&req->work, io_poll_complete_work);
        events = READ_ONCE(sqe->poll_events);
        poll->events = demangle_poll(events) | EPOLLERR | EPOLLHUP;
@@ -1765,6 +1864,81 @@ static int io_poll_add(struct io_kiocb *req, const struct io_uring_sqe *sqe)
        return ipt.error;
 }
 
+static enum hrtimer_restart io_timeout_fn(struct hrtimer *timer)
+{
+       struct io_ring_ctx *ctx;
+       struct io_kiocb *req;
+       unsigned long flags;
+
+       req = container_of(timer, struct io_kiocb, timeout.timer);
+       ctx = req->ctx;
+       atomic_inc(&ctx->cq_timeouts);
+
+       spin_lock_irqsave(&ctx->completion_lock, flags);
+       list_del(&req->list);
+
+       io_cqring_fill_event(ctx, req->user_data, -ETIME);
+       io_commit_cqring(ctx);
+       spin_unlock_irqrestore(&ctx->completion_lock, flags);
+
+       io_cqring_ev_posted(ctx);
+
+       io_put_req(req);
+       return HRTIMER_NORESTART;
+}
+
+static int io_timeout(struct io_kiocb *req, const struct io_uring_sqe *sqe)
+{
+       unsigned count, req_dist, tail_index;
+       struct io_ring_ctx *ctx = req->ctx;
+       struct list_head *entry;
+       struct timespec ts;
+
+       if (unlikely(ctx->flags & IORING_SETUP_IOPOLL))
+               return -EINVAL;
+       if (sqe->flags || sqe->ioprio || sqe->buf_index || sqe->timeout_flags ||
+           sqe->len != 1)
+               return -EINVAL;
+       if (copy_from_user(&ts, (void __user *) (unsigned long) sqe->addr,
+           sizeof(ts)))
+               return -EFAULT;
+
+       /*
+        * sqe->off holds how many events that need to occur for this
+        * timeout event to be satisfied.
+        */
+       count = READ_ONCE(sqe->off);
+       if (!count)
+               count = 1;
+
+       req->sequence = ctx->cached_sq_head + count - 1;
+       req->flags |= REQ_F_TIMEOUT;
+
+       /*
+        * Insertion sort, ensuring the first entry in the list is always
+        * the one we need first.
+        */
+       tail_index = ctx->cached_cq_tail - ctx->rings->sq_dropped;
+       req_dist = req->sequence - tail_index;
+       spin_lock_irq(&ctx->completion_lock);
+       list_for_each_prev(entry, &ctx->timeout_list) {
+               struct io_kiocb *nxt = list_entry(entry, struct io_kiocb, list);
+               unsigned dist;
+
+               dist = nxt->sequence - tail_index;
+               if (req_dist >= dist)
+                       break;
+       }
+       list_add(&req->list, entry);
+       spin_unlock_irq(&ctx->completion_lock);
+
+       hrtimer_init(&req->timeout.timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+       req->timeout.timer.function = io_timeout_fn;
+       hrtimer_start(&req->timeout.timer, timespec_to_ktime(ts),
+                       HRTIMER_MODE_REL);
+       return 0;
+}
+
 static int io_req_defer(struct io_ring_ctx *ctx, struct io_kiocb *req,
                        const struct io_uring_sqe *sqe)
 {
@@ -1842,6 +2016,9 @@ static int __io_submit_sqe(struct io_ring_ctx *ctx, struct io_kiocb *req,
        case IORING_OP_RECVMSG:
                ret = io_recvmsg(req, s->sqe, force_nonblock);
                break;
+       case IORING_OP_TIMEOUT:
+               ret = io_timeout(req, s->sqe);
+               break;
        default:
                ret = -EINVAL;
                break;
@@ -2098,13 +2275,11 @@ static int __io_queue_sqe(struct io_ring_ctx *ctx, struct io_kiocb *req,
        if (ret == -EAGAIN && !(req->flags & REQ_F_NOWAIT)) {
                struct io_uring_sqe *sqe_copy;
 
-               sqe_copy = kmalloc(sizeof(*sqe_copy), GFP_KERNEL);
+               sqe_copy = kmemdup(s->sqe, sizeof(*sqe_copy), GFP_KERNEL);
                if (sqe_copy) {
                        struct async_list *list;
 
-                       memcpy(sqe_copy, s->sqe, sizeof(*sqe_copy));
                        s->sqe = sqe_copy;
-
                        memcpy(&req->submit, s, sizeof(*s));
                        list = io_async_list_from_sqe(ctx, s->sqe);
                        if (!io_add_to_prev_work(list, req)) {
@@ -2359,18 +2534,22 @@ static int io_submit_sqes(struct io_ring_ctx *ctx, struct sqe_submit *sqes,
                        io_queue_link_head(ctx, link, &link->submit, shadow_req,
                                                true);
                        link = NULL;
+                       shadow_req = NULL;
                }
                prev_was_link = (sqes[i].sqe->flags & IOSQE_IO_LINK) != 0;
 
                if (link && (sqes[i].sqe->flags & IOSQE_IO_DRAIN)) {
                        if (!shadow_req) {
                                shadow_req = io_get_req(ctx, NULL);
+                               if (unlikely(!shadow_req))
+                                       goto out;
                                shadow_req->flags |= (REQ_F_IO_DRAIN | REQ_F_SHADOW_DRAIN);
                                refcount_dec(&shadow_req->refs);
                        }
                        shadow_req->sequence = sqes[i].sequence;
                }
 
+out:
                if (unlikely(mm_fault)) {
                        io_cqring_add_event(ctx, sqes[i].sqe->user_data,
                                                -EFAULT);
@@ -2436,7 +2615,7 @@ static int io_sq_thread(void *data)
                         * to sleep.
                         */
                        if (inflight || !time_after(jiffies, timeout)) {
-                               cpu_relax();
+                               cond_resched();
                                continue;
                        }
 
@@ -2545,18 +2724,22 @@ static int io_ring_submit(struct io_ring_ctx *ctx, unsigned int to_submit,
                        io_queue_link_head(ctx, link, &link->submit, shadow_req,
                                                force_nonblock);
                        link = NULL;
+                       shadow_req = NULL;
                }
                prev_was_link = (s.sqe->flags & IOSQE_IO_LINK) != 0;
 
                if (link && (s.sqe->flags & IOSQE_IO_DRAIN)) {
                        if (!shadow_req) {
                                shadow_req = io_get_req(ctx, NULL);
+                               if (unlikely(!shadow_req))
+                                       goto out;
                                shadow_req->flags |= (REQ_F_IO_DRAIN | REQ_F_SHADOW_DRAIN);
                                refcount_dec(&shadow_req->refs);
                        }
                        shadow_req->sequence = s.sequence;
                }
 
+out:
                s.has_user = true;
                s.needs_lock = false;
                s.needs_fixed_file = false;
@@ -2585,6 +2768,38 @@ static int io_ring_submit(struct io_ring_ctx *ctx, unsigned int to_submit,
        return submit;
 }
 
+struct io_wait_queue {
+       struct wait_queue_entry wq;
+       struct io_ring_ctx *ctx;
+       unsigned to_wait;
+       unsigned nr_timeouts;
+};
+
+static inline bool io_should_wake(struct io_wait_queue *iowq)
+{
+       struct io_ring_ctx *ctx = iowq->ctx;
+
+       /*
+        * Wake up if we have enough events, or if a timeout occured since we
+        * started waiting. For timeouts, we always want to return to userspace,
+        * regardless of event count.
+        */
+       return io_cqring_events(ctx->rings) >= iowq->to_wait ||
+                       atomic_read(&ctx->cq_timeouts) != iowq->nr_timeouts;
+}
+
+static int io_wake_function(struct wait_queue_entry *curr, unsigned int mode,
+                           int wake_flags, void *key)
+{
+       struct io_wait_queue *iowq = container_of(curr, struct io_wait_queue,
+                                                       wq);
+
+       if (!io_should_wake(iowq))
+               return -1;
+
+       return autoremove_wake_function(curr, mode, wake_flags, key);
+}
+
 /*
  * Wait until events become available, if we don't already have some. The
  * application must reap them itself, as they reside on the shared cq ring.
@@ -2592,6 +2807,15 @@ static int io_ring_submit(struct io_ring_ctx *ctx, unsigned int to_submit,
 static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events,
                          const sigset_t __user *sig, size_t sigsz)
 {
+       struct io_wait_queue iowq = {
+               .wq = {
+                       .private        = current,
+                       .func           = io_wake_function,
+                       .entry          = LIST_HEAD_INIT(iowq.wq.entry),
+               },
+               .ctx            = ctx,
+               .to_wait        = min_events,
+       };
        struct io_rings *rings = ctx->rings;
        int ret;
 
@@ -2611,7 +2835,21 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events,
                        return ret;
        }
 
-       ret = wait_event_interruptible(ctx->wait, io_cqring_events(rings) >= min_events);
+       ret = 0;
+       iowq.nr_timeouts = atomic_read(&ctx->cq_timeouts);
+       do {
+               prepare_to_wait_exclusive(&ctx->wait, &iowq.wq,
+                                               TASK_INTERRUPTIBLE);
+               if (io_should_wake(&iowq))
+                       break;
+               schedule();
+               if (signal_pending(current)) {
+                       ret = -ERESTARTSYS;
+                       break;
+               }
+       } while (1);
+       finish_wait(&ctx->wait, &iowq.wq);
+
        restore_saved_sigmask_unless(ret == -ERESTARTSYS);
        if (ret == -ERESTARTSYS)
                ret = -EINTR;
@@ -3263,7 +3501,7 @@ static __poll_t io_uring_poll(struct file *file, poll_table *wait)
        if (READ_ONCE(ctx->rings->sq.tail) - ctx->cached_sq_head !=
            ctx->rings->sq_ring_entries)
                mask |= EPOLLOUT | EPOLLWRNORM;
-       if (READ_ONCE(ctx->rings->sq.head) != ctx->cached_cq_tail)
+       if (READ_ONCE(ctx->rings->cq.head) != ctx->cached_cq_tail)
                mask |= EPOLLIN | EPOLLRDNORM;
 
        return mask;
@@ -3282,6 +3520,7 @@ static void io_ring_ctx_wait_and_kill(struct io_ring_ctx *ctx)
        percpu_ref_kill(&ctx->refs);
        mutex_unlock(&ctx->uring_lock);
 
+       io_kill_timeouts(ctx);
        io_poll_remove_all(ctx);
        io_iopoll_reap_events(ctx);
        wait_for_completion(&ctx->ctx_done);
@@ -3319,7 +3558,7 @@ static int io_uring_mmap(struct file *file, struct vm_area_struct *vma)
        }
 
        page = virt_to_head_page(ptr);
-       if (sz > (PAGE_SIZE << compound_order(page)))
+       if (sz > page_size(page))
                return -EINVAL;
 
        pfn = virt_to_phys(ptr) >> PAGE_SHIFT;
index 10517ce..1fc28c2 100644 (file)
@@ -24,7 +24,7 @@
 
 struct iomap_dio {
        struct kiocb            *iocb;
-       iomap_dio_end_io_t      *end_io;
+       const struct iomap_dio_ops *dops;
        loff_t                  i_size;
        loff_t                  size;
        atomic_t                ref;
@@ -72,18 +72,14 @@ static void iomap_dio_submit_bio(struct iomap_dio *dio, struct iomap *iomap,
 
 static ssize_t iomap_dio_complete(struct iomap_dio *dio)
 {
+       const struct iomap_dio_ops *dops = dio->dops;
        struct kiocb *iocb = dio->iocb;
        struct inode *inode = file_inode(iocb->ki_filp);
        loff_t offset = iocb->ki_pos;
-       ssize_t ret;
+       ssize_t ret = dio->error;
 
-       if (dio->end_io) {
-               ret = dio->end_io(iocb,
-                               dio->error ? dio->error : dio->size,
-                               dio->flags);
-       } else {
-               ret = dio->error;
-       }
+       if (dops && dops->end_io)
+               ret = dops->end_io(iocb, dio->size, ret, dio->flags);
 
        if (likely(!ret)) {
                ret = dio->size;
@@ -101,9 +97,9 @@ static ssize_t iomap_dio_complete(struct iomap_dio *dio)
         * one is a pretty crazy thing to do, so we don't support it 100%.  If
         * this invalidation fails, tough, the write still worked...
         *
-        * And this page cache invalidation has to be after dio->end_io(), as
-        * some filesystems convert unwritten extents to real allocations in
-        * end_io() when necessary, otherwise a racing buffer read would cache
+        * And this page cache invalidation has to be after ->end_io(), as some
+        * filesystems convert unwritten extents to real allocations in
+        * ->end_io() when necessary, otherwise a racing buffer read would cache
         * zeros from unwritten extents.
         */
        if (!dio->error &&
@@ -396,7 +392,7 @@ iomap_dio_actor(struct inode *inode, loff_t pos, loff_t length,
  */
 ssize_t
 iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
-               const struct iomap_ops *ops, iomap_dio_end_io_t end_io)
+               const struct iomap_ops *ops, const struct iomap_dio_ops *dops)
 {
        struct address_space *mapping = iocb->ki_filp->f_mapping;
        struct inode *inode = file_inode(iocb->ki_filp);
@@ -421,7 +417,7 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
        atomic_set(&dio->ref, 1);
        dio->size = 0;
        dio->i_size = i_size_read(inode);
-       dio->end_io = end_io;
+       dio->dops = dops;
        dio->error = 0;
        dio->flags = 0;
 
index 953990e..1c58859 100644 (file)
@@ -89,8 +89,6 @@ EXPORT_SYMBOL(jbd2_journal_blocks_per_page);
 EXPORT_SYMBOL(jbd2_journal_invalidatepage);
 EXPORT_SYMBOL(jbd2_journal_try_to_free_buffers);
 EXPORT_SYMBOL(jbd2_journal_force_commit);
-EXPORT_SYMBOL(jbd2_journal_inode_add_write);
-EXPORT_SYMBOL(jbd2_journal_inode_add_wait);
 EXPORT_SYMBOL(jbd2_journal_inode_ranged_write);
 EXPORT_SYMBOL(jbd2_journal_inode_ranged_wait);
 EXPORT_SYMBOL(jbd2_journal_init_jbd_inode);
index 69b9bc3..f08073d 100644 (file)
@@ -638,10 +638,8 @@ static void flush_descriptor(journal_t *journal,
 {
        jbd2_journal_revoke_header_t *header;
 
-       if (is_journal_aborted(journal)) {
-               put_bh(descriptor);
+       if (is_journal_aborted(journal))
                return;
-       }
 
        header = (jbd2_journal_revoke_header_t *)descriptor->b_data;
        header->r_count = cpu_to_be32(offset);
index 990e7b5..bee8498 100644 (file)
@@ -569,6 +569,9 @@ int jbd2_journal_start_reserved(handle_t *handle, unsigned int type,
        }
        handle->h_type = type;
        handle->h_line_no = line_no;
+       trace_jbd2_handle_start(journal->j_fs_dev->bd_dev,
+                               handle->h_transaction->t_tid, type,
+                               line_no, handle->h_buffer_credits);
        return 0;
 }
 EXPORT_SYMBOL(jbd2_journal_start_reserved);
@@ -2619,18 +2622,6 @@ done:
        return 0;
 }
 
-int jbd2_journal_inode_add_write(handle_t *handle, struct jbd2_inode *jinode)
-{
-       return jbd2_journal_file_inode(handle, jinode,
-                       JI_WRITE_DATA | JI_WAIT_DATA, 0, LLONG_MAX);
-}
-
-int jbd2_journal_inode_add_wait(handle_t *handle, struct jbd2_inode *jinode)
-{
-       return jbd2_journal_file_inode(handle, jinode, JI_WAIT_DATA, 0,
-                       LLONG_MAX);
-}
-
 int jbd2_journal_inode_ranged_write(handle_t *handle,
                struct jbd2_inode *jinode, loff_t start_byte, loff_t length)
 {
index 05fe6cf..ab8cdd9 100644 (file)
@@ -682,33 +682,6 @@ struct jffs2_inode_info *jffs2_gc_fetch_inode(struct jffs2_sb_info *c,
        return JFFS2_INODE_INFO(inode);
 }
 
-unsigned char *jffs2_gc_fetch_page(struct jffs2_sb_info *c,
-                                  struct jffs2_inode_info *f,
-                                  unsigned long offset,
-                                  unsigned long *priv)
-{
-       struct inode *inode = OFNI_EDONI_2SFFJ(f);
-       struct page *pg;
-
-       pg = read_cache_page(inode->i_mapping, offset >> PAGE_SHIFT,
-                            jffs2_do_readpage_unlock, inode);
-       if (IS_ERR(pg))
-               return (void *)pg;
-
-       *priv = (unsigned long)pg;
-       return kmap(pg);
-}
-
-void jffs2_gc_release_page(struct jffs2_sb_info *c,
-                          unsigned char *ptr,
-                          unsigned long *priv)
-{
-       struct page *pg = (void *)*priv;
-
-       kunmap(pg);
-       put_page(pg);
-}
-
 static int jffs2_flash_setup(struct jffs2_sb_info *c) {
        int ret = 0;
 
index 9ed0f26..373b3b7 100644 (file)
@@ -1165,12 +1165,13 @@ static int jffs2_garbage_collect_dnode(struct jffs2_sb_info *c, struct jffs2_era
                                       struct jffs2_inode_info *f, struct jffs2_full_dnode *fn,
                                       uint32_t start, uint32_t end)
 {
+       struct inode *inode = OFNI_EDONI_2SFFJ(f);
        struct jffs2_full_dnode *new_fn;
        struct jffs2_raw_inode ri;
        uint32_t alloclen, offset, orig_end, orig_start;
        int ret = 0;
        unsigned char *comprbuf = NULL, *writebuf;
-       unsigned long pg;
+       struct page *page;
        unsigned char *pg_ptr;
 
        memset(&ri, 0, sizeof(ri));
@@ -1325,15 +1326,18 @@ static int jffs2_garbage_collect_dnode(struct jffs2_sb_info *c, struct jffs2_era
         * end up here trying to GC the *same* page that jffs2_write_begin() is
         * trying to write out, read_cache_page() will not deadlock. */
        mutex_unlock(&f->sem);
-       pg_ptr = jffs2_gc_fetch_page(c, f, start, &pg);
-       mutex_lock(&f->sem);
-
-       if (IS_ERR(pg_ptr)) {
+       page = read_cache_page(inode->i_mapping, start >> PAGE_SHIFT,
+                              jffs2_do_readpage_unlock, inode);
+       if (IS_ERR(page)) {
                pr_warn("read_cache_page() returned error: %ld\n",
-                       PTR_ERR(pg_ptr));
-               return PTR_ERR(pg_ptr);
+                       PTR_ERR(page));
+               mutex_lock(&f->sem);
+               return PTR_ERR(page);
        }
 
+       pg_ptr = kmap(page);
+       mutex_lock(&f->sem);
+
        offset = start;
        while(offset < orig_end) {
                uint32_t datalen;
@@ -1396,6 +1400,7 @@ static int jffs2_garbage_collect_dnode(struct jffs2_sb_info *c, struct jffs2_era
                }
        }
 
-       jffs2_gc_release_page(c, pg_ptr, &pg);
+       kunmap(page);
+       put_page(page);
        return ret;
 }
index b86c78d..021a4a2 100644 (file)
@@ -226,7 +226,7 @@ static int jffs2_add_frag_to_fragtree(struct jffs2_sb_info *c, struct rb_root *r
                lastend = this->ofs + this->size;
        } else {
                dbg_fragtree2("lookup gave no frag\n");
-               lastend = 0;
+               return -EINVAL;
        }
 
        /* See if we ran off the end of the fragtree */
index 21071fc..ef1cfa6 100644 (file)
@@ -183,9 +183,6 @@ unsigned char *jffs2_gc_fetch_page(struct jffs2_sb_info *c,
                                   struct jffs2_inode_info *f,
                                   unsigned long offset,
                                   unsigned long *priv);
-void jffs2_gc_release_page(struct jffs2_sb_info *c,
-                          unsigned char *pg,
-                          unsigned long *priv);
 void jffs2_flash_cleanup(struct jffs2_sb_info *c);
 
 
index 90431dd..5f7e284 100644 (file)
@@ -527,8 +527,11 @@ static int jffs2_scan_eraseblock (struct jffs2_sb_info *c, struct jffs2_eraseblo
                                        err = jffs2_fill_scan_buf(c, sumptr, 
                                                                  jeb->offset + c->sector_size - sumlen,
                                                                  sumlen - buf_len);                            
-                                       if (err)
+                                       if (err) {
+                                               if (sumlen > buf_size)
+                                                       kfree(sumptr);
                                                return err;
+                                       }
                                }
                        }
 
index cbe7063..0e6406c 100644 (file)
@@ -163,13 +163,11 @@ static const struct export_operations jffs2_export_ops = {
  * Opt_rp_size: size of reserved pool in KiB
  */
 enum {
-       Opt_source,
        Opt_override_compr,
        Opt_rp_size,
 };
 
 static const struct fs_parameter_spec jffs2_param_specs[] = {
-       fsparam_string  ("source",      Opt_source),
        fsparam_enum    ("compr",       Opt_override_compr),
        fsparam_u32     ("rp_size",     Opt_rp_size),
        {}
index a364ebc..6970f55 100644 (file)
@@ -212,6 +212,7 @@ struct file_lock_list_struct {
 static DEFINE_PER_CPU(struct file_lock_list_struct, file_lock_list);
 DEFINE_STATIC_PERCPU_RWSEM(file_rwsem);
 
+
 /*
  * The blocked_hash is used to find POSIX lock loops for deadlock detection.
  * It is protected by blocked_lock_lock.
@@ -1991,6 +1992,64 @@ int generic_setlease(struct file *filp, long arg, struct file_lock **flp,
 }
 EXPORT_SYMBOL(generic_setlease);
 
+#if IS_ENABLED(CONFIG_SRCU)
+/*
+ * Kernel subsystems can register to be notified on any attempt to set
+ * a new lease with the lease_notifier_chain. This is used by (e.g.) nfsd
+ * to close files that it may have cached when there is an attempt to set a
+ * conflicting lease.
+ */
+static struct srcu_notifier_head lease_notifier_chain;
+
+static inline void
+lease_notifier_chain_init(void)
+{
+       srcu_init_notifier_head(&lease_notifier_chain);
+}
+
+static inline void
+setlease_notifier(long arg, struct file_lock *lease)
+{
+       if (arg != F_UNLCK)
+               srcu_notifier_call_chain(&lease_notifier_chain, arg, lease);
+}
+
+int lease_register_notifier(struct notifier_block *nb)
+{
+       return srcu_notifier_chain_register(&lease_notifier_chain, nb);
+}
+EXPORT_SYMBOL_GPL(lease_register_notifier);
+
+void lease_unregister_notifier(struct notifier_block *nb)
+{
+       srcu_notifier_chain_unregister(&lease_notifier_chain, nb);
+}
+EXPORT_SYMBOL_GPL(lease_unregister_notifier);
+
+#else /* !IS_ENABLED(CONFIG_SRCU) */
+static inline void
+lease_notifier_chain_init(void)
+{
+}
+
+static inline void
+setlease_notifier(long arg, struct file_lock *lease)
+{
+}
+
+int lease_register_notifier(struct notifier_block *nb)
+{
+       return 0;
+}
+EXPORT_SYMBOL_GPL(lease_register_notifier);
+
+void lease_unregister_notifier(struct notifier_block *nb)
+{
+}
+EXPORT_SYMBOL_GPL(lease_unregister_notifier);
+
+#endif /* IS_ENABLED(CONFIG_SRCU) */
+
 /**
  * vfs_setlease        -       sets a lease on an open file
  * @filp:      file pointer
@@ -2011,6 +2070,8 @@ EXPORT_SYMBOL(generic_setlease);
 int
 vfs_setlease(struct file *filp, long arg, struct file_lock **lease, void **priv)
 {
+       if (lease)
+               setlease_notifier(arg, *lease);
        if (filp->f_op->setlease)
                return filp->f_op->setlease(filp, arg, lease, priv);
        else
@@ -2924,6 +2985,7 @@ static int __init filelock_init(void)
                INIT_HLIST_HEAD(&fll->hlist);
        }
 
+       lease_notifier_chain_init();
        return 0;
 }
 core_initcall(filelock_init);
index 93c0432..fe0e9e1 100644 (file)
@@ -2802,8 +2802,6 @@ static int do_new_mount(struct path *path, const char *fstype, int sb_flags,
                                put_filesystem(type);
                                return -EINVAL;
                        }
-               } else {
-                       subtype = "";
                }
        }
 
@@ -3028,7 +3026,7 @@ void *copy_mount_options(const void __user * data)
         * the remainder of the page.
         */
        /* copy_from_user cannot cross TASK_SIZE ! */
-       size = TASK_SIZE - (unsigned long)data;
+       size = TASK_SIZE - (unsigned long)untagged_addr(data);
        if (size > PAGE_SIZE)
                size = PAGE_SIZE;
 
index 0adfd88..e180033 100644 (file)
@@ -1669,10 +1669,8 @@ static int nfs4_lookup_revalidate(struct dentry *dentry, unsigned int flags)
 
 #endif /* CONFIG_NFSV4 */
 
-/*
- * Code common to create, mkdir, and mknod.
- */
-int nfs_instantiate(struct dentry *dentry, struct nfs_fh *fhandle,
+struct dentry *
+nfs_add_or_obtain(struct dentry *dentry, struct nfs_fh *fhandle,
                                struct nfs_fattr *fattr,
                                struct nfs4_label *label)
 {
@@ -1680,13 +1678,10 @@ int nfs_instantiate(struct dentry *dentry, struct nfs_fh *fhandle,
        struct inode *dir = d_inode(parent);
        struct inode *inode;
        struct dentry *d;
-       int error = -EACCES;
+       int error;
 
        d_drop(dentry);
 
-       /* We may have been initialized further down */
-       if (d_really_is_positive(dentry))
-               goto out;
        if (fhandle->size == 0) {
                error = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, fhandle, fattr, NULL);
                if (error)
@@ -1702,18 +1697,32 @@ int nfs_instantiate(struct dentry *dentry, struct nfs_fh *fhandle,
        }
        inode = nfs_fhget(dentry->d_sb, fhandle, fattr, label);
        d = d_splice_alias(inode, dentry);
-       if (IS_ERR(d)) {
-               error = PTR_ERR(d);
-               goto out_error;
-       }
-       dput(d);
 out:
        dput(parent);
-       return 0;
+       return d;
 out_error:
        nfs_mark_for_revalidate(dir);
-       dput(parent);
-       return error;
+       d = ERR_PTR(error);
+       goto out;
+}
+EXPORT_SYMBOL_GPL(nfs_add_or_obtain);
+
+/*
+ * Code common to create, mkdir, and mknod.
+ */
+int nfs_instantiate(struct dentry *dentry, struct nfs_fh *fhandle,
+                               struct nfs_fattr *fattr,
+                               struct nfs4_label *label)
+{
+       struct dentry *d;
+
+       d = nfs_add_or_obtain(dentry, fhandle, fattr, label);
+       if (IS_ERR(d))
+               return PTR_ERR(d);
+
+       /* Callers don't care */
+       dput(d);
+       return 0;
 }
 EXPORT_SYMBOL_GPL(nfs_instantiate);
 
index 3cb073c..c9b605f 100644 (file)
@@ -1164,6 +1164,7 @@ static struct pnfs_layoutdriver_type filelayout_type = {
        .id                     = LAYOUT_NFSV4_1_FILES,
        .name                   = "LAYOUT_NFSV4_1_FILES",
        .owner                  = THIS_MODULE,
+       .flags                  = PNFS_LAYOUTGET_ON_OPEN,
        .max_layoutget_response = 4096, /* 1 page or so... */
        .alloc_layout_hdr       = filelayout_alloc_layout_hdr,
        .free_layout_hdr        = filelayout_free_layout_hdr,
index e64f810..447a3c1 100644 (file)
@@ -16,14 +16,6 @@ extern const struct export_operations nfs_export_ops;
 
 struct nfs_string;
 
-/* Maximum number of readahead requests
- * FIXME: this should really be a sysctl so that users may tune it to suit
- *        their needs. People that do NFS over a slow network, might for
- *        instance want to reduce it to something closer to 1 for improved
- *        interactive response.
- */
-#define NFS_MAX_READAHEAD      (RPC_DEF_SLOT_TABLE - 1)
-
 static inline void nfs_attr_check_mountpoint(struct super_block *parent, struct nfs_fattr *fattr)
 {
        if (!nfs_fsid_equal(&NFS_SB(parent)->fsid, &fattr->fsid))
index a3ad2d4..9eb2f1a 100644 (file)
@@ -279,15 +279,17 @@ static struct nfs3_createdata *nfs3_alloc_createdata(void)
        return data;
 }
 
-static int nfs3_do_create(struct inode *dir, struct dentry *dentry, struct nfs3_createdata *data)
+static struct dentry *
+nfs3_do_create(struct inode *dir, struct dentry *dentry, struct nfs3_createdata *data)
 {
        int status;
 
        status = rpc_call_sync(NFS_CLIENT(dir), &data->msg, 0);
        nfs_post_op_update_inode(dir, data->res.dir_attr);
-       if (status == 0)
-               status = nfs_instantiate(dentry, data->res.fh, data->res.fattr, NULL);
-       return status;
+       if (status != 0)
+               return ERR_PTR(status);
+
+       return nfs_add_or_obtain(dentry, data->res.fh, data->res.fattr, NULL);
 }
 
 static void nfs3_free_createdata(struct nfs3_createdata *data)
@@ -304,6 +306,7 @@ nfs3_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
 {
        struct posix_acl *default_acl, *acl;
        struct nfs3_createdata *data;
+       struct dentry *d_alias;
        int status = -ENOMEM;
 
        dprintk("NFS call  create %pd\n", dentry);
@@ -330,7 +333,8 @@ nfs3_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
                goto out;
 
        for (;;) {
-               status = nfs3_do_create(dir, dentry, data);
+               d_alias = nfs3_do_create(dir, dentry, data);
+               status = PTR_ERR_OR_ZERO(d_alias);
 
                if (status != -ENOTSUPP)
                        break;
@@ -355,6 +359,9 @@ nfs3_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
        if (status != 0)
                goto out_release_acls;
 
+       if (d_alias)
+               dentry = d_alias;
+
        /* When we created the file with exclusive semantics, make
         * sure we set the attributes afterwards. */
        if (data->arg.create.createmode == NFS3_CREATE_EXCLUSIVE) {
@@ -372,11 +379,13 @@ nfs3_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
                nfs_post_op_update_inode(d_inode(dentry), data->res.fattr);
                dprintk("NFS reply setattr (post-create): %d\n", status);
                if (status != 0)
-                       goto out_release_acls;
+                       goto out_dput;
        }
 
        status = nfs3_proc_setacls(d_inode(dentry), acl, default_acl);
 
+out_dput:
+       dput(d_alias);
 out_release_acls:
        posix_acl_release(acl);
        posix_acl_release(default_acl);
@@ -504,6 +513,7 @@ nfs3_proc_symlink(struct inode *dir, struct dentry *dentry, struct page *page,
                  unsigned int len, struct iattr *sattr)
 {
        struct nfs3_createdata *data;
+       struct dentry *d_alias;
        int status = -ENOMEM;
 
        if (len > NFS3_MAXPATHLEN)
@@ -522,7 +532,11 @@ nfs3_proc_symlink(struct inode *dir, struct dentry *dentry, struct page *page,
        data->arg.symlink.pathlen = len;
        data->arg.symlink.sattr = sattr;
 
-       status = nfs3_do_create(dir, dentry, data);
+       d_alias = nfs3_do_create(dir, dentry, data);
+       status = PTR_ERR_OR_ZERO(d_alias);
+
+       if (status == 0)
+               dput(d_alias);
 
        nfs3_free_createdata(data);
 out:
@@ -535,6 +549,7 @@ nfs3_proc_mkdir(struct inode *dir, struct dentry *dentry, struct iattr *sattr)
 {
        struct posix_acl *default_acl, *acl;
        struct nfs3_createdata *data;
+       struct dentry *d_alias;
        int status = -ENOMEM;
 
        dprintk("NFS call  mkdir %pd\n", dentry);
@@ -553,12 +568,18 @@ nfs3_proc_mkdir(struct inode *dir, struct dentry *dentry, struct iattr *sattr)
        data->arg.mkdir.len = dentry->d_name.len;
        data->arg.mkdir.sattr = sattr;
 
-       status = nfs3_do_create(dir, dentry, data);
+       d_alias = nfs3_do_create(dir, dentry, data);
+       status = PTR_ERR_OR_ZERO(d_alias);
+
        if (status != 0)
                goto out_release_acls;
 
+       if (d_alias)
+               dentry = d_alias;
+
        status = nfs3_proc_setacls(d_inode(dentry), acl, default_acl);
 
+       dput(d_alias);
 out_release_acls:
        posix_acl_release(acl);
        posix_acl_release(default_acl);
@@ -660,6 +681,7 @@ nfs3_proc_mknod(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
 {
        struct posix_acl *default_acl, *acl;
        struct nfs3_createdata *data;
+       struct dentry *d_alias;
        int status = -ENOMEM;
 
        dprintk("NFS call  mknod %pd %u:%u\n", dentry,
@@ -698,12 +720,17 @@ nfs3_proc_mknod(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
                goto out;
        }
 
-       status = nfs3_do_create(dir, dentry, data);
+       d_alias = nfs3_do_create(dir, dentry, data);
+       status = PTR_ERR_OR_ZERO(d_alias);
        if (status != 0)
                goto out_release_acls;
 
+       if (d_alias)
+               dentry = d_alias;
+
        status = nfs3_proc_setacls(d_inode(dentry), acl, default_acl);
 
+       dput(d_alias);
 out_release_acls:
        posix_acl_release(acl);
        posix_acl_release(default_acl);
index 3564da1..16b2e5c 100644 (file)
@@ -491,8 +491,6 @@ extern int nfs4_set_lock_state(struct nfs4_state *state, struct file_lock *fl);
 extern int nfs4_select_rw_stateid(struct nfs4_state *, fmode_t,
                const struct nfs_lock_context *, nfs4_stateid *,
                const struct cred **);
-extern bool nfs4_refresh_open_stateid(nfs4_stateid *dst,
-               struct nfs4_state *state);
 extern bool nfs4_copy_open_stateid(nfs4_stateid *dst,
                struct nfs4_state *state);
 
@@ -574,6 +572,15 @@ static inline bool nfs4_stateid_is_newer(const nfs4_stateid *s1, const nfs4_stat
        return (s32)(be32_to_cpu(s1->seqid) - be32_to_cpu(s2->seqid)) > 0;
 }
 
+static inline void nfs4_stateid_seqid_inc(nfs4_stateid *s1)
+{
+       u32 seqid = be32_to_cpu(s1->seqid);
+
+       if (++seqid == 0)
+               ++seqid;
+       s1->seqid = cpu_to_be32(seqid);
+}
+
 static inline bool nfs4_valid_open_stateid(const struct nfs4_state *state)
 {
        return test_bit(NFS_STATE_RECOVERY_FAILED, &state->flags) == 0;
index 1406858..11eafcf 100644 (file)
@@ -1073,14 +1073,26 @@ static const struct rpc_call_ops nfs40_call_sync_ops = {
        .rpc_call_done = nfs40_call_sync_done,
 };
 
+static int nfs4_call_sync_custom(struct rpc_task_setup *task_setup)
+{
+       int ret;
+       struct rpc_task *task;
+
+       task = rpc_run_task(task_setup);
+       if (IS_ERR(task))
+               return PTR_ERR(task);
+
+       ret = task->tk_status;
+       rpc_put_task(task);
+       return ret;
+}
+
 static int nfs4_call_sync_sequence(struct rpc_clnt *clnt,
                                   struct nfs_server *server,
                                   struct rpc_message *msg,
                                   struct nfs4_sequence_args *args,
                                   struct nfs4_sequence_res *res)
 {
-       int ret;
-       struct rpc_task *task;
        struct nfs_client *clp = server->nfs_client;
        struct nfs4_call_sync_data data = {
                .seq_server = server,
@@ -1094,14 +1106,7 @@ static int nfs4_call_sync_sequence(struct rpc_clnt *clnt,
                .callback_data = &data
        };
 
-       task = rpc_run_task(&task_setup);
-       if (IS_ERR(task))
-               ret = PTR_ERR(task);
-       else {
-               ret = task->tk_status;
-               rpc_put_task(task);
-       }
-       return ret;
+       return nfs4_call_sync_custom(&task_setup);
 }
 
 int nfs4_call_sync(struct rpc_clnt *clnt,
@@ -3308,6 +3313,75 @@ nfs4_wait_on_layoutreturn(struct inode *inode, struct rpc_task *task)
        return pnfs_wait_on_layoutreturn(inode, task);
 }
 
+/*
+ * Update the seqid of an open stateid
+ */
+static void nfs4_sync_open_stateid(nfs4_stateid *dst,
+               struct nfs4_state *state)
+{
+       __be32 seqid_open;
+       u32 dst_seqid;
+       int seq;
+
+       for (;;) {
+               if (!nfs4_valid_open_stateid(state))
+                       break;
+               seq = read_seqbegin(&state->seqlock);
+               if (!nfs4_state_match_open_stateid_other(state, dst)) {
+                       nfs4_stateid_copy(dst, &state->open_stateid);
+                       if (read_seqretry(&state->seqlock, seq))
+                               continue;
+                       break;
+               }
+               seqid_open = state->open_stateid.seqid;
+               if (read_seqretry(&state->seqlock, seq))
+                       continue;
+
+               dst_seqid = be32_to_cpu(dst->seqid);
+               if ((s32)(dst_seqid - be32_to_cpu(seqid_open)) < 0)
+                       dst->seqid = seqid_open;
+               break;
+       }
+}
+
+/*
+ * Update the seqid of an open stateid after receiving
+ * NFS4ERR_OLD_STATEID
+ */
+static bool nfs4_refresh_open_old_stateid(nfs4_stateid *dst,
+               struct nfs4_state *state)
+{
+       __be32 seqid_open;
+       u32 dst_seqid;
+       bool ret;
+       int seq;
+
+       for (;;) {
+               ret = false;
+               if (!nfs4_valid_open_stateid(state))
+                       break;
+               seq = read_seqbegin(&state->seqlock);
+               if (!nfs4_state_match_open_stateid_other(state, dst)) {
+                       if (read_seqretry(&state->seqlock, seq))
+                               continue;
+                       break;
+               }
+               seqid_open = state->open_stateid.seqid;
+               if (read_seqretry(&state->seqlock, seq))
+                       continue;
+
+               dst_seqid = be32_to_cpu(dst->seqid);
+               if ((s32)(dst_seqid - be32_to_cpu(seqid_open)) >= 0)
+                       dst->seqid = cpu_to_be32(dst_seqid + 1);
+               else
+                       dst->seqid = seqid_open;
+               ret = true;
+               break;
+       }
+
+       return ret;
+}
+
 struct nfs4_closedata {
        struct inode *inode;
        struct nfs4_state *state;
@@ -3358,32 +3432,11 @@ static void nfs4_close_done(struct rpc_task *task, void *data)
        trace_nfs4_close(state, &calldata->arg, &calldata->res, task->tk_status);
 
        /* Handle Layoutreturn errors */
-       if (calldata->arg.lr_args && task->tk_status != 0) {
-               switch (calldata->res.lr_ret) {
-               default:
-                       calldata->res.lr_ret = -NFS4ERR_NOMATCHING_LAYOUT;
-                       break;
-               case 0:
-                       calldata->arg.lr_args = NULL;
-                       calldata->res.lr_res = NULL;
-                       break;
-               case -NFS4ERR_OLD_STATEID:
-                       if (nfs4_layoutreturn_refresh_stateid(&calldata->arg.lr_args->stateid,
-                                               &calldata->arg.lr_args->range,
-                                               calldata->inode))
-                               goto lr_restart;
-                       /* Fallthrough */
-               case -NFS4ERR_ADMIN_REVOKED:
-               case -NFS4ERR_DELEG_REVOKED:
-               case -NFS4ERR_EXPIRED:
-               case -NFS4ERR_BAD_STATEID:
-               case -NFS4ERR_UNKNOWN_LAYOUTTYPE:
-               case -NFS4ERR_WRONG_CRED:
-                       calldata->arg.lr_args = NULL;
-                       calldata->res.lr_res = NULL;
-                       goto lr_restart;
-               }
-       }
+       if (pnfs_roc_done(task, calldata->inode,
+                               &calldata->arg.lr_args,
+                               &calldata->res.lr_res,
+                               &calldata->res.lr_ret) == -EAGAIN)
+               goto out_restart;
 
        /* hmm. we are done with the inode, and in the process of freeing
         * the state_owner. we keep this around to process errors
@@ -3403,7 +3456,7 @@ static void nfs4_close_done(struct rpc_task *task, void *data)
                        break;
                case -NFS4ERR_OLD_STATEID:
                        /* Did we race with OPEN? */
-                       if (nfs4_refresh_open_stateid(&calldata->arg.stateid,
+                       if (nfs4_refresh_open_old_stateid(&calldata->arg.stateid,
                                                state))
                                goto out_restart;
                        goto out_release;
@@ -3415,7 +3468,9 @@ static void nfs4_close_done(struct rpc_task *task, void *data)
                                        task->tk_msg.rpc_cred);
                        /* Fallthrough */
                case -NFS4ERR_BAD_STATEID:
-                       break;
+                       if (calldata->arg.fmode == 0)
+                               break;
+                       /* Fallthrough */
                default:
                        task->tk_status = nfs4_async_handle_exception(task,
                                        server, task->tk_status, &exception);
@@ -3430,8 +3485,6 @@ out_release:
        nfs_refresh_inode(calldata->inode, &calldata->fattr);
        dprintk("%s: done, ret = %d!\n", __func__, task->tk_status);
        return;
-lr_restart:
-       calldata->res.lr_ret = 0;
 out_restart:
        task->tk_status = 0;
        rpc_restart_call_prepare(task);
@@ -3472,8 +3525,8 @@ static void nfs4_close_prepare(struct rpc_task *task, void *data)
        } else if (is_rdwr)
                calldata->arg.fmode |= FMODE_READ|FMODE_WRITE;
 
-       if (!nfs4_valid_open_stateid(state) ||
-           !nfs4_refresh_open_stateid(&calldata->arg.stateid, state))
+       nfs4_sync_open_stateid(&calldata->arg.stateid, state);
+       if (!nfs4_valid_open_stateid(state))
                call_close = 0;
        spin_unlock(&state->owner->so_lock);
 
@@ -6018,7 +6071,6 @@ int nfs4_proc_setclientid(struct nfs_client *clp, u32 program,
                .rpc_resp = res,
                .rpc_cred = cred,
        };
-       struct rpc_task *task;
        struct rpc_task_setup task_setup_data = {
                .rpc_client = clp->cl_rpcclient,
                .rpc_message = &msg,
@@ -6051,17 +6103,12 @@ int nfs4_proc_setclientid(struct nfs_client *clp, u32 program,
        dprintk("NFS call  setclientid auth=%s, '%s'\n",
                clp->cl_rpcclient->cl_auth->au_ops->au_name,
                clp->cl_owner_id);
-       task = rpc_run_task(&task_setup_data);
-       if (IS_ERR(task)) {
-               status = PTR_ERR(task);
-               goto out;
-       }
-       status = task->tk_status;
+
+       status = nfs4_call_sync_custom(&task_setup_data);
        if (setclientid.sc_cred) {
                clp->cl_acceptor = rpcauth_stringify_acceptor(setclientid.sc_cred);
                put_rpccred(setclientid.sc_cred);
        }
-       rpc_put_task(task);
 out:
        trace_nfs4_setclientid(clp, status);
        dprintk("NFS reply setclientid: %d\n", status);
@@ -6129,32 +6176,11 @@ static void nfs4_delegreturn_done(struct rpc_task *task, void *calldata)
        trace_nfs4_delegreturn_exit(&data->args, &data->res, task->tk_status);
 
        /* Handle Layoutreturn errors */
-       if (data->args.lr_args && task->tk_status != 0) {
-               switch(data->res.lr_ret) {
-               default:
-                       data->res.lr_ret = -NFS4ERR_NOMATCHING_LAYOUT;
-                       break;
-               case 0:
-                       data->args.lr_args = NULL;
-                       data->res.lr_res = NULL;
-                       break;
-               case -NFS4ERR_OLD_STATEID:
-                       if (nfs4_layoutreturn_refresh_stateid(&data->args.lr_args->stateid,
-                                               &data->args.lr_args->range,
-                                               data->inode))
-                               goto lr_restart;
-                       /* Fallthrough */
-               case -NFS4ERR_ADMIN_REVOKED:
-               case -NFS4ERR_DELEG_REVOKED:
-               case -NFS4ERR_EXPIRED:
-               case -NFS4ERR_BAD_STATEID:
-               case -NFS4ERR_UNKNOWN_LAYOUTTYPE:
-               case -NFS4ERR_WRONG_CRED:
-                       data->args.lr_args = NULL;
-                       data->res.lr_res = NULL;
-                       goto lr_restart;
-               }
-       }
+       if (pnfs_roc_done(task, data->inode,
+                               &data->args.lr_args,
+                               &data->res.lr_res,
+                               &data->res.lr_ret) == -EAGAIN)
+               goto out_restart;
 
        switch (task->tk_status) {
        case 0:
@@ -6192,8 +6218,6 @@ static void nfs4_delegreturn_done(struct rpc_task *task, void *calldata)
        }
        data->rpc_status = task->tk_status;
        return;
-lr_restart:
-       data->res.lr_ret = 0;
 out_restart:
        task->tk_status = 0;
        rpc_restart_call_prepare(task);
@@ -6386,6 +6410,42 @@ static int nfs4_proc_getlk(struct nfs4_state *state, int cmd, struct file_lock *
        return err;
 }
 
+/*
+ * Update the seqid of a lock stateid after receiving
+ * NFS4ERR_OLD_STATEID
+ */
+static bool nfs4_refresh_lock_old_stateid(nfs4_stateid *dst,
+               struct nfs4_lock_state *lsp)
+{
+       struct nfs4_state *state = lsp->ls_state;
+       bool ret = false;
+
+       spin_lock(&state->state_lock);
+       if (!nfs4_stateid_match_other(dst, &lsp->ls_stateid))
+               goto out;
+       if (!nfs4_stateid_is_newer(&lsp->ls_stateid, dst))
+               nfs4_stateid_seqid_inc(dst);
+       else
+               dst->seqid = lsp->ls_stateid.seqid;
+       ret = true;
+out:
+       spin_unlock(&state->state_lock);
+       return ret;
+}
+
+static bool nfs4_sync_lock_stateid(nfs4_stateid *dst,
+               struct nfs4_lock_state *lsp)
+{
+       struct nfs4_state *state = lsp->ls_state;
+       bool ret;
+
+       spin_lock(&state->state_lock);
+       ret = !nfs4_stateid_match_other(dst, &lsp->ls_stateid);
+       nfs4_stateid_copy(dst, &lsp->ls_stateid);
+       spin_unlock(&state->state_lock);
+       return ret;
+}
+
 struct nfs4_unlockdata {
        struct nfs_locku_args arg;
        struct nfs_locku_res res;
@@ -6403,7 +6463,8 @@ static struct nfs4_unlockdata *nfs4_alloc_unlockdata(struct file_lock *fl,
                struct nfs_seqid *seqid)
 {
        struct nfs4_unlockdata *p;
-       struct inode *inode = lsp->ls_state->inode;
+       struct nfs4_state *state = lsp->ls_state;
+       struct inode *inode = state->inode;
 
        p = kzalloc(sizeof(*p), GFP_NOFS);
        if (p == NULL)
@@ -6419,6 +6480,9 @@ static struct nfs4_unlockdata *nfs4_alloc_unlockdata(struct file_lock *fl,
        locks_init_lock(&p->fl);
        locks_copy_lock(&p->fl, fl);
        p->server = NFS_SERVER(inode);
+       spin_lock(&state->state_lock);
+       nfs4_stateid_copy(&p->arg.stateid, &lsp->ls_stateid);
+       spin_unlock(&state->state_lock);
        return p;
 }
 
@@ -6457,10 +6521,14 @@ static void nfs4_locku_done(struct rpc_task *task, void *data)
                                        task->tk_msg.rpc_cred);
                        /* Fall through */
                case -NFS4ERR_BAD_STATEID:
-               case -NFS4ERR_OLD_STATEID:
                case -NFS4ERR_STALE_STATEID:
-                       if (!nfs4_stateid_match(&calldata->arg.stateid,
-                                               &calldata->lsp->ls_stateid))
+                       if (nfs4_sync_lock_stateid(&calldata->arg.stateid,
+                                               calldata->lsp))
+                               rpc_restart_call_prepare(task);
+                       break;
+               case -NFS4ERR_OLD_STATEID:
+                       if (nfs4_refresh_lock_old_stateid(&calldata->arg.stateid,
+                                               calldata->lsp))
                                rpc_restart_call_prepare(task);
                        break;
                default:
@@ -6483,7 +6551,6 @@ static void nfs4_locku_prepare(struct rpc_task *task, void *data)
 
        if (nfs_wait_on_sequence(calldata->arg.seqid, task) != 0)
                goto out_wait;
-       nfs4_stateid_copy(&calldata->arg.stateid, &calldata->lsp->ls_stateid);
        if (test_bit(NFS_LOCK_INITIALIZED, &calldata->lsp->ls_flags) == 0) {
                /* Note: exit _without_ running nfs4_locku_done */
                goto out_no_action;
@@ -7645,6 +7712,8 @@ int nfs4_proc_fsid_present(struct inode *inode, const struct cred *cred)
 static int _nfs4_proc_secinfo(struct inode *dir, const struct qstr *name, struct nfs4_secinfo_flavors *flavors, bool use_integrity)
 {
        int status;
+       struct rpc_clnt *clnt = NFS_SERVER(dir)->client;
+       struct nfs_client *clp = NFS_SERVER(dir)->nfs_client;
        struct nfs4_secinfo_arg args = {
                .dir_fh = NFS_FH(dir),
                .name   = name,
@@ -7657,26 +7726,37 @@ static int _nfs4_proc_secinfo(struct inode *dir, const struct qstr *name, struct
                .rpc_argp = &args,
                .rpc_resp = &res,
        };
-       struct rpc_clnt *clnt = NFS_SERVER(dir)->client;
+       struct nfs4_call_sync_data data = {
+               .seq_server = NFS_SERVER(dir),
+               .seq_args = &args.seq_args,
+               .seq_res = &res.seq_res,
+       };
+       struct rpc_task_setup task_setup = {
+               .rpc_client = clnt,
+               .rpc_message = &msg,
+               .callback_ops = clp->cl_mvops->call_sync_ops,
+               .callback_data = &data,
+               .flags = RPC_TASK_NO_ROUND_ROBIN,
+       };
        const struct cred *cred = NULL;
 
        if (use_integrity) {
-               clnt = NFS_SERVER(dir)->nfs_client->cl_rpcclient;
-               cred = nfs4_get_clid_cred(NFS_SERVER(dir)->nfs_client);
+               clnt = clp->cl_rpcclient;
+               task_setup.rpc_client = clnt;
+
+               cred = nfs4_get_clid_cred(clp);
                msg.rpc_cred = cred;
        }
 
        dprintk("NFS call  secinfo %s\n", name->name);
 
-       nfs4_state_protect(NFS_SERVER(dir)->nfs_client,
-               NFS_SP4_MACH_CRED_SECINFO, &clnt, &msg);
+       nfs4_state_protect(clp, NFS_SP4_MACH_CRED_SECINFO, &clnt, &msg);
+       nfs4_init_sequence(&args.seq_args, &res.seq_res, 0, 0);
+       status = nfs4_call_sync_custom(&task_setup);
 
-       status = nfs4_call_sync(clnt, NFS_SERVER(dir), &msg, &args.seq_args,
-                               &res.seq_res, RPC_TASK_NO_ROUND_ROBIN);
        dprintk("NFS reply  secinfo: %d\n", status);
 
        put_cred(cred);
-
        return status;
 }
 
@@ -8344,7 +8424,6 @@ static const struct rpc_call_ops nfs4_get_lease_time_ops = {
 
 int nfs4_proc_get_lease_time(struct nfs_client *clp, struct nfs_fsinfo *fsinfo)
 {
-       struct rpc_task *task;
        struct nfs4_get_lease_time_args args;
        struct nfs4_get_lease_time_res res = {
                .lr_fsinfo = fsinfo,
@@ -8366,17 +8445,9 @@ int nfs4_proc_get_lease_time(struct nfs_client *clp, struct nfs_fsinfo *fsinfo)
                .callback_data = &data,
                .flags = RPC_TASK_TIMEOUT,
        };
-       int status;
 
        nfs4_init_sequence(&args.la_seq_args, &res.lr_seq_res, 0, 1);
-       task = rpc_run_task(&task_setup);
-
-       if (IS_ERR(task))
-               return PTR_ERR(task);
-
-       status = task->tk_status;
-       rpc_put_task(task);
-       return status;
+       return nfs4_call_sync_custom(&task_setup);
 }
 
 #ifdef CONFIG_NFS_V4_1
@@ -8845,7 +8916,6 @@ static int nfs41_proc_reclaim_complete(struct nfs_client *clp,
                const struct cred *cred)
 {
        struct nfs4_reclaim_complete_data *calldata;
-       struct rpc_task *task;
        struct rpc_message msg = {
                .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_RECLAIM_COMPLETE],
                .rpc_cred = cred,
@@ -8854,7 +8924,7 @@ static int nfs41_proc_reclaim_complete(struct nfs_client *clp,
                .rpc_client = clp->cl_rpcclient,
                .rpc_message = &msg,
                .callback_ops = &nfs4_reclaim_complete_call_ops,
-               .flags = RPC_TASK_ASYNC | RPC_TASK_NO_ROUND_ROBIN,
+               .flags = RPC_TASK_NO_ROUND_ROBIN,
        };
        int status = -ENOMEM;
 
@@ -8869,15 +8939,7 @@ static int nfs41_proc_reclaim_complete(struct nfs_client *clp,
        msg.rpc_argp = &calldata->arg;
        msg.rpc_resp = &calldata->res;
        task_setup_data.callback_data = calldata;
-       task = rpc_run_task(&task_setup_data);
-       if (IS_ERR(task)) {
-               status = PTR_ERR(task);
-               goto out;
-       }
-       status = rpc_wait_for_completion_task(task);
-       if (status == 0)
-               status = task->tk_status;
-       rpc_put_task(task);
+       status = nfs4_call_sync_custom(&task_setup_data);
 out:
        dprintk("<-- %s status=%d\n", __func__, status);
        return status;
@@ -9103,10 +9165,19 @@ static void nfs4_layoutreturn_done(struct rpc_task *task, void *calldata)
        if (!nfs41_sequence_process(task, &lrp->res.seq_res))
                return;
 
+       /*
+        * Was there an RPC level error? Assume the call succeeded,
+        * and that we need to release the layout
+        */
+       if (task->tk_rpc_status != 0 && RPC_WAS_SENT(task)) {
+               lrp->res.lrs_present = 0;
+               return;
+       }
+
        server = NFS_SERVER(lrp->args.inode);
        switch (task->tk_status) {
        case -NFS4ERR_OLD_STATEID:
-               if (nfs4_layoutreturn_refresh_stateid(&lrp->args.stateid,
+               if (nfs4_layout_refresh_old_stateid(&lrp->args.stateid,
                                        &lrp->args.range,
                                        lrp->args.inode))
                        goto out_restart;
@@ -9362,18 +9433,32 @@ _nfs41_proc_secinfo_no_name(struct nfs_server *server, struct nfs_fh *fhandle,
                .rpc_resp = &res,
        };
        struct rpc_clnt *clnt = server->client;
+       struct nfs4_call_sync_data data = {
+               .seq_server = server,
+               .seq_args = &args.seq_args,
+               .seq_res = &res.seq_res,
+       };
+       struct rpc_task_setup task_setup = {
+               .rpc_client = server->client,
+               .rpc_message = &msg,
+               .callback_ops = server->nfs_client->cl_mvops->call_sync_ops,
+               .callback_data = &data,
+               .flags = RPC_TASK_NO_ROUND_ROBIN,
+       };
        const struct cred *cred = NULL;
        int status;
 
        if (use_integrity) {
                clnt = server->nfs_client->cl_rpcclient;
+               task_setup.rpc_client = clnt;
+
                cred = nfs4_get_clid_cred(server->nfs_client);
                msg.rpc_cred = cred;
        }
 
        dprintk("--> %s\n", __func__);
-       status = nfs4_call_sync(clnt, server, &msg, &args.seq_args,
-                               &res.seq_res, RPC_TASK_NO_ROUND_ROBIN);
+       nfs4_init_sequence(&args.seq_args, &res.seq_res, 0, 0);
+       status = nfs4_call_sync_custom(&task_setup);
        dprintk("<-- %s status=%d\n", __func__, status);
 
        put_cred(cred);
index cad4e06..0c6d53d 100644 (file)
@@ -1015,22 +1015,6 @@ out:
        return ret;
 }
 
-bool nfs4_refresh_open_stateid(nfs4_stateid *dst, struct nfs4_state *state)
-{
-       bool ret;
-       int seq;
-
-       do {
-               ret = false;
-               seq = read_seqbegin(&state->seqlock);
-               if (nfs4_state_match_open_stateid_other(state, dst)) {
-                       dst->seqid = state->open_stateid.seqid;
-                       ret = true;
-               }
-       } while (read_seqretry(&state->seqlock, seq));
-       return ret;
-}
-
 bool nfs4_copy_open_stateid(nfs4_stateid *dst, struct nfs4_state *state)
 {
        bool ret;
@@ -2095,8 +2079,10 @@ static int nfs4_try_migration(struct nfs_server *server, const struct cred *cred
        }
 
        status = nfs4_begin_drain_session(clp);
-       if (status != 0)
-               return status;
+       if (status != 0) {
+               result = status;
+               goto out;
+       }
 
        status = nfs4_replace_transport(server, locations);
        if (status != 0) {
index 46a8d63..ab07db0 100644 (file)
@@ -1174,7 +1174,7 @@ static void encode_attrs(struct xdr_stream *xdr, const struct iattr *iap,
                } else
                        *p++ = cpu_to_be32(NFS4_SET_TO_SERVER_TIME);
        }
-       if (bmval[2] & FATTR4_WORD2_SECURITY_LABEL) {
+       if (label && (bmval[2] & FATTR4_WORD2_SECURITY_LABEL)) {
                *p++ = cpu_to_be32(label->lfs);
                *p++ = cpu_to_be32(label->pi);
                *p++ = cpu_to_be32(label->len);
index 4525d5a..bb80034 100644 (file)
@@ -359,9 +359,10 @@ pnfs_clear_lseg_state(struct pnfs_layout_segment *lseg,
 }
 
 /*
- * Update the seqid of a layout stateid
+ * Update the seqid of a layout stateid after receiving
+ * NFS4ERR_OLD_STATEID
  */
-bool nfs4_layoutreturn_refresh_stateid(nfs4_stateid *dst,
+bool nfs4_layout_refresh_old_stateid(nfs4_stateid *dst,
                struct pnfs_layout_range *dst_range,
                struct inode *inode)
 {
@@ -377,7 +378,15 @@ bool nfs4_layoutreturn_refresh_stateid(nfs4_stateid *dst,
 
        spin_lock(&inode->i_lock);
        lo = NFS_I(inode)->layout;
-       if (lo && nfs4_stateid_match_other(dst, &lo->plh_stateid)) {
+       if (lo &&  pnfs_layout_is_valid(lo) &&
+           nfs4_stateid_match_other(dst, &lo->plh_stateid)) {
+               /* Is our call using the most recent seqid? If so, bump it */
+               if (!nfs4_stateid_is_newer(&lo->plh_stateid, dst)) {
+                       nfs4_stateid_seqid_inc(dst);
+                       ret = true;
+                       goto out;
+               }
+               /* Try to update the seqid to the most recent */
                err = pnfs_mark_matching_lsegs_return(lo, &head, &range, 0);
                if (err != -EBUSY) {
                        dst->seqid = lo->plh_stateid.seqid;
@@ -385,6 +394,7 @@ bool nfs4_layoutreturn_refresh_stateid(nfs4_stateid *dst,
                        ret = true;
                }
        }
+out:
        spin_unlock(&inode->i_lock);
        pnfs_free_lseg_list(&head);
        return ret;
@@ -1440,6 +1450,52 @@ out_noroc:
        return false;
 }
 
+int pnfs_roc_done(struct rpc_task *task, struct inode *inode,
+               struct nfs4_layoutreturn_args **argpp,
+               struct nfs4_layoutreturn_res **respp,
+               int *ret)
+{
+       struct nfs4_layoutreturn_args *arg = *argpp;
+       int retval = -EAGAIN;
+
+       if (!arg)
+               return 0;
+       /* Handle Layoutreturn errors */
+       switch (*ret) {
+       case 0:
+               retval = 0;
+               break;
+       case -NFS4ERR_NOMATCHING_LAYOUT:
+               /* Was there an RPC level error? If not, retry */
+               if (task->tk_rpc_status == 0)
+                       break;
+               /* If the call was not sent, let caller handle it */
+               if (!RPC_WAS_SENT(task))
+                       return 0;
+               /*
+                * Otherwise, assume the call succeeded and
+                * that we need to release the layout
+                */
+               *ret = 0;
+               (*respp)->lrs_present = 0;
+               retval = 0;
+               break;
+       case -NFS4ERR_DELAY:
+               /* Let the caller handle the retry */
+               *ret = -NFS4ERR_NOMATCHING_LAYOUT;
+               return 0;
+       case -NFS4ERR_OLD_STATEID:
+               if (!nfs4_layout_refresh_old_stateid(&arg->stateid,
+                                       &arg->range, inode))
+                       break;
+               *ret = -NFS4ERR_NOMATCHING_LAYOUT;
+               return -EAGAIN;
+       }
+       *argpp = NULL;
+       *respp = NULL;
+       return retval;
+}
+
 void pnfs_roc_release(struct nfs4_layoutreturn_args *args,
                struct nfs4_layoutreturn_res *res,
                int ret)
@@ -1449,10 +1505,15 @@ void pnfs_roc_release(struct nfs4_layoutreturn_args *args,
        const nfs4_stateid *res_stateid = NULL;
        struct nfs4_xdr_opaque_data *ld_private = args->ld_private;
 
-       if (ret == 0) {
-               arg_stateid = &args->stateid;
+       switch (ret) {
+       case -NFS4ERR_NOMATCHING_LAYOUT:
+               break;
+       case 0:
                if (res->lrs_present)
                        res_stateid = &res->stateid;
+               /* Fallthrough */
+       default:
+               arg_stateid = &args->stateid;
        }
        pnfs_layoutreturn_free_lsegs(lo, arg_stateid, &args->range,
                        res_stateid);
index f15609c..f8a3806 100644 (file)
@@ -261,7 +261,7 @@ int pnfs_destroy_layouts_byfsid(struct nfs_client *clp,
                bool is_recall);
 int pnfs_destroy_layouts_byclid(struct nfs_client *clp,
                bool is_recall);
-bool nfs4_layoutreturn_refresh_stateid(nfs4_stateid *dst,
+bool nfs4_layout_refresh_old_stateid(nfs4_stateid *dst,
                struct pnfs_layout_range *dst_range,
                struct inode *inode);
 void pnfs_put_layout_hdr(struct pnfs_layout_hdr *lo);
@@ -282,6 +282,10 @@ bool pnfs_roc(struct inode *ino,
                struct nfs4_layoutreturn_args *args,
                struct nfs4_layoutreturn_res *res,
                const struct cred *cred);
+int pnfs_roc_done(struct rpc_task *task, struct inode *inode,
+               struct nfs4_layoutreturn_args **argpp,
+               struct nfs4_layoutreturn_res **respp,
+               int *ret);
 void pnfs_roc_release(struct nfs4_layoutreturn_args *args,
                struct nfs4_layoutreturn_res *res,
                int ret);
@@ -701,6 +705,15 @@ pnfs_roc(struct inode *ino,
        return false;
 }
 
+static inline int
+pnfs_roc_done(struct rpc_task *task, struct inode *inode,
+               struct nfs4_layoutreturn_args **argpp,
+               struct nfs4_layoutreturn_res **respp,
+               int *ret)
+{
+       return 0;
+}
+
 static inline void
 pnfs_roc_release(struct nfs4_layoutreturn_args *args,
                struct nfs4_layoutreturn_res *res,
@@ -785,7 +798,7 @@ static inline void nfs4_pnfs_v3_ds_connect_unload(void)
 {
 }
 
-static inline bool nfs4_layoutreturn_refresh_stateid(nfs4_stateid *dst,
+static inline bool nfs4_layout_refresh_old_stateid(nfs4_stateid *dst,
                struct pnfs_layout_range *dst_range,
                struct inode *inode)
 {
index 19a76cf..a84df7d 100644 (file)
@@ -2645,6 +2645,13 @@ int nfs_clone_sb_security(struct super_block *s, struct dentry *mntroot,
 }
 EXPORT_SYMBOL_GPL(nfs_clone_sb_security);
 
+static void nfs_set_readahead(struct backing_dev_info *bdi,
+                             unsigned long iomax_pages)
+{
+       bdi->ra_pages = VM_READAHEAD_PAGES;
+       bdi->io_pages = iomax_pages;
+}
+
 struct dentry *nfs_fs_mount_common(struct nfs_server *server,
                                   int flags, const char *dev_name,
                                   struct nfs_mount_info *mount_info,
@@ -2687,7 +2694,7 @@ struct dentry *nfs_fs_mount_common(struct nfs_server *server,
                        mntroot = ERR_PTR(error);
                        goto error_splat_super;
                }
-               s->s_bdi->ra_pages = server->rpages * NFS_MAX_READAHEAD;
+               nfs_set_readahead(s->s_bdi, server->rpages);
                server->super = s;
        }
 
index d25f6bb..10cefb0 100644 (file)
@@ -3,6 +3,7 @@ config NFSD
        tristate "NFS server support"
        depends on INET
        depends on FILE_LOCKING
+       depends on FSNOTIFY
        select LOCKD
        select SUNRPC
        select EXPORTFS
@@ -147,7 +148,7 @@ config NFSD_V4_SECURITY_LABEL
 
 config NFSD_FAULT_INJECTION
        bool "NFS server manual fault injection"
-       depends on NFSD_V4 && DEBUG_KERNEL && DEBUG_FS
+       depends on NFSD_V4 && DEBUG_KERNEL && DEBUG_FS && BROKEN
        help
          This option enables support for manually injecting faults
          into the NFS server.  This is intended to be used for
index 2bfb58e..6a40b1a 100644 (file)
@@ -11,7 +11,8 @@ obj-$(CONFIG_NFSD)    += nfsd.o
 nfsd-y                 += trace.o
 
 nfsd-y                         += nfssvc.o nfsctl.o nfsproc.o nfsfh.o vfs.o \
-                          export.o auth.o lockd.o nfscache.o nfsxdr.o stats.o
+                          export.o auth.o lockd.o nfscache.o nfsxdr.o \
+                          stats.o filecache.o
 nfsd-$(CONFIG_NFSD_FAULT_INJECTION) += fault_inject.o
 nfsd-$(CONFIG_NFSD_V2_ACL) += nfs2acl.o
 nfsd-$(CONFIG_NFSD_V3) += nfs3proc.o nfs3xdr.o
index 4cd7c69..ba14d2f 100644 (file)
@@ -39,14 +39,6 @@ struct nfs4_acl;
 struct svc_fh;
 struct svc_rqst;
 
-/*
- * Maximum ACL we'll accept from a client; chosen (somewhat
- * arbitrarily) so that kmalloc'ing the ACL shouldn't require a
- * high-order allocation.  This allows 204 ACEs on x86_64:
- */
-#define NFS4_ACL_MAX ((PAGE_SIZE - sizeof(struct nfs4_acl)) \
-                       / sizeof(struct nfs4_ace))
-
 int nfs4_acl_bytes(int entries);
 int nfs4_acl_get_whotype(char *, u32);
 __be32 nfs4_acl_write_who(struct xdr_stream *xdr, int who);
index 66d4c55..9bbaa67 100644 (file)
@@ -15,6 +15,7 @@
 
 #include "blocklayoutxdr.h"
 #include "pnfs.h"
+#include "filecache.h"
 
 #define NFSDDBG_FACILITY       NFSDDBG_PNFS
 
@@ -404,7 +405,7 @@ static void
 nfsd4_scsi_fence_client(struct nfs4_layout_stateid *ls)
 {
        struct nfs4_client *clp = ls->ls_stid.sc_client;
-       struct block_device *bdev = ls->ls_file->f_path.mnt->mnt_sb->s_bdev;
+       struct block_device *bdev = ls->ls_file->nf_file->f_path.mnt->mnt_sb->s_bdev;
 
        bdev->bd_disk->fops->pr_ops->pr_preempt(bdev, NFSD_MDS_PR_KEY,
                        nfsd4_scsi_pr_key(clp), 0, true);
index baa0195..15422c9 100644 (file)
@@ -22,6 +22,7 @@
 #include "nfsfh.h"
 #include "netns.h"
 #include "pnfs.h"
+#include "filecache.h"
 
 #define NFSDDBG_FACILITY       NFSDDBG_EXPORT
 
@@ -232,6 +233,17 @@ static struct cache_head *expkey_alloc(void)
                return NULL;
 }
 
+static void expkey_flush(void)
+{
+       /*
+        * Take the nfsd_mutex here to ensure that the file cache is not
+        * destroyed while we're in the middle of flushing.
+        */
+       mutex_lock(&nfsd_mutex);
+       nfsd_file_cache_purge(current->nsproxy->net_ns);
+       mutex_unlock(&nfsd_mutex);
+}
+
 static const struct cache_detail svc_expkey_cache_template = {
        .owner          = THIS_MODULE,
        .hash_size      = EXPKEY_HASHMAX,
@@ -244,6 +256,7 @@ static const struct cache_detail svc_expkey_cache_template = {
        .init           = expkey_init,
        .update         = expkey_update,
        .alloc          = expkey_alloc,
+       .flush          = expkey_flush,
 };
 
 static int
diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c
new file mode 100644 (file)
index 0000000..ef55e9b
--- /dev/null
@@ -0,0 +1,934 @@
+/*
+ * Open file cache.
+ *
+ * (c) 2015 - Jeff Layton <jeff.layton@primarydata.com>
+ */
+
+#include <linux/hash.h>
+#include <linux/slab.h>
+#include <linux/file.h>
+#include <linux/sched.h>
+#include <linux/list_lru.h>
+#include <linux/fsnotify_backend.h>
+#include <linux/fsnotify.h>
+#include <linux/seq_file.h>
+
+#include "vfs.h"
+#include "nfsd.h"
+#include "nfsfh.h"
+#include "netns.h"
+#include "filecache.h"
+#include "trace.h"
+
+#define NFSDDBG_FACILITY       NFSDDBG_FH
+
+/* FIXME: dynamically size this for the machine somehow? */
+#define NFSD_FILE_HASH_BITS                   12
+#define NFSD_FILE_HASH_SIZE                  (1 << NFSD_FILE_HASH_BITS)
+#define NFSD_LAUNDRETTE_DELAY               (2 * HZ)
+
+#define NFSD_FILE_LRU_RESCAN                (0)
+#define NFSD_FILE_SHUTDOWN                  (1)
+#define NFSD_FILE_LRU_THRESHOLD                     (4096UL)
+#define NFSD_FILE_LRU_LIMIT                 (NFSD_FILE_LRU_THRESHOLD << 2)
+
+/* We only care about NFSD_MAY_READ/WRITE for this cache */
+#define NFSD_FILE_MAY_MASK     (NFSD_MAY_READ|NFSD_MAY_WRITE)
+
+struct nfsd_fcache_bucket {
+       struct hlist_head       nfb_head;
+       spinlock_t              nfb_lock;
+       unsigned int            nfb_count;
+       unsigned int            nfb_maxcount;
+};
+
+static DEFINE_PER_CPU(unsigned long, nfsd_file_cache_hits);
+
+static struct kmem_cache               *nfsd_file_slab;
+static struct kmem_cache               *nfsd_file_mark_slab;
+static struct nfsd_fcache_bucket       *nfsd_file_hashtbl;
+static struct list_lru                 nfsd_file_lru;
+static long                            nfsd_file_lru_flags;
+static struct fsnotify_group           *nfsd_file_fsnotify_group;
+static atomic_long_t                   nfsd_filecache_count;
+static struct delayed_work             nfsd_filecache_laundrette;
+
+enum nfsd_file_laundrette_ctl {
+       NFSD_FILE_LAUNDRETTE_NOFLUSH = 0,
+       NFSD_FILE_LAUNDRETTE_MAY_FLUSH
+};
+
+static void
+nfsd_file_schedule_laundrette(enum nfsd_file_laundrette_ctl ctl)
+{
+       long count = atomic_long_read(&nfsd_filecache_count);
+
+       if (count == 0 || test_bit(NFSD_FILE_SHUTDOWN, &nfsd_file_lru_flags))
+               return;
+
+       /* Be more aggressive about scanning if over the threshold */
+       if (count > NFSD_FILE_LRU_THRESHOLD)
+               mod_delayed_work(system_wq, &nfsd_filecache_laundrette, 0);
+       else
+               schedule_delayed_work(&nfsd_filecache_laundrette, NFSD_LAUNDRETTE_DELAY);
+
+       if (ctl == NFSD_FILE_LAUNDRETTE_NOFLUSH)
+               return;
+
+       /* ...and don't delay flushing if we're out of control */
+       if (count >= NFSD_FILE_LRU_LIMIT)
+               flush_delayed_work(&nfsd_filecache_laundrette);
+}
+
+static void
+nfsd_file_slab_free(struct rcu_head *rcu)
+{
+       struct nfsd_file *nf = container_of(rcu, struct nfsd_file, nf_rcu);
+
+       put_cred(nf->nf_cred);
+       kmem_cache_free(nfsd_file_slab, nf);
+}
+
+static void
+nfsd_file_mark_free(struct fsnotify_mark *mark)
+{
+       struct nfsd_file_mark *nfm = container_of(mark, struct nfsd_file_mark,
+                                                 nfm_mark);
+
+       kmem_cache_free(nfsd_file_mark_slab, nfm);
+}
+
+static struct nfsd_file_mark *
+nfsd_file_mark_get(struct nfsd_file_mark *nfm)
+{
+       if (!atomic_inc_not_zero(&nfm->nfm_ref))
+               return NULL;
+       return nfm;
+}
+
+static void
+nfsd_file_mark_put(struct nfsd_file_mark *nfm)
+{
+       if (atomic_dec_and_test(&nfm->nfm_ref)) {
+
+               fsnotify_destroy_mark(&nfm->nfm_mark, nfsd_file_fsnotify_group);
+               fsnotify_put_mark(&nfm->nfm_mark);
+       }
+}
+
+static struct nfsd_file_mark *
+nfsd_file_mark_find_or_create(struct nfsd_file *nf)
+{
+       int                     err;
+       struct fsnotify_mark    *mark;
+       struct nfsd_file_mark   *nfm = NULL, *new;
+       struct inode *inode = nf->nf_inode;
+
+       do {
+               mutex_lock(&nfsd_file_fsnotify_group->mark_mutex);
+               mark = fsnotify_find_mark(&inode->i_fsnotify_marks,
+                               nfsd_file_fsnotify_group);
+               if (mark) {
+                       nfm = nfsd_file_mark_get(container_of(mark,
+                                                struct nfsd_file_mark,
+                                                nfm_mark));
+                       mutex_unlock(&nfsd_file_fsnotify_group->mark_mutex);
+                       fsnotify_put_mark(mark);
+                       if (likely(nfm))
+                               break;
+               } else
+                       mutex_unlock(&nfsd_file_fsnotify_group->mark_mutex);
+
+               /* allocate a new nfm */
+               new = kmem_cache_alloc(nfsd_file_mark_slab, GFP_KERNEL);
+               if (!new)
+                       return NULL;
+               fsnotify_init_mark(&new->nfm_mark, nfsd_file_fsnotify_group);
+               new->nfm_mark.mask = FS_ATTRIB|FS_DELETE_SELF;
+               atomic_set(&new->nfm_ref, 1);
+
+               err = fsnotify_add_inode_mark(&new->nfm_mark, inode, 0);
+
+               /*
+                * If the add was successful, then return the object.
+                * Otherwise, we need to put the reference we hold on the
+                * nfm_mark. The fsnotify code will take a reference and put
+                * it on failure, so we can't just free it directly. It's also
+                * not safe to call fsnotify_destroy_mark on it as the
+                * mark->group will be NULL. Thus, we can't let the nfm_ref
+                * counter drive the destruction at this point.
+                */
+               if (likely(!err))
+                       nfm = new;
+               else
+                       fsnotify_put_mark(&new->nfm_mark);
+       } while (unlikely(err == -EEXIST));
+
+       return nfm;
+}
+
+static struct nfsd_file *
+nfsd_file_alloc(struct inode *inode, unsigned int may, unsigned int hashval,
+               struct net *net)
+{
+       struct nfsd_file *nf;
+
+       nf = kmem_cache_alloc(nfsd_file_slab, GFP_KERNEL);
+       if (nf) {
+               INIT_HLIST_NODE(&nf->nf_node);
+               INIT_LIST_HEAD(&nf->nf_lru);
+               nf->nf_file = NULL;
+               nf->nf_cred = get_current_cred();
+               nf->nf_net = net;
+               nf->nf_flags = 0;
+               nf->nf_inode = inode;
+               nf->nf_hashval = hashval;
+               atomic_set(&nf->nf_ref, 1);
+               nf->nf_may = may & NFSD_FILE_MAY_MASK;
+               if (may & NFSD_MAY_NOT_BREAK_LEASE) {
+                       if (may & NFSD_MAY_WRITE)
+                               __set_bit(NFSD_FILE_BREAK_WRITE, &nf->nf_flags);
+                       if (may & NFSD_MAY_READ)
+                               __set_bit(NFSD_FILE_BREAK_READ, &nf->nf_flags);
+               }
+               nf->nf_mark = NULL;
+               trace_nfsd_file_alloc(nf);
+       }
+       return nf;
+}
+
+static bool
+nfsd_file_free(struct nfsd_file *nf)
+{
+       bool flush = false;
+
+       trace_nfsd_file_put_final(nf);
+       if (nf->nf_mark)
+               nfsd_file_mark_put(nf->nf_mark);
+       if (nf->nf_file) {
+               get_file(nf->nf_file);
+               filp_close(nf->nf_file, NULL);
+               fput(nf->nf_file);
+               flush = true;
+       }
+       call_rcu(&nf->nf_rcu, nfsd_file_slab_free);
+       return flush;
+}
+
+static bool
+nfsd_file_check_writeback(struct nfsd_file *nf)
+{
+       struct file *file = nf->nf_file;
+       struct address_space *mapping;
+
+       if (!file || !(file->f_mode & FMODE_WRITE))
+               return false;
+       mapping = file->f_mapping;
+       return mapping_tagged(mapping, PAGECACHE_TAG_DIRTY) ||
+               mapping_tagged(mapping, PAGECACHE_TAG_WRITEBACK);
+}
+
+static int
+nfsd_file_check_write_error(struct nfsd_file *nf)
+{
+       struct file *file = nf->nf_file;
+
+       if (!file || !(file->f_mode & FMODE_WRITE))
+               return 0;
+       return filemap_check_wb_err(file->f_mapping, READ_ONCE(file->f_wb_err));
+}
+
+static bool
+nfsd_file_in_use(struct nfsd_file *nf)
+{
+       return nfsd_file_check_writeback(nf) ||
+                       nfsd_file_check_write_error(nf);
+}
+
+static void
+nfsd_file_do_unhash(struct nfsd_file *nf)
+{
+       lockdep_assert_held(&nfsd_file_hashtbl[nf->nf_hashval].nfb_lock);
+
+       trace_nfsd_file_unhash(nf);
+
+       if (nfsd_file_check_write_error(nf))
+               nfsd_reset_boot_verifier(net_generic(nf->nf_net, nfsd_net_id));
+       --nfsd_file_hashtbl[nf->nf_hashval].nfb_count;
+       hlist_del_rcu(&nf->nf_node);
+       if (!list_empty(&nf->nf_lru))
+               list_lru_del(&nfsd_file_lru, &nf->nf_lru);
+       atomic_long_dec(&nfsd_filecache_count);
+}
+
+static bool
+nfsd_file_unhash(struct nfsd_file *nf)
+{
+       if (test_and_clear_bit(NFSD_FILE_HASHED, &nf->nf_flags)) {
+               nfsd_file_do_unhash(nf);
+               return true;
+       }
+       return false;
+}
+
+/*
+ * Return true if the file was unhashed.
+ */
+static bool
+nfsd_file_unhash_and_release_locked(struct nfsd_file *nf, struct list_head *dispose)
+{
+       lockdep_assert_held(&nfsd_file_hashtbl[nf->nf_hashval].nfb_lock);
+
+       trace_nfsd_file_unhash_and_release_locked(nf);
+       if (!nfsd_file_unhash(nf))
+               return false;
+       /* keep final reference for nfsd_file_lru_dispose */
+       if (atomic_add_unless(&nf->nf_ref, -1, 1))
+               return true;
+
+       list_add(&nf->nf_lru, dispose);
+       return true;
+}
+
+static int
+nfsd_file_put_noref(struct nfsd_file *nf)
+{
+       int count;
+       trace_nfsd_file_put(nf);
+
+       count = atomic_dec_return(&nf->nf_ref);
+       if (!count) {
+               WARN_ON(test_bit(NFSD_FILE_HASHED, &nf->nf_flags));
+               nfsd_file_free(nf);
+       }
+       return count;
+}
+
+void
+nfsd_file_put(struct nfsd_file *nf)
+{
+       bool is_hashed = test_bit(NFSD_FILE_HASHED, &nf->nf_flags) != 0;
+       bool unused = !nfsd_file_in_use(nf);
+
+       set_bit(NFSD_FILE_REFERENCED, &nf->nf_flags);
+       if (nfsd_file_put_noref(nf) == 1 && is_hashed && unused)
+               nfsd_file_schedule_laundrette(NFSD_FILE_LAUNDRETTE_MAY_FLUSH);
+}
+
+struct nfsd_file *
+nfsd_file_get(struct nfsd_file *nf)
+{
+       if (likely(atomic_inc_not_zero(&nf->nf_ref)))
+               return nf;
+       return NULL;
+}
+
+static void
+nfsd_file_dispose_list(struct list_head *dispose)
+{
+       struct nfsd_file *nf;
+
+       while(!list_empty(dispose)) {
+               nf = list_first_entry(dispose, struct nfsd_file, nf_lru);
+               list_del(&nf->nf_lru);
+               nfsd_file_put_noref(nf);
+       }
+}
+
+static void
+nfsd_file_dispose_list_sync(struct list_head *dispose)
+{
+       bool flush = false;
+       struct nfsd_file *nf;
+
+       while(!list_empty(dispose)) {
+               nf = list_first_entry(dispose, struct nfsd_file, nf_lru);
+               list_del(&nf->nf_lru);
+               if (!atomic_dec_and_test(&nf->nf_ref))
+                       continue;
+               if (nfsd_file_free(nf))
+                       flush = true;
+       }
+       if (flush)
+               flush_delayed_fput();
+}
+
+/*
+ * Note this can deadlock with nfsd_file_cache_purge.
+ */
+static enum lru_status
+nfsd_file_lru_cb(struct list_head *item, struct list_lru_one *lru,
+                spinlock_t *lock, void *arg)
+       __releases(lock)
+       __acquires(lock)
+{
+       struct list_head *head = arg;
+       struct nfsd_file *nf = list_entry(item, struct nfsd_file, nf_lru);
+
+       /*
+        * Do a lockless refcount check. The hashtable holds one reference, so
+        * we look to see if anything else has a reference, or if any have
+        * been put since the shrinker last ran. Those don't get unhashed and
+        * released.
+        *
+        * Note that in the put path, we set the flag and then decrement the
+        * counter. Here we check the counter and then test and clear the flag.
+        * That order is deliberate to ensure that we can do this locklessly.
+        */
+       if (atomic_read(&nf->nf_ref) > 1)
+               goto out_skip;
+
+       /*
+        * Don't throw out files that are still undergoing I/O or
+        * that have uncleared errors pending.
+        */
+       if (nfsd_file_check_writeback(nf))
+               goto out_skip;
+
+       if (test_and_clear_bit(NFSD_FILE_REFERENCED, &nf->nf_flags))
+               goto out_rescan;
+
+       if (!test_and_clear_bit(NFSD_FILE_HASHED, &nf->nf_flags))
+               goto out_skip;
+
+       list_lru_isolate_move(lru, &nf->nf_lru, head);
+       return LRU_REMOVED;
+out_rescan:
+       set_bit(NFSD_FILE_LRU_RESCAN, &nfsd_file_lru_flags);
+out_skip:
+       return LRU_SKIP;
+}
+
+static void
+nfsd_file_lru_dispose(struct list_head *head)
+{
+       while(!list_empty(head)) {
+               struct nfsd_file *nf = list_first_entry(head,
+                               struct nfsd_file, nf_lru);
+               list_del_init(&nf->nf_lru);
+               spin_lock(&nfsd_file_hashtbl[nf->nf_hashval].nfb_lock);
+               nfsd_file_do_unhash(nf);
+               spin_unlock(&nfsd_file_hashtbl[nf->nf_hashval].nfb_lock);
+               nfsd_file_put_noref(nf);
+       }
+}
+
+static unsigned long
+nfsd_file_lru_count(struct shrinker *s, struct shrink_control *sc)
+{
+       return list_lru_count(&nfsd_file_lru);
+}
+
+static unsigned long
+nfsd_file_lru_scan(struct shrinker *s, struct shrink_control *sc)
+{
+       LIST_HEAD(head);
+       unsigned long ret;
+
+       ret = list_lru_shrink_walk(&nfsd_file_lru, sc, nfsd_file_lru_cb, &head);
+       nfsd_file_lru_dispose(&head);
+       return ret;
+}
+
+static struct shrinker nfsd_file_shrinker = {
+       .scan_objects = nfsd_file_lru_scan,
+       .count_objects = nfsd_file_lru_count,
+       .seeks = 1,
+};
+
+static void
+__nfsd_file_close_inode(struct inode *inode, unsigned int hashval,
+                       struct list_head *dispose)
+{
+       struct nfsd_file        *nf;
+       struct hlist_node       *tmp;
+
+       spin_lock(&nfsd_file_hashtbl[hashval].nfb_lock);
+       hlist_for_each_entry_safe(nf, tmp, &nfsd_file_hashtbl[hashval].nfb_head, nf_node) {
+               if (inode == nf->nf_inode)
+                       nfsd_file_unhash_and_release_locked(nf, dispose);
+       }
+       spin_unlock(&nfsd_file_hashtbl[hashval].nfb_lock);
+}
+
+/**
+ * nfsd_file_close_inode_sync - attempt to forcibly close a nfsd_file
+ * @inode: inode of the file to attempt to remove
+ *
+ * Walk the whole hash bucket, looking for any files that correspond to "inode".
+ * If any do, then unhash them and put the hashtable reference to them and
+ * destroy any that had their last reference put. Also ensure that any of the
+ * fputs also have their final __fput done as well.
+ */
+void
+nfsd_file_close_inode_sync(struct inode *inode)
+{
+       unsigned int            hashval = (unsigned int)hash_long(inode->i_ino,
+                                               NFSD_FILE_HASH_BITS);
+       LIST_HEAD(dispose);
+
+       __nfsd_file_close_inode(inode, hashval, &dispose);
+       trace_nfsd_file_close_inode_sync(inode, hashval, !list_empty(&dispose));
+       nfsd_file_dispose_list_sync(&dispose);
+}
+
+/**
+ * nfsd_file_close_inode_sync - attempt to forcibly close a nfsd_file
+ * @inode: inode of the file to attempt to remove
+ *
+ * Walk the whole hash bucket, looking for any files that correspond to "inode".
+ * If any do, then unhash them and put the hashtable reference to them and
+ * destroy any that had their last reference put.
+ */
+static void
+nfsd_file_close_inode(struct inode *inode)
+{
+       unsigned int            hashval = (unsigned int)hash_long(inode->i_ino,
+                                               NFSD_FILE_HASH_BITS);
+       LIST_HEAD(dispose);
+
+       __nfsd_file_close_inode(inode, hashval, &dispose);
+       trace_nfsd_file_close_inode(inode, hashval, !list_empty(&dispose));
+       nfsd_file_dispose_list(&dispose);
+}
+
+/**
+ * nfsd_file_delayed_close - close unused nfsd_files
+ * @work: dummy
+ *
+ * Walk the LRU list and close any entries that have not been used since
+ * the last scan.
+ *
+ * Note this can deadlock with nfsd_file_cache_purge.
+ */
+static void
+nfsd_file_delayed_close(struct work_struct *work)
+{
+       LIST_HEAD(head);
+
+       list_lru_walk(&nfsd_file_lru, nfsd_file_lru_cb, &head, LONG_MAX);
+
+       if (test_and_clear_bit(NFSD_FILE_LRU_RESCAN, &nfsd_file_lru_flags))
+               nfsd_file_schedule_laundrette(NFSD_FILE_LAUNDRETTE_NOFLUSH);
+
+       if (!list_empty(&head)) {
+               nfsd_file_lru_dispose(&head);
+               flush_delayed_fput();
+       }
+}
+
+static int
+nfsd_file_lease_notifier_call(struct notifier_block *nb, unsigned long arg,
+                           void *data)
+{
+       struct file_lock *fl = data;
+
+       /* Only close files for F_SETLEASE leases */
+       if (fl->fl_flags & FL_LEASE)
+               nfsd_file_close_inode_sync(file_inode(fl->fl_file));
+       return 0;
+}
+
+static struct notifier_block nfsd_file_lease_notifier = {
+       .notifier_call = nfsd_file_lease_notifier_call,
+};
+
+static int
+nfsd_file_fsnotify_handle_event(struct fsnotify_group *group,
+                               struct inode *inode,
+                               u32 mask, const void *data, int data_type,
+                               const struct qstr *file_name, u32 cookie,
+                               struct fsnotify_iter_info *iter_info)
+{
+       trace_nfsd_file_fsnotify_handle_event(inode, mask);
+
+       /* Should be no marks on non-regular files */
+       if (!S_ISREG(inode->i_mode)) {
+               WARN_ON_ONCE(1);
+               return 0;
+       }
+
+       /* don't close files if this was not the last link */
+       if (mask & FS_ATTRIB) {
+               if (inode->i_nlink)
+                       return 0;
+       }
+
+       nfsd_file_close_inode(inode);
+       return 0;
+}
+
+
+static const struct fsnotify_ops nfsd_file_fsnotify_ops = {
+       .handle_event = nfsd_file_fsnotify_handle_event,
+       .free_mark = nfsd_file_mark_free,
+};
+
+int
+nfsd_file_cache_init(void)
+{
+       int             ret = -ENOMEM;
+       unsigned int    i;
+
+       clear_bit(NFSD_FILE_SHUTDOWN, &nfsd_file_lru_flags);
+
+       if (nfsd_file_hashtbl)
+               return 0;
+
+       nfsd_file_hashtbl = kcalloc(NFSD_FILE_HASH_SIZE,
+                               sizeof(*nfsd_file_hashtbl), GFP_KERNEL);
+       if (!nfsd_file_hashtbl) {
+               pr_err("nfsd: unable to allocate nfsd_file_hashtbl\n");
+               goto out_err;
+       }
+
+       nfsd_file_slab = kmem_cache_create("nfsd_file",
+                               sizeof(struct nfsd_file), 0, 0, NULL);
+       if (!nfsd_file_slab) {
+               pr_err("nfsd: unable to create nfsd_file_slab\n");
+               goto out_err;
+       }
+
+       nfsd_file_mark_slab = kmem_cache_create("nfsd_file_mark",
+                                       sizeof(struct nfsd_file_mark), 0, 0, NULL);
+       if (!nfsd_file_mark_slab) {
+               pr_err("nfsd: unable to create nfsd_file_mark_slab\n");
+               goto out_err;
+       }
+
+
+       ret = list_lru_init(&nfsd_file_lru);
+       if (ret) {
+               pr_err("nfsd: failed to init nfsd_file_lru: %d\n", ret);
+               goto out_err;
+       }
+
+       ret = register_shrinker(&nfsd_file_shrinker);
+       if (ret) {
+               pr_err("nfsd: failed to register nfsd_file_shrinker: %d\n", ret);
+               goto out_lru;
+       }
+
+       ret = lease_register_notifier(&nfsd_file_lease_notifier);
+       if (ret) {
+               pr_err("nfsd: unable to register lease notifier: %d\n", ret);
+               goto out_shrinker;
+       }
+
+       nfsd_file_fsnotify_group = fsnotify_alloc_group(&nfsd_file_fsnotify_ops);
+       if (IS_ERR(nfsd_file_fsnotify_group)) {
+               pr_err("nfsd: unable to create fsnotify group: %ld\n",
+                       PTR_ERR(nfsd_file_fsnotify_group));
+               nfsd_file_fsnotify_group = NULL;
+               goto out_notifier;
+       }
+
+       for (i = 0; i < NFSD_FILE_HASH_SIZE; i++) {
+               INIT_HLIST_HEAD(&nfsd_file_hashtbl[i].nfb_head);
+               spin_lock_init(&nfsd_file_hashtbl[i].nfb_lock);
+       }
+
+       INIT_DELAYED_WORK(&nfsd_filecache_laundrette, nfsd_file_delayed_close);
+out:
+       return ret;
+out_notifier:
+       lease_unregister_notifier(&nfsd_file_lease_notifier);
+out_shrinker:
+       unregister_shrinker(&nfsd_file_shrinker);
+out_lru:
+       list_lru_destroy(&nfsd_file_lru);
+out_err:
+       kmem_cache_destroy(nfsd_file_slab);
+       nfsd_file_slab = NULL;
+       kmem_cache_destroy(nfsd_file_mark_slab);
+       nfsd_file_mark_slab = NULL;
+       kfree(nfsd_file_hashtbl);
+       nfsd_file_hashtbl = NULL;
+       goto out;
+}
+
+/*
+ * Note this can deadlock with nfsd_file_lru_cb.
+ */
+void
+nfsd_file_cache_purge(struct net *net)
+{
+       unsigned int            i;
+       struct nfsd_file        *nf;
+       struct hlist_node       *next;
+       LIST_HEAD(dispose);
+       bool del;
+
+       if (!nfsd_file_hashtbl)
+               return;
+
+       for (i = 0; i < NFSD_FILE_HASH_SIZE; i++) {
+               struct nfsd_fcache_bucket *nfb = &nfsd_file_hashtbl[i];
+
+               spin_lock(&nfb->nfb_lock);
+               hlist_for_each_entry_safe(nf, next, &nfb->nfb_head, nf_node) {
+                       if (net && nf->nf_net != net)
+                               continue;
+                       del = nfsd_file_unhash_and_release_locked(nf, &dispose);
+
+                       /*
+                        * Deadlock detected! Something marked this entry as
+                        * unhased, but hasn't removed it from the hash list.
+                        */
+                       WARN_ON_ONCE(!del);
+               }
+               spin_unlock(&nfb->nfb_lock);
+               nfsd_file_dispose_list(&dispose);
+       }
+}
+
+void
+nfsd_file_cache_shutdown(void)
+{
+       LIST_HEAD(dispose);
+
+       set_bit(NFSD_FILE_SHUTDOWN, &nfsd_file_lru_flags);
+
+       lease_unregister_notifier(&nfsd_file_lease_notifier);
+       unregister_shrinker(&nfsd_file_shrinker);
+       /*
+        * make sure all callers of nfsd_file_lru_cb are done before
+        * calling nfsd_file_cache_purge
+        */
+       cancel_delayed_work_sync(&nfsd_filecache_laundrette);
+       nfsd_file_cache_purge(NULL);
+       list_lru_destroy(&nfsd_file_lru);
+       rcu_barrier();
+       fsnotify_put_group(nfsd_file_fsnotify_group);
+       nfsd_file_fsnotify_group = NULL;
+       kmem_cache_destroy(nfsd_file_slab);
+       nfsd_file_slab = NULL;
+       fsnotify_wait_marks_destroyed();
+       kmem_cache_destroy(nfsd_file_mark_slab);
+       nfsd_file_mark_slab = NULL;
+       kfree(nfsd_file_hashtbl);
+       nfsd_file_hashtbl = NULL;
+}
+
+static bool
+nfsd_match_cred(const struct cred *c1, const struct cred *c2)
+{
+       int i;
+
+       if (!uid_eq(c1->fsuid, c2->fsuid))
+               return false;
+       if (!gid_eq(c1->fsgid, c2->fsgid))
+               return false;
+       if (c1->group_info == NULL || c2->group_info == NULL)
+               return c1->group_info == c2->group_info;
+       if (c1->group_info->ngroups != c2->group_info->ngroups)
+               return false;
+       for (i = 0; i < c1->group_info->ngroups; i++) {
+               if (!gid_eq(c1->group_info->gid[i], c2->group_info->gid[i]))
+                       return false;
+       }
+       return true;
+}
+
+static struct nfsd_file *
+nfsd_file_find_locked(struct inode *inode, unsigned int may_flags,
+                       unsigned int hashval, struct net *net)
+{
+       struct nfsd_file *nf;
+       unsigned char need = may_flags & NFSD_FILE_MAY_MASK;
+
+       hlist_for_each_entry_rcu(nf, &nfsd_file_hashtbl[hashval].nfb_head,
+                                nf_node) {
+               if ((need & nf->nf_may) != need)
+                       continue;
+               if (nf->nf_inode != inode)
+                       continue;
+               if (nf->nf_net != net)
+                       continue;
+               if (!nfsd_match_cred(nf->nf_cred, current_cred()))
+                       continue;
+               if (nfsd_file_get(nf) != NULL)
+                       return nf;
+       }
+       return NULL;
+}
+
+/**
+ * nfsd_file_is_cached - are there any cached open files for this fh?
+ * @inode: inode of the file to check
+ *
+ * Scan the hashtable for open files that match this fh. Returns true if there
+ * are any, and false if not.
+ */
+bool
+nfsd_file_is_cached(struct inode *inode)
+{
+       bool                    ret = false;
+       struct nfsd_file        *nf;
+       unsigned int            hashval;
+
+        hashval = (unsigned int)hash_long(inode->i_ino, NFSD_FILE_HASH_BITS);
+
+       rcu_read_lock();
+       hlist_for_each_entry_rcu(nf, &nfsd_file_hashtbl[hashval].nfb_head,
+                                nf_node) {
+               if (inode == nf->nf_inode) {
+                       ret = true;
+                       break;
+               }
+       }
+       rcu_read_unlock();
+       trace_nfsd_file_is_cached(inode, hashval, (int)ret);
+       return ret;
+}
+
+__be32
+nfsd_file_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp,
+                 unsigned int may_flags, struct nfsd_file **pnf)
+{
+       __be32  status;
+       struct net *net = SVC_NET(rqstp);
+       struct nfsd_file *nf, *new;
+       struct inode *inode;
+       unsigned int hashval;
+
+       /* FIXME: skip this if fh_dentry is already set? */
+       status = fh_verify(rqstp, fhp, S_IFREG,
+                               may_flags|NFSD_MAY_OWNER_OVERRIDE);
+       if (status != nfs_ok)
+               return status;
+
+       inode = d_inode(fhp->fh_dentry);
+       hashval = (unsigned int)hash_long(inode->i_ino, NFSD_FILE_HASH_BITS);
+retry:
+       rcu_read_lock();
+       nf = nfsd_file_find_locked(inode, may_flags, hashval, net);
+       rcu_read_unlock();
+       if (nf)
+               goto wait_for_construction;
+
+       new = nfsd_file_alloc(inode, may_flags, hashval, net);
+       if (!new) {
+               trace_nfsd_file_acquire(rqstp, hashval, inode, may_flags,
+                                       NULL, nfserr_jukebox);
+               return nfserr_jukebox;
+       }
+
+       spin_lock(&nfsd_file_hashtbl[hashval].nfb_lock);
+       nf = nfsd_file_find_locked(inode, may_flags, hashval, net);
+       if (nf == NULL)
+               goto open_file;
+       spin_unlock(&nfsd_file_hashtbl[hashval].nfb_lock);
+       nfsd_file_slab_free(&new->nf_rcu);
+
+wait_for_construction:
+       wait_on_bit(&nf->nf_flags, NFSD_FILE_PENDING, TASK_UNINTERRUPTIBLE);
+
+       /* Did construction of this file fail? */
+       if (!test_bit(NFSD_FILE_HASHED, &nf->nf_flags)) {
+               nfsd_file_put_noref(nf);
+               goto retry;
+       }
+
+       this_cpu_inc(nfsd_file_cache_hits);
+
+       if (!(may_flags & NFSD_MAY_NOT_BREAK_LEASE)) {
+               bool write = (may_flags & NFSD_MAY_WRITE);
+
+               if (test_bit(NFSD_FILE_BREAK_READ, &nf->nf_flags) ||
+                   (test_bit(NFSD_FILE_BREAK_WRITE, &nf->nf_flags) && write)) {
+                       status = nfserrno(nfsd_open_break_lease(
+                                       file_inode(nf->nf_file), may_flags));
+                       if (status == nfs_ok) {
+                               clear_bit(NFSD_FILE_BREAK_READ, &nf->nf_flags);
+                               if (write)
+                                       clear_bit(NFSD_FILE_BREAK_WRITE,
+                                                 &nf->nf_flags);
+                       }
+               }
+       }
+out:
+       if (status == nfs_ok) {
+               *pnf = nf;
+       } else {
+               nfsd_file_put(nf);
+               nf = NULL;
+       }
+
+       trace_nfsd_file_acquire(rqstp, hashval, inode, may_flags, nf, status);
+       return status;
+open_file:
+       nf = new;
+       /* Take reference for the hashtable */
+       atomic_inc(&nf->nf_ref);
+       __set_bit(NFSD_FILE_HASHED, &nf->nf_flags);
+       __set_bit(NFSD_FILE_PENDING, &nf->nf_flags);
+       list_lru_add(&nfsd_file_lru, &nf->nf_lru);
+       hlist_add_head_rcu(&nf->nf_node, &nfsd_file_hashtbl[hashval].nfb_head);
+       ++nfsd_file_hashtbl[hashval].nfb_count;
+       nfsd_file_hashtbl[hashval].nfb_maxcount = max(nfsd_file_hashtbl[hashval].nfb_maxcount,
+                       nfsd_file_hashtbl[hashval].nfb_count);
+       spin_unlock(&nfsd_file_hashtbl[hashval].nfb_lock);
+       atomic_long_inc(&nfsd_filecache_count);
+
+       nf->nf_mark = nfsd_file_mark_find_or_create(nf);
+       if (nf->nf_mark)
+               status = nfsd_open_verified(rqstp, fhp, S_IFREG,
+                               may_flags, &nf->nf_file);
+       else
+               status = nfserr_jukebox;
+       /*
+        * If construction failed, or we raced with a call to unlink()
+        * then unhash.
+        */
+       if (status != nfs_ok || inode->i_nlink == 0) {
+               bool do_free;
+               spin_lock(&nfsd_file_hashtbl[hashval].nfb_lock);
+               do_free = nfsd_file_unhash(nf);
+               spin_unlock(&nfsd_file_hashtbl[hashval].nfb_lock);
+               if (do_free)
+                       nfsd_file_put_noref(nf);
+       }
+       clear_bit_unlock(NFSD_FILE_PENDING, &nf->nf_flags);
+       smp_mb__after_atomic();
+       wake_up_bit(&nf->nf_flags, NFSD_FILE_PENDING);
+       goto out;
+}
+
+/*
+ * Note that fields may be added, removed or reordered in the future. Programs
+ * scraping this file for info should test the labels to ensure they're
+ * getting the correct field.
+ */
+static int nfsd_file_cache_stats_show(struct seq_file *m, void *v)
+{
+       unsigned int i, count = 0, longest = 0;
+       unsigned long hits = 0;
+
+       /*
+        * No need for spinlocks here since we're not terribly interested in
+        * accuracy. We do take the nfsd_mutex simply to ensure that we
+        * don't end up racing with server shutdown
+        */
+       mutex_lock(&nfsd_mutex);
+       if (nfsd_file_hashtbl) {
+               for (i = 0; i < NFSD_FILE_HASH_SIZE; i++) {
+                       count += nfsd_file_hashtbl[i].nfb_count;
+                       longest = max(longest, nfsd_file_hashtbl[i].nfb_count);
+               }
+       }
+       mutex_unlock(&nfsd_mutex);
+
+       for_each_possible_cpu(i)
+               hits += per_cpu(nfsd_file_cache_hits, i);
+
+       seq_printf(m, "total entries: %u\n", count);
+       seq_printf(m, "longest chain: %u\n", longest);
+       seq_printf(m, "cache hits:    %lu\n", hits);
+       return 0;
+}
+
+int nfsd_file_cache_stats_open(struct inode *inode, struct file *file)
+{
+       return single_open(file, nfsd_file_cache_stats_show, NULL);
+}
diff --git a/fs/nfsd/filecache.h b/fs/nfsd/filecache.h
new file mode 100644 (file)
index 0000000..851d9ab
--- /dev/null
@@ -0,0 +1,61 @@
+#ifndef _FS_NFSD_FILECACHE_H
+#define _FS_NFSD_FILECACHE_H
+
+#include <linux/fsnotify_backend.h>
+
+/*
+ * This is the fsnotify_mark container that nfsd attaches to the files that it
+ * is holding open. Note that we have a separate refcount here aside from the
+ * one in the fsnotify_mark. We only want a single fsnotify_mark attached to
+ * the inode, and for each nfsd_file to hold a reference to it.
+ *
+ * The fsnotify_mark is itself refcounted, but that's not sufficient to tell us
+ * how to put that reference. If there are still outstanding nfsd_files that
+ * reference the mark, then we would want to call fsnotify_put_mark on it.
+ * If there were not, then we'd need to call fsnotify_destroy_mark. Since we
+ * can't really tell the difference, we use the nfm_mark to keep track of how
+ * many nfsd_files hold references to the mark. When that counter goes to zero
+ * then we know to call fsnotify_destroy_mark on it.
+ */
+struct nfsd_file_mark {
+       struct fsnotify_mark    nfm_mark;
+       atomic_t                nfm_ref;
+};
+
+/*
+ * A representation of a file that has been opened by knfsd. These are hashed
+ * in the hashtable by inode pointer value. Note that this object doesn't
+ * hold a reference to the inode by itself, so the nf_inode pointer should
+ * never be dereferenced, only used for comparison.
+ */
+struct nfsd_file {
+       struct hlist_node       nf_node;
+       struct list_head        nf_lru;
+       struct rcu_head         nf_rcu;
+       struct file             *nf_file;
+       const struct cred       *nf_cred;
+       struct net              *nf_net;
+#define NFSD_FILE_HASHED       (0)
+#define NFSD_FILE_PENDING      (1)
+#define NFSD_FILE_BREAK_READ   (2)
+#define NFSD_FILE_BREAK_WRITE  (3)
+#define NFSD_FILE_REFERENCED   (4)
+       unsigned long           nf_flags;
+       struct inode            *nf_inode;
+       unsigned int            nf_hashval;
+       atomic_t                nf_ref;
+       unsigned char           nf_may;
+       struct nfsd_file_mark   *nf_mark;
+};
+
+int nfsd_file_cache_init(void);
+void nfsd_file_cache_purge(struct net *);
+void nfsd_file_cache_shutdown(void);
+void nfsd_file_put(struct nfsd_file *nf);
+struct nfsd_file *nfsd_file_get(struct nfsd_file *nf);
+void nfsd_file_close_inode_sync(struct inode *inode);
+bool nfsd_file_is_cached(struct inode *inode);
+__be32 nfsd_file_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp,
+                 unsigned int may_flags, struct nfsd_file **nfp);
+int    nfsd_file_cache_stats_open(struct inode *, struct file *);
+#endif /* _FS_NFSD_FILECACHE_H */
index bdfe5bc..9a4ef81 100644 (file)
@@ -104,6 +104,7 @@ struct nfsd_net {
 
        /* Time of server startup */
        struct timespec64 nfssvc_boot;
+       seqlock_t boot_lock;
 
        /*
         * Max number of connections this nfsd container will allow. Defaults
@@ -179,4 +180,7 @@ struct nfsd_net {
 extern void nfsd_netns_free_versions(struct nfsd_net *nn);
 
 extern unsigned int nfsd_net_id;
+
+void nfsd_copy_boot_verifier(__be32 verf[2], struct nfsd_net *nn);
+void nfsd_reset_boot_verifier(struct nfsd_net *nn);
 #endif /* __NFSD_NETNS_H__ */
index 9bc32af..cea68d8 100644 (file)
@@ -172,13 +172,8 @@ nfsd3_proc_read(struct svc_rqst *rqstp)
        nfserr = nfsd_read(rqstp, &resp->fh,
                                  argp->offset,
                                  rqstp->rq_vec, argp->vlen,
-                                 &resp->count);
-       if (nfserr == 0) {
-               struct inode    *inode = d_inode(resp->fh.fh_dentry);
-               resp->eof = nfsd_eof_on_read(cnt, resp->count, argp->offset,
-                                                       inode->i_size);
-       }
-
+                                 &resp->count,
+                                 &resp->eof);
        RETURN_STATUS(nfserr);
 }
 
index fcf3182..86e5658 100644 (file)
@@ -27,6 +27,7 @@ static u32    nfs3_ftypes[] = {
        NF3SOCK, NF3BAD,  NF3LNK, NF3BAD,
 };
 
+
 /*
  * XDR functions for basic NFS types
  */
@@ -751,14 +752,16 @@ nfs3svc_encode_writeres(struct svc_rqst *rqstp, __be32 *p)
 {
        struct nfsd3_writeres *resp = rqstp->rq_resp;
        struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
+       __be32 verf[2];
 
        p = encode_wcc_data(rqstp, p, &resp->fh);
        if (resp->status == 0) {
                *p++ = htonl(resp->count);
                *p++ = htonl(resp->committed);
                /* unique identifier, y2038 overflow can be ignored */
-               *p++ = htonl((u32)nn->nfssvc_boot.tv_sec);
-               *p++ = htonl(nn->nfssvc_boot.tv_nsec);
+               nfsd_copy_boot_verifier(verf, nn);
+               *p++ = verf[0];
+               *p++ = verf[1];
        }
        return xdr_ressize_check(rqstp, p);
 }
@@ -1125,13 +1128,15 @@ nfs3svc_encode_commitres(struct svc_rqst *rqstp, __be32 *p)
 {
        struct nfsd3_commitres *resp = rqstp->rq_resp;
        struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
+       __be32 verf[2];
 
        p = encode_wcc_data(rqstp, p, &resp->fh);
        /* Write verifier */
        if (resp->status == 0) {
                /* unique identifier, y2038 overflow can be ignored */
-               *p++ = htonl((u32)nn->nfssvc_boot.tv_sec);
-               *p++ = htonl(nn->nfssvc_boot.tv_nsec);
+               nfsd_copy_boot_verifier(verf, nn);
+               *p++ = verf[0];
+               *p++ = verf[1];
        }
        return xdr_ressize_check(rqstp, p);
 }
index 397eb78..5241114 100644 (file)
@@ -512,11 +512,9 @@ static int nfs4_xdr_dec_cb_recall(struct rpc_rqst *rqstp,
        if (unlikely(status))
                return status;
 
-       if (cb != NULL) {
-               status = decode_cb_sequence4res(xdr, cb);
-               if (unlikely(status || cb->cb_seq_status))
-                       return status;
-       }
+       status = decode_cb_sequence4res(xdr, cb);
+       if (unlikely(status || cb->cb_seq_status))
+               return status;
 
        return decode_cb_op_status(xdr, OP_CB_RECALL, &cb->cb_status);
 }
@@ -604,11 +602,10 @@ static int nfs4_xdr_dec_cb_layout(struct rpc_rqst *rqstp,
        if (unlikely(status))
                return status;
 
-       if (cb) {
-               status = decode_cb_sequence4res(xdr, cb);
-               if (unlikely(status || cb->cb_seq_status))
-                       return status;
-       }
+       status = decode_cb_sequence4res(xdr, cb);
+       if (unlikely(status || cb->cb_seq_status))
+               return status;
+
        return decode_cb_op_status(xdr, OP_CB_LAYOUTRECALL, &cb->cb_status);
 }
 #endif /* CONFIG_NFSD_PNFS */
@@ -663,11 +660,10 @@ static int nfs4_xdr_dec_cb_notify_lock(struct rpc_rqst *rqstp,
        if (unlikely(status))
                return status;
 
-       if (cb) {
-               status = decode_cb_sequence4res(xdr, cb);
-               if (unlikely(status || cb->cb_seq_status))
-                       return status;
-       }
+       status = decode_cb_sequence4res(xdr, cb);
+       if (unlikely(status || cb->cb_seq_status))
+               return status;
+
        return decode_cb_op_status(xdr, OP_CB_NOTIFY_LOCK, &cb->cb_status);
 }
 
@@ -759,11 +755,10 @@ static int nfs4_xdr_dec_cb_offload(struct rpc_rqst *rqstp,
        if (unlikely(status))
                return status;
 
-       if (cb) {
-               status = decode_cb_sequence4res(xdr, cb);
-               if (unlikely(status || cb->cb_seq_status))
-                       return status;
-       }
+       status = decode_cb_sequence4res(xdr, cb);
+       if (unlikely(status || cb->cb_seq_status))
+               return status;
+
        return decode_cb_op_status(xdr, OP_CB_OFFLOAD, &cb->cb_status);
 }
 /*
index a79e24b..2681c70 100644 (file)
@@ -169,8 +169,8 @@ nfsd4_free_layout_stateid(struct nfs4_stid *stid)
        spin_unlock(&fp->fi_lock);
 
        if (!nfsd4_layout_ops[ls->ls_layout_type]->disable_recalls)
-               vfs_setlease(ls->ls_file, F_UNLCK, NULL, (void **)&ls);
-       fput(ls->ls_file);
+               vfs_setlease(ls->ls_file->nf_file, F_UNLCK, NULL, (void **)&ls);
+       nfsd_file_put(ls->ls_file);
 
        if (ls->ls_recalled)
                atomic_dec(&ls->ls_stid.sc_file->fi_lo_recalls);
@@ -197,7 +197,7 @@ nfsd4_layout_setlease(struct nfs4_layout_stateid *ls)
        fl->fl_end = OFFSET_MAX;
        fl->fl_owner = ls;
        fl->fl_pid = current->tgid;
-       fl->fl_file = ls->ls_file;
+       fl->fl_file = ls->ls_file->nf_file;
 
        status = vfs_setlease(fl->fl_file, fl->fl_type, &fl, NULL);
        if (status) {
@@ -236,13 +236,13 @@ nfsd4_alloc_layout_stateid(struct nfsd4_compound_state *cstate,
                        NFSPROC4_CLNT_CB_LAYOUT);
 
        if (parent->sc_type == NFS4_DELEG_STID)
-               ls->ls_file = get_file(fp->fi_deleg_file);
+               ls->ls_file = nfsd_file_get(fp->fi_deleg_file);
        else
                ls->ls_file = find_any_file(fp);
        BUG_ON(!ls->ls_file);
 
        if (nfsd4_layout_setlease(ls)) {
-               fput(ls->ls_file);
+               nfsd_file_put(ls->ls_file);
                put_nfs4_file(fp);
                kmem_cache_free(nfs4_layout_stateid_cache, ls);
                return NULL;
@@ -626,7 +626,7 @@ nfsd4_cb_layout_fail(struct nfs4_layout_stateid *ls)
 
        argv[0] = (char *)nfsd_recall_failed;
        argv[1] = addr_str;
-       argv[2] = ls->ls_file->f_path.mnt->mnt_sb->s_id;
+       argv[2] = ls->ls_file->nf_file->f_path.mnt->mnt_sb->s_id;
        argv[3] = NULL;
 
        error = call_usermodehelper(nfsd_recall_failed, argv, envp,
index 8beda99..4e3e77b 100644 (file)
@@ -568,17 +568,11 @@ nfsd4_access(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 
 static void gen_boot_verifier(nfs4_verifier *verifier, struct net *net)
 {
-       __be32 verf[2];
-       struct nfsd_net *nn = net_generic(net, nfsd_net_id);
+       __be32 *verf = (__be32 *)verifier->data;
 
-       /*
-        * This is opaque to client, so no need to byte-swap. Use
-        * __force to keep sparse happy. y2038 time_t overflow is
-        * irrelevant in this usage.
-        */
-       verf[0] = (__force __be32)nn->nfssvc_boot.tv_sec;
-       verf[1] = (__force __be32)nn->nfssvc_boot.tv_nsec;
-       memcpy(verifier->data, verf, sizeof(verifier->data));
+       BUILD_BUG_ON(2*sizeof(*verf) != sizeof(verifier->data));
+
+       nfsd_copy_boot_verifier(verf, net_generic(net, nfsd_net_id));
 }
 
 static __be32
@@ -761,7 +755,7 @@ nfsd4_read(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
        struct nfsd4_read *read = &u->read;
        __be32 status;
 
-       read->rd_filp = NULL;
+       read->rd_nf = NULL;
        if (read->rd_offset >= OFFSET_MAX)
                return nfserr_inval;
 
@@ -782,7 +776,7 @@ nfsd4_read(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
        /* check stateid */
        status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh,
                                        &read->rd_stateid, RD_STATE,
-                                       &read->rd_filp, &read->rd_tmp_file);
+                                       &read->rd_nf);
        if (status) {
                dprintk("NFSD: nfsd4_read: couldn't process stateid!\n");
                goto out;
@@ -798,8 +792,8 @@ out:
 static void
 nfsd4_read_release(union nfsd4_op_u *u)
 {
-       if (u->read.rd_filp)
-               fput(u->read.rd_filp);
+       if (u->read.rd_nf)
+               nfsd_file_put(u->read.rd_nf);
        trace_nfsd_read_done(u->read.rd_rqstp, u->read.rd_fhp,
                             u->read.rd_offset, u->read.rd_length);
 }
@@ -954,7 +948,7 @@ nfsd4_setattr(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
        if (setattr->sa_iattr.ia_valid & ATTR_SIZE) {
                status = nfs4_preprocess_stateid_op(rqstp, cstate,
                                &cstate->current_fh, &setattr->sa_stateid,
-                               WR_STATE, NULL, NULL);
+                               WR_STATE, NULL);
                if (status) {
                        dprintk("NFSD: nfsd4_setattr: couldn't process stateid!\n");
                        return status;
@@ -993,7 +987,7 @@ nfsd4_write(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 {
        struct nfsd4_write *write = &u->write;
        stateid_t *stateid = &write->wr_stateid;
-       struct file *filp = NULL;
+       struct nfsd_file *nf = NULL;
        __be32 status = nfs_ok;
        unsigned long cnt;
        int nvecs;
@@ -1005,7 +999,7 @@ nfsd4_write(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
        trace_nfsd_write_start(rqstp, &cstate->current_fh,
                               write->wr_offset, cnt);
        status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh,
-                                               stateid, WR_STATE, &filp, NULL);
+                                               stateid, WR_STATE, &nf);
        if (status) {
                dprintk("NFSD: nfsd4_write: couldn't process stateid!\n");
                return status;
@@ -1018,10 +1012,10 @@ nfsd4_write(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
                                      &write->wr_head, write->wr_buflen);
        WARN_ON_ONCE(nvecs > ARRAY_SIZE(rqstp->rq_vec));
 
-       status = nfsd_vfs_write(rqstp, &cstate->current_fh, filp,
+       status = nfsd_vfs_write(rqstp, &cstate->current_fh, nf->nf_file,
                                write->wr_offset, rqstp->rq_vec, nvecs, &cnt,
                                write->wr_how_written);
-       fput(filp);
+       nfsd_file_put(nf);
 
        write->wr_bytes_written = cnt;
        trace_nfsd_write_done(rqstp, &cstate->current_fh,
@@ -1031,8 +1025,8 @@ nfsd4_write(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 
 static __be32
 nfsd4_verify_copy(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
-                 stateid_t *src_stateid, struct file **src,
-                 stateid_t *dst_stateid, struct file **dst)
+                 stateid_t *src_stateid, struct nfsd_file **src,
+                 stateid_t *dst_stateid, struct nfsd_file **dst)
 {
        __be32 status;
 
@@ -1040,22 +1034,22 @@ nfsd4_verify_copy(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
                return nfserr_nofilehandle;
 
        status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->save_fh,
-                                           src_stateid, RD_STATE, src, NULL);
+                                           src_stateid, RD_STATE, src);
        if (status) {
                dprintk("NFSD: %s: couldn't process src stateid!\n", __func__);
                goto out;
        }
 
        status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh,
-                                           dst_stateid, WR_STATE, dst, NULL);
+                                           dst_stateid, WR_STATE, dst);
        if (status) {
                dprintk("NFSD: %s: couldn't process dst stateid!\n", __func__);
                goto out_put_src;
        }
 
        /* fix up for NFS-specific error code */
-       if (!S_ISREG(file_inode(*src)->i_mode) ||
-           !S_ISREG(file_inode(*dst)->i_mode)) {
+       if (!S_ISREG(file_inode((*src)->nf_file)->i_mode) ||
+           !S_ISREG(file_inode((*dst)->nf_file)->i_mode)) {
                status = nfserr_wrong_type;
                goto out_put_dst;
        }
@@ -1063,9 +1057,9 @@ nfsd4_verify_copy(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 out:
        return status;
 out_put_dst:
-       fput(*dst);
+       nfsd_file_put(*dst);
 out_put_src:
-       fput(*src);
+       nfsd_file_put(*src);
        goto out;
 }
 
@@ -1074,7 +1068,7 @@ nfsd4_clone(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
                union nfsd4_op_u *u)
 {
        struct nfsd4_clone *clone = &u->clone;
-       struct file *src, *dst;
+       struct nfsd_file *src, *dst;
        __be32 status;
 
        status = nfsd4_verify_copy(rqstp, cstate, &clone->cl_src_stateid, &src,
@@ -1082,11 +1076,11 @@ nfsd4_clone(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
        if (status)
                goto out;
 
-       status = nfsd4_clone_file_range(src, clone->cl_src_pos,
-                       dst, clone->cl_dst_pos, clone->cl_count);
+       status = nfsd4_clone_file_range(src->nf_file, clone->cl_src_pos,
+                       dst->nf_file, clone->cl_dst_pos, clone->cl_count);
 
-       fput(dst);
-       fput(src);
+       nfsd_file_put(dst);
+       nfsd_file_put(src);
 out:
        return status;
 }
@@ -1176,8 +1170,9 @@ static ssize_t _nfsd_copy_file_range(struct nfsd4_copy *copy)
        do {
                if (kthread_should_stop())
                        break;
-               bytes_copied = nfsd_copy_file_range(copy->file_src, src_pos,
-                               copy->file_dst, dst_pos, bytes_total);
+               bytes_copied = nfsd_copy_file_range(copy->nf_src->nf_file,
+                               src_pos, copy->nf_dst->nf_file, dst_pos,
+                               bytes_total);
                if (bytes_copied <= 0)
                        break;
                bytes_total -= bytes_copied;
@@ -1204,8 +1199,8 @@ static __be32 nfsd4_do_copy(struct nfsd4_copy *copy, bool sync)
                status = nfs_ok;
        }
 
-       fput(copy->file_src);
-       fput(copy->file_dst);
+       nfsd_file_put(copy->nf_src);
+       nfsd_file_put(copy->nf_dst);
        return status;
 }
 
@@ -1218,16 +1213,16 @@ static void dup_copy_fields(struct nfsd4_copy *src, struct nfsd4_copy *dst)
        memcpy(&dst->cp_res, &src->cp_res, sizeof(src->cp_res));
        memcpy(&dst->fh, &src->fh, sizeof(src->fh));
        dst->cp_clp = src->cp_clp;
-       dst->file_dst = get_file(src->file_dst);
-       dst->file_src = get_file(src->file_src);
+       dst->nf_dst = nfsd_file_get(src->nf_dst);
+       dst->nf_src = nfsd_file_get(src->nf_src);
        memcpy(&dst->cp_stateid, &src->cp_stateid, sizeof(src->cp_stateid));
 }
 
 static void cleanup_async_copy(struct nfsd4_copy *copy)
 {
        nfs4_free_cp_state(copy);
-       fput(copy->file_dst);
-       fput(copy->file_src);
+       nfsd_file_put(copy->nf_dst);
+       nfsd_file_put(copy->nf_src);
        spin_lock(&copy->cp_clp->async_lock);
        list_del(&copy->copies);
        spin_unlock(&copy->cp_clp->async_lock);
@@ -1264,8 +1259,8 @@ nfsd4_copy(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
        struct nfsd4_copy *async_copy = NULL;
 
        status = nfsd4_verify_copy(rqstp, cstate, &copy->cp_src_stateid,
-                                  &copy->file_src, &copy->cp_dst_stateid,
-                                  &copy->file_dst);
+                                  &copy->nf_src, &copy->cp_dst_stateid,
+                                  &copy->nf_dst);
        if (status)
                goto out;
 
@@ -1347,21 +1342,21 @@ nfsd4_fallocate(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
                struct nfsd4_fallocate *fallocate, int flags)
 {
        __be32 status;
-       struct file *file;
+       struct nfsd_file *nf;
 
        status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh,
                                            &fallocate->falloc_stateid,
-                                           WR_STATE, &file, NULL);
+                                           WR_STATE, &nf);
        if (status != nfs_ok) {
                dprintk("NFSD: nfsd4_fallocate: couldn't process stateid!\n");
                return status;
        }
 
-       status = nfsd4_vfs_fallocate(rqstp, &cstate->current_fh, file,
+       status = nfsd4_vfs_fallocate(rqstp, &cstate->current_fh, nf->nf_file,
                                     fallocate->falloc_offset,
                                     fallocate->falloc_length,
                                     flags);
-       fput(file);
+       nfsd_file_put(nf);
        return status;
 }
 static __be32
@@ -1406,11 +1401,11 @@ nfsd4_seek(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
        struct nfsd4_seek *seek = &u->seek;
        int whence;
        __be32 status;
-       struct file *file;
+       struct nfsd_file *nf;
 
        status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh,
                                            &seek->seek_stateid,
-                                           RD_STATE, &file, NULL);
+                                           RD_STATE, &nf);
        if (status) {
                dprintk("NFSD: nfsd4_seek: couldn't process stateid!\n");
                return status;
@@ -1432,14 +1427,14 @@ nfsd4_seek(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
         * Note:  This call does change file->f_pos, but nothing in NFSD
         *        should ever file->f_pos.
         */
-       seek->seek_pos = vfs_llseek(file, seek->seek_offset, whence);
+       seek->seek_pos = vfs_llseek(nf->nf_file, seek->seek_offset, whence);
        if (seek->seek_pos < 0)
                status = nfserrno(seek->seek_pos);
-       else if (seek->seek_pos >= i_size_read(file_inode(file)))
+       else if (seek->seek_pos >= i_size_read(file_inode(nf->nf_file)))
                seek->seek_eof = true;
 
 out:
-       fput(file);
+       nfsd_file_put(nf);
        return status;
 }
 
index 8767955..cdc75ad 100644 (file)
@@ -59,8 +59,13 @@ struct nfsd4_client_tracking_ops {
        void (*remove)(struct nfs4_client *);
        int (*check)(struct nfs4_client *);
        void (*grace_done)(struct nfsd_net *);
+       uint8_t version;
+       size_t msglen;
 };
 
+static const struct nfsd4_client_tracking_ops nfsd4_cld_tracking_ops;
+static const struct nfsd4_client_tracking_ops nfsd4_cld_tracking_ops_v2;
+
 /* Globals */
 static char user_recovery_dirname[PATH_MAX] = "/var/lib/nfs/v4recovery";
 
@@ -173,6 +178,7 @@ __nfsd4_create_reclaim_record_grace(struct nfs4_client *clp,
                const char *dname, int len, struct nfsd_net *nn)
 {
        struct xdr_netobj name;
+       struct xdr_netobj princhash = { .len = 0, .data = NULL };
        struct nfs4_client_reclaim *crp;
 
        name.data = kmemdup(dname, len, GFP_KERNEL);
@@ -182,7 +188,7 @@ __nfsd4_create_reclaim_record_grace(struct nfs4_client *clp,
                return;
        }
        name.len = len;
-       crp = nfs4_client_to_reclaim(name, nn);
+       crp = nfs4_client_to_reclaim(name, princhash, nn);
        if (!crp) {
                kfree(name.data);
                return;
@@ -482,6 +488,7 @@ static int
 load_recdir(struct dentry *parent, struct dentry *child, struct nfsd_net *nn)
 {
        struct xdr_netobj name;
+       struct xdr_netobj princhash = { .len = 0, .data = NULL };
 
        if (child->d_name.len != HEXDIR_LEN - 1) {
                printk("%s: illegal name %pd in recovery directory\n",
@@ -496,7 +503,7 @@ load_recdir(struct dentry *parent, struct dentry *child, struct nfsd_net *nn)
                goto out;
        }
        name.len = HEXDIR_LEN;
-       if (!nfs4_client_to_reclaim(name, nn))
+       if (!nfs4_client_to_reclaim(name, princhash, nn))
                kfree(name.data);
 out:
        return 0;
@@ -718,6 +725,8 @@ static const struct nfsd4_client_tracking_ops nfsd4_legacy_tracking_ops = {
        .remove         = nfsd4_remove_clid_dir,
        .check          = nfsd4_check_legacy_client,
        .grace_done     = nfsd4_recdir_purge_old,
+       .version        = 1,
+       .msglen         = 0,
 };
 
 /* Globals */
@@ -731,25 +740,32 @@ struct cld_net {
        struct list_head         cn_list;
        unsigned int             cn_xid;
        bool                     cn_has_legacy;
+       struct crypto_shash     *cn_tfm;
 };
 
 struct cld_upcall {
        struct list_head         cu_list;
        struct cld_net          *cu_net;
        struct completion        cu_done;
-       struct cld_msg           cu_msg;
+       union {
+               struct cld_msg_hdr       cu_hdr;
+               struct cld_msg           cu_msg;
+               struct cld_msg_v2        cu_msg_v2;
+       } cu_u;
 };
 
 static int
-__cld_pipe_upcall(struct rpc_pipe *pipe, struct cld_msg *cmsg)
+__cld_pipe_upcall(struct rpc_pipe *pipe, void *cmsg)
 {
        int ret;
        struct rpc_pipe_msg msg;
-       struct cld_upcall *cup = container_of(cmsg, struct cld_upcall, cu_msg);
+       struct cld_upcall *cup = container_of(cmsg, struct cld_upcall, cu_u);
+       struct nfsd_net *nn = net_generic(pipe->dentry->d_sb->s_fs_info,
+                                         nfsd_net_id);
 
        memset(&msg, 0, sizeof(msg));
        msg.data = cmsg;
-       msg.len = sizeof(*cmsg);
+       msg.len = nn->client_tracking_ops->msglen;
 
        ret = rpc_queue_upcall(pipe, &msg);
        if (ret < 0) {
@@ -765,7 +781,7 @@ out:
 }
 
 static int
-cld_pipe_upcall(struct rpc_pipe *pipe, struct cld_msg *cmsg)
+cld_pipe_upcall(struct rpc_pipe *pipe, void *cmsg)
 {
        int ret;
 
@@ -781,11 +797,11 @@ cld_pipe_upcall(struct rpc_pipe *pipe, struct cld_msg *cmsg)
 }
 
 static ssize_t
-__cld_pipe_inprogress_downcall(const struct cld_msg __user *cmsg,
+__cld_pipe_inprogress_downcall(const struct cld_msg_v2 __user *cmsg,
                struct nfsd_net *nn)
 {
-       uint8_t cmd;
-       struct xdr_netobj name;
+       uint8_t cmd, princhashlen;
+       struct xdr_netobj name, princhash = { .len = 0, .data = NULL };
        uint16_t namelen;
        struct cld_net *cn = nn->cld_net;
 
@@ -794,22 +810,48 @@ __cld_pipe_inprogress_downcall(const struct cld_msg __user *cmsg,
                return -EFAULT;
        }
        if (cmd == Cld_GraceStart) {
-               if (get_user(namelen, &cmsg->cm_u.cm_name.cn_len))
-                       return -EFAULT;
-               name.data = memdup_user(&cmsg->cm_u.cm_name.cn_id, namelen);
-               if (IS_ERR_OR_NULL(name.data))
-                       return -EFAULT;
-               name.len = namelen;
+               if (nn->client_tracking_ops->version >= 2) {
+                       const struct cld_clntinfo __user *ci;
+
+                       ci = &cmsg->cm_u.cm_clntinfo;
+                       if (get_user(namelen, &ci->cc_name.cn_len))
+                               return -EFAULT;
+                       name.data = memdup_user(&ci->cc_name.cn_id, namelen);
+                       if (IS_ERR_OR_NULL(name.data))
+                               return -EFAULT;
+                       name.len = namelen;
+                       get_user(princhashlen, &ci->cc_princhash.cp_len);
+                       if (princhashlen > 0) {
+                               princhash.data = memdup_user(
+                                               &ci->cc_princhash.cp_data,
+                                               princhashlen);
+                               if (IS_ERR_OR_NULL(princhash.data))
+                                       return -EFAULT;
+                               princhash.len = princhashlen;
+                       } else
+                               princhash.len = 0;
+               } else {
+                       const struct cld_name __user *cnm;
+
+                       cnm = &cmsg->cm_u.cm_name;
+                       if (get_user(namelen, &cnm->cn_len))
+                               return -EFAULT;
+                       name.data = memdup_user(&cnm->cn_id, namelen);
+                       if (IS_ERR_OR_NULL(name.data))
+                               return -EFAULT;
+                       name.len = namelen;
+               }
                if (name.len > 5 && memcmp(name.data, "hash:", 5) == 0) {
                        name.len = name.len - 5;
                        memmove(name.data, name.data + 5, name.len);
                        cn->cn_has_legacy = true;
                }
-               if (!nfs4_client_to_reclaim(name, nn)) {
+               if (!nfs4_client_to_reclaim(name, princhash, nn)) {
                        kfree(name.data);
+                       kfree(princhash.data);
                        return -EFAULT;
                }
-               return sizeof(*cmsg);
+               return nn->client_tracking_ops->msglen;
        }
        return -EFAULT;
 }
@@ -818,21 +860,22 @@ static ssize_t
 cld_pipe_downcall(struct file *filp, const char __user *src, size_t mlen)
 {
        struct cld_upcall *tmp, *cup;
-       struct cld_msg __user *cmsg = (struct cld_msg __user *)src;
+       struct cld_msg_hdr __user *hdr = (struct cld_msg_hdr __user *)src;
+       struct cld_msg_v2 __user *cmsg = (struct cld_msg_v2 __user *)src;
        uint32_t xid;
        struct nfsd_net *nn = net_generic(file_inode(filp)->i_sb->s_fs_info,
                                                nfsd_net_id);
        struct cld_net *cn = nn->cld_net;
        int16_t status;
 
-       if (mlen != sizeof(*cmsg)) {
+       if (mlen != nn->client_tracking_ops->msglen) {
                dprintk("%s: got %zu bytes, expected %zu\n", __func__, mlen,
-                       sizeof(*cmsg));
+                       nn->client_tracking_ops->msglen);
                return -EINVAL;
        }
 
        /* copy just the xid so we can try to find that */
-       if (copy_from_user(&xid, &cmsg->cm_xid, sizeof(xid)) != 0) {
+       if (copy_from_user(&xid, &hdr->cm_xid, sizeof(xid)) != 0) {
                dprintk("%s: error when copying xid from userspace", __func__);
                return -EFAULT;
        }
@@ -842,7 +885,7 @@ cld_pipe_downcall(struct file *filp, const char __user *src, size_t mlen)
         * list (for -EINPROGRESS, we just want to make sure the xid is
         * valid, not remove the upcall from the list)
         */
-       if (get_user(status, &cmsg->cm_status)) {
+       if (get_user(status, &hdr->cm_status)) {
                dprintk("%s: error when copying status from userspace", __func__);
                return -EFAULT;
        }
@@ -851,7 +894,7 @@ cld_pipe_downcall(struct file *filp, const char __user *src, size_t mlen)
        cup = NULL;
        spin_lock(&cn->cn_lock);
        list_for_each_entry(tmp, &cn->cn_list, cu_list) {
-               if (get_unaligned(&tmp->cu_msg.cm_xid) == xid) {
+               if (get_unaligned(&tmp->cu_u.cu_hdr.cm_xid) == xid) {
                        cup = tmp;
                        if (status != -EINPROGRESS)
                                list_del_init(&cup->cu_list);
@@ -869,7 +912,7 @@ cld_pipe_downcall(struct file *filp, const char __user *src, size_t mlen)
        if (status == -EINPROGRESS)
                return __cld_pipe_inprogress_downcall(cmsg, nn);
 
-       if (copy_from_user(&cup->cu_msg, src, mlen) != 0)
+       if (copy_from_user(&cup->cu_u.cu_msg_v2, src, mlen) != 0)
                return -EFAULT;
 
        complete(&cup->cu_done);
@@ -881,7 +924,7 @@ cld_pipe_destroy_msg(struct rpc_pipe_msg *msg)
 {
        struct cld_msg *cmsg = msg->data;
        struct cld_upcall *cup = container_of(cmsg, struct cld_upcall,
-                                                cu_msg);
+                                                cu_u.cu_msg);
 
        /* errno >= 0 means we got a downcall */
        if (msg->errno >= 0)
@@ -1007,14 +1050,17 @@ nfsd4_remove_cld_pipe(struct net *net)
 
        nfsd4_cld_unregister_net(net, cn->cn_pipe);
        rpc_destroy_pipe_data(cn->cn_pipe);
+       if (cn->cn_tfm)
+               crypto_free_shash(cn->cn_tfm);
        kfree(nn->cld_net);
        nn->cld_net = NULL;
 }
 
 static struct cld_upcall *
-alloc_cld_upcall(struct cld_net *cn)
+alloc_cld_upcall(struct nfsd_net *nn)
 {
        struct cld_upcall *new, *tmp;
+       struct cld_net *cn = nn->cld_net;
 
        new = kzalloc(sizeof(*new), GFP_KERNEL);
        if (!new)
@@ -1024,20 +1070,20 @@ alloc_cld_upcall(struct cld_net *cn)
 restart_search:
        spin_lock(&cn->cn_lock);
        list_for_each_entry(tmp, &cn->cn_list, cu_list) {
-               if (tmp->cu_msg.cm_xid == cn->cn_xid) {
+               if (tmp->cu_u.cu_msg.cm_xid == cn->cn_xid) {
                        cn->cn_xid++;
                        spin_unlock(&cn->cn_lock);
                        goto restart_search;
                }
        }
        init_completion(&new->cu_done);
-       new->cu_msg.cm_vers = CLD_UPCALL_VERSION;
-       put_unaligned(cn->cn_xid++, &new->cu_msg.cm_xid);
+       new->cu_u.cu_msg.cm_vers = nn->client_tracking_ops->version;
+       put_unaligned(cn->cn_xid++, &new->cu_u.cu_msg.cm_xid);
        new->cu_net = cn;
        list_add(&new->cu_list, &cn->cn_list);
        spin_unlock(&cn->cn_lock);
 
-       dprintk("%s: allocated xid %u\n", __func__, new->cu_msg.cm_xid);
+       dprintk("%s: allocated xid %u\n", __func__, new->cu_u.cu_msg.cm_xid);
 
        return new;
 }
@@ -1066,20 +1112,20 @@ nfsd4_cld_create(struct nfs4_client *clp)
        if (test_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags))
                return;
 
-       cup = alloc_cld_upcall(cn);
+       cup = alloc_cld_upcall(nn);
        if (!cup) {
                ret = -ENOMEM;
                goto out_err;
        }
 
-       cup->cu_msg.cm_cmd = Cld_Create;
-       cup->cu_msg.cm_u.cm_name.cn_len = clp->cl_name.len;
-       memcpy(cup->cu_msg.cm_u.cm_name.cn_id, clp->cl_name.data,
+       cup->cu_u.cu_msg.cm_cmd = Cld_Create;
+       cup->cu_u.cu_msg.cm_u.cm_name.cn_len = clp->cl_name.len;
+       memcpy(cup->cu_u.cu_msg.cm_u.cm_name.cn_id, clp->cl_name.data,
                        clp->cl_name.len);
 
-       ret = cld_pipe_upcall(cn->cn_pipe, &cup->cu_msg);
+       ret = cld_pipe_upcall(cn->cn_pipe, &cup->cu_u.cu_msg);
        if (!ret) {
-               ret = cup->cu_msg.cm_status;
+               ret = cup->cu_u.cu_msg.cm_status;
                set_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags);
        }
 
@@ -1092,6 +1138,75 @@ out_err:
 
 /* Ask daemon to create a new record */
 static void
+nfsd4_cld_create_v2(struct nfs4_client *clp)
+{
+       int ret;
+       struct cld_upcall *cup;
+       struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
+       struct cld_net *cn = nn->cld_net;
+       struct cld_msg_v2 *cmsg;
+       struct crypto_shash *tfm = cn->cn_tfm;
+       struct xdr_netobj cksum;
+       char *principal = NULL;
+       SHASH_DESC_ON_STACK(desc, tfm);
+
+       /* Don't upcall if it's already stored */
+       if (test_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags))
+               return;
+
+       cup = alloc_cld_upcall(nn);
+       if (!cup) {
+               ret = -ENOMEM;
+               goto out_err;
+       }
+
+       cmsg = &cup->cu_u.cu_msg_v2;
+       cmsg->cm_cmd = Cld_Create;
+       cmsg->cm_u.cm_clntinfo.cc_name.cn_len = clp->cl_name.len;
+       memcpy(cmsg->cm_u.cm_clntinfo.cc_name.cn_id, clp->cl_name.data,
+                       clp->cl_name.len);
+       if (clp->cl_cred.cr_raw_principal)
+               principal = clp->cl_cred.cr_raw_principal;
+       else if (clp->cl_cred.cr_principal)
+               principal = clp->cl_cred.cr_principal;
+       if (principal) {
+               desc->tfm = tfm;
+               cksum.len = crypto_shash_digestsize(tfm);
+               cksum.data = kmalloc(cksum.len, GFP_KERNEL);
+               if (cksum.data == NULL) {
+                       ret = -ENOMEM;
+                       goto out;
+               }
+               ret = crypto_shash_digest(desc, principal, strlen(principal),
+                                         cksum.data);
+               shash_desc_zero(desc);
+               if (ret) {
+                       kfree(cksum.data);
+                       goto out;
+               }
+               cmsg->cm_u.cm_clntinfo.cc_princhash.cp_len = cksum.len;
+               memcpy(cmsg->cm_u.cm_clntinfo.cc_princhash.cp_data,
+                      cksum.data, cksum.len);
+               kfree(cksum.data);
+       } else
+               cmsg->cm_u.cm_clntinfo.cc_princhash.cp_len = 0;
+
+       ret = cld_pipe_upcall(cn->cn_pipe, cmsg);
+       if (!ret) {
+               ret = cmsg->cm_status;
+               set_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags);
+       }
+
+out:
+       free_cld_upcall(cup);
+out_err:
+       if (ret)
+               pr_err("NFSD: Unable to create client record on stable storage: %d\n",
+                               ret);
+}
+
+/* Ask daemon to create a new record */
+static void
 nfsd4_cld_remove(struct nfs4_client *clp)
 {
        int ret;
@@ -1103,20 +1218,20 @@ nfsd4_cld_remove(struct nfs4_client *clp)
        if (!test_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags))
                return;
 
-       cup = alloc_cld_upcall(cn);
+       cup = alloc_cld_upcall(nn);
        if (!cup) {
                ret = -ENOMEM;
                goto out_err;
        }
 
-       cup->cu_msg.cm_cmd = Cld_Remove;
-       cup->cu_msg.cm_u.cm_name.cn_len = clp->cl_name.len;
-       memcpy(cup->cu_msg.cm_u.cm_name.cn_id, clp->cl_name.data,
+       cup->cu_u.cu_msg.cm_cmd = Cld_Remove;
+       cup->cu_u.cu_msg.cm_u.cm_name.cn_len = clp->cl_name.len;
+       memcpy(cup->cu_u.cu_msg.cm_u.cm_name.cn_id, clp->cl_name.data,
                        clp->cl_name.len);
 
-       ret = cld_pipe_upcall(cn->cn_pipe, &cup->cu_msg);
+       ret = cld_pipe_upcall(cn->cn_pipe, &cup->cu_u.cu_msg);
        if (!ret) {
-               ret = cup->cu_msg.cm_status;
+               ret = cup->cu_u.cu_msg.cm_status;
                clear_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags);
        }
 
@@ -1145,21 +1260,21 @@ nfsd4_cld_check_v0(struct nfs4_client *clp)
        if (test_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags))
                return 0;
 
-       cup = alloc_cld_upcall(cn);
+       cup = alloc_cld_upcall(nn);
        if (!cup) {
                printk(KERN_ERR "NFSD: Unable to check client record on "
                                "stable storage: %d\n", -ENOMEM);
                return -ENOMEM;
        }
 
-       cup->cu_msg.cm_cmd = Cld_Check;
-       cup->cu_msg.cm_u.cm_name.cn_len = clp->cl_name.len;
-       memcpy(cup->cu_msg.cm_u.cm_name.cn_id, clp->cl_name.data,
+       cup->cu_u.cu_msg.cm_cmd = Cld_Check;
+       cup->cu_u.cu_msg.cm_u.cm_name.cn_len = clp->cl_name.len;
+       memcpy(cup->cu_u.cu_msg.cm_u.cm_name.cn_id, clp->cl_name.data,
                        clp->cl_name.len);
 
-       ret = cld_pipe_upcall(cn->cn_pipe, &cup->cu_msg);
+       ret = cld_pipe_upcall(cn->cn_pipe, &cup->cu_u.cu_msg);
        if (!ret) {
-               ret = cup->cu_msg.cm_status;
+               ret = cup->cu_u.cu_msg.cm_status;
                set_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags);
        }
 
@@ -1217,22 +1332,95 @@ found:
 }
 
 static int
+nfsd4_cld_check_v2(struct nfs4_client *clp)
+{
+       struct nfs4_client_reclaim *crp;
+       struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
+       struct cld_net *cn = nn->cld_net;
+       int status;
+       char dname[HEXDIR_LEN];
+       struct xdr_netobj name;
+       struct crypto_shash *tfm = cn->cn_tfm;
+       struct xdr_netobj cksum;
+       char *principal = NULL;
+       SHASH_DESC_ON_STACK(desc, tfm);
+
+       /* did we already find that this client is stable? */
+       if (test_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags))
+               return 0;
+
+       /* look for it in the reclaim hashtable otherwise */
+       crp = nfsd4_find_reclaim_client(clp->cl_name, nn);
+       if (crp)
+               goto found;
+
+       if (cn->cn_has_legacy) {
+               status = nfs4_make_rec_clidname(dname, &clp->cl_name);
+               if (status)
+                       return -ENOENT;
+
+               name.data = kmemdup(dname, HEXDIR_LEN, GFP_KERNEL);
+               if (!name.data) {
+                       dprintk("%s: failed to allocate memory for name.data\n",
+                                       __func__);
+                       return -ENOENT;
+               }
+               name.len = HEXDIR_LEN;
+               crp = nfsd4_find_reclaim_client(name, nn);
+               kfree(name.data);
+               if (crp)
+                       goto found;
+
+       }
+       return -ENOENT;
+found:
+       if (crp->cr_princhash.len) {
+               if (clp->cl_cred.cr_raw_principal)
+                       principal = clp->cl_cred.cr_raw_principal;
+               else if (clp->cl_cred.cr_principal)
+                       principal = clp->cl_cred.cr_principal;
+               if (principal == NULL)
+                       return -ENOENT;
+               desc->tfm = tfm;
+               cksum.len = crypto_shash_digestsize(tfm);
+               cksum.data = kmalloc(cksum.len, GFP_KERNEL);
+               if (cksum.data == NULL)
+                       return -ENOENT;
+               status = crypto_shash_digest(desc, principal, strlen(principal),
+                                            cksum.data);
+               shash_desc_zero(desc);
+               if (status) {
+                       kfree(cksum.data);
+                       return -ENOENT;
+               }
+               if (memcmp(crp->cr_princhash.data, cksum.data,
+                               crp->cr_princhash.len)) {
+                       kfree(cksum.data);
+                       return -ENOENT;
+               }
+               kfree(cksum.data);
+       }
+       crp->cr_clp = clp;
+       return 0;
+}
+
+static int
 nfsd4_cld_grace_start(struct nfsd_net *nn)
 {
        int ret;
        struct cld_upcall *cup;
        struct cld_net *cn = nn->cld_net;
 
-       cup = alloc_cld_upcall(cn);
+       cup = alloc_cld_upcall(nn);
        if (!cup) {
                ret = -ENOMEM;
                goto out_err;
        }
 
-       cup->cu_msg.cm_cmd = Cld_GraceStart;
-       ret = cld_pipe_upcall(cn->cn_pipe, &cup->cu_msg);
+       cup->cu_u.cu_msg.cm_cmd = Cld_GraceStart;
+       ret = cld_pipe_upcall(cn->cn_pipe, &cup->cu_u.cu_msg);
        if (!ret)
-               ret = cup->cu_msg.cm_status;
+               ret = cup->cu_u.cu_msg.cm_status;
 
        free_cld_upcall(cup);
 out_err:
@@ -1250,17 +1438,17 @@ nfsd4_cld_grace_done_v0(struct nfsd_net *nn)
        struct cld_upcall *cup;
        struct cld_net *cn = nn->cld_net;
 
-       cup = alloc_cld_upcall(cn);
+       cup = alloc_cld_upcall(nn);
        if (!cup) {
                ret = -ENOMEM;
                goto out_err;
        }
 
-       cup->cu_msg.cm_cmd = Cld_GraceDone;
-       cup->cu_msg.cm_u.cm_gracetime = (int64_t)nn->boot_time;
-       ret = cld_pipe_upcall(cn->cn_pipe, &cup->cu_msg);
+       cup->cu_u.cu_msg.cm_cmd = Cld_GraceDone;
+       cup->cu_u.cu_msg.cm_u.cm_gracetime = (int64_t)nn->boot_time;
+       ret = cld_pipe_upcall(cn->cn_pipe, &cup->cu_u.cu_msg);
        if (!ret)
-               ret = cup->cu_msg.cm_status;
+               ret = cup->cu_u.cu_msg.cm_status;
 
        free_cld_upcall(cup);
 out_err:
@@ -1279,16 +1467,16 @@ nfsd4_cld_grace_done(struct nfsd_net *nn)
        struct cld_upcall *cup;
        struct cld_net *cn = nn->cld_net;
 
-       cup = alloc_cld_upcall(cn);
+       cup = alloc_cld_upcall(nn);
        if (!cup) {
                ret = -ENOMEM;
                goto out_err;
        }
 
-       cup->cu_msg.cm_cmd = Cld_GraceDone;
-       ret = cld_pipe_upcall(cn->cn_pipe, &cup->cu_msg);
+       cup->cu_u.cu_msg.cm_cmd = Cld_GraceDone;
+       ret = cld_pipe_upcall(cn->cn_pipe, &cup->cu_u.cu_msg);
        if (!ret)
-               ret = cup->cu_msg.cm_status;
+               ret = cup->cu_u.cu_msg.cm_status;
 
        free_cld_upcall(cup);
 out_err:
@@ -1337,6 +1525,53 @@ cld_running(struct nfsd_net *nn)
 }
 
 static int
+nfsd4_cld_get_version(struct nfsd_net *nn)
+{
+       int ret = 0;
+       struct cld_upcall *cup;
+       struct cld_net *cn = nn->cld_net;
+       uint8_t version;
+
+       cup = alloc_cld_upcall(nn);
+       if (!cup) {
+               ret = -ENOMEM;
+               goto out_err;
+       }
+       cup->cu_u.cu_msg.cm_cmd = Cld_GetVersion;
+       ret = cld_pipe_upcall(cn->cn_pipe, &cup->cu_u.cu_msg);
+       if (!ret) {
+               ret = cup->cu_u.cu_msg.cm_status;
+               if (ret)
+                       goto out_free;
+               version = cup->cu_u.cu_msg.cm_u.cm_version;
+               dprintk("%s: userspace returned version %u\n",
+                               __func__, version);
+               if (version < 1)
+                       version = 1;
+               else if (version > CLD_UPCALL_VERSION)
+                       version = CLD_UPCALL_VERSION;
+
+               switch (version) {
+               case 1:
+                       nn->client_tracking_ops = &nfsd4_cld_tracking_ops;
+                       break;
+               case 2:
+                       nn->client_tracking_ops = &nfsd4_cld_tracking_ops_v2;
+                       break;
+               default:
+                       break;
+               }
+       }
+out_free:
+       free_cld_upcall(cup);
+out_err:
+       if (ret)
+               dprintk("%s: Unable to get version from userspace: %d\n",
+                       __func__, ret);
+       return ret;
+}
+
+static int
 nfsd4_cld_tracking_init(struct net *net)
 {
        int status;
@@ -1351,6 +1586,11 @@ nfsd4_cld_tracking_init(struct net *net)
        status = __nfsd4_init_cld_pipe(net);
        if (status)
                goto err_shutdown;
+       nn->cld_net->cn_tfm = crypto_alloc_shash("sha256", 0, 0);
+       if (IS_ERR(nn->cld_net->cn_tfm)) {
+               status = PTR_ERR(nn->cld_net->cn_tfm);
+               goto err_remove;
+       }
 
        /*
         * rpc pipe upcalls take 30 seconds to time out, so we don't want to
@@ -1368,10 +1608,14 @@ nfsd4_cld_tracking_init(struct net *net)
                goto err_remove;
        }
 
+       status = nfsd4_cld_get_version(nn);
+       if (status == -EOPNOTSUPP)
+               pr_warn("NFSD: nfsdcld GetVersion upcall failed. Please upgrade nfsdcld.\n");
+
        status = nfsd4_cld_grace_start(nn);
        if (status) {
                if (status == -EOPNOTSUPP)
-                       printk(KERN_WARNING "NFSD: Please upgrade nfsdcld.\n");
+                       pr_warn("NFSD: nfsdcld GraceStart upcall failed. Please upgrade nfsdcld.\n");
                nfs4_release_reclaim(nn);
                goto err_remove;
        } else
@@ -1403,6 +1647,8 @@ static const struct nfsd4_client_tracking_ops nfsd4_cld_tracking_ops_v0 = {
        .remove         = nfsd4_cld_remove,
        .check          = nfsd4_cld_check_v0,
        .grace_done     = nfsd4_cld_grace_done_v0,
+       .version        = 1,
+       .msglen         = sizeof(struct cld_msg),
 };
 
 /* For newer nfsdcld's */
@@ -1413,6 +1659,20 @@ static const struct nfsd4_client_tracking_ops nfsd4_cld_tracking_ops = {
        .remove         = nfsd4_cld_remove,
        .check          = nfsd4_cld_check,
        .grace_done     = nfsd4_cld_grace_done,
+       .version        = 1,
+       .msglen         = sizeof(struct cld_msg),
+};
+
+/* v2 create/check ops include the principal, if available */
+static const struct nfsd4_client_tracking_ops nfsd4_cld_tracking_ops_v2 = {
+       .init           = nfsd4_cld_tracking_init,
+       .exit           = nfsd4_cld_tracking_exit,
+       .create         = nfsd4_cld_create_v2,
+       .remove         = nfsd4_cld_remove,
+       .check          = nfsd4_cld_check_v2,
+       .grace_done     = nfsd4_cld_grace_done,
+       .version        = 2,
+       .msglen         = sizeof(struct cld_msg_v2),
 };
 
 /* upcall via usermodehelper */
@@ -1760,6 +2020,8 @@ static const struct nfsd4_client_tracking_ops nfsd4_umh_tracking_ops = {
        .remove         = nfsd4_umh_cltrack_remove,
        .check          = nfsd4_umh_cltrack_check,
        .grace_done     = nfsd4_umh_cltrack_grace_done,
+       .version        = 1,
+       .msglen         = 0,
 };
 
 int
index 7857942..c65aeaa 100644 (file)
@@ -50,6 +50,7 @@
 
 #include "netns.h"
 #include "pnfs.h"
+#include "filecache.h"
 
 #define NFSDDBG_FACILITY                NFSDDBG_PROC
 
@@ -429,18 +430,18 @@ put_nfs4_file(struct nfs4_file *fi)
        }
 }
 
-static struct file *
+static struct nfsd_file *
 __nfs4_get_fd(struct nfs4_file *f, int oflag)
 {
        if (f->fi_fds[oflag])
-               return get_file(f->fi_fds[oflag]);
+               return nfsd_file_get(f->fi_fds[oflag]);
        return NULL;
 }
 
-static struct file *
+static struct nfsd_file *
 find_writeable_file_locked(struct nfs4_file *f)
 {
-       struct file *ret;
+       struct nfsd_file *ret;
 
        lockdep_assert_held(&f->fi_lock);
 
@@ -450,10 +451,10 @@ find_writeable_file_locked(struct nfs4_file *f)
        return ret;
 }
 
-static struct file *
+static struct nfsd_file *
 find_writeable_file(struct nfs4_file *f)
 {
-       struct file *ret;
+       struct nfsd_file *ret;
 
        spin_lock(&f->fi_lock);
        ret = find_writeable_file_locked(f);
@@ -462,9 +463,10 @@ find_writeable_file(struct nfs4_file *f)
        return ret;
 }
 
-static struct file *find_readable_file_locked(struct nfs4_file *f)
+static struct nfsd_file *
+find_readable_file_locked(struct nfs4_file *f)
 {
-       struct file *ret;
+       struct nfsd_file *ret;
 
        lockdep_assert_held(&f->fi_lock);
 
@@ -474,10 +476,10 @@ static struct file *find_readable_file_locked(struct nfs4_file *f)
        return ret;
 }
 
-static struct file *
+static struct nfsd_file *
 find_readable_file(struct nfs4_file *f)
 {
-       struct file *ret;
+       struct nfsd_file *ret;
 
        spin_lock(&f->fi_lock);
        ret = find_readable_file_locked(f);
@@ -486,10 +488,10 @@ find_readable_file(struct nfs4_file *f)
        return ret;
 }
 
-struct file *
+struct nfsd_file *
 find_any_file(struct nfs4_file *f)
 {
-       struct file *ret;
+       struct nfsd_file *ret;
 
        spin_lock(&f->fi_lock);
        ret = __nfs4_get_fd(f, O_RDWR);
@@ -590,17 +592,17 @@ static void __nfs4_file_put_access(struct nfs4_file *fp, int oflag)
        might_lock(&fp->fi_lock);
 
        if (atomic_dec_and_lock(&fp->fi_access[oflag], &fp->fi_lock)) {
-               struct file *f1 = NULL;
-               struct file *f2 = NULL;
+               struct nfsd_file *f1 = NULL;
+               struct nfsd_file *f2 = NULL;
 
                swap(f1, fp->fi_fds[oflag]);
                if (atomic_read(&fp->fi_access[1 - oflag]) == 0)
                        swap(f2, fp->fi_fds[O_RDWR]);
                spin_unlock(&fp->fi_lock);
                if (f1)
-                       fput(f1);
+                       nfsd_file_put(f1);
                if (f2)
-                       fput(f2);
+                       nfsd_file_put(f2);
        }
 }
 
@@ -933,25 +935,25 @@ nfs4_inc_and_copy_stateid(stateid_t *dst, struct nfs4_stid *stid)
 
 static void put_deleg_file(struct nfs4_file *fp)
 {
-       struct file *filp = NULL;
+       struct nfsd_file *nf = NULL;
 
        spin_lock(&fp->fi_lock);
        if (--fp->fi_delegees == 0)
-               swap(filp, fp->fi_deleg_file);
+               swap(nf, fp->fi_deleg_file);
        spin_unlock(&fp->fi_lock);
 
-       if (filp)
-               fput(filp);
+       if (nf)
+               nfsd_file_put(nf);
 }
 
 static void nfs4_unlock_deleg_lease(struct nfs4_delegation *dp)
 {
        struct nfs4_file *fp = dp->dl_stid.sc_file;
-       struct file *filp = fp->fi_deleg_file;
+       struct nfsd_file *nf = fp->fi_deleg_file;
 
        WARN_ON_ONCE(!fp->fi_delegees);
 
-       vfs_setlease(filp, F_UNLCK, NULL, (void **)&dp);
+       vfs_setlease(nf->nf_file, F_UNLCK, NULL, (void **)&dp);
        put_deleg_file(fp);
 }
 
@@ -1289,11 +1291,14 @@ static void nfs4_free_lock_stateid(struct nfs4_stid *stid)
 {
        struct nfs4_ol_stateid *stp = openlockstateid(stid);
        struct nfs4_lockowner *lo = lockowner(stp->st_stateowner);
-       struct file *file;
+       struct nfsd_file *nf;
 
-       file = find_any_file(stp->st_stid.sc_file);
-       if (file)
-               filp_close(file, (fl_owner_t)lo);
+       nf = find_any_file(stp->st_stid.sc_file);
+       if (nf) {
+               get_file(nf->nf_file);
+               filp_close(nf->nf_file, (fl_owner_t)lo);
+               nfsd_file_put(nf);
+       }
        nfs4_free_ol_stateid(stid);
 }
 
@@ -1563,21 +1568,39 @@ static inline u32 slot_bytes(struct nfsd4_channel_attrs *ca)
  * re-negotiate active sessions and reduce their slot usage to make
  * room for new connections. For now we just fail the create session.
  */
-static u32 nfsd4_get_drc_mem(struct nfsd4_channel_attrs *ca)
+static u32 nfsd4_get_drc_mem(struct nfsd4_channel_attrs *ca, struct nfsd_net *nn)
 {
        u32 slotsize = slot_bytes(ca);
        u32 num = ca->maxreqs;
        unsigned long avail, total_avail;
+       unsigned int scale_factor;
 
        spin_lock(&nfsd_drc_lock);
-       total_avail = nfsd_drc_max_mem - nfsd_drc_mem_used;
+       if (nfsd_drc_max_mem > nfsd_drc_mem_used)
+               total_avail = nfsd_drc_max_mem - nfsd_drc_mem_used;
+       else
+               /* We have handed out more space than we chose in
+                * set_max_drc() to allow.  That isn't really a
+                * problem as long as that doesn't make us think we
+                * have lots more due to integer overflow.
+                */
+               total_avail = 0;
        avail = min((unsigned long)NFSD_MAX_MEM_PER_SESSION, total_avail);
        /*
-        * Never use more than a third of the remaining memory,
-        * unless it's the only way to give this client a slot:
+        * Never use more than a fraction of the remaining memory,
+        * unless it's the only way to give this client a slot.
+        * The chosen fraction is either 1/8 or 1/number of threads,
+        * whichever is smaller.  This ensures there are adequate
+        * slots to support multiple clients per thread.
+        * Give the client one slot even if that would require
+        * over-allocation--it is better than failure.
         */
-       avail = clamp_t(unsigned long, avail, slotsize, total_avail/3);
+       scale_factor = max_t(unsigned int, 8, nn->nfsd_serv->sv_nrthreads);
+
+       avail = clamp_t(unsigned long, avail, slotsize,
+                       total_avail/scale_factor);
        num = min_t(int, num, avail / slotsize);
+       num = max_t(int, num, 1);
        nfsd_drc_mem_used += num * slotsize;
        spin_unlock(&nfsd_drc_lock);
 
@@ -2323,9 +2346,9 @@ static void states_stop(struct seq_file *s, void *v)
        spin_unlock(&clp->cl_lock);
 }
 
-static void nfs4_show_superblock(struct seq_file *s, struct file *f)
+static void nfs4_show_superblock(struct seq_file *s, struct nfsd_file *f)
 {
-       struct inode *inode = file_inode(f);
+       struct inode *inode = f->nf_inode;
 
        seq_printf(s, "superblock: \"%02x:%02x:%ld\"",
                                        MAJOR(inode->i_sb->s_dev),
@@ -2343,7 +2366,7 @@ static int nfs4_show_open(struct seq_file *s, struct nfs4_stid *st)
 {
        struct nfs4_ol_stateid *ols;
        struct nfs4_file *nf;
-       struct file *file;
+       struct nfsd_file *file;
        struct nfs4_stateowner *oo;
        unsigned int access, deny;
 
@@ -2370,7 +2393,7 @@ static int nfs4_show_open(struct seq_file *s, struct nfs4_stid *st)
        seq_printf(s, ", ");
        nfs4_show_owner(s, oo);
        seq_printf(s, " }\n");
-       fput(file);
+       nfsd_file_put(file);
 
        return 0;
 }
@@ -2379,7 +2402,7 @@ static int nfs4_show_lock(struct seq_file *s, struct nfs4_stid *st)
 {
        struct nfs4_ol_stateid *ols;
        struct nfs4_file *nf;
-       struct file *file;
+       struct nfsd_file *file;
        struct nfs4_stateowner *oo;
 
        ols = openlockstateid(st);
@@ -2401,7 +2424,7 @@ static int nfs4_show_lock(struct seq_file *s, struct nfs4_stid *st)
        seq_printf(s, ", ");
        nfs4_show_owner(s, oo);
        seq_printf(s, " }\n");
-       fput(file);
+       nfsd_file_put(file);
 
        return 0;
 }
@@ -2410,7 +2433,7 @@ static int nfs4_show_deleg(struct seq_file *s, struct nfs4_stid *st)
 {
        struct nfs4_delegation *ds;
        struct nfs4_file *nf;
-       struct file *file;
+       struct nfsd_file *file;
 
        ds = delegstateid(st);
        nf = st->sc_file;
@@ -2433,7 +2456,7 @@ static int nfs4_show_deleg(struct seq_file *s, struct nfs4_stid *st)
 static int nfs4_show_layout(struct seq_file *s, struct nfs4_stid *st)
 {
        struct nfs4_layout_stateid *ls;
-       struct file *file;
+       struct nfsd_file *file;
 
        ls = container_of(st, struct nfs4_layout_stateid, ls_stid);
        file = ls->ls_file;
@@ -3169,10 +3192,10 @@ static __be32 check_forechannel_attrs(struct nfsd4_channel_attrs *ca, struct nfs
         * performance.  When short on memory we therefore prefer to
         * decrease number of slots instead of their size.  Clients that
         * request larger slots than they need will get poor results:
+        * Note that we always allow at least one slot, because our
+        * accounting is soft and provides no guarantees either way.
         */
-       ca->maxreqs = nfsd4_get_drc_mem(ca);
-       if (!ca->maxreqs)
-               return nfserr_jukebox;
+       ca->maxreqs = nfsd4_get_drc_mem(ca, nn);
 
        return nfs_ok;
 }
@@ -4651,7 +4674,7 @@ static __be32 nfs4_get_vfs_file(struct svc_rqst *rqstp, struct nfs4_file *fp,
                struct svc_fh *cur_fh, struct nfs4_ol_stateid *stp,
                struct nfsd4_open *open)
 {
-       struct file *filp = NULL;
+       struct nfsd_file *nf = NULL;
        __be32 status;
        int oflag = nfs4_access_to_omode(open->op_share_access);
        int access = nfs4_access_to_access(open->op_share_access);
@@ -4687,18 +4710,18 @@ static __be32 nfs4_get_vfs_file(struct svc_rqst *rqstp, struct nfs4_file *fp,
 
        if (!fp->fi_fds[oflag]) {
                spin_unlock(&fp->fi_lock);
-               status = nfsd_open(rqstp, cur_fh, S_IFREG, access, &filp);
+               status = nfsd_file_acquire(rqstp, cur_fh, access, &nf);
                if (status)
                        goto out_put_access;
                spin_lock(&fp->fi_lock);
                if (!fp->fi_fds[oflag]) {
-                       fp->fi_fds[oflag] = filp;
-                       filp = NULL;
+                       fp->fi_fds[oflag] = nf;
+                       nf = NULL;
                }
        }
        spin_unlock(&fp->fi_lock);
-       if (filp)
-               fput(filp);
+       if (nf)
+               nfsd_file_put(nf);
 
        status = nfsd4_truncate(rqstp, cur_fh, open);
        if (status)
@@ -4767,7 +4790,7 @@ static struct file_lock *nfs4_alloc_init_lease(struct nfs4_delegation *dp,
        fl->fl_end = OFFSET_MAX;
        fl->fl_owner = (fl_owner_t)dp;
        fl->fl_pid = current->tgid;
-       fl->fl_file = dp->dl_stid.sc_file->fi_deleg_file;
+       fl->fl_file = dp->dl_stid.sc_file->fi_deleg_file->nf_file;
        return fl;
 }
 
@@ -4777,7 +4800,7 @@ nfs4_set_delegation(struct nfs4_client *clp, struct svc_fh *fh,
 {
        int status = 0;
        struct nfs4_delegation *dp;
-       struct file *filp;
+       struct nfsd_file *nf;
        struct file_lock *fl;
 
        /*
@@ -4788,8 +4811,8 @@ nfs4_set_delegation(struct nfs4_client *clp, struct svc_fh *fh,
        if (fp->fi_had_conflict)
                return ERR_PTR(-EAGAIN);
 
-       filp = find_readable_file(fp);
-       if (!filp) {
+       nf = find_readable_file(fp);
+       if (!nf) {
                /* We should always have a readable file here */
                WARN_ON_ONCE(1);
                return ERR_PTR(-EBADF);
@@ -4799,17 +4822,17 @@ nfs4_set_delegation(struct nfs4_client *clp, struct svc_fh *fh,
        if (nfs4_delegation_exists(clp, fp))
                status = -EAGAIN;
        else if (!fp->fi_deleg_file) {
-               fp->fi_deleg_file = filp;
+               fp->fi_deleg_file = nf;
                /* increment early to prevent fi_deleg_file from being
                 * cleared */
                fp->fi_delegees = 1;
-               filp = NULL;
+               nf = NULL;
        } else
                fp->fi_delegees++;
        spin_unlock(&fp->fi_lock);
        spin_unlock(&state_lock);
-       if (filp)
-               fput(filp);
+       if (nf)
+               nfsd_file_put(nf);
        if (status)
                return ERR_PTR(status);
 
@@ -4822,7 +4845,7 @@ nfs4_set_delegation(struct nfs4_client *clp, struct svc_fh *fh,
        if (!fl)
                goto out_clnt_odstate;
 
-       status = vfs_setlease(fp->fi_deleg_file, fl->fl_type, &fl, NULL);
+       status = vfs_setlease(fp->fi_deleg_file->nf_file, fl->fl_type, &fl, NULL);
        if (fl)
                locks_free_lock(fl);
        if (status)
@@ -4842,7 +4865,7 @@ nfs4_set_delegation(struct nfs4_client *clp, struct svc_fh *fh,
 
        return dp;
 out_unlock:
-       vfs_setlease(fp->fi_deleg_file, F_UNLCK, NULL, (void **)&dp);
+       vfs_setlease(fp->fi_deleg_file->nf_file, F_UNLCK, NULL, (void **)&dp);
 out_clnt_odstate:
        put_clnt_odstate(dp->dl_clnt_odstate);
        nfs4_put_stid(&dp->dl_stid);
@@ -5513,7 +5536,7 @@ nfsd4_lookup_stateid(struct nfsd4_compound_state *cstate,
        return nfs_ok;
 }
 
-static struct file *
+static struct nfsd_file *
 nfs4_find_file(struct nfs4_stid *s, int flags)
 {
        if (!s)
@@ -5523,7 +5546,7 @@ nfs4_find_file(struct nfs4_stid *s, int flags)
        case NFS4_DELEG_STID:
                if (WARN_ON_ONCE(!s->sc_file->fi_deleg_file))
                        return NULL;
-               return get_file(s->sc_file->fi_deleg_file);
+               return nfsd_file_get(s->sc_file->fi_deleg_file);
        case NFS4_OPEN_STID:
        case NFS4_LOCK_STID:
                if (flags & RD_STATE)
@@ -5549,32 +5572,28 @@ nfs4_check_olstateid(struct nfs4_ol_stateid *ols, int flags)
 
 static __be32
 nfs4_check_file(struct svc_rqst *rqstp, struct svc_fh *fhp, struct nfs4_stid *s,
-               struct file **filpp, bool *tmp_file, int flags)
+               struct nfsd_file **nfp, int flags)
 {
        int acc = (flags & RD_STATE) ? NFSD_MAY_READ : NFSD_MAY_WRITE;
-       struct file *file;
+       struct nfsd_file *nf;
        __be32 status;
 
-       file = nfs4_find_file(s, flags);
-       if (file) {
+       nf = nfs4_find_file(s, flags);
+       if (nf) {
                status = nfsd_permission(rqstp, fhp->fh_export, fhp->fh_dentry,
                                acc | NFSD_MAY_OWNER_OVERRIDE);
                if (status) {
-                       fput(file);
-                       return status;
+                       nfsd_file_put(nf);
+                       goto out;
                }
-
-               *filpp = file;
        } else {
-               status = nfsd_open(rqstp, fhp, S_IFREG, acc, filpp);
+               status = nfsd_file_acquire(rqstp, fhp, acc, &nf);
                if (status)
                        return status;
-
-               if (tmp_file)
-                       *tmp_file = true;
        }
-
-       return 0;
+       *nfp = nf;
+out:
+       return status;
 }
 
 /*
@@ -5583,7 +5602,7 @@ nfs4_check_file(struct svc_rqst *rqstp, struct svc_fh *fhp, struct nfs4_stid *s,
 __be32
 nfs4_preprocess_stateid_op(struct svc_rqst *rqstp,
                struct nfsd4_compound_state *cstate, struct svc_fh *fhp,
-               stateid_t *stateid, int flags, struct file **filpp, bool *tmp_file)
+               stateid_t *stateid, int flags, struct nfsd_file **nfp)
 {
        struct inode *ino = d_inode(fhp->fh_dentry);
        struct net *net = SVC_NET(rqstp);
@@ -5591,10 +5610,8 @@ nfs4_preprocess_stateid_op(struct svc_rqst *rqstp,
        struct nfs4_stid *s = NULL;
        __be32 status;
 
-       if (filpp)
-               *filpp = NULL;
-       if (tmp_file)
-               *tmp_file = false;
+       if (nfp)
+               *nfp = NULL;
 
        if (grace_disallows_io(net, ino))
                return nfserr_grace;
@@ -5631,8 +5648,8 @@ nfs4_preprocess_stateid_op(struct svc_rqst *rqstp,
        status = nfs4_check_fh(fhp, s);
 
 done:
-       if (!status && filpp)
-               status = nfs4_check_file(rqstp, fhp, s, filpp, tmp_file, flags);
+       if (status == nfs_ok && nfp)
+               status = nfs4_check_file(rqstp, fhp, s, nfp, flags);
 out:
        if (s)
                nfs4_put_stid(s);
@@ -6392,7 +6409,7 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
        struct nfs4_ol_stateid *lock_stp = NULL;
        struct nfs4_ol_stateid *open_stp = NULL;
        struct nfs4_file *fp;
-       struct file *filp = NULL;
+       struct nfsd_file *nf = NULL;
        struct nfsd4_blocked_lock *nbl = NULL;
        struct file_lock *file_lock = NULL;
        struct file_lock *conflock = NULL;
@@ -6474,8 +6491,8 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
                        /* Fallthrough */
                case NFS4_READ_LT:
                        spin_lock(&fp->fi_lock);
-                       filp = find_readable_file_locked(fp);
-                       if (filp)
+                       nf = find_readable_file_locked(fp);
+                       if (nf)
                                get_lock_access(lock_stp, NFS4_SHARE_ACCESS_READ);
                        spin_unlock(&fp->fi_lock);
                        fl_type = F_RDLCK;
@@ -6486,8 +6503,8 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
                        /* Fallthrough */
                case NFS4_WRITE_LT:
                        spin_lock(&fp->fi_lock);
-                       filp = find_writeable_file_locked(fp);
-                       if (filp)
+                       nf = find_writeable_file_locked(fp);
+                       if (nf)
                                get_lock_access(lock_stp, NFS4_SHARE_ACCESS_WRITE);
                        spin_unlock(&fp->fi_lock);
                        fl_type = F_WRLCK;
@@ -6497,7 +6514,7 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
                goto out;
        }
 
-       if (!filp) {
+       if (!nf) {
                status = nfserr_openmode;
                goto out;
        }
@@ -6513,7 +6530,7 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
        file_lock->fl_type = fl_type;
        file_lock->fl_owner = (fl_owner_t)lockowner(nfs4_get_stateowner(&lock_sop->lo_owner));
        file_lock->fl_pid = current->tgid;
-       file_lock->fl_file = filp;
+       file_lock->fl_file = nf->nf_file;
        file_lock->fl_flags = fl_flags;
        file_lock->fl_lmops = &nfsd_posix_mng_ops;
        file_lock->fl_start = lock->lk_offset;
@@ -6535,7 +6552,7 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
                spin_unlock(&nn->blocked_locks_lock);
        }
 
-       err = vfs_lock_file(filp, F_SETLK, file_lock, conflock);
+       err = vfs_lock_file(nf->nf_file, F_SETLK, file_lock, conflock);
        switch (err) {
        case 0: /* success! */
                nfs4_inc_and_copy_stateid(&lock->lk_resp_stateid, &lock_stp->st_stid);
@@ -6570,8 +6587,8 @@ out:
                }
                free_blocked_lock(nbl);
        }
-       if (filp)
-               fput(filp);
+       if (nf)
+               nfsd_file_put(nf);
        if (lock_stp) {
                /* Bump seqid manually if the 4.0 replay owner is openowner */
                if (cstate->replay_owner &&
@@ -6606,11 +6623,11 @@ out:
  */
 static __be32 nfsd_test_lock(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file_lock *lock)
 {
-       struct file *file;
-       __be32 err = nfsd_open(rqstp, fhp, S_IFREG, NFSD_MAY_READ, &file);
+       struct nfsd_file *nf;
+       __be32 err = nfsd_file_acquire(rqstp, fhp, NFSD_MAY_READ, &nf);
        if (!err) {
-               err = nfserrno(vfs_test_lock(file, lock));
-               fput(file);
+               err = nfserrno(vfs_test_lock(nf->nf_file, lock));
+               nfsd_file_put(nf);
        }
        return err;
 }
@@ -6698,7 +6715,7 @@ nfsd4_locku(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 {
        struct nfsd4_locku *locku = &u->locku;
        struct nfs4_ol_stateid *stp;
-       struct file *filp = NULL;
+       struct nfsd_file *nf = NULL;
        struct file_lock *file_lock = NULL;
        __be32 status;
        int err;
@@ -6716,8 +6733,8 @@ nfsd4_locku(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
                                        &stp, nn);
        if (status)
                goto out;
-       filp = find_any_file(stp->st_stid.sc_file);
-       if (!filp) {
+       nf = find_any_file(stp->st_stid.sc_file);
+       if (!nf) {
                status = nfserr_lock_range;
                goto put_stateid;
        }
@@ -6725,13 +6742,13 @@ nfsd4_locku(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
        if (!file_lock) {
                dprintk("NFSD: %s: unable to allocate lock!\n", __func__);
                status = nfserr_jukebox;
-               goto fput;
+               goto put_file;
        }
 
        file_lock->fl_type = F_UNLCK;
        file_lock->fl_owner = (fl_owner_t)lockowner(nfs4_get_stateowner(stp->st_stateowner));
        file_lock->fl_pid = current->tgid;
-       file_lock->fl_file = filp;
+       file_lock->fl_file = nf->nf_file;
        file_lock->fl_flags = FL_POSIX;
        file_lock->fl_lmops = &nfsd_posix_mng_ops;
        file_lock->fl_start = locku->lu_offset;
@@ -6740,14 +6757,14 @@ nfsd4_locku(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
                                                locku->lu_length);
        nfs4_transform_lock_offset(file_lock);
 
-       err = vfs_lock_file(filp, F_SETLK, file_lock, NULL);
+       err = vfs_lock_file(nf->nf_file, F_SETLK, file_lock, NULL);
        if (err) {
                dprintk("NFSD: nfs4_locku: vfs_lock_file failed!\n");
                goto out_nfserr;
        }
        nfs4_inc_and_copy_stateid(&locku->lu_stateid, &stp->st_stid);
-fput:
-       fput(filp);
+put_file:
+       nfsd_file_put(nf);
 put_stateid:
        mutex_unlock(&stp->st_mutex);
        nfs4_put_stid(&stp->st_stid);
@@ -6759,7 +6776,7 @@ out:
 
 out_nfserr:
        status = nfserrno(err);
-       goto fput;
+       goto put_file;
 }
 
 /*
@@ -6772,17 +6789,17 @@ check_for_locks(struct nfs4_file *fp, struct nfs4_lockowner *lowner)
 {
        struct file_lock *fl;
        int status = false;
-       struct file *filp = find_any_file(fp);
+       struct nfsd_file *nf = find_any_file(fp);
        struct inode *inode;
        struct file_lock_context *flctx;
 
-       if (!filp) {
+       if (!nf) {
                /* Any valid lock stateid should have some sort of access */
                WARN_ON_ONCE(1);
                return status;
        }
 
-       inode = locks_inode(filp);
+       inode = locks_inode(nf->nf_file);
        flctx = inode->i_flctx;
 
        if (flctx && !list_empty_careful(&flctx->flc_posix)) {
@@ -6795,7 +6812,7 @@ check_for_locks(struct nfs4_file *fp, struct nfs4_lockowner *lowner)
                }
                spin_unlock(&flctx->flc_lock);
        }
-       fput(filp);
+       nfsd_file_put(nf);
        return status;
 }
 
@@ -6888,7 +6905,8 @@ nfs4_has_reclaimed_state(struct xdr_netobj name, struct nfsd_net *nn)
  * will be freed in nfs4_remove_reclaim_record in the normal case).
  */
 struct nfs4_client_reclaim *
-nfs4_client_to_reclaim(struct xdr_netobj name, struct nfsd_net *nn)
+nfs4_client_to_reclaim(struct xdr_netobj name, struct xdr_netobj princhash,
+               struct nfsd_net *nn)
 {
        unsigned int strhashval;
        struct nfs4_client_reclaim *crp;
@@ -6901,6 +6919,8 @@ nfs4_client_to_reclaim(struct xdr_netobj name, struct nfsd_net *nn)
                list_add(&crp->cr_strhash, &nn->reclaim_str_hashtbl[strhashval]);
                crp->cr_name.data = name.data;
                crp->cr_name.len = name.len;
+               crp->cr_princhash.data = princhash.data;
+               crp->cr_princhash.len = princhash.len;
                crp->cr_clp = NULL;
                nn->reclaim_str_hashtbl_size++;
        }
@@ -6912,6 +6932,7 @@ nfs4_remove_reclaim_record(struct nfs4_client_reclaim *crp, struct nfsd_net *nn)
 {
        list_del(&crp->cr_strhash);
        kfree(crp->cr_name.data);
+       kfree(crp->cr_princhash.data);
        kfree(crp);
        nn->reclaim_str_hashtbl_size--;
 }
index 4428118..533d0fc 100644 (file)
@@ -49,6 +49,7 @@
 #include "cache.h"
 #include "netns.h"
 #include "pnfs.h"
+#include "filecache.h"
 
 #ifdef CONFIG_NFSD_V4_SECURITY_LABEL
 #include <linux/security.h>
@@ -203,6 +204,13 @@ static __be32 *read_buf(struct nfsd4_compoundargs *argp, u32 nbytes)
        return p;
 }
 
+static unsigned int compoundargs_bytes_left(struct nfsd4_compoundargs *argp)
+{
+       unsigned int this = (char *)argp->end - (char *)argp->p;
+
+       return this + argp->pagelen;
+}
+
 static int zero_clientid(clientid_t *clid)
 {
        return (clid->cl_boot == 0) && (clid->cl_id == 0);
@@ -211,10 +219,10 @@ static int zero_clientid(clientid_t *clid)
 /**
  * svcxdr_tmpalloc - allocate memory to be freed after compound processing
  * @argp: NFSv4 compound argument structure
- * @p: pointer to be freed (with kfree())
+ * @len: length of buffer to allocate
  *
- * Marks @p to be freed when processing the compound operation
- * described in @argp finishes.
+ * Allocates a buffer of size @len to be freed when processing the compound
+ * operation described in @argp finishes.
  */
 static void *
 svcxdr_tmpalloc(struct nfsd4_compoundargs *argp, u32 len)
@@ -347,7 +355,12 @@ nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval,
                READ_BUF(4); len += 4;
                nace = be32_to_cpup(p++);
 
-               if (nace > NFS4_ACL_MAX)
+               if (nace > compoundargs_bytes_left(argp)/20)
+                       /*
+                        * Even with 4-byte names there wouldn't be
+                        * space for that many aces; something fishy is
+                        * going on:
+                        */
                        return nfserr_fbig;
 
                *acl = svcxdr_tmpalloc(argp, nfs4_acl_bytes(nace));
@@ -1418,7 +1431,6 @@ nfsd4_decode_create_session(struct nfsd4_compoundargs *argp,
                            struct nfsd4_create_session *sess)
 {
        DECODE_HEAD;
-       u32 dummy;
 
        READ_BUF(16);
        COPYMEM(&sess->clientid, 8);
@@ -1427,7 +1439,7 @@ nfsd4_decode_create_session(struct nfsd4_compoundargs *argp,
 
        /* Fore channel attrs */
        READ_BUF(28);
-       dummy = be32_to_cpup(p++); /* headerpadsz is always 0 */
+       p++; /* headerpadsz is always 0 */
        sess->fore_channel.maxreq_sz = be32_to_cpup(p++);
        sess->fore_channel.maxresp_sz = be32_to_cpup(p++);
        sess->fore_channel.maxresp_cached = be32_to_cpup(p++);
@@ -1444,7 +1456,7 @@ nfsd4_decode_create_session(struct nfsd4_compoundargs *argp,
 
        /* Back channel attrs */
        READ_BUF(28);
-       dummy = be32_to_cpup(p++); /* headerpadsz is always 0 */
+       p++; /* headerpadsz is always 0 */
        sess->back_channel.maxreq_sz = be32_to_cpup(p++);
        sess->back_channel.maxresp_sz = be32_to_cpup(p++);
        sess->back_channel.maxresp_cached = be32_to_cpup(p++);
@@ -1736,7 +1748,6 @@ static __be32
 nfsd4_decode_copy(struct nfsd4_compoundargs *argp, struct nfsd4_copy *copy)
 {
        DECODE_HEAD;
-       unsigned int tmp;
 
        status = nfsd4_decode_stateid(argp, &copy->cp_src_stateid);
        if (status)
@@ -1751,7 +1762,7 @@ nfsd4_decode_copy(struct nfsd4_compoundargs *argp, struct nfsd4_copy *copy)
        p = xdr_decode_hyper(p, &copy->cp_count);
        p++; /* ca_consecutive: we always do consecutive copies */
        copy->cp_synchronous = be32_to_cpup(p++);
-       tmp = be32_to_cpup(p); /* Source server list not supported */
+       /* tmp = be32_to_cpup(p); Source server list not supported */
 
        DECODE_TAIL;
 }
@@ -3217,9 +3228,8 @@ nfsd4_encode_create(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_
        if (!p)
                return nfserr_resource;
        encode_cinfo(p, &create->cr_cinfo);
-       nfserr = nfsd4_encode_bitmap(xdr, create->cr_bmval[0],
+       return nfsd4_encode_bitmap(xdr, create->cr_bmval[0],
                        create->cr_bmval[1], create->cr_bmval[2]);
-       return 0;
 }
 
 static __be32
@@ -3462,7 +3472,7 @@ static __be32 nfsd4_encode_splice_read(
 
        len = maxcount;
        nfserr = nfsd_splice_read(read->rd_rqstp, read->rd_fhp,
-                                 file, read->rd_offset, &maxcount);
+                                 file, read->rd_offset, &maxcount, &eof);
        read->rd_length = maxcount;
        if (nfserr) {
                /*
@@ -3474,9 +3484,6 @@ static __be32 nfsd4_encode_splice_read(
                return nfserr;
        }
 
-       eof = nfsd_eof_on_read(len, maxcount, read->rd_offset,
-                               d_inode(read->rd_fhp->fh_dentry)->i_size);
-
        *(p++) = htonl(eof);
        *(p++) = htonl(maxcount);
 
@@ -3547,15 +3554,13 @@ static __be32 nfsd4_encode_readv(struct nfsd4_compoundres *resp,
 
        len = maxcount;
        nfserr = nfsd_readv(resp->rqstp, read->rd_fhp, file, read->rd_offset,
-                           resp->rqstp->rq_vec, read->rd_vlen, &maxcount);
+                           resp->rqstp->rq_vec, read->rd_vlen, &maxcount,
+                           &eof);
        read->rd_length = maxcount;
        if (nfserr)
                return nfserr;
        xdr_truncate_encode(xdr, starting_len + 8 + ((maxcount+3)&~3));
 
-       eof = nfsd_eof_on_read(len, maxcount, read->rd_offset,
-                               d_inode(read->rd_fhp->fh_dentry)->i_size);
-
        tmp = htonl(eof);
        write_bytes_to_xdr_buf(xdr->buf, starting_len    , &tmp, 4);
        tmp = htonl(maxcount);
@@ -3574,11 +3579,14 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr,
 {
        unsigned long maxcount;
        struct xdr_stream *xdr = &resp->xdr;
-       struct file *file = read->rd_filp;
+       struct file *file;
        int starting_len = xdr->buf->len;
-       struct raparms *ra = NULL;
        __be32 *p;
 
+       if (nfserr)
+               return nfserr;
+       file = read->rd_nf->nf_file;
+
        p = xdr_reserve_space(xdr, 8); /* eof flag and byte count */
        if (!p) {
                WARN_ON_ONCE(test_bit(RQ_SPLICE_OK, &resp->rqstp->rq_flags));
@@ -3596,18 +3604,12 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr,
                         (xdr->buf->buflen - xdr->buf->len));
        maxcount = min_t(unsigned long, maxcount, read->rd_length);
 
-       if (read->rd_tmp_file)
-               ra = nfsd_init_raparms(file);
-
        if (file->f_op->splice_read &&
            test_bit(RQ_SPLICE_OK, &resp->rqstp->rq_flags))
                nfserr = nfsd4_encode_splice_read(resp, read, file, maxcount);
        else
                nfserr = nfsd4_encode_readv(resp, read, file, maxcount);
 
-       if (ra)
-               nfsd_put_raparams(file, ra);
-
        if (nfserr)
                xdr_truncate_encode(xdr, starting_len);
 
index 2c21517..11b42c5 100644 (file)
@@ -1476,6 +1476,7 @@ static __net_init int nfsd_init_net(struct net *net)
 
        atomic_set(&nn->ntf_refcnt, 0);
        init_waitqueue_head(&nn->ntf_wq);
+       seqlock_init(&nn->boot_lock);
 
        mnt =  vfs_kern_mount(&nfsd_fs_type, SB_KERNMOUNT, "nfsd", NULL);
        if (IS_ERR(mnt)) {
index 0d20fd1..c83ddac 100644 (file)
@@ -172,6 +172,7 @@ nfsd_proc_read(struct svc_rqst *rqstp)
        struct nfsd_readargs *argp = rqstp->rq_argp;
        struct nfsd_readres *resp = rqstp->rq_resp;
        __be32  nfserr;
+       u32 eof;
 
        dprintk("nfsd: READ    %s %d bytes at %d\n",
                SVCFH_fmt(&argp->fh),
@@ -195,7 +196,8 @@ nfsd_proc_read(struct svc_rqst *rqstp)
        nfserr = nfsd_read(rqstp, fh_copy(&resp->fh, &argp->fh),
                                  argp->offset,
                                  rqstp->rq_vec, argp->vlen,
-                                 &resp->count);
+                                 &resp->count,
+                                 &eof);
 
        if (nfserr) return nfserr;
        return fh_getattr(&resp->fh, &resp->stat);
index 18d94ea..fdf7ed4 100644 (file)
@@ -27,6 +27,7 @@
 #include "cache.h"
 #include "vfs.h"
 #include "netns.h"
+#include "filecache.h"
 
 #define NFSDDBG_FACILITY       NFSDDBG_SVC
 
@@ -313,22 +314,17 @@ static int nfsd_startup_generic(int nrservs)
        if (nfsd_users++)
                return 0;
 
-       /*
-        * Readahead param cache - will no-op if it already exists.
-        * (Note therefore results will be suboptimal if number of
-        * threads is modified after nfsd start.)
-        */
-       ret = nfsd_racache_init(2*nrservs);
+       ret = nfsd_file_cache_init();
        if (ret)
                goto dec_users;
 
        ret = nfs4_state_start();
        if (ret)
-               goto out_racache;
+               goto out_file_cache;
        return 0;
 
-out_racache:
-       nfsd_racache_shutdown();
+out_file_cache:
+       nfsd_file_cache_shutdown();
 dec_users:
        nfsd_users--;
        return ret;
@@ -340,7 +336,7 @@ static void nfsd_shutdown_generic(void)
                return;
 
        nfs4_state_shutdown();
-       nfsd_racache_shutdown();
+       nfsd_file_cache_shutdown();
 }
 
 static bool nfsd_needs_lockd(struct nfsd_net *nn)
@@ -348,6 +344,35 @@ static bool nfsd_needs_lockd(struct nfsd_net *nn)
        return nfsd_vers(nn, 2, NFSD_TEST) || nfsd_vers(nn, 3, NFSD_TEST);
 }
 
+void nfsd_copy_boot_verifier(__be32 verf[2], struct nfsd_net *nn)
+{
+       int seq = 0;
+
+       do {
+               read_seqbegin_or_lock(&nn->boot_lock, &seq);
+               /*
+                * This is opaque to client, so no need to byte-swap. Use
+                * __force to keep sparse happy. y2038 time_t overflow is
+                * irrelevant in this usage
+                */
+               verf[0] = (__force __be32)nn->nfssvc_boot.tv_sec;
+               verf[1] = (__force __be32)nn->nfssvc_boot.tv_nsec;
+       } while (need_seqretry(&nn->boot_lock, seq));
+       done_seqretry(&nn->boot_lock, seq);
+}
+
+static void nfsd_reset_boot_verifier_locked(struct nfsd_net *nn)
+{
+       ktime_get_real_ts64(&nn->nfssvc_boot);
+}
+
+void nfsd_reset_boot_verifier(struct nfsd_net *nn)
+{
+       write_seqlock(&nn->boot_lock);
+       nfsd_reset_boot_verifier_locked(nn);
+       write_sequnlock(&nn->boot_lock);
+}
+
 static int nfsd_startup_net(int nrservs, struct net *net, const struct cred *cred)
 {
        struct nfsd_net *nn = net_generic(net, nfsd_net_id);
@@ -391,6 +416,7 @@ static void nfsd_shutdown_net(struct net *net)
 {
        struct nfsd_net *nn = net_generic(net, nfsd_net_id);
 
+       nfsd_file_cache_purge(net);
        nfs4_state_shutdown_net(net);
        if (nn->lockd_up) {
                lockd_down(net);
@@ -599,7 +625,7 @@ int nfsd_create_serv(struct net *net)
 #endif
        }
        atomic_inc(&nn->ntf_refcnt);
-       ktime_get_real_ts64(&nn->nfssvc_boot); /* record boot time */
+       nfsd_reset_boot_verifier(nn);
        return 0;
 }
 
index 5dbd169..46f56af 100644 (file)
@@ -378,6 +378,7 @@ struct nfs4_client_reclaim {
        struct list_head        cr_strhash;     /* hash by cr_name */
        struct nfs4_client      *cr_clp;        /* pointer to associated clp */
        struct xdr_netobj       cr_name;        /* recovery dir name */
+       struct xdr_netobj       cr_princhash;
 };
 
 /* A reasonable value for REPLAY_ISIZE was estimated as follows:  
@@ -506,7 +507,7 @@ struct nfs4_file {
        };
        struct list_head        fi_clnt_odstate;
        /* One each for O_RDONLY, O_WRONLY, O_RDWR: */
-       struct file *           fi_fds[3];
+       struct nfsd_file        *fi_fds[3];
        /*
         * Each open or lock stateid contributes 0-4 to the counts
         * below depending on which bits are set in st_access_bitmap:
@@ -516,7 +517,7 @@ struct nfs4_file {
         */
        atomic_t                fi_access[2];
        u32                     fi_share_deny;
-       struct file             *fi_deleg_file;
+       struct nfsd_file        *fi_deleg_file;
        int                     fi_delegees;
        struct knfsd_fh         fi_fhandle;
        bool                    fi_had_conflict;
@@ -565,7 +566,7 @@ struct nfs4_layout_stateid {
        spinlock_t                      ls_lock;
        struct list_head                ls_layouts;
        u32                             ls_layout_type;
-       struct file                     *ls_file;
+       struct nfsd_file                *ls_file;
        struct nfsd4_callback           ls_recall;
        stateid_t                       ls_recall_sid;
        bool                            ls_recalled;
@@ -616,7 +617,7 @@ struct nfsd4_copy;
 
 extern __be32 nfs4_preprocess_stateid_op(struct svc_rqst *rqstp,
                struct nfsd4_compound_state *cstate, struct svc_fh *fhp,
-               stateid_t *stateid, int flags, struct file **filp, bool *tmp_file);
+               stateid_t *stateid, int flags, struct nfsd_file **filp);
 __be32 nfsd4_lookup_stateid(struct nfsd4_compound_state *cstate,
                     stateid_t *stateid, unsigned char typemask,
                     struct nfs4_stid **s, struct nfsd_net *nn);
@@ -645,7 +646,7 @@ extern void nfsd4_shutdown_callback(struct nfs4_client *);
 extern void nfsd4_shutdown_copy(struct nfs4_client *clp);
 extern void nfsd4_prepare_cb_recall(struct nfs4_delegation *dp);
 extern struct nfs4_client_reclaim *nfs4_client_to_reclaim(struct xdr_netobj name,
-                                                       struct nfsd_net *nn);
+                               struct xdr_netobj princhash, struct nfsd_net *nn);
 extern bool nfs4_has_reclaimed_state(struct xdr_netobj name, struct nfsd_net *nn);
 
 struct nfs4_file *find_file(struct knfsd_fh *fh);
@@ -657,7 +658,7 @@ static inline void get_nfs4_file(struct nfs4_file *fi)
 {
        refcount_inc(&fi->fi_ref);
 }
-struct file *find_any_file(struct nfs4_file *f);
+struct nfsd_file *find_any_file(struct nfs4_file *f);
 
 /* grace period management */
 void nfsd4_end_grace(struct nfsd_net *nn);
index 80933e4..ffc78a0 100644 (file)
@@ -126,6 +126,8 @@ DEFINE_NFSD_ERR_EVENT(read_err);
 DEFINE_NFSD_ERR_EVENT(write_err);
 
 #include "state.h"
+#include "filecache.h"
+#include "vfs.h"
 
 DECLARE_EVENT_CLASS(nfsd_stateid_class,
        TP_PROTO(stateid_t *stp),
@@ -164,6 +166,144 @@ DEFINE_STATEID_EVENT(layout_recall_done);
 DEFINE_STATEID_EVENT(layout_recall_fail);
 DEFINE_STATEID_EVENT(layout_recall_release);
 
+#define show_nf_flags(val)                                             \
+       __print_flags(val, "|",                                         \
+               { 1 << NFSD_FILE_HASHED,        "HASHED" },             \
+               { 1 << NFSD_FILE_PENDING,       "PENDING" },            \
+               { 1 << NFSD_FILE_BREAK_READ,    "BREAK_READ" },         \
+               { 1 << NFSD_FILE_BREAK_WRITE,   "BREAK_WRITE" },        \
+               { 1 << NFSD_FILE_REFERENCED,    "REFERENCED"})
+
+/* FIXME: This should probably be fleshed out in the future. */
+#define show_nf_may(val)                                               \
+       __print_flags(val, "|",                                         \
+               { NFSD_MAY_READ,                "READ" },               \
+               { NFSD_MAY_WRITE,               "WRITE" },              \
+               { NFSD_MAY_NOT_BREAK_LEASE,     "NOT_BREAK_LEASE" })
+
+DECLARE_EVENT_CLASS(nfsd_file_class,
+       TP_PROTO(struct nfsd_file *nf),
+       TP_ARGS(nf),
+       TP_STRUCT__entry(
+               __field(unsigned int, nf_hashval)
+               __field(void *, nf_inode)
+               __field(int, nf_ref)
+               __field(unsigned long, nf_flags)
+               __field(unsigned char, nf_may)
+               __field(struct file *, nf_file)
+       ),
+       TP_fast_assign(
+               __entry->nf_hashval = nf->nf_hashval;
+               __entry->nf_inode = nf->nf_inode;
+               __entry->nf_ref = atomic_read(&nf->nf_ref);
+               __entry->nf_flags = nf->nf_flags;
+               __entry->nf_may = nf->nf_may;
+               __entry->nf_file = nf->nf_file;
+       ),
+       TP_printk("hash=0x%x inode=0x%p ref=%d flags=%s may=%s file=%p",
+               __entry->nf_hashval,
+               __entry->nf_inode,
+               __entry->nf_ref,
+               show_nf_flags(__entry->nf_flags),
+               show_nf_may(__entry->nf_may),
+               __entry->nf_file)
+)
+
+#define DEFINE_NFSD_FILE_EVENT(name) \
+DEFINE_EVENT(nfsd_file_class, name, \
+       TP_PROTO(struct nfsd_file *nf), \
+       TP_ARGS(nf))
+
+DEFINE_NFSD_FILE_EVENT(nfsd_file_alloc);
+DEFINE_NFSD_FILE_EVENT(nfsd_file_put_final);
+DEFINE_NFSD_FILE_EVENT(nfsd_file_unhash);
+DEFINE_NFSD_FILE_EVENT(nfsd_file_put);
+DEFINE_NFSD_FILE_EVENT(nfsd_file_unhash_and_release_locked);
+
+TRACE_EVENT(nfsd_file_acquire,
+       TP_PROTO(struct svc_rqst *rqstp, unsigned int hash,
+                struct inode *inode, unsigned int may_flags,
+                struct nfsd_file *nf, __be32 status),
+
+       TP_ARGS(rqstp, hash, inode, may_flags, nf, status),
+
+       TP_STRUCT__entry(
+               __field(__be32, xid)
+               __field(unsigned int, hash)
+               __field(void *, inode)
+               __field(unsigned int, may_flags)
+               __field(int, nf_ref)
+               __field(unsigned long, nf_flags)
+               __field(unsigned char, nf_may)
+               __field(struct file *, nf_file)
+               __field(__be32, status)
+       ),
+
+       TP_fast_assign(
+               __entry->xid = rqstp->rq_xid;
+               __entry->hash = hash;
+               __entry->inode = inode;
+               __entry->may_flags = may_flags;
+               __entry->nf_ref = nf ? atomic_read(&nf->nf_ref) : 0;
+               __entry->nf_flags = nf ? nf->nf_flags : 0;
+               __entry->nf_may = nf ? nf->nf_may : 0;
+               __entry->nf_file = nf ? nf->nf_file : NULL;
+               __entry->status = status;
+       ),
+
+       TP_printk("xid=0x%x hash=0x%x inode=0x%p may_flags=%s ref=%d nf_flags=%s nf_may=%s nf_file=0x%p status=%u",
+                       be32_to_cpu(__entry->xid), __entry->hash, __entry->inode,
+                       show_nf_may(__entry->may_flags), __entry->nf_ref,
+                       show_nf_flags(__entry->nf_flags),
+                       show_nf_may(__entry->nf_may), __entry->nf_file,
+                       be32_to_cpu(__entry->status))
+);
+
+DECLARE_EVENT_CLASS(nfsd_file_search_class,
+       TP_PROTO(struct inode *inode, unsigned int hash, int found),
+       TP_ARGS(inode, hash, found),
+       TP_STRUCT__entry(
+               __field(struct inode *, inode)
+               __field(unsigned int, hash)
+               __field(int, found)
+       ),
+       TP_fast_assign(
+               __entry->inode = inode;
+               __entry->hash = hash;
+               __entry->found = found;
+       ),
+       TP_printk("hash=0x%x inode=0x%p found=%d", __entry->hash,
+                       __entry->inode, __entry->found)
+);
+
+#define DEFINE_NFSD_FILE_SEARCH_EVENT(name)                            \
+DEFINE_EVENT(nfsd_file_search_class, name,                             \
+       TP_PROTO(struct inode *inode, unsigned int hash, int found),    \
+       TP_ARGS(inode, hash, found))
+
+DEFINE_NFSD_FILE_SEARCH_EVENT(nfsd_file_close_inode_sync);
+DEFINE_NFSD_FILE_SEARCH_EVENT(nfsd_file_close_inode);
+DEFINE_NFSD_FILE_SEARCH_EVENT(nfsd_file_is_cached);
+
+TRACE_EVENT(nfsd_file_fsnotify_handle_event,
+       TP_PROTO(struct inode *inode, u32 mask),
+       TP_ARGS(inode, mask),
+       TP_STRUCT__entry(
+               __field(struct inode *, inode)
+               __field(unsigned int, nlink)
+               __field(umode_t, mode)
+               __field(u32, mask)
+       ),
+       TP_fast_assign(
+               __entry->inode = inode;
+               __entry->nlink = inode->i_nlink;
+               __entry->mode = inode->i_mode;
+               __entry->mask = mask;
+       ),
+       TP_printk("inode=0x%p nlink=%u mode=0%ho mask=0x%x", __entry->inode,
+                       __entry->nlink, __entry->mode, __entry->mask)
+);
+
 #endif /* _NFSD_TRACE_H */
 
 #undef TRACE_INCLUDE_PATH
index c85783e..bd0a385 100644 (file)
 
 #include "nfsd.h"
 #include "vfs.h"
+#include "filecache.h"
 #include "trace.h"
 
 #define NFSDDBG_FACILITY               NFSDDBG_FILEOP
 
-
-/*
- * This is a cache of readahead params that help us choose the proper
- * readahead strategy. Initially, we set all readahead parameters to 0
- * and let the VFS handle things.
- * If you increase the number of cached files very much, you'll need to
- * add a hash table here.
- */
-struct raparms {
-       struct raparms          *p_next;
-       unsigned int            p_count;
-       ino_t                   p_ino;
-       dev_t                   p_dev;
-       int                     p_set;
-       struct file_ra_state    p_ra;
-       unsigned int            p_hindex;
-};
-
-struct raparm_hbucket {
-       struct raparms          *pb_head;
-       spinlock_t              pb_lock;
-} ____cacheline_aligned_in_smp;
-
-#define RAPARM_HASH_BITS       4
-#define RAPARM_HASH_SIZE       (1<<RAPARM_HASH_BITS)
-#define RAPARM_HASH_MASK       (RAPARM_HASH_SIZE-1)
-static struct raparm_hbucket   raparm_hash[RAPARM_HASH_SIZE];
-
 /* 
  * Called from nfsd_lookup and encode_dirent. Check if we have crossed 
  * a mount point.
@@ -699,7 +672,7 @@ nfsd_access(struct svc_rqst *rqstp, struct svc_fh *fhp, u32 *access, u32 *suppor
 }
 #endif /* CONFIG_NFSD_V3 */
 
-static int nfsd_open_break_lease(struct inode *inode, int access)
+int nfsd_open_break_lease(struct inode *inode, int access)
 {
        unsigned int mode;
 
@@ -715,8 +688,8 @@ static int nfsd_open_break_lease(struct inode *inode, int access)
  * and additional flags.
  * N.B. After this call fhp needs an fh_put
  */
-__be32
-nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type,
+static __be32
+__nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type,
                        int may_flags, struct file **filp)
 {
        struct path     path;
@@ -726,25 +699,6 @@ nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type,
        __be32          err;
        int             host_err = 0;
 
-       validate_process_creds();
-
-       /*
-        * If we get here, then the client has already done an "open",
-        * and (hopefully) checked permission - so allow OWNER_OVERRIDE
-        * in case a chmod has now revoked permission.
-        *
-        * Arguably we should also allow the owner override for
-        * directories, but we never have and it doesn't seem to have
-        * caused anyone a problem.  If we were to change this, note
-        * also that our filldir callbacks would need a variant of
-        * lookup_one_len that doesn't check permissions.
-        */
-       if (type == S_IFREG)
-               may_flags |= NFSD_MAY_OWNER_OVERRIDE;
-       err = fh_verify(rqstp, fhp, type, may_flags);
-       if (err)
-               goto out;
-
        path.mnt = fhp->fh_export->ex_path.mnt;
        path.dentry = fhp->fh_dentry;
        inode = d_inode(path.dentry);
@@ -798,67 +752,46 @@ nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type,
 out_nfserr:
        err = nfserrno(host_err);
 out:
-       validate_process_creds();
        return err;
 }
 
-struct raparms *
-nfsd_init_raparms(struct file *file)
+__be32
+nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type,
+               int may_flags, struct file **filp)
 {
-       struct inode *inode = file_inode(file);
-       dev_t dev = inode->i_sb->s_dev;
-       ino_t ino = inode->i_ino;
-       struct raparms  *ra, **rap, **frap = NULL;
-       int depth = 0;
-       unsigned int hash;
-       struct raparm_hbucket *rab;
-
-       hash = jhash_2words(dev, ino, 0xfeedbeef) & RAPARM_HASH_MASK;
-       rab = &raparm_hash[hash];
-
-       spin_lock(&rab->pb_lock);
-       for (rap = &rab->pb_head; (ra = *rap); rap = &ra->p_next) {
-               if (ra->p_ino == ino && ra->p_dev == dev)
-                       goto found;
-               depth++;
-               if (ra->p_count == 0)
-                       frap = rap;
-       }
-       depth = nfsdstats.ra_size;
-       if (!frap) {    
-               spin_unlock(&rab->pb_lock);
-               return NULL;
-       }
-       rap = frap;
-       ra = *frap;
-       ra->p_dev = dev;
-       ra->p_ino = ino;
-       ra->p_set = 0;
-       ra->p_hindex = hash;
-found:
-       if (rap != &rab->pb_head) {
-               *rap = ra->p_next;
-               ra->p_next   = rab->pb_head;
-               rab->pb_head = ra;
-       }
-       ra->p_count++;
-       nfsdstats.ra_depth[depth*10/nfsdstats.ra_size]++;
-       spin_unlock(&rab->pb_lock);
+       __be32 err;
 
-       if (ra->p_set)
-               file->f_ra = ra->p_ra;
-       return ra;
+       validate_process_creds();
+       /*
+        * If we get here, then the client has already done an "open",
+        * and (hopefully) checked permission - so allow OWNER_OVERRIDE
+        * in case a chmod has now revoked permission.
+        *
+        * Arguably we should also allow the owner override for
+        * directories, but we never have and it doesn't seem to have
+        * caused anyone a problem.  If we were to change this, note
+        * also that our filldir callbacks would need a variant of
+        * lookup_one_len that doesn't check permissions.
+        */
+       if (type == S_IFREG)
+               may_flags |= NFSD_MAY_OWNER_OVERRIDE;
+       err = fh_verify(rqstp, fhp, type, may_flags);
+       if (!err)
+               err = __nfsd_open(rqstp, fhp, type, may_flags, filp);
+       validate_process_creds();
+       return err;
 }
 
-void nfsd_put_raparams(struct file *file, struct raparms *ra)
+__be32
+nfsd_open_verified(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type,
+               int may_flags, struct file **filp)
 {
-       struct raparm_hbucket *rab = &raparm_hash[ra->p_hindex];
+       __be32 err;
 
-       spin_lock(&rab->pb_lock);
-       ra->p_ra = file->f_ra;
-       ra->p_set = 1;
-       ra->p_count--;
-       spin_unlock(&rab->pb_lock);
+       validate_process_creds();
+       err = __nfsd_open(rqstp, fhp, type, may_flags, filp);
+       validate_process_creds();
+       return err;
 }
 
 /*
@@ -901,12 +834,23 @@ static int nfsd_direct_splice_actor(struct pipe_inode_info *pipe,
        return __splice_from_pipe(pipe, sd, nfsd_splice_actor);
 }
 
+static u32 nfsd_eof_on_read(struct file *file, loff_t offset, ssize_t len,
+               size_t expected)
+{
+       if (expected != 0 && len == 0)
+               return 1;
+       if (offset+len >= i_size_read(file_inode(file)))
+               return 1;
+       return 0;
+}
+
 static __be32 nfsd_finish_read(struct svc_rqst *rqstp, struct svc_fh *fhp,
                               struct file *file, loff_t offset,
-                              unsigned long *count, int host_err)
+                              unsigned long *count, u32 *eof, ssize_t host_err)
 {
        if (host_err >= 0) {
                nfsdstats.io_read += host_err;
+               *eof = nfsd_eof_on_read(file, offset, host_err, *count);
                *count = host_err;
                fsnotify_access(file);
                trace_nfsd_read_io_done(rqstp, fhp, offset, *count);
@@ -918,7 +862,8 @@ static __be32 nfsd_finish_read(struct svc_rqst *rqstp, struct svc_fh *fhp,
 }
 
 __be32 nfsd_splice_read(struct svc_rqst *rqstp, struct svc_fh *fhp,
-                       struct file *file, loff_t offset, unsigned long *count)
+                       struct file *file, loff_t offset, unsigned long *count,
+                       u32 *eof)
 {
        struct splice_desc sd = {
                .len            = 0,
@@ -926,25 +871,27 @@ __be32 nfsd_splice_read(struct svc_rqst *rqstp, struct svc_fh *fhp,
                .pos            = offset,
                .u.data         = rqstp,
        };
-       int host_err;
+       ssize_t host_err;
 
        trace_nfsd_read_splice(rqstp, fhp, offset, *count);
        rqstp->rq_next_page = rqstp->rq_respages + 1;
        host_err = splice_direct_to_actor(file, &sd, nfsd_direct_splice_actor);
-       return nfsd_finish_read(rqstp, fhp, file, offset, count, host_err);
+       return nfsd_finish_read(rqstp, fhp, file, offset, count, eof, host_err);
 }
 
 __be32 nfsd_readv(struct svc_rqst *rqstp, struct svc_fh *fhp,
                  struct file *file, loff_t offset,
-                 struct kvec *vec, int vlen, unsigned long *count)
+                 struct kvec *vec, int vlen, unsigned long *count,
+                 u32 *eof)
 {
        struct iov_iter iter;
-       int host_err;
+       loff_t ppos = offset;
+       ssize_t host_err;
 
        trace_nfsd_read_vector(rqstp, fhp, offset, *count);
        iov_iter_kvec(&iter, READ, vec, vlen, *count);
-       host_err = vfs_iter_read(file, &iter, &offset, 0);
-       return nfsd_finish_read(rqstp, fhp, file, offset, count, host_err);
+       host_err = vfs_iter_read(file, &iter, &ppos, 0);
+       return nfsd_finish_read(rqstp, fhp, file, offset, count, eof, host_err);
 }
 
 /*
@@ -1025,8 +972,12 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
        nfsdstats.io_write += *cnt;
        fsnotify_modify(file);
 
-       if (stable && use_wgather)
+       if (stable && use_wgather) {
                host_err = wait_for_concurrent_writes(file);
+               if (host_err < 0)
+                       nfsd_reset_boot_verifier(net_generic(SVC_NET(rqstp),
+                                                nfsd_net_id));
+       }
 
 out_nfserr:
        if (host_err >= 0) {
@@ -1047,27 +998,25 @@ out_nfserr:
  * N.B. After this call fhp needs an fh_put
  */
 __be32 nfsd_read(struct svc_rqst *rqstp, struct svc_fh *fhp,
-       loff_t offset, struct kvec *vec, int vlen, unsigned long *count)
+       loff_t offset, struct kvec *vec, int vlen, unsigned long *count,
+       u32 *eof)
 {
+       struct nfsd_file        *nf;
        struct file *file;
-       struct raparms  *ra;
        __be32 err;
 
        trace_nfsd_read_start(rqstp, fhp, offset, *count);
-       err = nfsd_open(rqstp, fhp, S_IFREG, NFSD_MAY_READ, &file);
+       err = nfsd_file_acquire(rqstp, fhp, NFSD_MAY_READ, &nf);
        if (err)
                return err;
 
-       ra = nfsd_init_raparms(file);
-
+       file = nf->nf_file;
        if (file->f_op->splice_read && test_bit(RQ_SPLICE_OK, &rqstp->rq_flags))
-               err = nfsd_splice_read(rqstp, fhp, file, offset, count);
+               err = nfsd_splice_read(rqstp, fhp, file, offset, count, eof);
        else
-               err = nfsd_readv(rqstp, fhp, file, offset, vec, vlen, count);
+               err = nfsd_readv(rqstp, fhp, file, offset, vec, vlen, count, eof);
 
-       if (ra)
-               nfsd_put_raparams(file, ra);
-       fput(file);
+       nfsd_file_put(nf);
 
        trace_nfsd_read_done(rqstp, fhp, offset, *count);
 
@@ -1083,17 +1032,18 @@ __be32
 nfsd_write(struct svc_rqst *rqstp, struct svc_fh *fhp, loff_t offset,
           struct kvec *vec, int vlen, unsigned long *cnt, int stable)
 {
-       struct file *file = NULL;
-       __be32 err = 0;
+       struct nfsd_file *nf;
+       __be32 err;
 
        trace_nfsd_write_start(rqstp, fhp, offset, *cnt);
 
-       err = nfsd_open(rqstp, fhp, S_IFREG, NFSD_MAY_WRITE, &file);
+       err = nfsd_file_acquire(rqstp, fhp, NFSD_MAY_WRITE, &nf);
        if (err)
                goto out;
 
-       err = nfsd_vfs_write(rqstp, fhp, file, offset, vec, vlen, cnt, stable);
-       fput(file);
+       err = nfsd_vfs_write(rqstp, fhp, nf->nf_file, offset, vec,
+                       vlen, cnt, stable);
+       nfsd_file_put(nf);
 out:
        trace_nfsd_write_done(rqstp, fhp, offset, *cnt);
        return err;
@@ -1113,9 +1063,9 @@ __be32
 nfsd_commit(struct svc_rqst *rqstp, struct svc_fh *fhp,
                loff_t offset, unsigned long count)
 {
-       struct file     *file;
-       loff_t          end = LLONG_MAX;
-       __be32          err = nfserr_inval;
+       struct nfsd_file        *nf;
+       loff_t                  end = LLONG_MAX;
+       __be32                  err = nfserr_inval;
 
        if (offset < 0)
                goto out;
@@ -1125,20 +1075,27 @@ nfsd_commit(struct svc_rqst *rqstp, struct svc_fh *fhp,
                        goto out;
        }
 
-       err = nfsd_open(rqstp, fhp, S_IFREG,
-                       NFSD_MAY_WRITE|NFSD_MAY_NOT_BREAK_LEASE, &file);
+       err = nfsd_file_acquire(rqstp, fhp,
+                       NFSD_MAY_WRITE|NFSD_MAY_NOT_BREAK_LEASE, &nf);
        if (err)
                goto out;
        if (EX_ISSYNC(fhp->fh_export)) {
-               int err2 = vfs_fsync_range(file, offset, end, 0);
+               int err2 = vfs_fsync_range(nf->nf_file, offset, end, 0);
 
-               if (err2 != -EINVAL)
-                       err = nfserrno(err2);
-               else
+               switch (err2) {
+               case 0:
+                       break;
+               case -EINVAL:
                        err = nfserr_notsupp;
+                       break;
+               default:
+                       err = nfserrno(err2);
+                       nfsd_reset_boot_verifier(net_generic(nf->nf_net,
+                                                nfsd_net_id));
+               }
        }
 
-       fput(file);
+       nfsd_file_put(nf);
 out:
        return err;
 }
@@ -1659,6 +1616,26 @@ out_nfserr:
        goto out_unlock;
 }
 
+static void
+nfsd_close_cached_files(struct dentry *dentry)
+{
+       struct inode *inode = d_inode(dentry);
+
+       if (inode && S_ISREG(inode->i_mode))
+               nfsd_file_close_inode_sync(inode);
+}
+
+static bool
+nfsd_has_cached_files(struct dentry *dentry)
+{
+       bool            ret = false;
+       struct inode *inode = d_inode(dentry);
+
+       if (inode && S_ISREG(inode->i_mode))
+               ret = nfsd_file_is_cached(inode);
+       return ret;
+}
+
 /*
  * Rename a file
  * N.B. After this call _both_ ffhp and tfhp need an fh_put
@@ -1671,6 +1648,7 @@ nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen,
        struct inode    *fdir, *tdir;
        __be32          err;
        int             host_err;
+       bool            has_cached = false;
 
        err = fh_verify(rqstp, ffhp, S_IFDIR, NFSD_MAY_REMOVE);
        if (err)
@@ -1689,6 +1667,7 @@ nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen,
        if (!flen || isdotent(fname, flen) || !tlen || isdotent(tname, tlen))
                goto out;
 
+retry:
        host_err = fh_want_write(ffhp);
        if (host_err) {
                err = nfserrno(host_err);
@@ -1728,11 +1707,16 @@ nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen,
        if (ffhp->fh_export->ex_path.dentry != tfhp->fh_export->ex_path.dentry)
                goto out_dput_new;
 
-       host_err = vfs_rename(fdir, odentry, tdir, ndentry, NULL, 0);
-       if (!host_err) {
-               host_err = commit_metadata(tfhp);
-               if (!host_err)
-                       host_err = commit_metadata(ffhp);
+       if (nfsd_has_cached_files(ndentry)) {
+               has_cached = true;
+               goto out_dput_old;
+       } else {
+               host_err = vfs_rename(fdir, odentry, tdir, ndentry, NULL, 0);
+               if (!host_err) {
+                       host_err = commit_metadata(tfhp);
+                       if (!host_err)
+                               host_err = commit_metadata(ffhp);
+               }
        }
  out_dput_new:
        dput(ndentry);
@@ -1745,12 +1729,26 @@ nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen,
         * as that would do the wrong thing if the two directories
         * were the same, so again we do it by hand.
         */
-       fill_post_wcc(ffhp);
-       fill_post_wcc(tfhp);
+       if (!has_cached) {
+               fill_post_wcc(ffhp);
+               fill_post_wcc(tfhp);
+       }
        unlock_rename(tdentry, fdentry);
        ffhp->fh_locked = tfhp->fh_locked = false;
        fh_drop_write(ffhp);
 
+       /*
+        * If the target dentry has cached open files, then we need to try to
+        * close them prior to doing the rename. Flushing delayed fput
+        * shouldn't be done with locks held however, so we delay it until this
+        * point and then reattempt the whole shebang.
+        */
+       if (has_cached) {
+               has_cached = false;
+               nfsd_close_cached_files(ndentry);
+               dput(ndentry);
+               goto retry;
+       }
 out:
        return err;
 }
@@ -1797,10 +1795,13 @@ nfsd_unlink(struct svc_rqst *rqstp, struct svc_fh *fhp, int type,
        if (!type)
                type = d_inode(rdentry)->i_mode & S_IFMT;
 
-       if (type != S_IFDIR)
+       if (type != S_IFDIR) {
+               nfsd_close_cached_files(rdentry);
                host_err = vfs_unlink(dirp, rdentry, NULL);
-       else
+       } else {
                host_err = vfs_rmdir(dirp, rdentry);
+       }
+
        if (!host_err)
                host_err = commit_metadata(fhp);
        dput(rdentry);
@@ -2074,63 +2075,3 @@ nfsd_permission(struct svc_rqst *rqstp, struct svc_export *exp,
 
        return err? nfserrno(err) : 0;
 }
-
-void
-nfsd_racache_shutdown(void)
-{
-       struct raparms *raparm, *last_raparm;
-       unsigned int i;
-
-       dprintk("nfsd: freeing readahead buffers.\n");
-
-       for (i = 0; i < RAPARM_HASH_SIZE; i++) {
-               raparm = raparm_hash[i].pb_head;
-               while(raparm) {
-                       last_raparm = raparm;
-                       raparm = raparm->p_next;
-                       kfree(last_raparm);
-               }
-               raparm_hash[i].pb_head = NULL;
-       }
-}
-/*
- * Initialize readahead param cache
- */
-int
-nfsd_racache_init(int cache_size)
-{
-       int     i;
-       int     j = 0;
-       int     nperbucket;
-       struct raparms **raparm = NULL;
-
-
-       if (raparm_hash[0].pb_head)
-               return 0;
-       nperbucket = DIV_ROUND_UP(cache_size, RAPARM_HASH_SIZE);
-       nperbucket = max(2, nperbucket);
-       cache_size = nperbucket * RAPARM_HASH_SIZE;
-
-       dprintk("nfsd: allocating %d readahead buffers.\n", cache_size);
-
-       for (i = 0; i < RAPARM_HASH_SIZE; i++) {
-               spin_lock_init(&raparm_hash[i].pb_lock);
-
-               raparm = &raparm_hash[i].pb_head;
-               for (j = 0; j < nperbucket; j++) {
-                       *raparm = kzalloc(sizeof(struct raparms), GFP_KERNEL);
-                       if (!*raparm)
-                               goto out_nomem;
-                       raparm = &(*raparm)->p_next;
-               }
-               *raparm = NULL;
-       }
-
-       nfsdstats.ra_size = cache_size;
-       return 0;
-
-out_nomem:
-       dprintk("nfsd: kmalloc failed, freeing readahead buffers\n");
-       nfsd_racache_shutdown();
-       return -ENOMEM;
-}
index db35124..a13fd9d 100644 (file)
@@ -40,8 +40,6 @@
 typedef int (*nfsd_filldir_t)(void *, const char *, int, loff_t, u64, unsigned);
 
 /* nfsd/vfs.c */
-int            nfsd_racache_init(int);
-void           nfsd_racache_shutdown(void);
 int            nfsd_cross_mnt(struct svc_rqst *rqstp, struct dentry **dpp,
                                struct svc_export **expp);
 __be32         nfsd_lookup(struct svc_rqst *, struct svc_fh *,
@@ -75,18 +73,23 @@ __be32              do_nfsd_create(struct svc_rqst *, struct svc_fh *,
 __be32         nfsd_commit(struct svc_rqst *, struct svc_fh *,
                                loff_t, unsigned long);
 #endif /* CONFIG_NFSD_V3 */
+int            nfsd_open_break_lease(struct inode *, int);
 __be32         nfsd_open(struct svc_rqst *, struct svc_fh *, umode_t,
                                int, struct file **);
-struct raparms;
+__be32         nfsd_open_verified(struct svc_rqst *, struct svc_fh *, umode_t,
+                               int, struct file **);
 __be32         nfsd_splice_read(struct svc_rqst *rqstp, struct svc_fh *fhp,
                                struct file *file, loff_t offset,
-                               unsigned long *count);
+                               unsigned long *count,
+                               u32 *eof);
 __be32         nfsd_readv(struct svc_rqst *rqstp, struct svc_fh *fhp,
                                struct file *file, loff_t offset,
                                struct kvec *vec, int vlen,
-                               unsigned long *count);
+                               unsigned long *count,
+                               u32 *eof);
 __be32                 nfsd_read(struct svc_rqst *, struct svc_fh *,
-                               loff_t, struct kvec *, int, unsigned long *);
+                               loff_t, struct kvec *, int, unsigned long *,
+                               u32 *eof);
 __be32                 nfsd_write(struct svc_rqst *, struct svc_fh *, loff_t,
                                struct kvec *, int, unsigned long *, int);
 __be32         nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp,
@@ -115,9 +118,6 @@ __be32              nfsd_statfs(struct svc_rqst *, struct svc_fh *,
 __be32         nfsd_permission(struct svc_rqst *, struct svc_export *,
                                struct dentry *, int);
 
-struct raparms *nfsd_init_raparms(struct file *file);
-void           nfsd_put_raparams(struct file *file, struct raparms *ra);
-
 static inline int fh_want_write(struct svc_fh *fh)
 {
        int ret;
@@ -152,23 +152,4 @@ static inline int nfsd_create_is_exclusive(int createmode)
               || createmode == NFS4_CREATE_EXCLUSIVE4_1;
 }
 
-static inline bool nfsd_eof_on_read(long requested, long read,
-                               loff_t offset, loff_t size)
-{
-       /* We assume a short read means eof: */
-       if (requested > read)
-               return true;
-       /*
-        * A non-short read might also reach end of file.  The spec
-        * still requires us to set eof in that case.
-        *
-        * Further operations may have modified the file size since
-        * the read, so the following check is not atomic with the read.
-        * We've only seen that cause a problem for a client in the case
-        * where the read returned a count of 0 without setting eof.
-        * That case was fixed by the addition of the above check.
-        */
-       return (offset + read >= size);
-}
-
 #endif /* LINUX_NFSD_VFS_H */
index 2cb29e9..99ff9f4 100644 (file)
@@ -151,7 +151,7 @@ struct nfsd3_readres {
        __be32                  status;
        struct svc_fh           fh;
        unsigned long           count;
-       int                     eof;
+       __u32                   eof;
 };
 
 struct nfsd3_writeres {
index d64c870..f4737d6 100644 (file)
@@ -273,15 +273,14 @@ struct nfsd4_open_downgrade {
 
 
 struct nfsd4_read {
-       stateid_t       rd_stateid;         /* request */
-       u64             rd_offset;          /* request */
-       u32             rd_length;          /* request */
-       int             rd_vlen;
-       struct file     *rd_filp;
-       bool            rd_tmp_file;
+       stateid_t               rd_stateid;         /* request */
+       u64                     rd_offset;          /* request */
+       u32                     rd_length;          /* request */
+       int                     rd_vlen;
+       struct nfsd_file        *rd_nf;
        
-       struct svc_rqst *rd_rqstp;          /* response */
-       struct svc_fh rd_fhp;             /* response */
+       struct svc_rqst         *rd_rqstp;          /* response */
+       struct svc_fh           *rd_fhp;             /* response */
 };
 
 struct nfsd4_readdir {
@@ -538,8 +537,8 @@ struct nfsd4_copy {
 
        struct nfs4_client      *cp_clp;
 
-       struct file             *file_src;
-       struct file             *file_dst;
+       struct nfsd_file        *nf_src;
+       struct nfsd_file        *nf_dst;
 
        stateid_t               cp_stateid;
 
index c03758c..7a42c2e 100644 (file)
@@ -13,6 +13,7 @@
 #include <linux/sched/signal.h>
 #include <linux/dnotify.h>
 #include <linux/init.h>
+#include <linux/security.h>
 #include <linux/spinlock.h>
 #include <linux/slab.h>
 #include <linux/fdtable.h>
@@ -279,6 +280,17 @@ int fcntl_dirnotify(int fd, struct file *filp, unsigned long arg)
                goto out_err;
        }
 
+       /*
+        * convert the userspace DN_* "arg" to the internal FS_*
+        * defined in fsnotify
+        */
+       mask = convert_arg(arg);
+
+       error = security_path_notify(&filp->f_path, mask,
+                       FSNOTIFY_OBJ_TYPE_INODE);
+       if (error)
+               goto out_err;
+
        /* expect most fcntl to add new rather than augment old */
        dn = kmem_cache_alloc(dnotify_struct_cache, GFP_KERNEL);
        if (!dn) {
@@ -293,9 +305,6 @@ int fcntl_dirnotify(int fd, struct file *filp, unsigned long arg)
                goto out_err;
        }
 
-       /* convert the userspace DN_* "arg" to the internal FS_* defines in fsnotify */
-       mask = convert_arg(arg);
-
        /* set up the new_fsn_mark and new_dn_mark */
        new_fsn_mark = &new_dn_mark->fsn_mark;
        fsnotify_init_mark(new_fsn_mark, dnotify_group);
index 91006f4..8508ab5 100644 (file)
@@ -528,7 +528,8 @@ static const struct file_operations fanotify_fops = {
 };
 
 static int fanotify_find_path(int dfd, const char __user *filename,
-                             struct path *path, unsigned int flags)
+                             struct path *path, unsigned int flags, __u64 mask,
+                             unsigned int obj_type)
 {
        int ret;
 
@@ -567,8 +568,15 @@ static int fanotify_find_path(int dfd, const char __user *filename,
 
        /* you can only watch an inode if you have read permissions on it */
        ret = inode_permission(path->dentry->d_inode, MAY_READ);
+       if (ret) {
+               path_put(path);
+               goto out;
+       }
+
+       ret = security_path_notify(path, mask, obj_type);
        if (ret)
                path_put(path);
+
 out:
        return ret;
 }
@@ -947,6 +955,7 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask,
        __kernel_fsid_t __fsid, *fsid = NULL;
        u32 valid_mask = FANOTIFY_EVENTS | FANOTIFY_EVENT_FLAGS;
        unsigned int mark_type = flags & FANOTIFY_MARK_TYPE_BITS;
+       unsigned int obj_type;
        int ret;
 
        pr_debug("%s: fanotify_fd=%d flags=%x dfd=%d pathname=%p mask=%llx\n",
@@ -961,8 +970,13 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask,
 
        switch (mark_type) {
        case FAN_MARK_INODE:
+               obj_type = FSNOTIFY_OBJ_TYPE_INODE;
+               break;
        case FAN_MARK_MOUNT:
+               obj_type = FSNOTIFY_OBJ_TYPE_VFSMOUNT;
+               break;
        case FAN_MARK_FILESYSTEM:
+               obj_type = FSNOTIFY_OBJ_TYPE_SB;
                break;
        default:
                return -EINVAL;
@@ -1030,7 +1044,8 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask,
                goto fput_and_out;
        }
 
-       ret = fanotify_find_path(dfd, pathname, &path, flags);
+       ret = fanotify_find_path(dfd, pathname, &path, flags,
+                       (mask & ALL_FSNOTIFY_EVENTS), obj_type);
        if (ret)
                goto fput_and_out;
 
index 5a00121..f346282 100644 (file)
@@ -54,8 +54,6 @@ static inline void fsnotify_clear_marks_by_sb(struct super_block *sb)
 {
        fsnotify_destroy_marks(&sb->s_fsnotify_marks);
 }
-/* Wait until all marks queued for destruction are destroyed */
-extern void fsnotify_wait_marks_destroyed(void);
 
 /*
  * update the dentry->d_flags of all of inode's children to indicate if inode cares
index 0391190..133f723 100644 (file)
@@ -108,6 +108,7 @@ void fsnotify_put_group(struct fsnotify_group *group)
        if (refcount_dec_and_test(&group->refcnt))
                fsnotify_final_destroy_group(group);
 }
+EXPORT_SYMBOL_GPL(fsnotify_put_group);
 
 /*
  * Create a new fsnotify_group and hold a reference for the group returned.
@@ -137,6 +138,7 @@ struct fsnotify_group *fsnotify_alloc_group(const struct fsnotify_ops *ops)
 
        return group;
 }
+EXPORT_SYMBOL_GPL(fsnotify_alloc_group);
 
 int fsnotify_fasync(int fd, struct file *file, int on)
 {
index 0b81517..107537a 100644 (file)
@@ -30,6 +30,7 @@
 #include <linux/poll.h>
 #include <linux/wait.h>
 #include <linux/memcontrol.h>
+#include <linux/security.h>
 
 #include "inotify.h"
 #include "../fdinfo.h"
@@ -331,7 +332,8 @@ static const struct file_operations inotify_fops = {
 /*
  * find_inode - resolve a user-given path to a specific inode
  */
-static int inotify_find_inode(const char __user *dirname, struct path *path, unsigned flags)
+static int inotify_find_inode(const char __user *dirname, struct path *path,
+                                               unsigned int flags, __u64 mask)
 {
        int error;
 
@@ -340,8 +342,15 @@ static int inotify_find_inode(const char __user *dirname, struct path *path, uns
                return error;
        /* you can only watch an inode if you have read permissions on it */
        error = inode_permission(path->dentry->d_inode, MAY_READ);
+       if (error) {
+               path_put(path);
+               return error;
+       }
+       error = security_path_notify(path, mask,
+                               FSNOTIFY_OBJ_TYPE_INODE);
        if (error)
                path_put(path);
+
        return error;
 }
 
@@ -733,7 +742,8 @@ SYSCALL_DEFINE3(inotify_add_watch, int, fd, const char __user *, pathname,
        if (mask & IN_ONLYDIR)
                flags |= LOOKUP_DIRECTORY;
 
-       ret = inotify_find_inode(pathname, &path, flags);
+       ret = inotify_find_inode(pathname, &path, flags,
+                       (mask & IN_ALL_EVENTS));
        if (ret)
                goto fput_and_out;
 
index 99ddd12..1d96216 100644 (file)
@@ -276,6 +276,7 @@ void fsnotify_put_mark(struct fsnotify_mark *mark)
        queue_delayed_work(system_unbound_wq, &reaper_work,
                           FSNOTIFY_REAPER_DELAY);
 }
+EXPORT_SYMBOL_GPL(fsnotify_put_mark);
 
 /*
  * Get mark reference when we found the mark via lockless traversal of object
@@ -430,6 +431,7 @@ void fsnotify_destroy_mark(struct fsnotify_mark *mark,
        mutex_unlock(&group->mark_mutex);
        fsnotify_free_mark(mark);
 }
+EXPORT_SYMBOL_GPL(fsnotify_destroy_mark);
 
 /*
  * Sorting function for lists of fsnotify marks.
@@ -685,6 +687,7 @@ int fsnotify_add_mark(struct fsnotify_mark *mark, fsnotify_connp_t *connp,
        mutex_unlock(&group->mark_mutex);
        return ret;
 }
+EXPORT_SYMBOL_GPL(fsnotify_add_mark);
 
 /*
  * Given a list of marks, find the mark associated with given group. If found
@@ -711,6 +714,7 @@ struct fsnotify_mark *fsnotify_find_mark(fsnotify_connp_t *connp,
        spin_unlock(&conn->lock);
        return NULL;
 }
+EXPORT_SYMBOL_GPL(fsnotify_find_mark);
 
 /* Clear any marks in a group with given type mask */
 void fsnotify_clear_marks_by_group(struct fsnotify_group *group,
@@ -809,6 +813,7 @@ void fsnotify_init_mark(struct fsnotify_mark *mark,
        mark->group = group;
        WRITE_ONCE(mark->connector, NULL);
 }
+EXPORT_SYMBOL_GPL(fsnotify_init_mark);
 
 /*
  * Destroy all marks in destroy_list, waits for SRCU period to finish before
@@ -837,3 +842,4 @@ void fsnotify_wait_marks_destroyed(void)
 {
        flush_delayed_work(&reaper_work);
 }
+EXPORT_SYMBOL_GPL(fsnotify_wait_marks_destroyed);
index 20c841a..3aac5c9 100644 (file)
@@ -71,7 +71,7 @@ static inline MFT_RECORD *map_mft_record_page(ntfs_inode *ni)
        }
        /* Read, map, and pin the page. */
        page = ntfs_map_page(mft_vi->i_mapping, index);
-       if (likely(!IS_ERR(page))) {
+       if (!IS_ERR(page)) {
                /* Catch multi sector transfer fixup errors. */
                if (likely(ntfs_is_mft_recordp((le32*)(page_address(page) +
                                ofs)))) {
@@ -154,7 +154,7 @@ MFT_RECORD *map_mft_record(ntfs_inode *ni)
        mutex_lock(&ni->mrec_lock);
 
        m = map_mft_record_page(ni);
-       if (likely(!IS_ERR(m)))
+       if (!IS_ERR(m))
                return m;
 
        mutex_unlock(&ni->mrec_lock);
@@ -271,7 +271,7 @@ MFT_RECORD *map_extent_mft_record(ntfs_inode *base_ni, MFT_REF mref,
                m = map_mft_record(ni);
                /* map_mft_record() has incremented this on success. */
                atomic_dec(&ni->count);
-               if (likely(!IS_ERR(m))) {
+               if (!IS_ERR(m)) {
                        /* Verify the sequence number. */
                        if (likely(le16_to_cpu(m->sequence_number) == seq_no)) {
                                ntfs_debug("Done 1.");
@@ -1303,7 +1303,7 @@ static int ntfs_mft_bitmap_extend_allocation_nolock(ntfs_volume *vol)
        read_unlock_irqrestore(&mftbmp_ni->size_lock, flags);
        rl = ntfs_attr_find_vcn_nolock(mftbmp_ni,
                        (ll - 1) >> vol->cluster_size_bits, NULL);
-       if (unlikely(IS_ERR(rl) || !rl->length || rl->lcn < 0)) {
+       if (IS_ERR(rl) || unlikely(!rl->length || rl->lcn < 0)) {
                up_write(&mftbmp_ni->runlist.lock);
                ntfs_error(vol->sb, "Failed to determine last allocated "
                                "cluster of mft bitmap attribute.");
@@ -1734,7 +1734,7 @@ static int ntfs_mft_data_extend_allocation_nolock(ntfs_volume *vol)
        read_unlock_irqrestore(&mft_ni->size_lock, flags);
        rl = ntfs_attr_find_vcn_nolock(mft_ni,
                        (ll - 1) >> vol->cluster_size_bits, NULL);
-       if (unlikely(IS_ERR(rl) || !rl->length || rl->lcn < 0)) {
+       if (IS_ERR(rl) || unlikely(!rl->length || rl->lcn < 0)) {
                up_write(&mft_ni->runlist.lock);
                ntfs_error(vol->sb, "Failed to determine last allocated "
                                "cluster of mft data attribute.");
@@ -1776,7 +1776,7 @@ static int ntfs_mft_data_extend_allocation_nolock(ntfs_volume *vol)
        do {
                rl2 = ntfs_cluster_alloc(vol, old_last_vcn, nr, lcn, MFT_ZONE,
                                true);
-               if (likely(!IS_ERR(rl2)))
+               if (!IS_ERR(rl2))
                        break;
                if (PTR_ERR(rl2) != -ENOSPC || nr == min_nr) {
                        ntfs_error(vol->sb, "Failed to allocate the minimal "
index 2d3cc9e..4e6a44b 100644 (file)
@@ -115,7 +115,7 @@ static struct dentry *ntfs_lookup(struct inode *dir_ino, struct dentry *dent,
                dent_ino = MREF(mref);
                ntfs_debug("Found inode 0x%lx. Calling ntfs_iget.", dent_ino);
                dent_inode = ntfs_iget(vol->sb, dent_ino);
-               if (likely(!IS_ERR(dent_inode))) {
+               if (!IS_ERR(dent_inode)) {
                        /* Consistency check. */
                        if (is_bad_inode(dent_inode) || MSEQNO(mref) ==
                                        NTFS_I(dent_inode)->seq_no ||
index 508744a..97932fb 100644 (file)
@@ -951,7 +951,7 @@ mpa_err:
        }
        /* Now combine the new and old runlists checking for overlaps. */
        old_rl = ntfs_runlists_merge(old_rl, rl);
-       if (likely(!IS_ERR(old_rl)))
+       if (!IS_ERR(old_rl))
                return old_rl;
        ntfs_free(rl);
        ntfs_error(vol->sb, "Failed to merge runlists.");
index 29621d4..7dc3bc6 100644 (file)
@@ -1475,7 +1475,7 @@ not_enabled:
        kfree(name);
        /* Get the inode. */
        tmp_ino = ntfs_iget(vol->sb, MREF(mref));
-       if (unlikely(IS_ERR(tmp_ino) || is_bad_inode(tmp_ino))) {
+       if (IS_ERR(tmp_ino) || unlikely(is_bad_inode(tmp_ino))) {
                if (!IS_ERR(tmp_ino))
                        iput(tmp_ino);
                ntfs_error(vol->sb, "Failed to load $UsnJrnl.");
index 0c335b5..f9baefc 100644 (file)
@@ -5993,6 +5993,7 @@ int __ocfs2_flush_truncate_log(struct ocfs2_super *osb)
        struct buffer_head *data_alloc_bh = NULL;
        struct ocfs2_dinode *di;
        struct ocfs2_truncate_log *tl;
+       struct ocfs2_journal *journal = osb->journal;
 
        BUG_ON(inode_trylock(tl_inode));
 
@@ -6013,6 +6014,20 @@ int __ocfs2_flush_truncate_log(struct ocfs2_super *osb)
                goto out;
        }
 
+       /* Appending truncate log(TA) and and flushing truncate log(TF) are
+        * two separated transactions. They can be both committed but not
+        * checkpointed. If crash occurs then, both two transaction will be
+        * replayed with several already released to global bitmap clusters.
+        * Then truncate log will be replayed resulting in cluster double free.
+        */
+       jbd2_journal_lock_updates(journal->j_journal);
+       status = jbd2_journal_flush(journal->j_journal);
+       jbd2_journal_unlock_updates(journal->j_journal);
+       if (status < 0) {
+               mlog_errno(status);
+               goto out;
+       }
+
        data_alloc_inode = ocfs2_get_system_file_inode(osb,
                                                       GLOBAL_BITMAP_SYSTEM_INODE,
                                                       OCFS2_INVALID_SLOT);
@@ -6792,6 +6807,8 @@ void ocfs2_map_and_dirty_page(struct inode *inode, handle_t *handle,
                              struct page *page, int zero, u64 *phys)
 {
        int ret, partial = 0;
+       loff_t start_byte = ((loff_t)page->index << PAGE_SHIFT) + from;
+       loff_t length = to - from;
 
        ret = ocfs2_map_page_blocks(page, phys, inode, from, to, 0);
        if (ret)
@@ -6811,7 +6828,8 @@ void ocfs2_map_and_dirty_page(struct inode *inode, handle_t *handle,
        if (ret < 0)
                mlog_errno(ret);
        else if (ocfs2_should_order_data(inode)) {
-               ret = ocfs2_jbd2_file_inode(handle, inode);
+               ret = ocfs2_jbd2_inode_add_write(handle, inode,
+                                                start_byte, length);
                if (ret < 0)
                        mlog_errno(ret);
        }
index a4c905d..8de1c9d 100644 (file)
@@ -942,7 +942,8 @@ static void ocfs2_write_failure(struct inode *inode,
 
                if (tmppage && page_has_buffers(tmppage)) {
                        if (ocfs2_should_order_data(inode))
-                               ocfs2_jbd2_file_inode(wc->w_handle, inode);
+                               ocfs2_jbd2_inode_add_write(wc->w_handle, inode,
+                                                          user_pos, user_len);
 
                        block_commit_write(tmppage, from, to);
                }
@@ -2023,8 +2024,14 @@ int ocfs2_write_end_nolock(struct address_space *mapping,
                }
 
                if (page_has_buffers(tmppage)) {
-                       if (handle && ocfs2_should_order_data(inode))
-                               ocfs2_jbd2_file_inode(handle, inode);
+                       if (handle && ocfs2_should_order_data(inode)) {
+                               loff_t start_byte =
+                                       ((loff_t)tmppage->index << PAGE_SHIFT) +
+                                       from;
+                               loff_t length = to - from;
+                               ocfs2_jbd2_inode_add_write(handle, inode,
+                                                          start_byte, length);
+                       }
                        block_commit_write(tmppage, from, to);
                }
        }
index 429e6a8..eaf042f 100644 (file)
@@ -231,14 +231,6 @@ static int blockcheck_u64_get(void *data, u64 *val)
 }
 DEFINE_SIMPLE_ATTRIBUTE(blockcheck_fops, blockcheck_u64_get, NULL, "%llu\n");
 
-static struct dentry *blockcheck_debugfs_create(const char *name,
-                                               struct dentry *parent,
-                                               u64 *value)
-{
-       return debugfs_create_file(name, S_IFREG | S_IRUSR, parent, value,
-                                  &blockcheck_fops);
-}
-
 static void ocfs2_blockcheck_debug_remove(struct ocfs2_blockcheck_stats *stats)
 {
        if (stats) {
@@ -250,16 +242,20 @@ static void ocfs2_blockcheck_debug_remove(struct ocfs2_blockcheck_stats *stats)
 static void ocfs2_blockcheck_debug_install(struct ocfs2_blockcheck_stats *stats,
                                           struct dentry *parent)
 {
-       stats->b_debug_dir = debugfs_create_dir("blockcheck", parent);
+       struct dentry *dir;
+
+       dir = debugfs_create_dir("blockcheck", parent);
+       stats->b_debug_dir = dir;
+
+       debugfs_create_file("blocks_checked", S_IFREG | S_IRUSR, dir,
+                           &stats->b_check_count, &blockcheck_fops);
 
-       blockcheck_debugfs_create("blocks_checked", stats->b_debug_dir,
-                                 &stats->b_check_count);
+       debugfs_create_file("checksums_failed", S_IFREG | S_IRUSR, dir,
+                           &stats->b_failure_count, &blockcheck_fops);
 
-       blockcheck_debugfs_create("checksums_failed", stats->b_debug_dir,
-                                 &stats->b_failure_count);
+       debugfs_create_file("ecc_recoveries", S_IFREG | S_IRUSR, dir,
+                           &stats->b_recover_count, &blockcheck_fops);
 
-       blockcheck_debugfs_create("ecc_recoveries", stats->b_debug_dir,
-                                 &stats->b_recover_count);
 }
 #else
 static inline void ocfs2_blockcheck_debug_install(struct ocfs2_blockcheck_stats *stats,
index f1b6133..a368350 100644 (file)
@@ -225,10 +225,6 @@ struct o2hb_region {
        unsigned int            hr_region_num;
 
        struct dentry           *hr_debug_dir;
-       struct dentry           *hr_debug_livenodes;
-       struct dentry           *hr_debug_regnum;
-       struct dentry           *hr_debug_elapsed_time;
-       struct dentry           *hr_debug_pinned;
        struct o2hb_debug_buf   *hr_db_livenodes;
        struct o2hb_debug_buf   *hr_db_regnum;
        struct o2hb_debug_buf   *hr_db_elapsed_time;
@@ -1394,21 +1390,20 @@ void o2hb_exit(void)
        kfree(o2hb_db_failedregions);
 }
 
-static struct dentry *o2hb_debug_create(const char *name, struct dentry *dir,
-                                       struct o2hb_debug_buf **db, int db_len,
-                                       int type, int size, int len, void *data)
+static void o2hb_debug_create(const char *name, struct dentry *dir,
+                             struct o2hb_debug_buf **db, int db_len, int type,
+                             int size, int len, void *data)
 {
        *db = kmalloc(db_len, GFP_KERNEL);
        if (!*db)
-               return NULL;
+               return;
 
        (*db)->db_type = type;
        (*db)->db_size = size;
        (*db)->db_len = len;
        (*db)->db_data = data;
 
-       return debugfs_create_file(name, S_IFREG|S_IRUSR, dir, *db,
-                                  &o2hb_debug_fops);
+       debugfs_create_file(name, S_IFREG|S_IRUSR, dir, *db, &o2hb_debug_fops);
 }
 
 static void o2hb_debug_init(void)
@@ -1525,11 +1520,7 @@ static void o2hb_region_release(struct config_item *item)
 
        kfree(reg->hr_slots);
 
-       debugfs_remove(reg->hr_debug_livenodes);
-       debugfs_remove(reg->hr_debug_regnum);
-       debugfs_remove(reg->hr_debug_elapsed_time);
-       debugfs_remove(reg->hr_debug_pinned);
-       debugfs_remove(reg->hr_debug_dir);
+       debugfs_remove_recursive(reg->hr_debug_dir);
        kfree(reg->hr_db_livenodes);
        kfree(reg->hr_db_regnum);
        kfree(reg->hr_db_elapsed_time);
@@ -1988,69 +1979,33 @@ static struct o2hb_heartbeat_group *to_o2hb_heartbeat_group(struct config_group
                : NULL;
 }
 
-static int o2hb_debug_region_init(struct o2hb_region *reg, struct dentry *dir)
+static void o2hb_debug_region_init(struct o2hb_region *reg,
+                                  struct dentry *parent)
 {
-       int ret = -ENOMEM;
+       struct dentry *dir;
 
-       reg->hr_debug_dir =
-               debugfs_create_dir(config_item_name(&reg->hr_item), dir);
-       if (!reg->hr_debug_dir) {
-               mlog_errno(ret);
-               goto bail;
-       }
+       dir = debugfs_create_dir(config_item_name(&reg->hr_item), parent);
+       reg->hr_debug_dir = dir;
 
-       reg->hr_debug_livenodes =
-                       o2hb_debug_create(O2HB_DEBUG_LIVENODES,
-                                         reg->hr_debug_dir,
-                                         &(reg->hr_db_livenodes),
-                                         sizeof(*(reg->hr_db_livenodes)),
-                                         O2HB_DB_TYPE_REGION_LIVENODES,
-                                         sizeof(reg->hr_live_node_bitmap),
-                                         O2NM_MAX_NODES, reg);
-       if (!reg->hr_debug_livenodes) {
-               mlog_errno(ret);
-               goto bail;
-       }
+       o2hb_debug_create(O2HB_DEBUG_LIVENODES, dir, &(reg->hr_db_livenodes),
+                         sizeof(*(reg->hr_db_livenodes)),
+                         O2HB_DB_TYPE_REGION_LIVENODES,
+                         sizeof(reg->hr_live_node_bitmap), O2NM_MAX_NODES,
+                         reg);
 
-       reg->hr_debug_regnum =
-                       o2hb_debug_create(O2HB_DEBUG_REGION_NUMBER,
-                                         reg->hr_debug_dir,
-                                         &(reg->hr_db_regnum),
-                                         sizeof(*(reg->hr_db_regnum)),
-                                         O2HB_DB_TYPE_REGION_NUMBER,
-                                         0, O2NM_MAX_NODES, reg);
-       if (!reg->hr_debug_regnum) {
-               mlog_errno(ret);
-               goto bail;
-       }
+       o2hb_debug_create(O2HB_DEBUG_REGION_NUMBER, dir, &(reg->hr_db_regnum),
+                         sizeof(*(reg->hr_db_regnum)),
+                         O2HB_DB_TYPE_REGION_NUMBER, 0, O2NM_MAX_NODES, reg);
 
-       reg->hr_debug_elapsed_time =
-                       o2hb_debug_create(O2HB_DEBUG_REGION_ELAPSED_TIME,
-                                         reg->hr_debug_dir,
-                                         &(reg->hr_db_elapsed_time),
-                                         sizeof(*(reg->hr_db_elapsed_time)),
-                                         O2HB_DB_TYPE_REGION_ELAPSED_TIME,
-                                         0, 0, reg);
-       if (!reg->hr_debug_elapsed_time) {
-               mlog_errno(ret);
-               goto bail;
-       }
+       o2hb_debug_create(O2HB_DEBUG_REGION_ELAPSED_TIME, dir,
+                         &(reg->hr_db_elapsed_time),
+                         sizeof(*(reg->hr_db_elapsed_time)),
+                         O2HB_DB_TYPE_REGION_ELAPSED_TIME, 0, 0, reg);
 
-       reg->hr_debug_pinned =
-                       o2hb_debug_create(O2HB_DEBUG_REGION_PINNED,
-                                         reg->hr_debug_dir,
-                                         &(reg->hr_db_pinned),
-                                         sizeof(*(reg->hr_db_pinned)),
-                                         O2HB_DB_TYPE_REGION_PINNED,
-                                         0, 0, reg);
-       if (!reg->hr_debug_pinned) {
-               mlog_errno(ret);
-               goto bail;
-       }
+       o2hb_debug_create(O2HB_DEBUG_REGION_PINNED, dir, &(reg->hr_db_pinned),
+                         sizeof(*(reg->hr_db_pinned)),
+                         O2HB_DB_TYPE_REGION_PINNED, 0, 0, reg);
 
-       ret = 0;
-bail:
-       return ret;
 }
 
 static struct config_item *o2hb_heartbeat_group_make_item(struct config_group *group,
@@ -2106,11 +2061,7 @@ static struct config_item *o2hb_heartbeat_group_make_item(struct config_group *g
        if (ret)
                goto unregister_handler;
 
-       ret = o2hb_debug_region_init(reg, o2hb_debug_dir);
-       if (ret) {
-               config_item_put(&reg->hr_item);
-               goto unregister_handler;
-       }
+       o2hb_debug_region_init(reg, o2hb_debug_dir);
 
        return &reg->hr_item;
 
index 784426d..bdef72c 100644 (file)
@@ -3636,7 +3636,7 @@ static void ocfs2_dx_dir_transfer_leaf(struct inode *dir, u32 split_hash,
        int i, j, num_used;
        u32 major_hash;
        struct ocfs2_dx_leaf *orig_dx_leaf, *new_dx_leaf;
-       struct ocfs2_dx_entry_list *orig_list, *new_list, *tmp_list;
+       struct ocfs2_dx_entry_list *orig_list, *tmp_list;
        struct ocfs2_dx_entry *dx_entry;
 
        tmp_list = &tmp_dx_leaf->dl_list;
@@ -3645,7 +3645,6 @@ static void ocfs2_dx_dir_transfer_leaf(struct inode *dir, u32 split_hash,
                orig_dx_leaf = (struct ocfs2_dx_leaf *) orig_dx_leaves[i]->b_data;
                orig_list = &orig_dx_leaf->dl_list;
                new_dx_leaf = (struct ocfs2_dx_leaf *) new_dx_leaves[i]->b_data;
-               new_list = &new_dx_leaf->dl_list;
 
                num_used = le16_to_cpu(orig_list->de_num_used);
 
index 69a429b..aaf2454 100644 (file)
@@ -142,7 +142,6 @@ struct dlm_ctxt
        atomic_t res_tot_count;
        atomic_t res_cur_count;
 
-       struct dlm_debug_ctxt *dlm_debug_ctxt;
        struct dentry *dlm_debugfs_subroot;
 
        /* NOTE: Next three are protected by dlm_domain_lock */
index a4b58ba..4d0b452 100644 (file)
@@ -853,67 +853,34 @@ static const struct file_operations debug_state_fops = {
 /* files in subroot */
 void dlm_debug_init(struct dlm_ctxt *dlm)
 {
-       struct dlm_debug_ctxt *dc = dlm->dlm_debug_ctxt;
-
        /* for dumping dlm_ctxt */
-       dc->debug_state_dentry = debugfs_create_file(DLM_DEBUGFS_DLM_STATE,
-                                                    S_IFREG|S_IRUSR,
-                                                    dlm->dlm_debugfs_subroot,
-                                                    dlm, &debug_state_fops);
+       debugfs_create_file(DLM_DEBUGFS_DLM_STATE, S_IFREG|S_IRUSR,
+                           dlm->dlm_debugfs_subroot, dlm, &debug_state_fops);
 
        /* for dumping lockres */
-       dc->debug_lockres_dentry =
-                       debugfs_create_file(DLM_DEBUGFS_LOCKING_STATE,
-                                           S_IFREG|S_IRUSR,
-                                           dlm->dlm_debugfs_subroot,
-                                           dlm, &debug_lockres_fops);
+       debugfs_create_file(DLM_DEBUGFS_LOCKING_STATE, S_IFREG|S_IRUSR,
+                           dlm->dlm_debugfs_subroot, dlm, &debug_lockres_fops);
 
        /* for dumping mles */
-       dc->debug_mle_dentry = debugfs_create_file(DLM_DEBUGFS_MLE_STATE,
-                                                  S_IFREG|S_IRUSR,
-                                                  dlm->dlm_debugfs_subroot,
-                                                  dlm, &debug_mle_fops);
+       debugfs_create_file(DLM_DEBUGFS_MLE_STATE, S_IFREG|S_IRUSR,
+                           dlm->dlm_debugfs_subroot, dlm, &debug_mle_fops);
 
        /* for dumping lockres on the purge list */
-       dc->debug_purgelist_dentry =
-                       debugfs_create_file(DLM_DEBUGFS_PURGE_LIST,
-                                           S_IFREG|S_IRUSR,
-                                           dlm->dlm_debugfs_subroot,
-                                           dlm, &debug_purgelist_fops);
-}
-
-void dlm_debug_shutdown(struct dlm_ctxt *dlm)
-{
-       struct dlm_debug_ctxt *dc = dlm->dlm_debug_ctxt;
-
-       if (dc) {
-               debugfs_remove(dc->debug_purgelist_dentry);
-               debugfs_remove(dc->debug_mle_dentry);
-               debugfs_remove(dc->debug_lockres_dentry);
-               debugfs_remove(dc->debug_state_dentry);
-               kfree(dc);
-               dc = NULL;
-       }
+       debugfs_create_file(DLM_DEBUGFS_PURGE_LIST, S_IFREG|S_IRUSR,
+                           dlm->dlm_debugfs_subroot, dlm,
+                           &debug_purgelist_fops);
 }
 
 /* subroot - domain dir */
-int dlm_create_debugfs_subroot(struct dlm_ctxt *dlm)
+void dlm_create_debugfs_subroot(struct dlm_ctxt *dlm)
 {
-       dlm->dlm_debug_ctxt = kzalloc(sizeof(struct dlm_debug_ctxt),
-                                     GFP_KERNEL);
-       if (!dlm->dlm_debug_ctxt) {
-               mlog_errno(-ENOMEM);
-               return -ENOMEM;
-       }
-
        dlm->dlm_debugfs_subroot = debugfs_create_dir(dlm->name,
                                                      dlm_debugfs_root);
-       return 0;
 }
 
 void dlm_destroy_debugfs_subroot(struct dlm_ctxt *dlm)
 {
-       debugfs_remove(dlm->dlm_debugfs_subroot);
+       debugfs_remove_recursive(dlm->dlm_debugfs_subroot);
 }
 
 /* debugfs root */
index 7d0c7c9..f8fd868 100644 (file)
@@ -14,13 +14,6 @@ void dlm_print_one_mle(struct dlm_master_list_entry *mle);
 
 #ifdef CONFIG_DEBUG_FS
 
-struct dlm_debug_ctxt {
-       struct dentry *debug_state_dentry;
-       struct dentry *debug_lockres_dentry;
-       struct dentry *debug_mle_dentry;
-       struct dentry *debug_purgelist_dentry;
-};
-
 struct debug_lockres {
        int dl_len;
        char *dl_buf;
@@ -29,9 +22,8 @@ struct debug_lockres {
 };
 
 void dlm_debug_init(struct dlm_ctxt *dlm);
-void dlm_debug_shutdown(struct dlm_ctxt *dlm);
 
-int dlm_create_debugfs_subroot(struct dlm_ctxt *dlm);
+void dlm_create_debugfs_subroot(struct dlm_ctxt *dlm);
 void dlm_destroy_debugfs_subroot(struct dlm_ctxt *dlm);
 
 void dlm_create_debugfs_root(void);
@@ -42,12 +34,8 @@ void dlm_destroy_debugfs_root(void);
 static inline void dlm_debug_init(struct dlm_ctxt *dlm)
 {
 }
-static inline void dlm_debug_shutdown(struct dlm_ctxt *dlm)
-{
-}
-static inline int dlm_create_debugfs_subroot(struct dlm_ctxt *dlm)
+static inline void dlm_create_debugfs_subroot(struct dlm_ctxt *dlm)
 {
-       return 0;
 }
 static inline void dlm_destroy_debugfs_subroot(struct dlm_ctxt *dlm)
 {
index 7338b5d..ee6f459 100644 (file)
@@ -387,7 +387,6 @@ static void dlm_destroy_dlm_worker(struct dlm_ctxt *dlm)
 static void dlm_complete_dlm_shutdown(struct dlm_ctxt *dlm)
 {
        dlm_unregister_domain_handlers(dlm);
-       dlm_debug_shutdown(dlm);
        dlm_complete_thread(dlm);
        dlm_complete_recovery_thread(dlm);
        dlm_destroy_dlm_worker(dlm);
@@ -1938,7 +1937,6 @@ bail:
 
        if (status) {
                dlm_unregister_domain_handlers(dlm);
-               dlm_debug_shutdown(dlm);
                dlm_complete_thread(dlm);
                dlm_complete_recovery_thread(dlm);
                dlm_destroy_dlm_worker(dlm);
@@ -1992,9 +1990,7 @@ static struct dlm_ctxt *dlm_alloc_ctxt(const char *domain,
        dlm->key = key;
        dlm->node_num = o2nm_this_node();
 
-       ret = dlm_create_debugfs_subroot(dlm);
-       if (ret < 0)
-               goto leave;
+       dlm_create_debugfs_subroot(dlm);
 
        spin_lock_init(&dlm->spinlock);
        spin_lock_init(&dlm->master_lock);
@@ -2056,6 +2052,7 @@ static struct dlm_ctxt *dlm_alloc_ctxt(const char *domain,
        mlog(0, "context init: refcount %u\n",
                  kref_read(&dlm->dlm_refs));
 
+       ret = 0;
 leave:
        if (ret < 0 && dlm) {
                if (dlm->master_hash)
index e786577..3883633 100644 (file)
@@ -90,7 +90,8 @@ static enum dlm_status dlmunlock_common(struct dlm_ctxt *dlm,
        enum dlm_status status;
        int actions = 0;
        int in_use;
-        u8 owner;
+       u8 owner;
+       int recovery_wait = 0;
 
        mlog(0, "master_node = %d, valblk = %d\n", master_node,
             flags & LKM_VALBLK);
@@ -193,9 +194,12 @@ static enum dlm_status dlmunlock_common(struct dlm_ctxt *dlm,
                }
                if (flags & LKM_CANCEL)
                        lock->cancel_pending = 0;
-               else
-                       lock->unlock_pending = 0;
-
+               else {
+                       if (!lock->unlock_pending)
+                               recovery_wait = 1;
+                       else
+                               lock->unlock_pending = 0;
+               }
        }
 
        /* get an extra ref on lock.  if we are just switching
@@ -229,6 +233,17 @@ leave:
        spin_unlock(&res->spinlock);
        wake_up(&res->wq);
 
+       if (recovery_wait) {
+               spin_lock(&res->spinlock);
+               /* Unlock request will directly succeed after owner dies,
+                * and the lock is already removed from grant list. We have to
+                * wait for RECOVERING done or we miss the chance to purge it
+                * since the removement is much faster than RECOVERING proc.
+                */
+               __dlm_wait_on_lockres_flags(res, DLM_LOCK_RES_RECOVERING);
+               spin_unlock(&res->spinlock);
+       }
+
        /* let the caller's final dlm_lock_put handle the actual kfree */
        if (actions & DLM_UNLOCK_FREE_LOCK) {
                /* this should always be coupled with list removal */
index 1420723..6e774c5 100644 (file)
@@ -2508,9 +2508,7 @@ bail:
                        ocfs2_inode_unlock(inode, ex);
        }
 
-       if (local_bh)
-               brelse(local_bh);
-
+       brelse(local_bh);
        return status;
 }
 
@@ -2593,8 +2591,7 @@ int ocfs2_inode_lock_atime(struct inode *inode,
                *level = 1;
                if (ocfs2_should_update_atime(inode, vfsmnt))
                        ocfs2_update_inode_atime(inode, bh);
-               if (bh)
-                       brelse(bh);
+               brelse(bh);
        } else
                *level = 0;
 
@@ -3012,8 +3009,6 @@ struct ocfs2_dlm_debug *ocfs2_new_dlm_debug(void)
 
        kref_init(&dlm_debug->d_refcnt);
        INIT_LIST_HEAD(&dlm_debug->d_lockres_tracking);
-       dlm_debug->d_locking_state = NULL;
-       dlm_debug->d_locking_filter = NULL;
        dlm_debug->d_filter_secs = 0;
 out:
        return dlm_debug;
@@ -3282,27 +3277,19 @@ static void ocfs2_dlm_init_debug(struct ocfs2_super *osb)
 {
        struct ocfs2_dlm_debug *dlm_debug = osb->osb_dlm_debug;
 
-       dlm_debug->d_locking_state = debugfs_create_file("locking_state",
-                                                        S_IFREG|S_IRUSR,
-                                                        osb->osb_debug_root,
-                                                        osb,
-                                                        &ocfs2_dlm_debug_fops);
+       debugfs_create_file("locking_state", S_IFREG|S_IRUSR,
+                           osb->osb_debug_root, osb, &ocfs2_dlm_debug_fops);
 
-       dlm_debug->d_locking_filter = debugfs_create_u32("locking_filter",
-                                               0600,
-                                               osb->osb_debug_root,
-                                               &dlm_debug->d_filter_secs);
+       debugfs_create_u32("locking_filter", 0600, osb->osb_debug_root,
+                          &dlm_debug->d_filter_secs);
 }
 
 static void ocfs2_dlm_shutdown_debug(struct ocfs2_super *osb)
 {
        struct ocfs2_dlm_debug *dlm_debug = osb->osb_dlm_debug;
 
-       if (dlm_debug) {
-               debugfs_remove(dlm_debug->d_locking_state);
-               debugfs_remove(dlm_debug->d_locking_filter);
+       if (dlm_debug)
                ocfs2_put_dlm_debug(dlm_debug);
-       }
 }
 
 int ocfs2_dlm_init(struct ocfs2_super *osb)
index e66a249..e3e2d1b 100644 (file)
@@ -590,8 +590,7 @@ int ocfs2_xattr_get_clusters(struct inode *inode, u32 v_cluster,
                        *extent_flags = rec->e_flags;
        }
 out:
-       if (eb_bh)
-               brelse(eb_bh);
+       brelse(eb_bh);
        return ret;
 }
 
index 4435df3..2e982db 100644 (file)
@@ -706,7 +706,9 @@ leave:
  * Thus, we need to explicitly order the zeroed pages.
  */
 static handle_t *ocfs2_zero_start_ordered_transaction(struct inode *inode,
-                                               struct buffer_head *di_bh)
+                                                     struct buffer_head *di_bh,
+                                                     loff_t start_byte,
+                                                     loff_t length)
 {
        struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
        handle_t *handle = NULL;
@@ -722,7 +724,7 @@ static handle_t *ocfs2_zero_start_ordered_transaction(struct inode *inode,
                goto out;
        }
 
-       ret = ocfs2_jbd2_file_inode(handle, inode);
+       ret = ocfs2_jbd2_inode_add_write(handle, inode, start_byte, length);
        if (ret < 0) {
                mlog_errno(ret);
                goto out;
@@ -761,7 +763,9 @@ static int ocfs2_write_zero_page(struct inode *inode, u64 abs_from,
        BUG_ON(abs_to > (((u64)index + 1) << PAGE_SHIFT));
        BUG_ON(abs_from & (inode->i_blkbits - 1));
 
-       handle = ocfs2_zero_start_ordered_transaction(inode, di_bh);
+       handle = ocfs2_zero_start_ordered_transaction(inode, di_bh,
+                                                     abs_from,
+                                                     abs_to - abs_from);
        if (IS_ERR(handle)) {
                ret = PTR_ERR(handle);
                goto out;
@@ -2126,7 +2130,6 @@ static int ocfs2_prepare_inode_for_write(struct file *file,
        struct dentry *dentry = file->f_path.dentry;
        struct inode *inode = d_inode(dentry);
        struct buffer_head *di_bh = NULL;
-       loff_t end;
 
        /*
         * We start with a read level meta lock and only jump to an ex
@@ -2190,8 +2193,6 @@ static int ocfs2_prepare_inode_for_write(struct file *file,
                        }
                }
 
-               end = pos + count;
-
                ret = ocfs2_check_range_for_refcount(inode, pos, count);
                if (ret == 1) {
                        ocfs2_inode_unlock(inode, meta_level);
index 7ad9d65..7c9dfd5 100644 (file)
@@ -534,7 +534,7 @@ static int ocfs2_read_locked_inode(struct inode *inode,
         */
        mlog_bug_on_msg(!!(fe->i_flags & cpu_to_le32(OCFS2_SYSTEM_FL)) !=
                        !!(args->fi_flags & OCFS2_FI_FLAG_SYSFILE),
-                       "Inode %llu: system file state is ambigous\n",
+                       "Inode %llu: system file state is ambiguous\n",
                        (unsigned long long)args->fi_blkno);
 
        if (S_ISCHR(le16_to_cpu(fe->i_mode)) ||
index c0fe6ed..3103ba7 100644 (file)
@@ -144,7 +144,6 @@ static inline void ocfs2_ci_set_new(struct ocfs2_super *osb,
 void ocfs2_orphan_scan_init(struct ocfs2_super *osb);
 void ocfs2_orphan_scan_start(struct ocfs2_super *osb);
 void ocfs2_orphan_scan_stop(struct ocfs2_super *osb);
-void ocfs2_orphan_scan_exit(struct ocfs2_super *osb);
 
 void ocfs2_complete_recovery(struct work_struct *work);
 void ocfs2_wait_for_recovery(struct ocfs2_super *osb);
@@ -232,8 +231,8 @@ static inline void ocfs2_checkpoint_inode(struct inode *inode)
  *                          ocfs2_journal_access_*() unless you intend to
  *                          manage the checksum by hand.
  *  ocfs2_journal_dirty    - Mark a journalled buffer as having dirty data.
- *  ocfs2_jbd2_file_inode  - Mark an inode so that its data goes out before
- *                           the current handle commits.
+ *  ocfs2_jbd2_inode_add_write  - Mark an inode with range so that its data goes
+ *                                out before the current handle commits.
  */
 
 /* You must always start_trans with a number of buffs > 0, but it's
@@ -441,7 +440,7 @@ static inline int ocfs2_mknod_credits(struct super_block *sb, int is_dir,
  * previous dirblock update in the free list */
 static inline int ocfs2_link_credits(struct super_block *sb)
 {
-       return 2*OCFS2_INODE_UPDATE_CREDITS + 4 +
+       return 2 * OCFS2_INODE_UPDATE_CREDITS + 4 +
               ocfs2_quota_trans_credits(sb);
 }
 
@@ -575,37 +574,12 @@ static inline int ocfs2_calc_bg_discontig_credits(struct super_block *sb)
        return ocfs2_extent_recs_per_gd(sb);
 }
 
-static inline int ocfs2_calc_tree_trunc_credits(struct super_block *sb,
-                                               unsigned int clusters_to_del,
-                                               struct ocfs2_dinode *fe,
-                                               struct ocfs2_extent_list *last_el)
+static inline int ocfs2_jbd2_inode_add_write(handle_t *handle, struct inode *inode,
+                                            loff_t start_byte, loff_t length)
 {
-       /* for dinode + all headers in this pass + update to next leaf */
-       u16 next_free = le16_to_cpu(last_el->l_next_free_rec);
-       u16 tree_depth = le16_to_cpu(fe->id2.i_list.l_tree_depth);
-       int credits = 1 + tree_depth + 1;
-       int i;
-
-       i = next_free - 1;
-       BUG_ON(i < 0);
-
-       /* We may be deleting metadata blocks, so metadata alloc dinode +
-          one desc. block for each possible delete. */
-       if (tree_depth && next_free == 1 &&
-           ocfs2_rec_clusters(last_el, &last_el->l_recs[i]) == clusters_to_del)
-               credits += 1 + tree_depth;
-
-       /* update to the truncate log. */
-       credits += OCFS2_TRUNCATE_LOG_UPDATE;
-
-       credits += ocfs2_quota_trans_credits(sb);
-
-       return credits;
-}
-
-static inline int ocfs2_jbd2_file_inode(handle_t *handle, struct inode *inode)
-{
-       return jbd2_journal_inode_add_write(handle, &OCFS2_I(inode)->ip_jinode);
+       return jbd2_journal_inode_ranged_write(handle,
+                                              &OCFS2_I(inode)->ip_jinode,
+                                              start_byte, length);
 }
 
 static inline int ocfs2_begin_ordered_truncate(struct inode *inode,
index 6f8e1c4..8ea51cf 100644 (file)
@@ -2486,7 +2486,6 @@ int ocfs2_create_inode_in_orphan(struct inode *dir,
        struct inode *inode = NULL;
        struct inode *orphan_dir = NULL;
        struct ocfs2_super *osb = OCFS2_SB(dir->i_sb);
-       struct ocfs2_dinode *di = NULL;
        handle_t *handle = NULL;
        char orphan_name[OCFS2_ORPHAN_NAMELEN + 1];
        struct buffer_head *parent_di_bh = NULL;
@@ -2552,7 +2551,6 @@ int ocfs2_create_inode_in_orphan(struct inode *dir,
                goto leave;
        }
 
-       di = (struct ocfs2_dinode *)new_di_bh->b_data;
        status = ocfs2_orphan_add(osb, handle, inode, new_di_bh, orphan_name,
                                  &orphan_insert, orphan_dir, false);
        if (status < 0) {
index fddbbd6..9150cfa 100644 (file)
@@ -223,8 +223,6 @@ struct ocfs2_orphan_scan {
 
 struct ocfs2_dlm_debug {
        struct kref d_refcnt;
-       struct dentry *d_locking_state;
-       struct dentry *d_locking_filter;
        u32 d_filter_secs;
        struct list_head d_lockres_tracking;
 };
@@ -401,7 +399,6 @@ struct ocfs2_super
        struct ocfs2_dlm_debug *osb_dlm_debug;
 
        struct dentry *osb_debug_root;
-       struct dentry *osb_ctxt;
 
        wait_queue_head_t recovery_event;
 
index 8b2f395..c81e86c 100644 (file)
@@ -1080,10 +1080,8 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent)
        osb->osb_debug_root = debugfs_create_dir(osb->uuid_str,
                                                 ocfs2_debugfs_root);
 
-       osb->osb_ctxt = debugfs_create_file("fs_state", S_IFREG|S_IRUSR,
-                                           osb->osb_debug_root,
-                                           osb,
-                                           &ocfs2_osb_debug_fops);
+       debugfs_create_file("fs_state", S_IFREG|S_IRUSR, osb->osb_debug_root,
+                           osb, &ocfs2_osb_debug_fops);
 
        if (ocfs2_meta_ecc(osb))
                ocfs2_blockcheck_stats_debugfs_install( &osb->osb_ecc_stats,
@@ -1861,8 +1859,6 @@ static void ocfs2_dismount_volume(struct super_block *sb, int mnt_err)
 
        kset_unregister(osb->osb_dev_kset);
 
-       debugfs_remove(osb->osb_ctxt);
-
        /* Orphan scan should be stopped as early as possible */
        ocfs2_orphan_scan_stop(osb);
 
@@ -1918,7 +1914,7 @@ static void ocfs2_dismount_volume(struct super_block *sb, int mnt_err)
                ocfs2_dlm_shutdown(osb, hangup_needed);
 
        ocfs2_blockcheck_stats_debugfs_remove(&osb->osb_ecc_stats);
-       debugfs_remove(osb->osb_debug_root);
+       debugfs_remove_recursive(osb->osb_debug_root);
 
        if (hangup_needed)
                ocfs2_cluster_hangup(osb->uuid_str, strlen(osb->uuid_str));
index a59abe3..b62f5c0 100644 (file)
--- a/fs/open.c
+++ b/fs/open.c
@@ -776,7 +776,7 @@ static int do_dentry_open(struct file *f,
                f->f_mode |= FMODE_ATOMIC_POS;
 
        f->f_op = fops_get(inode->i_fop);
-       if (unlikely(WARN_ON(!f->f_op))) {
+       if (WARN_ON(!f->f_op)) {
                error = -ENODEV;
                goto cleanup_all;
        }
@@ -818,6 +818,14 @@ static int do_dentry_open(struct file *f,
                if (!f->f_mapping->a_ops || !f->f_mapping->a_ops->direct_IO)
                        return -EINVAL;
        }
+
+       /*
+        * XXX: Huge page cache doesn't support writing yet. Drop all page
+        * cache for this file before processing writes.
+        */
+       if ((f->f_mode & FMODE_WRITE) && filemap_nr_thps(inode->i_mapping))
+               truncate_pagecache(inode, 0);
+
        return 0;
 
 cleanup_all:
index f583448..e2ed8e0 100644 (file)
@@ -31,6 +31,7 @@
 #include <linux/ioport.h>
 #include <linux/memory.h>
 #include <linux/sched/task.h>
+#include <linux/security.h>
 #include <asm/sections.h>
 #include "internal.h"
 
@@ -545,9 +546,14 @@ out:
 
 static int open_kcore(struct inode *inode, struct file *filp)
 {
+       int ret = security_locked_down(LOCKDOWN_KCORE);
+
        if (!capable(CAP_SYS_RAWIO))
                return -EPERM;
 
+       if (ret)
+               return ret;
+
        filp->private_data = kmalloc(PAGE_SIZE, GFP_KERNEL);
        if (!filp->private_data)
                return -ENOMEM;
index 465ea01..ac92473 100644 (file)
@@ -8,7 +8,6 @@
 #include <linux/mmzone.h>
 #include <linux/proc_fs.h>
 #include <linux/percpu.h>
-#include <linux/quicklist.h>
 #include <linux/seq_file.h>
 #include <linux/swap.h>
 #include <linux/vmstat.h>
@@ -106,9 +105,6 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
                   global_zone_page_state(NR_KERNEL_STACK_KB));
        show_val_kb(m, "PageTables:     ",
                    global_zone_page_state(NR_PAGETABLE));
-#ifdef CONFIG_QUICKLIST
-       show_val_kb(m, "Quicklists:     ", quicklist_total_size());
-#endif
 
        show_val_kb(m, "NFS_Unstable:   ",
                    global_node_page_state(NR_UNSTABLE_NFS));
@@ -136,6 +132,10 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
                    global_node_page_state(NR_SHMEM_THPS) * HPAGE_PMD_NR);
        show_val_kb(m, "ShmemPmdMapped: ",
                    global_node_page_state(NR_SHMEM_PMDMAPPED) * HPAGE_PMD_NR);
+       show_val_kb(m, "FileHugePages: ",
+                   global_node_page_state(NR_FILE_THPS) * HPAGE_PMD_NR);
+       show_val_kb(m, "FilePmdMapped: ",
+                   global_node_page_state(NR_FILE_PMDMAPPED) * HPAGE_PMD_NR);
 #endif
 
 #ifdef CONFIG_CMA
index 731642e..9442631 100644 (file)
@@ -1,5 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-#include <linux/mm.h>
+#include <linux/pagewalk.h>
 #include <linux/vmacache.h>
 #include <linux/hugetlb.h>
 #include <linux/huge_mm.h>
@@ -417,6 +417,7 @@ struct mem_size_stats {
        unsigned long lazyfree;
        unsigned long anonymous_thp;
        unsigned long shmem_thp;
+       unsigned long file_thp;
        unsigned long swap;
        unsigned long shared_hugetlb;
        unsigned long private_hugetlb;
@@ -461,7 +462,7 @@ static void smaps_page_accumulate(struct mem_size_stats *mss,
 static void smaps_account(struct mem_size_stats *mss, struct page *page,
                bool compound, bool young, bool dirty, bool locked)
 {
-       int i, nr = compound ? 1 << compound_order(page) : 1;
+       int i, nr = compound ? compound_nr(page) : 1;
        unsigned long size = nr * PAGE_SIZE;
 
        /*
@@ -513,7 +514,9 @@ static int smaps_pte_hole(unsigned long addr, unsigned long end,
 
        return 0;
 }
-#endif
+#else
+#define smaps_pte_hole         NULL
+#endif /* CONFIG_SHMEM */
 
 static void smaps_pte_entry(pte_t *pte, unsigned long addr,
                struct mm_walk *walk)
@@ -586,7 +589,7 @@ static void smaps_pmd_entry(pmd_t *pmd, unsigned long addr,
        else if (is_zone_device_page(page))
                /* pass */;
        else
-               VM_BUG_ON_PAGE(1, page);
+               mss->file_thp += HPAGE_PMD_SIZE;
        smaps_account(mss, page, true, pmd_young(*pmd), pmd_dirty(*pmd), locked);
 }
 #else
@@ -729,21 +732,24 @@ static int smaps_hugetlb_range(pte_t *pte, unsigned long hmask,
        }
        return 0;
 }
+#else
+#define smaps_hugetlb_range    NULL
 #endif /* HUGETLB_PAGE */
 
+static const struct mm_walk_ops smaps_walk_ops = {
+       .pmd_entry              = smaps_pte_range,
+       .hugetlb_entry          = smaps_hugetlb_range,
+};
+
+static const struct mm_walk_ops smaps_shmem_walk_ops = {
+       .pmd_entry              = smaps_pte_range,
+       .hugetlb_entry          = smaps_hugetlb_range,
+       .pte_hole               = smaps_pte_hole,
+};
+
 static void smap_gather_stats(struct vm_area_struct *vma,
                             struct mem_size_stats *mss)
 {
-       struct mm_walk smaps_walk = {
-               .pmd_entry = smaps_pte_range,
-#ifdef CONFIG_HUGETLB_PAGE
-               .hugetlb_entry = smaps_hugetlb_range,
-#endif
-               .mm = vma->vm_mm,
-       };
-
-       smaps_walk.private = mss;
-
 #ifdef CONFIG_SHMEM
        /* In case of smaps_rollup, reset the value from previous vma */
        mss->check_shmem_swap = false;
@@ -765,12 +771,13 @@ static void smap_gather_stats(struct vm_area_struct *vma,
                        mss->swap += shmem_swapped;
                } else {
                        mss->check_shmem_swap = true;
-                       smaps_walk.pte_hole = smaps_pte_hole;
+                       walk_page_vma(vma, &smaps_shmem_walk_ops, mss);
+                       return;
                }
        }
 #endif
        /* mmap_sem is held in m_start */
-       walk_page_vma(vma, &smaps_walk);
+       walk_page_vma(vma, &smaps_walk_ops, mss);
 }
 
 #define SEQ_PUT_DEC(str, val) \
@@ -803,6 +810,7 @@ static void __show_smap(struct seq_file *m, const struct mem_size_stats *mss,
        SEQ_PUT_DEC(" kB\nLazyFree:       ", mss->lazyfree);
        SEQ_PUT_DEC(" kB\nAnonHugePages:  ", mss->anonymous_thp);
        SEQ_PUT_DEC(" kB\nShmemPmdMapped: ", mss->shmem_thp);
+       SEQ_PUT_DEC(" kB\nFilePmdMapped: ", mss->file_thp);
        SEQ_PUT_DEC(" kB\nShared_Hugetlb: ", mss->shared_hugetlb);
        seq_put_decimal_ull_width(m, " kB\nPrivate_Hugetlb: ",
                                  mss->private_hugetlb >> 10, 7);
@@ -1118,6 +1126,11 @@ static int clear_refs_test_walk(unsigned long start, unsigned long end,
        return 0;
 }
 
+static const struct mm_walk_ops clear_refs_walk_ops = {
+       .pmd_entry              = clear_refs_pte_range,
+       .test_walk              = clear_refs_test_walk,
+};
+
 static ssize_t clear_refs_write(struct file *file, const char __user *buf,
                                size_t count, loff_t *ppos)
 {
@@ -1151,12 +1164,6 @@ static ssize_t clear_refs_write(struct file *file, const char __user *buf,
                struct clear_refs_private cp = {
                        .type = type,
                };
-               struct mm_walk clear_refs_walk = {
-                       .pmd_entry = clear_refs_pte_range,
-                       .test_walk = clear_refs_test_walk,
-                       .mm = mm,
-                       .private = &cp,
-               };
 
                if (type == CLEAR_REFS_MM_HIWATER_RSS) {
                        if (down_write_killable(&mm->mmap_sem)) {
@@ -1217,7 +1224,8 @@ static ssize_t clear_refs_write(struct file *file, const char __user *buf,
                                                0, NULL, mm, 0, -1UL);
                        mmu_notifier_invalidate_range_start(&range);
                }
-               walk_page_range(0, mm->highest_vm_end, &clear_refs_walk);
+               walk_page_range(mm, 0, mm->highest_vm_end, &clear_refs_walk_ops,
+                               &cp);
                if (type == CLEAR_REFS_SOFT_DIRTY)
                        mmu_notifier_invalidate_range_end(&range);
                tlb_finish_mmu(&tlb, 0, -1);
@@ -1489,8 +1497,16 @@ static int pagemap_hugetlb_range(pte_t *ptep, unsigned long hmask,
 
        return err;
 }
+#else
+#define pagemap_hugetlb_range  NULL
 #endif /* HUGETLB_PAGE */
 
+static const struct mm_walk_ops pagemap_ops = {
+       .pmd_entry      = pagemap_pmd_range,
+       .pte_hole       = pagemap_pte_hole,
+       .hugetlb_entry  = pagemap_hugetlb_range,
+};
+
 /*
  * /proc/pid/pagemap - an array mapping virtual pages to pfns
  *
@@ -1522,7 +1538,6 @@ static ssize_t pagemap_read(struct file *file, char __user *buf,
 {
        struct mm_struct *mm = file->private_data;
        struct pagemapread pm;
-       struct mm_walk pagemap_walk = {};
        unsigned long src;
        unsigned long svpfn;
        unsigned long start_vaddr;
@@ -1550,14 +1565,6 @@ static ssize_t pagemap_read(struct file *file, char __user *buf,
        if (!pm.buffer)
                goto out_mm;
 
-       pagemap_walk.pmd_entry = pagemap_pmd_range;
-       pagemap_walk.pte_hole = pagemap_pte_hole;
-#ifdef CONFIG_HUGETLB_PAGE
-       pagemap_walk.hugetlb_entry = pagemap_hugetlb_range;
-#endif
-       pagemap_walk.mm = mm;
-       pagemap_walk.private = &pm;
-
        src = *ppos;
        svpfn = src / PM_ENTRY_BYTES;
        start_vaddr = svpfn << PAGE_SHIFT;
@@ -1586,7 +1593,7 @@ static ssize_t pagemap_read(struct file *file, char __user *buf,
                ret = down_read_killable(&mm->mmap_sem);
                if (ret)
                        goto out_free;
-               ret = walk_page_range(start_vaddr, end, &pagemap_walk);
+               ret = walk_page_range(mm, start_vaddr, end, &pagemap_ops, &pm);
                up_read(&mm->mmap_sem);
                start_vaddr = end;
 
@@ -1798,6 +1805,11 @@ static int gather_hugetlb_stats(pte_t *pte, unsigned long hmask,
 }
 #endif
 
+static const struct mm_walk_ops show_numa_ops = {
+       .hugetlb_entry = gather_hugetlb_stats,
+       .pmd_entry = gather_pte_stats,
+};
+
 /*
  * Display pages allocated per node and memory policy via /proc.
  */
@@ -1809,12 +1821,6 @@ static int show_numa_map(struct seq_file *m, void *v)
        struct numa_maps *md = &numa_priv->md;
        struct file *file = vma->vm_file;
        struct mm_struct *mm = vma->vm_mm;
-       struct mm_walk walk = {
-               .hugetlb_entry = gather_hugetlb_stats,
-               .pmd_entry = gather_pte_stats,
-               .private = md,
-               .mm = mm,
-       };
        struct mempolicy *pol;
        char buffer[64];
        int nid;
@@ -1848,7 +1854,7 @@ static int show_numa_map(struct seq_file *m, void *v)
                seq_puts(m, " huge");
 
        /* mmap_sem is held by m_start */
-       walk_page_vma(vma, &walk);
+       walk_page_vma(vma, &show_numa_ops, md);
 
        if (!md->pages)
                goto out;
index e16fb8f..273ee82 100644 (file)
@@ -88,7 +88,7 @@ static inline void mangle(struct seq_file *m, const char *s)
 static void show_type(struct seq_file *m, struct super_block *sb)
 {
        mangle(m, sb->s_type->name);
-       if (sb->s_subtype && sb->s_subtype[0]) {
+       if (sb->s_subtype) {
                seq_putc(m, '.');
                mangle(m, sb->s_subtype);
        }
index be9c471..6e826b4 100644 (file)
@@ -2731,7 +2731,7 @@ static int do_set_dqblk(struct dquot *dquot, struct qc_dqblk *di)
 
        if (check_blim) {
                if (!dm->dqb_bsoftlimit ||
-                   dm->dqb_curspace + dm->dqb_rsvspace < dm->dqb_bsoftlimit) {
+                   dm->dqb_curspace + dm->dqb_rsvspace <= dm->dqb_bsoftlimit) {
                        dm->dqb_btime = 0;
                        clear_bit(DQ_BLKS_B, &dquot->dq_flags);
                } else if (!(di->d_fieldmask & QC_SPC_TIMER))
@@ -2740,7 +2740,7 @@ static int do_set_dqblk(struct dquot *dquot, struct qc_dqblk *di)
        }
        if (check_ilim) {
                if (!dm->dqb_isoftlimit ||
-                   dm->dqb_curinodes < dm->dqb_isoftlimit) {
+                   dm->dqb_curinodes <= dm->dqb_isoftlimit) {
                        dm->dqb_itime = 0;
                        clear_bit(DQ_INODES_B, &dquot->dq_flags);
                } else if (!(di->d_fieldmask & QC_INO_TIMER))
index 9c02d96..4075e41 100644 (file)
@@ -239,10 +239,8 @@ static int balance_leaf_when_delete_left(struct tree_balance *tb)
 static int balance_leaf_when_delete(struct tree_balance *tb, int flag)
 {
        struct buffer_head *tbS0 = PATH_PLAST_BUFFER(tb->tb_path);
-       int item_pos = PATH_LAST_POSITION(tb->tb_path);
        struct buffer_info bi;
        int n;
-       struct item_head *ih;
 
        RFALSE(tb->FR[0] && B_LEVEL(tb->FR[0]) != DISK_LEAF_NODE_LEVEL + 1,
               "vs- 12000: level: wrong FR %z", tb->FR[0]);
@@ -251,7 +249,6 @@ static int balance_leaf_when_delete(struct tree_balance *tb, int flag)
        RFALSE(!tb->blknum[0] && !PATH_H_PPARENT(tb->tb_path, 0),
               "PAP-12010: tree can not be empty");
 
-       ih = item_head(tbS0, item_pos);
        buffer_info_init_tbS0(tb, &bi);
 
        /* Delete or truncate the item */
@@ -298,7 +295,6 @@ static unsigned int balance_leaf_insert_left(struct tree_balance *tb,
        if (tb->item_pos == tb->lnum[0] - 1 && tb->lbytes != -1) {
                /* part of new item falls into L[0] */
                int new_item_len, shift;
-               int version;
 
                ret = leaf_shift_left(tb, tb->lnum[0] - 1, -1);
 
@@ -317,8 +313,6 @@ static unsigned int balance_leaf_insert_left(struct tree_balance *tb,
                leaf_insert_into_buf(&bi, n + tb->item_pos - ret, ih, body,
                             min_t(int, tb->zeroes_num, ih_item_len(ih)));
 
-               version = ih_version(ih);
-
                /*
                 * Calculate key component, item length and body to
                 * insert into S[0]
@@ -632,7 +626,6 @@ static void balance_leaf_insert_right(struct tree_balance *tb,
        struct buffer_head *tbS0 = PATH_PLAST_BUFFER(tb->tb_path);
        int n = B_NR_ITEMS(tbS0);
        struct buffer_info bi;
-       int ret;
 
        /* new item or part of it doesn't fall into R[0] */
        if (n - tb->rnum[0] >= tb->item_pos) {
@@ -646,13 +639,11 @@ static void balance_leaf_insert_right(struct tree_balance *tb,
        if (tb->item_pos == n - tb->rnum[0] + 1 && tb->rbytes != -1) {
                loff_t old_key_comp, old_len, r_zeroes_number;
                const char *r_body;
-               int version, shift;
+               int shift;
                loff_t offset;
 
                leaf_shift_right(tb, tb->rnum[0] - 1, -1);
 
-               version = ih_version(ih);
-
                /* Remember key component and item length */
                old_key_comp = le_ih_k_offset(ih);
                old_len = ih_item_len(ih);
@@ -698,7 +689,7 @@ static void balance_leaf_insert_right(struct tree_balance *tb,
                /* whole new item falls into R[0] */
 
                /* Shift rnum[0]-1 items to R[0] */
-               ret = leaf_shift_right(tb, tb->rnum[0] - 1, tb->rbytes);
+               leaf_shift_right(tb, tb->rnum[0] - 1, tb->rbytes);
 
                /* Insert new item into R[0] */
                buffer_info_init_right(tb, &bi);
@@ -950,14 +941,12 @@ static void balance_leaf_new_nodes_insert(struct tree_balance *tb,
        if (tb->item_pos == n - tb->snum[i] + 1 && tb->sbytes[i] != -1) {
                int old_key_comp, old_len, r_zeroes_number;
                const char *r_body;
-               int version;
 
                /* Move snum[i]-1 items from S[0] to S_new[i] */
                leaf_move_items(LEAF_FROM_S_TO_SNEW, tb, tb->snum[i] - 1, -1,
                                tb->S_new[i]);
 
                /* Remember key component and item length */
-               version = ih_version(ih);
                old_key_comp = le_ih_k_offset(ih);
                old_len = ih_item_len(ih);
 
index 6b0ddb2..1170922 100644 (file)
@@ -376,7 +376,6 @@ static int get_num_ver(int mode, struct tree_balance *tb, int h,
                       int to, int to_bytes, short *snum012, int flow)
 {
        int i;
-       int cur_free;
        int units;
        struct virtual_node *vn = tb->tb_vn;
        int total_node_size, max_node_size, current_item_size;
@@ -438,7 +437,6 @@ static int get_num_ver(int mode, struct tree_balance *tb, int h,
        /* leaf level */
        needed_nodes = 1;
        total_node_size = 0;
-       cur_free = max_node_size;
 
        /* start from 'from'-th item */
        start_item = from;
@@ -1734,14 +1732,12 @@ static int dc_check_balance_internal(struct tree_balance *tb, int h)
         * and Fh is its father.
         */
        struct buffer_head *Sh, *Fh;
-       int maxsize, ret;
+       int ret;
        int lfree, rfree /* free space in L and R */ ;
 
        Sh = PATH_H_PBUFFER(tb->tb_path, h);
        Fh = PATH_H_PPARENT(tb->tb_path, h);
 
-       maxsize = MAX_CHILD_SIZE(Sh);
-
        /*
         * using tb->insert_size[h], which is negative in this case,
         * create_virtual_node calculates:
index 4517a13..4b3e3e7 100644 (file)
@@ -891,7 +891,6 @@ static int flush_older_commits(struct super_block *s,
        struct list_head *entry;
        unsigned int trans_id = jl->j_trans_id;
        unsigned int other_trans_id;
-       unsigned int first_trans_id;
 
 find_first:
        /*
@@ -914,8 +913,6 @@ find_first:
                return 0;
        }
 
-       first_trans_id = first_jl->j_trans_id;
-
        entry = &first_jl->j_list;
        while (1) {
                other_jl = JOURNAL_LIST_ENTRY(entry);
@@ -1351,7 +1348,7 @@ static int flush_journal_list(struct super_block *s,
                              struct reiserfs_journal_list *jl, int flushall)
 {
        struct reiserfs_journal_list *pjl;
-       struct reiserfs_journal_cnode *cn, *last;
+       struct reiserfs_journal_cnode *cn;
        int count;
        int was_jwait = 0;
        int was_dirty = 0;
@@ -1509,7 +1506,6 @@ static int flush_journal_list(struct super_block *s,
                                         b_blocknr, __func__);
                }
 free_cnode:
-               last = cn;
                cn = cn->next;
                if (saved_bh) {
                        /*
@@ -1792,7 +1788,6 @@ static int flush_used_journal_lists(struct super_block *s,
 {
        unsigned long len = 0;
        unsigned long cur_len;
-       int ret;
        int i;
        int limit = 256;
        struct reiserfs_journal_list *tjl;
@@ -1829,9 +1824,9 @@ static int flush_used_journal_lists(struct super_block *s,
         * transactions, but only bother if we've actually spanned
         * across multiple lists
         */
-       if (flush_jl != jl) {
-               ret = kupdate_transactions(s, jl, &tjl, &trans_id, len, i);
-       }
+       if (flush_jl != jl)
+               kupdate_transactions(s, jl, &tjl, &trans_id, len, i);
+
        flush_journal_list(s, flush_jl, 1);
        put_journal_list(s, flush_jl);
        put_journal_list(s, jl);
@@ -1911,7 +1906,6 @@ static int do_journal_release(struct reiserfs_transaction_handle *th,
                              struct super_block *sb, int error)
 {
        struct reiserfs_transaction_handle myth;
-       int flushed = 0;
        struct reiserfs_journal *journal = SB_JOURNAL(sb);
 
        /*
@@ -1933,7 +1927,6 @@ static int do_journal_release(struct reiserfs_transaction_handle *th,
                                                     1);
                        journal_mark_dirty(&myth, SB_BUFFER_WITH_SB(sb));
                        do_journal_end(&myth, FLUSH_ALL);
-                       flushed = 1;
                }
        }
 
@@ -3444,9 +3437,8 @@ static int remove_from_transaction(struct super_block *sb,
        if (cn == journal->j_last) {
                journal->j_last = cn->prev;
        }
-       if (bh)
-               remove_journal_hash(sb, journal->j_hash_table, NULL,
-                                   bh->b_blocknr, 0);
+       remove_journal_hash(sb, journal->j_hash_table, NULL,
+                           bh->b_blocknr, 0);
        clear_buffer_journaled(bh);     /* don't log this one */
 
        if (!already_cleaned) {
@@ -3988,7 +3980,6 @@ static int do_journal_end(struct reiserfs_transaction_handle *th, int flags)
        struct buffer_head *c_bh;       /* commit bh */
        struct buffer_head *d_bh;       /* desc bh */
        int cur_write_start = 0;        /* start index of current log write */
-       int old_start;
        int i;
        int flush;
        int wait_on_commit;
@@ -4245,7 +4236,6 @@ static int do_journal_end(struct reiserfs_transaction_handle *th, int flags)
        journal->j_num_work_lists++;
 
        /* reset journal values for the next transaction */
-       old_start = journal->j_start;
        journal->j_start =
            (journal->j_start + journal->j_len +
             2) % SB_ONDISK_JOURNAL_SIZE(sb);
index f5cebd7..7f86856 100644 (file)
@@ -1322,7 +1322,7 @@ void leaf_paste_entries(struct buffer_info *bi,
        char *item;
        struct reiserfs_de_head *deh;
        char *insert_point;
-       int i, old_entry_num;
+       int i;
        struct buffer_head *bh = bi->bi_bh;
 
        if (new_entry_count == 0)
@@ -1362,7 +1362,6 @@ void leaf_paste_entries(struct buffer_info *bi,
                put_deh_location(&deh[i],
                                 deh_location(&deh[i]) + paste_size);
 
-       old_entry_num = ih_entry_count(ih);
        put_ih_entry_count(ih, ih_entry_count(ih) + new_entry_count);
 
        /* prepare space for pasted records */
index 415d66c..34baf5c 100644 (file)
@@ -183,13 +183,12 @@ int reiserfs_convert_objectid_map_v1(struct super_block *s)
        int new_size = (s->s_blocksize - SB_SIZE) / sizeof(__u32) / 2 * 2;
        int old_max = sb_oid_maxsize(disk_sb);
        struct reiserfs_super_block_v1 *disk_sb_v1;
-       __le32 *objectid_map, *new_objectid_map;
+       __le32 *objectid_map;
        int i;
 
        disk_sb_v1 =
            (struct reiserfs_super_block_v1 *)(SB_BUFFER_WITH_SB(s)->b_data);
        objectid_map = (__le32 *) (disk_sb_v1 + 1);
-       new_objectid_map = (__le32 *) (disk_sb + 1);
 
        if (cur_size > new_size) {
                /*
index 9fed1c0..500f200 100644 (file)
@@ -746,9 +746,6 @@ static void check_leaf_block_head(struct buffer_head *bh)
 
 static void check_internal_block_head(struct buffer_head *bh)
 {
-       struct block_head *blkh;
-
-       blkh = B_BLK_HEAD(bh);
        if (!(B_LEVEL(bh) > DISK_LEAF_NODE_LEVEL && B_LEVEL(bh) <= MAX_HEIGHT))
                reiserfs_panic(NULL, "vs-6025", "invalid level %z", bh);
 
index 0037aea..da9ebe3 100644 (file)
@@ -593,7 +593,6 @@ int search_by_key(struct super_block *sb, const struct cpu_key *key,
        struct buffer_head *bh;
        struct path_element *last_element;
        int node_level, retval;
-       int right_neighbor_of_leaf_node;
        int fs_gen;
        struct buffer_head *reada_bh[SEARCH_BY_KEY_READA];
        b_blocknr_t reada_blocks[SEARCH_BY_KEY_READA];
@@ -614,8 +613,6 @@ int search_by_key(struct super_block *sb, const struct cpu_key *key,
 
        pathrelse(search_path);
 
-       right_neighbor_of_leaf_node = 0;
-
        /*
         * With each iteration of this loop we search through the items in the
         * current node, and calculate the next current node(next path element)
@@ -701,7 +698,6 @@ io_error:
                         */
                        block_number = SB_ROOT_BLOCK(sb);
                        expected_level = -1;
-                       right_neighbor_of_leaf_node = 0;
 
                        /* repeat search from the root */
                        continue;
index 8020974..f627b7c 100644 (file)
@@ -1555,11 +1555,6 @@ int vfs_get_tree(struct fs_context *fc)
        sb = fc->root->d_sb;
        WARN_ON(!sb->s_bdi);
 
-       if (fc->subtype && !sb->s_subtype) {
-               sb->s_subtype = fc->subtype;
-               fc->subtype = NULL;
-       }
-
        /*
         * Write barrier is for super_cache_count(). We place it before setting
         * SB_BORN as the data dependency between the two functions is the
index eeeae04..9fc14e3 100644 (file)
@@ -20,6 +20,7 @@
 #include <linux/parser.h>
 #include <linux/magic.h>
 #include <linux/slab.h>
+#include <linux/security.h>
 
 #define TRACEFS_DEFAULT_MODE   0700
 
@@ -27,6 +28,25 @@ static struct vfsmount *tracefs_mount;
 static int tracefs_mount_count;
 static bool tracefs_registered;
 
+static int default_open_file(struct inode *inode, struct file *filp)
+{
+       struct dentry *dentry = filp->f_path.dentry;
+       struct file_operations *real_fops;
+       int ret;
+
+       if (!dentry)
+               return -EINVAL;
+
+       ret = security_locked_down(LOCKDOWN_TRACEFS);
+       if (ret)
+               return ret;
+
+       real_fops = dentry->d_fsdata;
+       if (!real_fops->open)
+               return 0;
+       return real_fops->open(inode, filp);
+}
+
 static ssize_t default_read_file(struct file *file, char __user *buf,
                                 size_t count, loff_t *ppos)
 {
@@ -221,6 +241,12 @@ static int tracefs_apply_options(struct super_block *sb)
        return 0;
 }
 
+static void tracefs_destroy_inode(struct inode *inode)
+{
+       if (S_ISREG(inode->i_mode))
+               kfree(inode->i_fop);
+}
+
 static int tracefs_remount(struct super_block *sb, int *flags, char *data)
 {
        int err;
@@ -257,6 +283,7 @@ static int tracefs_show_options(struct seq_file *m, struct dentry *root)
 static const struct super_operations tracefs_super_operations = {
        .statfs         = simple_statfs,
        .remount_fs     = tracefs_remount,
+       .destroy_inode  = tracefs_destroy_inode,
        .show_options   = tracefs_show_options,
 };
 
@@ -387,6 +414,7 @@ struct dentry *tracefs_create_file(const char *name, umode_t mode,
                                   struct dentry *parent, void *data,
                                   const struct file_operations *fops)
 {
+       struct file_operations *proxy_fops;
        struct dentry *dentry;
        struct inode *inode;
 
@@ -402,8 +430,20 @@ struct dentry *tracefs_create_file(const char *name, umode_t mode,
        if (unlikely(!inode))
                return failed_creating(dentry);
 
+       proxy_fops = kzalloc(sizeof(struct file_operations), GFP_KERNEL);
+       if (unlikely(!proxy_fops)) {
+               iput(inode);
+               return failed_creating(dentry);
+       }
+
+       if (!fops)
+               fops = &tracefs_file_operations;
+
+       dentry->d_fsdata = (void *)fops;
+       memcpy(proxy_fops, fops, sizeof(*proxy_fops));
+       proxy_fops->open = default_open_file;
        inode->i_mode = mode;
-       inode->i_fop = fops ? fops : &tracefs_file_operations;
+       inode->i_fop = proxy_fops;
        inode->i_private = data;
        d_instantiate(dentry, inode);
        fsnotify_create(dentry->d_parent->d_inode, dentry);
index d9af2de..8cdbd53 100644 (file)
@@ -479,8 +479,10 @@ int __ubifs_node_verify_hmac(const struct ubifs_info *c, const void *node,
                return -ENOMEM;
 
        err = ubifs_node_calc_hmac(c, node, len, ofs_hmac, hmac);
-       if (err)
+       if (err) {
+               kfree(hmac);
                return err;
+       }
 
        err = crypto_memneq(hmac, node + ofs_hmac, hmac_len);
 
index a5f10d7..e4b5278 100644 (file)
@@ -2817,7 +2817,6 @@ void dbg_debugfs_init_fs(struct ubifs_info *c)
                     c->vi.ubi_num, c->vi.vol_id);
        if (n == UBIFS_DFS_DIR_LEN) {
                /* The array size is too small */
-               fname = UBIFS_DFS_DIR_NAME;
                return;
        }
 
index 5e1e8ec..7d4547e 100644 (file)
@@ -2267,8 +2267,10 @@ static struct dentry *ubifs_mount(struct file_system_type *fs_type, int flags,
                }
        } else {
                err = ubifs_fill_super(sb, data, flags & SB_SILENT ? 1 : 0);
-               if (err)
+               if (err) {
+                       kfree(c);
                        goto out_deact;
+               }
                /* We do not support atime */
                sb->s_flags |= SB_ACTIVE;
                if (IS_ENABLED(CONFIG_UBIFS_ATIME_SUPPORT))
index 6f293f6..49cb34c 100644 (file)
@@ -284,6 +284,7 @@ static int read_znode(struct ubifs_info *c, struct ubifs_zbranch *zzbr,
        err = ubifs_node_check_hash(c, idx, zzbr->hash);
        if (err) {
                ubifs_bad_hash(c, idx, zzbr->hash, lnum, offs);
+               kfree(idx);
                return err;
        }
 
index ec85aea..02f03fa 100644 (file)
@@ -325,6 +325,17 @@ got_block:
        newblock = bit + (block_group << (sb->s_blocksize_bits + 3)) -
                (sizeof(struct spaceBitmapDesc) << 3);
 
+       if (newblock >= sbi->s_partmaps[partition].s_partition_len) {
+               /*
+                * Ran off the end of the bitmap, and bits following are
+                * non-compliant (not all zero)
+                */
+               udf_err(sb, "bitmap for partition %d corrupted (block %u marked"
+                       " as free, partition length is %u)\n", partition,
+                       newblock, sbi->s_partmaps[partition].s_partition_len);
+               goto error_return;
+       }
+
        if (!udf_clear_bit(bit, bh->b_data)) {
                udf_debug("bit already cleared for block %d\n", bit);
                goto repeat;
index 9f24bd1..fb7f2c7 100644 (file)
@@ -88,6 +88,20 @@ struct regid {
 #define ENTITYID_FLAGS_DIRTY           0x00
 #define ENTITYID_FLAGS_PROTECTED       0x01
 
+/* OSTA UDF 2.1.5.2 */
+#define UDF_ID_COMPLIANT "*OSTA UDF Compliant"
+
+/* OSTA UDF 2.1.5.3 */
+struct domainEntityIDSuffix {
+       uint16_t        revision;
+       uint8_t         flags;
+       uint8_t         reserved[5];
+};
+
+/* OSTA UDF 2.1.5.3 */
+#define ENTITYIDSUFFIX_FLAGS_HARDWRITEPROTECT 0
+#define ENTITYIDSUFFIX_FLAGS_SOFTWRITEPROTECT 1
+
 /* Volume Structure Descriptor (ECMA 167r3 2/9.1) */
 #define VSD_STD_ID_LEN                 5
 struct volStructDesc {
index cd31e4f..628941a 100644 (file)
@@ -280,6 +280,9 @@ static int udf_setattr(struct dentry *dentry, struct iattr *attr)
                        return error;
        }
 
+       if (attr->ia_valid & ATTR_MODE)
+               udf_update_extra_perms(inode, attr->ia_mode);
+
        setattr_copy(inode, attr);
        mark_inode_dirty(inode);
        return 0;
index f8e5872..0adb407 100644 (file)
@@ -118,6 +118,9 @@ struct inode *udf_new_inode(struct inode *dir, umode_t mode)
        iinfo->i_lenAlloc = 0;
        iinfo->i_use = 0;
        iinfo->i_checkpoint = 1;
+       iinfo->i_extraPerms = FE_PERM_U_CHATTR;
+       udf_update_extra_perms(inode, mode);
+
        if (UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_USE_AD_IN_ICB))
                iinfo->i_alloc_type = ICBTAG_FLAG_AD_IN_ICB;
        else if (UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_USE_SHORT_AD))
index 9bb1831..ea80036 100644 (file)
 
 #define EXTENT_MERGE_SIZE 5
 
+#define FE_MAPPED_PERMS        (FE_PERM_U_READ | FE_PERM_U_WRITE | FE_PERM_U_EXEC | \
+                        FE_PERM_G_READ | FE_PERM_G_WRITE | FE_PERM_G_EXEC | \
+                        FE_PERM_O_READ | FE_PERM_O_WRITE | FE_PERM_O_EXEC)
+
+#define FE_DELETE_PERMS        (FE_PERM_U_DELETE | FE_PERM_G_DELETE | \
+                        FE_PERM_O_DELETE)
+
 static umode_t udf_convert_permissions(struct fileEntry *);
 static int udf_update_inode(struct inode *, int);
 static int udf_sync_inode(struct inode *inode);
@@ -1458,6 +1465,8 @@ reread:
        else
                inode->i_mode = udf_convert_permissions(fe);
        inode->i_mode &= ~sbi->s_umask;
+       iinfo->i_extraPerms = le32_to_cpu(fe->permissions) & ~FE_MAPPED_PERMS;
+
        read_unlock(&sbi->s_cred_lock);
 
        link_count = le16_to_cpu(fe->fileLinkCount);
@@ -1485,6 +1494,8 @@ reread:
                iinfo->i_lenEAttr = le32_to_cpu(fe->lengthExtendedAttr);
                iinfo->i_lenAlloc = le32_to_cpu(fe->lengthAllocDescs);
                iinfo->i_checkpoint = le32_to_cpu(fe->checkpoint);
+               iinfo->i_streamdir = 0;
+               iinfo->i_lenStreams = 0;
        } else {
                inode->i_blocks = le64_to_cpu(efe->logicalBlocksRecorded) <<
                    (inode->i_sb->s_blocksize_bits - 9);
@@ -1498,6 +1509,16 @@ reread:
                iinfo->i_lenEAttr = le32_to_cpu(efe->lengthExtendedAttr);
                iinfo->i_lenAlloc = le32_to_cpu(efe->lengthAllocDescs);
                iinfo->i_checkpoint = le32_to_cpu(efe->checkpoint);
+
+               /* Named streams */
+               iinfo->i_streamdir = (efe->streamDirectoryICB.extLength != 0);
+               iinfo->i_locStreamdir =
+                       lelb_to_cpu(efe->streamDirectoryICB.extLocation);
+               iinfo->i_lenStreams = le64_to_cpu(efe->objectSize);
+               if (iinfo->i_lenStreams >= inode->i_size)
+                       iinfo->i_lenStreams -= inode->i_size;
+               else
+                       iinfo->i_lenStreams = 0;
        }
        inode->i_generation = iinfo->i_unique;
 
@@ -1619,6 +1640,23 @@ static umode_t udf_convert_permissions(struct fileEntry *fe)
        return mode;
 }
 
+void udf_update_extra_perms(struct inode *inode, umode_t mode)
+{
+       struct udf_inode_info *iinfo = UDF_I(inode);
+
+       /*
+        * UDF 2.01 sec. 3.3.3.3 Note 2:
+        * In Unix, delete permission tracks write
+        */
+       iinfo->i_extraPerms &= ~FE_DELETE_PERMS;
+       if (mode & 0200)
+               iinfo->i_extraPerms |= FE_PERM_U_DELETE;
+       if (mode & 0020)
+               iinfo->i_extraPerms |= FE_PERM_G_DELETE;
+       if (mode & 0002)
+               iinfo->i_extraPerms |= FE_PERM_O_DELETE;
+}
+
 int udf_write_inode(struct inode *inode, struct writeback_control *wbc)
 {
        return udf_update_inode(inode, wbc->sync_mode == WB_SYNC_ALL);
@@ -1691,10 +1729,7 @@ static int udf_update_inode(struct inode *inode, int do_sync)
                   ((inode->i_mode & 0070) << 2) |
                   ((inode->i_mode & 0700) << 4);
 
-       udfperms |= (le32_to_cpu(fe->permissions) &
-                   (FE_PERM_O_DELETE | FE_PERM_O_CHATTR |
-                    FE_PERM_G_DELETE | FE_PERM_G_CHATTR |
-                    FE_PERM_U_DELETE | FE_PERM_U_CHATTR));
+       udfperms |= iinfo->i_extraPerms;
        fe->permissions = cpu_to_le32(udfperms);
 
        if (S_ISDIR(inode->i_mode) && inode->i_nlink > 0)
@@ -1760,9 +1795,19 @@ static int udf_update_inode(struct inode *inode, int do_sync)
                       iinfo->i_ext.i_data,
                       inode->i_sb->s_blocksize -
                                        sizeof(struct extendedFileEntry));
-               efe->objectSize = cpu_to_le64(inode->i_size);
+               efe->objectSize =
+                       cpu_to_le64(inode->i_size + iinfo->i_lenStreams);
                efe->logicalBlocksRecorded = cpu_to_le64(lb_recorded);
 
+               if (iinfo->i_streamdir) {
+                       struct long_ad *icb_lad = &efe->streamDirectoryICB;
+
+                       icb_lad->extLocation =
+                               cpu_to_lelb(iinfo->i_locStreamdir);
+                       icb_lad->extLength =
+                               cpu_to_le32(inode->i_sb->s_blocksize);
+               }
+
                udf_adjust_time(iinfo, inode->i_atime);
                udf_adjust_time(iinfo, inode->i_mtime);
                udf_adjust_time(iinfo, inode->i_ctime);
index a143461..8c28e93 100644 (file)
@@ -92,10 +92,6 @@ static void udf_put_super(struct super_block *);
 static int udf_sync_fs(struct super_block *, int);
 static int udf_remount_fs(struct super_block *, int *, char *);
 static void udf_load_logicalvolint(struct super_block *, struct kernel_extent_ad);
-static int udf_find_fileset(struct super_block *, struct kernel_lb_addr *,
-                           struct kernel_lb_addr *);
-static void udf_load_fileset(struct super_block *, struct buffer_head *,
-                            struct kernel_lb_addr *);
 static void udf_open_lvid(struct super_block *);
 static void udf_close_lvid(struct super_block *);
 static unsigned int udf_count_free(struct super_block *);
@@ -151,9 +147,11 @@ static struct inode *udf_alloc_inode(struct super_block *sb)
 
        ei->i_unique = 0;
        ei->i_lenExtents = 0;
+       ei->i_lenStreams = 0;
        ei->i_next_alloc_block = 0;
        ei->i_next_alloc_goal = 0;
        ei->i_strat4096 = 0;
+       ei->i_streamdir = 0;
        init_rwsem(&ei->i_data_sem);
        ei->cached_extent.lstart = -1;
        spin_lock_init(&ei->i_extent_cache_lock);
@@ -271,8 +269,7 @@ static void udf_sb_free_bitmap(struct udf_bitmap *bitmap)
        int nr_groups = bitmap->s_nr_groups;
 
        for (i = 0; i < nr_groups; i++)
-               if (bitmap->s_block_bitmap[i])
-                       brelse(bitmap->s_block_bitmap[i]);
+               brelse(bitmap->s_block_bitmap[i]);
 
        kvfree(bitmap);
 }
@@ -646,16 +643,67 @@ out_unlock:
        return error;
 }
 
-/* Check Volume Structure Descriptors (ECMA 167 2/9.1) */
-/* We also check any "CD-ROM Volume Descriptor Set" (ECMA 167 2/8.3.1) */
-static loff_t udf_check_vsd(struct super_block *sb)
+/*
+ * Check VSD descriptor. Returns -1 in case we are at the end of volume
+ * recognition area, 0 if the descriptor is valid but non-interesting, 1 if
+ * we found one of NSR descriptors we are looking for.
+ */
+static int identify_vsd(const struct volStructDesc *vsd)
+{
+       int ret = 0;
+
+       if (!memcmp(vsd->stdIdent, VSD_STD_ID_CD001, VSD_STD_ID_LEN)) {
+               switch (vsd->structType) {
+               case 0:
+                       udf_debug("ISO9660 Boot Record found\n");
+                       break;
+               case 1:
+                       udf_debug("ISO9660 Primary Volume Descriptor found\n");
+                       break;
+               case 2:
+                       udf_debug("ISO9660 Supplementary Volume Descriptor found\n");
+                       break;
+               case 3:
+                       udf_debug("ISO9660 Volume Partition Descriptor found\n");
+                       break;
+               case 255:
+                       udf_debug("ISO9660 Volume Descriptor Set Terminator found\n");
+                       break;
+               default:
+                       udf_debug("ISO9660 VRS (%u) found\n", vsd->structType);
+                       break;
+               }
+       } else if (!memcmp(vsd->stdIdent, VSD_STD_ID_BEA01, VSD_STD_ID_LEN))
+               ; /* ret = 0 */
+       else if (!memcmp(vsd->stdIdent, VSD_STD_ID_NSR02, VSD_STD_ID_LEN))
+               ret = 1;
+       else if (!memcmp(vsd->stdIdent, VSD_STD_ID_NSR03, VSD_STD_ID_LEN))
+               ret = 1;
+       else if (!memcmp(vsd->stdIdent, VSD_STD_ID_BOOT2, VSD_STD_ID_LEN))
+               ; /* ret = 0 */
+       else if (!memcmp(vsd->stdIdent, VSD_STD_ID_CDW02, VSD_STD_ID_LEN))
+               ; /* ret = 0 */
+       else {
+               /* TEA01 or invalid id : end of volume recognition area */
+               ret = -1;
+       }
+
+       return ret;
+}
+
+/*
+ * Check Volume Structure Descriptors (ECMA 167 2/9.1)
+ * We also check any "CD-ROM Volume Descriptor Set" (ECMA 167 2/8.3.1)
+ * @return   1 if NSR02 or NSR03 found,
+ *         -1 if first sector read error, 0 otherwise
+ */
+static int udf_check_vsd(struct super_block *sb)
 {
        struct volStructDesc *vsd = NULL;
        loff_t sector = VSD_FIRST_SECTOR_OFFSET;
        int sectorsize;
        struct buffer_head *bh = NULL;
-       int nsr02 = 0;
-       int nsr03 = 0;
+       int nsr = 0;
        struct udf_sb_info *sbi;
 
        sbi = UDF_SB(sb);
@@ -679,71 +727,36 @@ static loff_t udf_check_vsd(struct super_block *sb)
         * activity. This actually happened with uninitialised SSD partitions
         * (all 0xFF) before the check for the limit and all valid IDs were
         * added */
-       for (; !nsr02 && !nsr03 && sector < VSD_MAX_SECTOR_OFFSET;
-            sector += sectorsize) {
+       for (; !nsr && sector < VSD_MAX_SECTOR_OFFSET; sector += sectorsize) {
                /* Read a block */
                bh = udf_tread(sb, sector >> sb->s_blocksize_bits);
                if (!bh)
                        break;
 
-               /* Look for ISO  descriptors */
                vsd = (struct volStructDesc *)(bh->b_data +
                                              (sector & (sb->s_blocksize - 1)));
-
-               if (!strncmp(vsd->stdIdent, VSD_STD_ID_CD001,
-                                   VSD_STD_ID_LEN)) {
-                       switch (vsd->structType) {
-                       case 0:
-                               udf_debug("ISO9660 Boot Record found\n");
-                               break;
-                       case 1:
-                               udf_debug("ISO9660 Primary Volume Descriptor found\n");
-                               break;
-                       case 2:
-                               udf_debug("ISO9660 Supplementary Volume Descriptor found\n");
-                               break;
-                       case 3:
-                               udf_debug("ISO9660 Volume Partition Descriptor found\n");
-                               break;
-                       case 255:
-                               udf_debug("ISO9660 Volume Descriptor Set Terminator found\n");
-                               break;
-                       default:
-                               udf_debug("ISO9660 VRS (%u) found\n",
-                                         vsd->structType);
-                               break;
-                       }
-               } else if (!strncmp(vsd->stdIdent, VSD_STD_ID_BEA01,
-                                   VSD_STD_ID_LEN))
-                       ; /* nothing */
-               else if (!strncmp(vsd->stdIdent, VSD_STD_ID_TEA01,
-                                   VSD_STD_ID_LEN)) {
-                       brelse(bh);
-                       break;
-               } else if (!strncmp(vsd->stdIdent, VSD_STD_ID_NSR02,
-                                   VSD_STD_ID_LEN))
-                       nsr02 = sector;
-               else if (!strncmp(vsd->stdIdent, VSD_STD_ID_NSR03,
-                                   VSD_STD_ID_LEN))
-                       nsr03 = sector;
-               else if (!strncmp(vsd->stdIdent, VSD_STD_ID_BOOT2,
-                                   VSD_STD_ID_LEN))
-                       ; /* nothing */
-               else if (!strncmp(vsd->stdIdent, VSD_STD_ID_CDW02,
-                                   VSD_STD_ID_LEN))
-                       ; /* nothing */
-               else {
-                       /* invalid id : end of volume recognition area */
+               nsr = identify_vsd(vsd);
+               /* Found NSR or end? */
+               if (nsr) {
                        brelse(bh);
                        break;
                }
+               /*
+                * Special handling for improperly formatted VRS (e.g., Win10)
+                * where components are separated by 2048 bytes even though
+                * sectors are 4K
+                */
+               if (sb->s_blocksize == 4096) {
+                       nsr = identify_vsd(vsd + 1);
+                       /* Ignore unknown IDs... */
+                       if (nsr < 0)
+                               nsr = 0;
+               }
                brelse(bh);
        }
 
-       if (nsr03)
-               return nsr03;
-       else if (nsr02)
-               return nsr02;
+       if (nsr > 0)
+               return 1;
        else if (!bh && sector - (sbi->s_session << sb->s_blocksize_bits) ==
                        VSD_FIRST_SECTOR_OFFSET)
                return -1;
@@ -751,34 +764,82 @@ static loff_t udf_check_vsd(struct super_block *sb)
                return 0;
 }
 
+static int udf_verify_domain_identifier(struct super_block *sb,
+                                       struct regid *ident, char *dname)
+{
+       struct domainEntityIDSuffix *suffix;
+
+       if (memcmp(ident->ident, UDF_ID_COMPLIANT, strlen(UDF_ID_COMPLIANT))) {
+               udf_warn(sb, "Not OSTA UDF compliant %s descriptor.\n", dname);
+               goto force_ro;
+       }
+       if (ident->flags & (1 << ENTITYID_FLAGS_DIRTY)) {
+               udf_warn(sb, "Possibly not OSTA UDF compliant %s descriptor.\n",
+                        dname);
+               goto force_ro;
+       }
+       suffix = (struct domainEntityIDSuffix *)ident->identSuffix;
+       if (suffix->flags & (1 << ENTITYIDSUFFIX_FLAGS_HARDWRITEPROTECT) ||
+           suffix->flags & (1 << ENTITYIDSUFFIX_FLAGS_SOFTWRITEPROTECT)) {
+               if (!sb_rdonly(sb)) {
+                       udf_warn(sb, "Descriptor for %s marked write protected."
+                                " Forcing read only mount.\n", dname);
+               }
+               goto force_ro;
+       }
+       return 0;
+
+force_ro:
+       if (!sb_rdonly(sb))
+               return -EACCES;
+       UDF_SET_FLAG(sb, UDF_FLAG_RW_INCOMPAT);
+       return 0;
+}
+
+static int udf_load_fileset(struct super_block *sb, struct fileSetDesc *fset,
+                           struct kernel_lb_addr *root)
+{
+       int ret;
+
+       ret = udf_verify_domain_identifier(sb, &fset->domainIdent, "file set");
+       if (ret < 0)
+               return ret;
+
+       *root = lelb_to_cpu(fset->rootDirectoryICB.extLocation);
+       UDF_SB(sb)->s_serial_number = le16_to_cpu(fset->descTag.tagSerialNum);
+
+       udf_debug("Rootdir at block=%u, partition=%u\n",
+                 root->logicalBlockNum, root->partitionReferenceNum);
+       return 0;
+}
+
 static int udf_find_fileset(struct super_block *sb,
                            struct kernel_lb_addr *fileset,
                            struct kernel_lb_addr *root)
 {
        struct buffer_head *bh = NULL;
        uint16_t ident;
+       int ret;
 
-       if (fileset->logicalBlockNum != 0xFFFFFFFF ||
-           fileset->partitionReferenceNum != 0xFFFF) {
-               bh = udf_read_ptagged(sb, fileset, 0, &ident);
-
-               if (!bh) {
-                       return 1;
-               } else if (ident != TAG_IDENT_FSD) {
-                       brelse(bh);
-                       return 1;
-               }
-
-               udf_debug("Fileset at block=%u, partition=%u\n",
-                         fileset->logicalBlockNum,
-                         fileset->partitionReferenceNum);
+       if (fileset->logicalBlockNum == 0xFFFFFFFF &&
+           fileset->partitionReferenceNum == 0xFFFF)
+               return -EINVAL;
 
-               UDF_SB(sb)->s_partition = fileset->partitionReferenceNum;
-               udf_load_fileset(sb, bh, root);
+       bh = udf_read_ptagged(sb, fileset, 0, &ident);
+       if (!bh)
+               return -EIO;
+       if (ident != TAG_IDENT_FSD) {
                brelse(bh);
-               return 0;
+               return -EINVAL;
        }
-       return 1;
+
+       udf_debug("Fileset at block=%u, partition=%u\n",
+                 fileset->logicalBlockNum, fileset->partitionReferenceNum);
+
+       UDF_SB(sb)->s_partition = fileset->partitionReferenceNum;
+       ret = udf_load_fileset(sb, (struct fileSetDesc *)bh->b_data, root);
+       brelse(bh);
+       return ret;
 }
 
 /*
@@ -794,9 +855,7 @@ static int udf_load_pvoldesc(struct super_block *sb, sector_t block)
        struct buffer_head *bh;
        uint16_t ident;
        int ret = -ENOMEM;
-#ifdef UDFFS_DEBUG
        struct timestamp *ts;
-#endif
 
        outstr = kmalloc(128, GFP_NOFS);
        if (!outstr)
@@ -817,13 +876,10 @@ static int udf_load_pvoldesc(struct super_block *sb, sector_t block)
 
        udf_disk_stamp_to_time(&UDF_SB(sb)->s_record_time,
                              pvoldesc->recordingDateAndTime);
-#ifdef UDFFS_DEBUG
        ts = &pvoldesc->recordingDateAndTime;
        udf_debug("recording time %04u/%02u/%02u %02u:%02u (%x)\n",
                  le16_to_cpu(ts->year), ts->month, ts->day, ts->hour,
                  ts->minute, le16_to_cpu(ts->typeAndTimezone));
-#endif
-
 
        ret = udf_dstrCS0toChar(sb, outstr, 31, pvoldesc->volIdent, 32);
        if (ret < 0) {
@@ -939,21 +995,6 @@ static int udf_load_metadata_files(struct super_block *sb, int partition,
        return 0;
 }
 
-static void udf_load_fileset(struct super_block *sb, struct buffer_head *bh,
-                            struct kernel_lb_addr *root)
-{
-       struct fileSetDesc *fset;
-
-       fset = (struct fileSetDesc *)bh->b_data;
-
-       *root = lelb_to_cpu(fset->rootDirectoryICB.extLocation);
-
-       UDF_SB(sb)->s_serial_number = le16_to_cpu(fset->descTag.tagSerialNum);
-
-       udf_debug("Rootdir at block=%u, partition=%u\n",
-                 root->logicalBlockNum, root->partitionReferenceNum);
-}
-
 int udf_compute_nr_groups(struct super_block *sb, u32 partition)
 {
        struct udf_part_map *map = &UDF_SB(sb)->s_partmaps[partition];
@@ -1238,9 +1279,7 @@ static int udf_load_partdesc(struct super_block *sb, sector_t block)
         * PHYSICAL partitions are already set up
         */
        type1_idx = i;
-#ifdef UDFFS_DEBUG
        map = NULL; /* supress 'maybe used uninitialized' warning */
-#endif
        for (i = 0; i < sbi->s_partitions; i++) {
                map = &sbi->s_partmaps[i];
 
@@ -1364,6 +1403,10 @@ static int udf_load_logicalvol(struct super_block *sb, sector_t block,
                goto out_bh;
        }
 
+       ret = udf_verify_domain_identifier(sb, &lvd->domainIdent,
+                                          "logical volume");
+       if (ret)
+               goto out_bh;
        ret = udf_sb_alloc_partition_maps(sb, le32_to_cpu(lvd->numPartitionMaps));
        if (ret)
                goto out_bh;
@@ -1915,7 +1958,7 @@ static int udf_load_vrs(struct super_block *sb, struct udf_options *uopt,
                        int silent, struct kernel_lb_addr *fileset)
 {
        struct udf_sb_info *sbi = UDF_SB(sb);
-       loff_t nsr_off;
+       int nsr = 0;
        int ret;
 
        if (!sb_set_blocksize(sb, uopt->blocksize)) {
@@ -1926,13 +1969,13 @@ static int udf_load_vrs(struct super_block *sb, struct udf_options *uopt,
        sbi->s_last_block = uopt->lastblock;
        if (!uopt->novrs) {
                /* Check that it is NSR02 compliant */
-               nsr_off = udf_check_vsd(sb);
-               if (!nsr_off) {
+               nsr = udf_check_vsd(sb);
+               if (!nsr) {
                        if (!silent)
                                udf_warn(sb, "No VRS found\n");
                        return -EINVAL;
                }
-               if (nsr_off == -1)
+               if (nsr == -1)
                        udf_debug("Failed to read sector at offset %d. "
                                  "Assuming open disc. Skipping validity "
                                  "check\n", VSD_FIRST_SECTOR_OFFSET);
@@ -2216,9 +2259,9 @@ static int udf_fill_super(struct super_block *sb, void *options, int silent)
                UDF_SET_FLAG(sb, UDF_FLAG_RW_INCOMPAT);
        }
 
-       if (udf_find_fileset(sb, &fileset, &rootdir)) {
+       ret = udf_find_fileset(sb, &fileset, &rootdir);
+       if (ret < 0) {
                udf_warn(sb, "No fileset found\n");
-               ret = -EINVAL;
                goto error_out;
        }
 
index 2ef0e21..4245d1f 100644 (file)
@@ -38,16 +38,20 @@ struct udf_inode_info {
        __u32                   i_next_alloc_block;
        __u32                   i_next_alloc_goal;
        __u32                   i_checkpoint;
+       __u32                   i_extraPerms;
        unsigned                i_alloc_type : 3;
        unsigned                i_efe : 1;      /* extendedFileEntry */
        unsigned                i_use : 1;      /* unallocSpaceEntry */
        unsigned                i_strat4096 : 1;
-       unsigned                reserved : 26;
+       unsigned                i_streamdir : 1;
+       unsigned                reserved : 25;
        union {
                struct short_ad *i_sad;
                struct long_ad          *i_lad;
                __u8            *i_data;
        } i_ext;
+       struct kernel_lb_addr   i_locStreamdir;
+       __u64                   i_lenStreams;
        struct rw_semaphore     i_data_sem;
        struct udf_ext_cache cached_extent;
        /* Spinlock for protecting extent cache */
index d89ef71..9dd0814 100644 (file)
@@ -31,16 +31,8 @@ extern __printf(3, 4) void _udf_warn(struct super_block *sb,
 #define udf_info(fmt, ...)                                     \
        pr_info("INFO " fmt, ##__VA_ARGS__)
 
-#undef UDFFS_DEBUG
-
-#ifdef UDFFS_DEBUG
-#define udf_debug(fmt, ...)                                    \
-       printk(KERN_DEBUG pr_fmt("%s:%d:%s: " fmt),             \
-              __FILE__, __LINE__, __func__, ##__VA_ARGS__)
-#else
 #define udf_debug(fmt, ...)                                    \
-       no_printk(fmt, ##__VA_ARGS__)
-#endif
+       pr_debug("%s:%d:%s: " fmt, __FILE__, __LINE__, __func__, ##__VA_ARGS__)
 
 #define udf_fixed_to_variable(x) ( ( ( (x) >> 5 ) * 39 ) + ( (x) & 0x0000001F ) )
 #define udf_variable_to_fixed(x) ( ( ( (x) / 39 ) << 5 ) + ( (x) % 39 ) )
@@ -178,6 +170,7 @@ extern int8_t udf_next_aext(struct inode *, struct extent_position *,
                            struct kernel_lb_addr *, uint32_t *, int);
 extern int8_t udf_current_aext(struct inode *, struct extent_position *,
                               struct kernel_lb_addr *, uint32_t *, int);
+extern void udf_update_extra_perms(struct inode *inode, umode_t mode);
 
 /* misc.c */
 extern struct buffer_head *udf_tgetblk(struct super_block *sb,
index 71ca4d0..2a878b7 100644 (file)
@@ -154,7 +154,7 @@ static int utf8_parse_version(const char *version, unsigned int *maj,
 {
        substring_t args[3];
        char version_string[12];
-       const struct match_token token[] = {
+       static const struct match_token token[] = {
                {1, "%d.%d.%d"},
                {0, NULL}
        };
index 6c1a36b..6fe8af7 100644 (file)
@@ -35,7 +35,7 @@ unsigned int total_tests;
 #define test_f(cond, fmt, ...) _test(cond, __func__, __LINE__, fmt, ##__VA_ARGS__)
 #define test(cond) _test(cond, __func__, __LINE__, "")
 
-const static struct {
+static const struct {
        /* UTF-8 strings in this vector _must_ be NULL-terminated. */
        unsigned char str[10];
        unsigned char dec[10];
@@ -89,7 +89,7 @@ const static struct {
 
 };
 
-const static struct {
+static const struct {
        /* UTF-8 strings in this vector _must_ be NULL-terminated. */
        unsigned char str[30];
        unsigned char ncf[30];
index fe6d804..f9fd186 100644 (file)
@@ -1272,21 +1272,23 @@ static __always_inline void wake_userfault(struct userfaultfd_ctx *ctx,
 }
 
 static __always_inline int validate_range(struct mm_struct *mm,
-                                         __u64 start, __u64 len)
+                                         __u64 *start, __u64 len)
 {
        __u64 task_size = mm->task_size;
 
-       if (start & ~PAGE_MASK)
+       *start = untagged_addr(*start);
+
+       if (*start & ~PAGE_MASK)
                return -EINVAL;
        if (len & ~PAGE_MASK)
                return -EINVAL;
        if (!len)
                return -EINVAL;
-       if (start < mmap_min_addr)
+       if (*start < mmap_min_addr)
                return -EINVAL;
-       if (start >= task_size)
+       if (*start >= task_size)
                return -EINVAL;
-       if (len > task_size - start)
+       if (len > task_size - *start)
                return -EINVAL;
        return 0;
 }
@@ -1336,7 +1338,7 @@ static int userfaultfd_register(struct userfaultfd_ctx *ctx,
                goto out;
        }
 
-       ret = validate_range(mm, uffdio_register.range.start,
+       ret = validate_range(mm, &uffdio_register.range.start,
                             uffdio_register.range.len);
        if (ret)
                goto out;
@@ -1525,7 +1527,7 @@ static int userfaultfd_unregister(struct userfaultfd_ctx *ctx,
        if (copy_from_user(&uffdio_unregister, buf, sizeof(uffdio_unregister)))
                goto out;
 
-       ret = validate_range(mm, uffdio_unregister.start,
+       ret = validate_range(mm, &uffdio_unregister.start,
                             uffdio_unregister.len);
        if (ret)
                goto out;
@@ -1676,7 +1678,7 @@ static int userfaultfd_wake(struct userfaultfd_ctx *ctx,
        if (copy_from_user(&uffdio_wake, buf, sizeof(uffdio_wake)))
                goto out;
 
-       ret = validate_range(ctx->mm, uffdio_wake.start, uffdio_wake.len);
+       ret = validate_range(ctx->mm, &uffdio_wake.start, uffdio_wake.len);
        if (ret)
                goto out;
 
@@ -1716,7 +1718,7 @@ static int userfaultfd_copy(struct userfaultfd_ctx *ctx,
                           sizeof(uffdio_copy)-sizeof(__s64)))
                goto out;
 
-       ret = validate_range(ctx->mm, uffdio_copy.dst, uffdio_copy.len);
+       ret = validate_range(ctx->mm, &uffdio_copy.dst, uffdio_copy.len);
        if (ret)
                goto out;
        /*
@@ -1772,7 +1774,7 @@ static int userfaultfd_zeropage(struct userfaultfd_ctx *ctx,
                           sizeof(uffdio_zeropage)-sizeof(__s64)))
                goto out;
 
-       ret = validate_range(ctx->mm, uffdio_zeropage.range.start,
+       ret = validate_range(ctx->mm, &uffdio_zeropage.range.start,
                             uffdio_zeropage.range.len);
        if (ret)
                goto out;
index 58fa85c..d6ed5d2 100644 (file)
@@ -81,9 +81,10 @@ typedef struct xfs_alloc_arg {
 /*
  * Defines for datatype
  */
-#define XFS_ALLOC_INITIAL_USER_DATA    (1 << 0)/* special case start of file */
-#define XFS_ALLOC_USERDATA_ZERO                (1 << 1)/* zero extent on allocation */
-#define XFS_ALLOC_NOBUSY               (1 << 2)/* Busy extents not allowed */
+#define XFS_ALLOC_USERDATA             (1 << 0)/* allocation is for user data*/
+#define XFS_ALLOC_INITIAL_USER_DATA    (1 << 1)/* special case start of file */
+#define XFS_ALLOC_USERDATA_ZERO                (1 << 2)/* zero extent on allocation */
+#define XFS_ALLOC_NOBUSY               (1 << 3)/* Busy extents not allowed */
 
 static inline bool
 xfs_alloc_is_userdata(int datatype)
index 054b4ce..4edc25a 100644 (file)
@@ -4042,8 +4042,12 @@ xfs_bmapi_allocate(
         */
        if (!(bma->flags & XFS_BMAPI_METADATA)) {
                bma->datatype = XFS_ALLOC_NOBUSY;
-               if (whichfork == XFS_DATA_FORK && bma->offset == 0)
-                       bma->datatype |= XFS_ALLOC_INITIAL_USER_DATA;
+               if (whichfork == XFS_DATA_FORK) {
+                       if (bma->offset == 0)
+                               bma->datatype |= XFS_ALLOC_INITIAL_USER_DATA;
+                       else
+                               bma->datatype |= XFS_ALLOC_USERDATA;
+               }
                if (bma->flags & XFS_BMAPI_ZERO)
                        bma->datatype |= XFS_ALLOC_USERDATA_ZERO;
        }
@@ -5621,6 +5625,11 @@ xfs_bmse_merge(
        if (error)
                return error;
 
+       /* change to extent format if required after extent removal */
+       error = xfs_bmap_btree_to_extents(tp, ip, cur, logflags, whichfork);
+       if (error)
+               return error;
+
 done:
        xfs_iext_remove(ip, icur, 0);
        xfs_iext_prev(XFS_IFORK_PTR(ip, whichfork), icur);
index a08dd8f..ac6cdca 100644 (file)
@@ -928,7 +928,7 @@ xfs_log_sb(
 
        xfs_sb_to_disk(XFS_BUF_TO_SBP(bp), &mp->m_sb);
        xfs_trans_buf_set_type(tp, bp, XFS_BLFT_SB_BUF);
-       xfs_trans_log_buf(tp, bp, 0, sizeof(struct xfs_dsb));
+       xfs_trans_log_buf(tp, bp, 0, sizeof(struct xfs_dsb) - 1);
 }
 
 /*
index a43d181..5533e48 100644 (file)
@@ -97,7 +97,6 @@ xchk_allocbt_rec(
        xfs_agnumber_t          agno = bs->cur->bc_private.a.agno;
        xfs_agblock_t           bno;
        xfs_extlen_t            len;
-       int                     error = 0;
 
        bno = be32_to_cpu(rec->alloc.ar_startblock);
        len = be32_to_cpu(rec->alloc.ar_blockcount);
@@ -109,7 +108,7 @@ xchk_allocbt_rec(
 
        xchk_allocbt_xref(bs->sc, bno, len);
 
-       return error;
+       return 0;
 }
 
 /* Scrub the freespace btrees for some AG. */
index 120ef99..21c2436 100644 (file)
@@ -2097,7 +2097,7 @@ xfs_verify_magic(
        int                     idx;
 
        idx = xfs_sb_version_hascrc(&mp->m_sb);
-       if (unlikely(WARN_ON(!bp->b_ops || !bp->b_ops->magic[idx])))
+       if (WARN_ON(!bp->b_ops || !bp->b_ops->magic[idx]))
                return false;
        return dmagic == bp->b_ops->magic[idx];
 }
@@ -2115,7 +2115,7 @@ xfs_verify_magic16(
        int                     idx;
 
        idx = xfs_sb_version_hascrc(&mp->m_sb);
-       if (unlikely(WARN_ON(!bp->b_ops || !bp->b_ops->magic16[idx])))
+       if (WARN_ON(!bp->b_ops || !bp->b_ops->magic16[idx]))
                return false;
        return dmagic == bp->b_ops->magic16[idx];
 }
index d952d59..1ffb179 100644 (file)
@@ -370,21 +370,23 @@ static int
 xfs_dio_write_end_io(
        struct kiocb            *iocb,
        ssize_t                 size,
+       int                     error,
        unsigned                flags)
 {
        struct inode            *inode = file_inode(iocb->ki_filp);
        struct xfs_inode        *ip = XFS_I(inode);
        loff_t                  offset = iocb->ki_pos;
        unsigned int            nofs_flag;
-       int                     error = 0;
 
        trace_xfs_end_io_direct_write(ip, offset, size);
 
        if (XFS_FORCED_SHUTDOWN(ip->i_mount))
                return -EIO;
 
-       if (size <= 0)
-               return size;
+       if (error)
+               return error;
+       if (!size)
+               return 0;
 
        /*
         * Capture amount written on completion as we can't reliably account
@@ -441,6 +443,10 @@ out:
        return error;
 }
 
+static const struct iomap_dio_ops xfs_dio_write_ops = {
+       .end_io         = xfs_dio_write_end_io,
+};
+
 /*
  * xfs_file_dio_aio_write - handle direct IO writes
  *
@@ -541,7 +547,7 @@ xfs_file_dio_aio_write(
        }
 
        trace_xfs_file_direct_write(ip, count, iocb->ki_pos);
-       ret = iomap_dio_rw(iocb, from, &xfs_iomap_ops, xfs_dio_write_end_io);
+       ret = iomap_dio_rw(iocb, from, &xfs_iomap_ops, &xfs_dio_write_ops);
 
        /*
         * If unaligned, this is the only IO in-flight. If it has not yet
index ddd0bf7..f1bc88f 100644 (file)
@@ -63,19 +63,6 @@ static const struct sysfs_ops xfs_sysfs_ops = {
        .store = xfs_sysfs_object_store,
 };
 
-/*
- * xfs_mount kobject. The mp kobject also serves as the per-mount parent object
- * that is identified by the fsname under sysfs.
- */
-
-static inline struct xfs_mount *
-to_mp(struct kobject *kobject)
-{
-       struct xfs_kobj *kobj = to_kobj(kobject);
-
-       return container_of(kobj, struct xfs_mount, m_kobj);
-}
-
 static struct attribute *xfs_mp_attrs[] = {
        NULL,
 };
index 7259d87..ffba794 100644 (file)
@@ -312,7 +312,6 @@ header-test-                        += linux/mfd/as3711.h
 header-test-                   += linux/mfd/as3722.h
 header-test-                   += linux/mfd/da903x.h
 header-test-                   += linux/mfd/da9055/pdata.h
-header-test-                   += linux/mfd/da9063/pdata.h
 header-test-                   += linux/mfd/db8500-prcmu.h
 header-test-                   += linux/mfd/dbx500-prcmu.h
 header-test-                   += linux/mfd/dln2.h
@@ -881,12 +880,6 @@ header-test-                       += net/xdp.h
 header-test-                   += net/xdp_priv.h
 header-test-                   += pcmcia/cistpl.h
 header-test-                   += pcmcia/ds.h
-header-test-                   += rdma/ib.h
-header-test-                   += rdma/iw_portmap.h
-header-test-                   += rdma/opa_port_info.h
-header-test-                   += rdma/rdmavt_cq.h
-header-test-                   += rdma/restrack.h
-header-test-                   += rdma/signature.h
 header-test-                   += rdma/tid_rdma_defs.h
 header-test-                   += scsi/fc/fc_encaps.h
 header-test-                   += scsi/fc/fc_fc2.h
index 7357a3c..384b5c8 100644 (file)
@@ -10,6 +10,7 @@
 #define BUGFLAG_WARNING                (1 << 0)
 #define BUGFLAG_ONCE           (1 << 1)
 #define BUGFLAG_DONE           (1 << 2)
+#define BUGFLAG_NO_CUT_HERE    (1 << 3)        /* CUT_HERE already sent */
 #define BUGFLAG_TAINT(taint)   ((taint) << 8)
 #define BUG_GET_TAINT(bug)     ((bug)->flags >> 8)
 #endif
@@ -61,18 +62,6 @@ struct bug_entry {
 #define BUG_ON(condition) do { if (unlikely(condition)) BUG(); } while (0)
 #endif
 
-#ifdef __WARN_FLAGS
-#define __WARN_TAINT(taint)            __WARN_FLAGS(BUGFLAG_TAINT(taint))
-#define __WARN_ONCE_TAINT(taint)       __WARN_FLAGS(BUGFLAG_ONCE|BUGFLAG_TAINT(taint))
-
-#define WARN_ON_ONCE(condition) ({                             \
-       int __ret_warn_on = !!(condition);                      \
-       if (unlikely(__ret_warn_on))                            \
-               __WARN_ONCE_TAINT(TAINT_WARN);                  \
-       unlikely(__ret_warn_on);                                \
-})
-#endif
-
 /*
  * WARN(), WARN_ON(), WARN_ON_ONCE, and so on can be used to report
  * significant kernel issues that need prompt attention if they should ever
@@ -89,27 +78,27 @@ struct bug_entry {
  *
  * Use the versions with printk format strings to provide better diagnostics.
  */
-#ifndef __WARN_TAINT
-extern __printf(3, 4)
-void warn_slowpath_fmt(const char *file, const int line,
-                      const char *fmt, ...);
+#ifndef __WARN_FLAGS
 extern __printf(4, 5)
-void warn_slowpath_fmt_taint(const char *file, const int line, unsigned taint,
-                            const char *fmt, ...);
-extern void warn_slowpath_null(const char *file, const int line);
-#define WANT_WARN_ON_SLOWPATH
-#define __WARN()               warn_slowpath_null(__FILE__, __LINE__)
-#define __WARN_printf(arg...)  warn_slowpath_fmt(__FILE__, __LINE__, arg)
-#define __WARN_printf_taint(taint, arg...)                             \
-       warn_slowpath_fmt_taint(__FILE__, __LINE__, taint, arg)
+void warn_slowpath_fmt(const char *file, const int line, unsigned taint,
+                      const char *fmt, ...);
+#define __WARN()               __WARN_printf(TAINT_WARN, NULL)
+#define __WARN_printf(taint, arg...)                                   \
+       warn_slowpath_fmt(__FILE__, __LINE__, taint, arg)
 #else
 extern __printf(1, 2) void __warn_printk(const char *fmt, ...);
-#define __WARN() do { \
-       printk(KERN_WARNING CUT_HERE); __WARN_TAINT(TAINT_WARN); \
-} while (0)
-#define __WARN_printf(arg...)  __WARN_printf_taint(TAINT_WARN, arg)
-#define __WARN_printf_taint(taint, arg...)                             \
-       do { __warn_printk(arg); __WARN_TAINT(taint); } while (0)
+#define __WARN()               __WARN_FLAGS(BUGFLAG_TAINT(TAINT_WARN))
+#define __WARN_printf(taint, arg...) do {                              \
+               __warn_printk(arg);                                     \
+               __WARN_FLAGS(BUGFLAG_NO_CUT_HERE | BUGFLAG_TAINT(taint));\
+       } while (0)
+#define WARN_ON_ONCE(condition) ({                             \
+       int __ret_warn_on = !!(condition);                      \
+       if (unlikely(__ret_warn_on))                            \
+               __WARN_FLAGS(BUGFLAG_ONCE |                     \
+                            BUGFLAG_TAINT(TAINT_WARN));        \
+       unlikely(__ret_warn_on);                                \
+})
 #endif
 
 /* used internally by panic.c */
@@ -132,7 +121,7 @@ void __warn(const char *file, int line, void *caller, unsigned taint,
 #define WARN(condition, format...) ({                                  \
        int __ret_warn_on = !!(condition);                              \
        if (unlikely(__ret_warn_on))                                    \
-               __WARN_printf(format);                                  \
+               __WARN_printf(TAINT_WARN, format);                      \
        unlikely(__ret_warn_on);                                        \
 })
 #endif
@@ -140,7 +129,7 @@ void __warn(const char *file, int line, void *caller, unsigned taint,
 #define WARN_TAINT(condition, taint, format...) ({                     \
        int __ret_warn_on = !!(condition);                              \
        if (unlikely(__ret_warn_on))                                    \
-               __WARN_printf_taint(taint, format);                     \
+               __WARN_printf(taint, format);                           \
        unlikely(__ret_warn_on);                                        \
 })
 
index 294d6ae..fa57797 100644 (file)
@@ -4,26 +4,24 @@
 #ifndef KSYM_FUNC
 #define KSYM_FUNC(x) x
 #endif
-#ifdef CONFIG_64BIT
-#ifndef KSYM_ALIGN
+#ifdef CONFIG_HAVE_ARCH_PREL32_RELOCATIONS
+#define KSYM_ALIGN 4
+#elif defined(CONFIG_64BIT)
 #define KSYM_ALIGN 8
-#endif
 #else
-#ifndef KSYM_ALIGN
 #define KSYM_ALIGN 4
 #endif
-#endif
 #ifndef KCRC_ALIGN
 #define KCRC_ALIGN 4
 #endif
 
 .macro __put, val, name
 #ifdef CONFIG_HAVE_ARCH_PREL32_RELOCATIONS
-       .long   \val - ., \name - .
+       .long   \val - ., \name - ., 0
 #elif defined(CONFIG_64BIT)
-       .quad   \val, \name
+       .quad   \val, \name, 0
 #else
-       .long   \val, \name
+       .long   \val, \name, 0
 #endif
 .endm
 
@@ -57,7 +55,6 @@ __kcrctab_\name:
 #endif
 #endif
 .endm
-#undef __put
 
 #if defined(CONFIG_TRIM_UNUSED_KSYMS)
 
index 8476175..73f7421 100644 (file)
@@ -49,7 +49,7 @@ static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
  * @mm: the mm_struct of the current context
  * @gfp: GFP flags to use for the allocation
  *
- * Allocates a page and runs the pgtable_page_ctor().
+ * Allocates a page and runs the pgtable_pte_page_ctor().
  *
  * This function is intended for architectures that need
  * anything beyond simple page allocation or must have custom GFP flags.
@@ -63,7 +63,7 @@ static inline pgtable_t __pte_alloc_one(struct mm_struct *mm, gfp_t gfp)
        pte = alloc_page(gfp);
        if (!pte)
                return NULL;
-       if (!pgtable_page_ctor(pte)) {
+       if (!pgtable_pte_page_ctor(pte)) {
                __free_page(pte);
                return NULL;
        }
@@ -76,7 +76,7 @@ static inline pgtable_t __pte_alloc_one(struct mm_struct *mm, gfp_t gfp)
  * pte_alloc_one - allocate a page for PTE-level user page table
  * @mm: the mm_struct of the current context
  *
- * Allocates a page and runs the pgtable_page_ctor().
+ * Allocates a page and runs the pgtable_pte_page_ctor().
  *
  * Return: `struct page` initialized as page table or %NULL on error
  */
@@ -98,15 +98,10 @@ static inline pgtable_t pte_alloc_one(struct mm_struct *mm)
  */
 static inline void pte_free(struct mm_struct *mm, struct page *pte_page)
 {
-       pgtable_page_dtor(pte_page);
+       pgtable_pte_page_dtor(pte_page);
        __free_page(pte_page);
 }
 
-#else /* CONFIG_MMU */
-
-/* This is enough for a nommu architecture */
-#define check_pgt_cache()          do { } while (0)
-
 #endif /* CONFIG_MMU */
 
 #endif /* __ASM_GENERIC_PGALLOC_H */
index 75d9d68..8186918 100644 (file)
@@ -1002,9 +1002,8 @@ static inline int pmd_none_or_trans_huge_or_clear_bad(pmd_t *pmd)
  * need this). If THP is not enabled, the pmd can't go away under the
  * code even if MADV_DONTNEED runs, but if THP is enabled we need to
  * run a pmd_trans_unstable before walking the ptes after
- * split_huge_page_pmd returns (because it may have run when the pmd
- * become null, but then a page fault can map in a THP and not a
- * regular page).
+ * split_huge_pmd returns (because it may have run when the pmd become
+ * null, but then a page fault can map in a THP and not a regular page).
  */
 static inline int pmd_trans_unstable(pmd_t *pmd)
 {
@@ -1126,7 +1125,7 @@ int phys_mem_access_prot_allowed(struct file *file, unsigned long pfn,
 static inline void init_espfix_bsp(void) { }
 #endif
 
-extern void __init pgd_cache_init(void);
+extern void __init pgtable_cache_init(void);
 
 #ifndef __HAVE_ARCH_PFN_MODIFY_ALLOWED
 static inline bool pfn_modify_allowed(unsigned long pfn, pgprot_t prot)
index cd28f63..dae6460 100644 (file)
                        __start_lsm_info = .;                           \
                        KEEP(*(.lsm_info.init))                         \
                        __end_lsm_info = .;
+#define EARLY_LSM_TABLE()      . = ALIGN(8);                           \
+                       __start_early_lsm_info = .;                     \
+                       KEEP(*(.early_lsm_info.init))                   \
+                       __end_early_lsm_info = .;
 #else
 #define LSM_TABLE()
+#define EARLY_LSM_TABLE()
 #endif
 
 #define ___OF_TABLE(cfg, name) _OF_TABLE_##cfg(name)
        ACPI_PROBE_TABLE(timer)                                         \
        THERMAL_TABLE(governor)                                         \
        EARLYCON_TABLE()                                                \
-       LSM_TABLE()
+       LSM_TABLE()                                                     \
+       EARLY_LSM_TABLE()
 
 #define INIT_TEXT                                                      \
        *(.init.text .init.text.*)                                      \
index 96071be..38ec7f5 100644 (file)
@@ -9,6 +9,7 @@
 #define _CRYPTO_PKCS7_H
 
 #include <linux/verification.h>
+#include <linux/hash_info.h>
 #include <crypto/public_key.h>
 
 struct key;
@@ -40,4 +41,7 @@ extern int pkcs7_verify(struct pkcs7_message *pkcs7,
 extern int pkcs7_supply_detached_data(struct pkcs7_message *pkcs7,
                                      const void *data, size_t datalen);
 
+extern int pkcs7_get_digest(struct pkcs7_message *pkcs7, const u8 **buf,
+                           u32 *len, enum hash_algo *hash_algo);
+
 #endif /* _CRYPTO_PKCS7_H */
index 7d14c11..408b6f4 100644 (file)
@@ -285,12 +285,12 @@ struct drm_crtc_state {
        u32 target_vblank;
 
        /**
-        * @pageflip_flags:
+        * @async_flip:
         *
-        * DRM_MODE_PAGE_FLIP_* flags, as passed to the page flip ioctl.
-        * Zero in any other case.
+        * This is set when DRM_MODE_PAGE_FLIP_ASYNC is set in the legacy
+        * PAGE_FLIP IOCTL. It's not wired up for the atomic IOCTL itself yet.
         */
-       u32 pageflip_flags;
+       bool async_flip;
 
        /**
         * @vrr_enabled:
@@ -1108,7 +1108,7 @@ struct drm_crtc {
        /**
         * @self_refresh_data: Holds the state for the self refresh helpers
         *
-        * Initialized via drm_self_refresh_helper_register().
+        * Initialized via drm_self_refresh_helper_init().
         */
        struct drm_self_refresh_data *self_refresh_data;
 };
index 397a583..5b79d25 100644 (file)
@@ -12,9 +12,9 @@ struct drm_atomic_state;
 struct drm_crtc;
 
 void drm_self_refresh_helper_alter_state(struct drm_atomic_state *state);
+void drm_self_refresh_helper_update_avg_times(struct drm_atomic_state *state,
+                                             unsigned int commit_time_ms);
 
-int drm_self_refresh_helper_init(struct drm_crtc *crtc,
-                                unsigned int entry_delay_ms);
-
+int drm_self_refresh_helper_init(struct drm_crtc *crtc);
 void drm_self_refresh_helper_cleanup(struct drm_crtc *crtc);
 #endif
index 7138384..babd08a 100644 (file)
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /* TI sysc interconnect target module defines */
 
 /* Generic sysc found on omap2 and later, also known as type1 */
diff --git a/include/dt-bindings/clock/ast2600-clock.h b/include/dt-bindings/clock/ast2600-clock.h
new file mode 100644 (file)
index 0000000..38074a5
--- /dev/null
@@ -0,0 +1,113 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later OR MIT */
+#ifndef DT_BINDINGS_AST2600_CLOCK_H
+#define DT_BINDINGS_AST2600_CLOCK_H
+
+#define ASPEED_CLK_GATE_ECLK           0
+#define ASPEED_CLK_GATE_GCLK           1
+
+#define ASPEED_CLK_GATE_MCLK           2
+
+#define ASPEED_CLK_GATE_VCLK           3
+#define ASPEED_CLK_GATE_BCLK           4
+#define ASPEED_CLK_GATE_DCLK           5
+
+#define ASPEED_CLK_GATE_LCLK           6
+#define ASPEED_CLK_GATE_LHCCLK         7
+
+#define ASPEED_CLK_GATE_D1CLK          8
+#define ASPEED_CLK_GATE_YCLK           9
+
+#define ASPEED_CLK_GATE_REF0CLK                10
+#define ASPEED_CLK_GATE_REF1CLK                11
+
+#define ASPEED_CLK_GATE_ESPICLK                12
+
+#define ASPEED_CLK_GATE_USBUHCICLK     13
+#define ASPEED_CLK_GATE_USBPORT1CLK    14
+#define ASPEED_CLK_GATE_USBPORT2CLK    15
+
+#define ASPEED_CLK_GATE_RSACLK         16
+#define ASPEED_CLK_GATE_RVASCLK                17
+
+#define ASPEED_CLK_GATE_MAC1CLK                18
+#define ASPEED_CLK_GATE_MAC2CLK                19
+#define ASPEED_CLK_GATE_MAC3CLK                20
+#define ASPEED_CLK_GATE_MAC4CLK                21
+
+#define ASPEED_CLK_GATE_UART1CLK       22
+#define ASPEED_CLK_GATE_UART2CLK       23
+#define ASPEED_CLK_GATE_UART3CLK       24
+#define ASPEED_CLK_GATE_UART4CLK       25
+#define ASPEED_CLK_GATE_UART5CLK       26
+#define ASPEED_CLK_GATE_UART6CLK       27
+#define ASPEED_CLK_GATE_UART7CLK       28
+#define ASPEED_CLK_GATE_UART8CLK       29
+#define ASPEED_CLK_GATE_UART9CLK       30
+#define ASPEED_CLK_GATE_UART10CLK      31
+#define ASPEED_CLK_GATE_UART11CLK      32
+#define ASPEED_CLK_GATE_UART12CLK      33
+#define ASPEED_CLK_GATE_UART13CLK      34
+
+#define ASPEED_CLK_GATE_SDCLK          35
+#define ASPEED_CLK_GATE_EMMCCLK                36
+
+#define ASPEED_CLK_GATE_I3C0CLK                37
+#define ASPEED_CLK_GATE_I3C1CLK                38
+#define ASPEED_CLK_GATE_I3C2CLK                39
+#define ASPEED_CLK_GATE_I3C3CLK                40
+#define ASPEED_CLK_GATE_I3C4CLK                41
+#define ASPEED_CLK_GATE_I3C5CLK                42
+#define ASPEED_CLK_GATE_I3C6CLK                43
+#define ASPEED_CLK_GATE_I3C7CLK                44
+
+#define ASPEED_CLK_GATE_FSICLK         45
+
+#define ASPEED_CLK_HPLL                        46
+#define ASPEED_CLK_MPLL                        47
+#define ASPEED_CLK_DPLL                        48
+#define ASPEED_CLK_EPLL                        49
+#define ASPEED_CLK_APLL                        50
+#define ASPEED_CLK_AHB                 51
+#define ASPEED_CLK_APB1                        52
+#define ASPEED_CLK_APB2                        53
+#define ASPEED_CLK_BCLK                        54
+#define ASPEED_CLK_D1CLK               55
+#define ASPEED_CLK_VCLK                        56
+#define ASPEED_CLK_LHCLK               57
+#define ASPEED_CLK_UART                        58
+#define ASPEED_CLK_UARTX               59
+#define ASPEED_CLK_SDIO                        60
+#define ASPEED_CLK_EMMC                        61
+#define ASPEED_CLK_ECLK                        62
+#define ASPEED_CLK_ECLK_MUX            63
+#define ASPEED_CLK_MAC12               64
+#define ASPEED_CLK_MAC34               65
+#define ASPEED_CLK_USBPHY_40M          66
+
+/* Only list resets here that are not part of a gate */
+#define ASPEED_RESET_ADC               55
+#define ASPEED_RESET_JTAG_MASTER2      54
+#define ASPEED_RESET_I3C_DMA           39
+#define ASPEED_RESET_PWM               37
+#define ASPEED_RESET_PECI              36
+#define ASPEED_RESET_MII               35
+#define ASPEED_RESET_I2C               34
+#define ASPEED_RESET_H2X               31
+#define ASPEED_RESET_GP_MCU            30
+#define ASPEED_RESET_DP_MCU            29
+#define ASPEED_RESET_DP                        28
+#define ASPEED_RESET_RC_XDMA           27
+#define ASPEED_RESET_GRAPHICS          26
+#define ASPEED_RESET_DEV_XDMA          25
+#define ASPEED_RESET_DEV_MCTP          24
+#define ASPEED_RESET_RC_MCTP           23
+#define ASPEED_RESET_JTAG_MASTER       22
+#define ASPEED_RESET_PCIE_DEV_O                21
+#define ASPEED_RESET_PCIE_DEV_OEN      20
+#define ASPEED_RESET_PCIE_RC_O         19
+#define ASPEED_RESET_PCIE_RC_OEN       18
+#define ASPEED_RESET_PCI_DP            5
+#define ASPEED_RESET_AHB               1
+#define ASPEED_RESET_SDRAM             0
+
+#endif
index 2cec01f..b60c034 100644 (file)
@@ -58,3 +58,5 @@
 #define BCM2835_CLOCK_DSI1E            48
 #define BCM2835_CLOCK_DSI0P            49
 #define BCM2835_CLOCK_DSI1P            50
+
+#define BCM2711_CLOCK_EMMC2            51
index 5255b1c..d7b2016 100644 (file)
 #define IMX8MN_CLK_ARM                         191
 #define IMX8MN_CLK_NAND_USDHC_BUS_RAWNAND_CLK  192
 #define IMX8MN_CLK_GPU_CORE_ROOT               193
+#define IMX8MN_CLK_GIC                         194
 
-#define IMX8MN_CLK_END                         194
+#define IMX8MN_CLK_END                         195
 
 #endif
diff --git a/include/dt-bindings/clock/ingenic,tcu.h b/include/dt-bindings/clock/ingenic,tcu.h
new file mode 100644 (file)
index 0000000..d569650
--- /dev/null
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * This header provides clock numbers for the ingenic,tcu DT binding.
+ */
+
+#ifndef __DT_BINDINGS_CLOCK_INGENIC_TCU_H__
+#define __DT_BINDINGS_CLOCK_INGENIC_TCU_H__
+
+#define TCU_CLK_TIMER0 0
+#define TCU_CLK_TIMER1 1
+#define TCU_CLK_TIMER2 2
+#define TCU_CLK_TIMER3 3
+#define TCU_CLK_TIMER4 4
+#define TCU_CLK_TIMER5 5
+#define TCU_CLK_TIMER6 6
+#define TCU_CLK_TIMER7 7
+#define TCU_CLK_WDT    8
+#define TCU_CLK_OST    9
+
+#endif /* __DT_BINDINGS_CLOCK_INGENIC_TCU_H__ */
index 6ed83f9..e82d770 100644 (file)
@@ -34,5 +34,6 @@
 #define JZ4740_CLK_ADC         19
 #define JZ4740_CLK_I2C         20
 #define JZ4740_CLK_AIC         21
+#define JZ4740_CLK_TCU         22
 
 #endif /* __DT_BINDINGS_CLOCK_JZ4740_CGU_H__ */
diff --git a/include/dt-bindings/clock/mt6779-clk.h b/include/dt-bindings/clock/mt6779-clk.h
new file mode 100644 (file)
index 0000000..b083139
--- /dev/null
@@ -0,0 +1,436 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (c) 2019 MediaTek Inc.
+ * Author: Wendell Lin <wendell.lin@mediatek.com>
+ */
+
+#ifndef _DT_BINDINGS_CLK_MT6779_H
+#define _DT_BINDINGS_CLK_MT6779_H
+
+/* TOPCKGEN */
+#define CLK_TOP_AXI                    1
+#define CLK_TOP_MM                     2
+#define CLK_TOP_CAM                    3
+#define CLK_TOP_MFG                    4
+#define CLK_TOP_CAMTG                  5
+#define CLK_TOP_UART                   6
+#define CLK_TOP_SPI                    7
+#define CLK_TOP_MSDC50_0_HCLK          8
+#define CLK_TOP_MSDC50_0               9
+#define CLK_TOP_MSDC30_1               10
+#define CLK_TOP_MSDC30_2               11
+#define CLK_TOP_AUD                    12
+#define CLK_TOP_AUD_INTBUS             13
+#define CLK_TOP_FPWRAP_ULPOSC          14
+#define CLK_TOP_SCP                    15
+#define CLK_TOP_ATB                    16
+#define CLK_TOP_SSPM                   17
+#define CLK_TOP_DPI0                   18
+#define CLK_TOP_SCAM                   19
+#define CLK_TOP_AUD_1                  20
+#define CLK_TOP_AUD_2                  21
+#define CLK_TOP_DISP_PWM               22
+#define CLK_TOP_SSUSB_TOP_XHCI         23
+#define CLK_TOP_USB_TOP                        24
+#define CLK_TOP_SPM                    25
+#define CLK_TOP_I2C                    26
+#define CLK_TOP_F52M_MFG               27
+#define CLK_TOP_SENINF                 28
+#define CLK_TOP_DXCC                   29
+#define CLK_TOP_CAMTG2                 30
+#define CLK_TOP_AUD_ENG1               31
+#define CLK_TOP_AUD_ENG2               32
+#define CLK_TOP_FAES_UFSFDE            33
+#define CLK_TOP_FUFS                   34
+#define CLK_TOP_IMG                    35
+#define CLK_TOP_DSP                    36
+#define CLK_TOP_DSP1                   37
+#define CLK_TOP_DSP2                   38
+#define CLK_TOP_IPU_IF                 39
+#define CLK_TOP_CAMTG3                 40
+#define CLK_TOP_CAMTG4                 41
+#define CLK_TOP_PMICSPI                        42
+#define CLK_TOP_MAINPLL_CK             43
+#define CLK_TOP_MAINPLL_D2             44
+#define CLK_TOP_MAINPLL_D3             45
+#define CLK_TOP_MAINPLL_D5             46
+#define CLK_TOP_MAINPLL_D7             47
+#define CLK_TOP_MAINPLL_D2_D2          48
+#define CLK_TOP_MAINPLL_D2_D4          49
+#define CLK_TOP_MAINPLL_D2_D8          50
+#define CLK_TOP_MAINPLL_D2_D16         51
+#define CLK_TOP_MAINPLL_D3_D2          52
+#define CLK_TOP_MAINPLL_D3_D4          53
+#define CLK_TOP_MAINPLL_D3_D8          54
+#define CLK_TOP_MAINPLL_D5_D2          55
+#define CLK_TOP_MAINPLL_D5_D4          56
+#define CLK_TOP_MAINPLL_D7_D2          57
+#define CLK_TOP_MAINPLL_D7_D4          58
+#define CLK_TOP_UNIVPLL_CK             59
+#define CLK_TOP_UNIVPLL_D2             60
+#define CLK_TOP_UNIVPLL_D3             61
+#define CLK_TOP_UNIVPLL_D5             62
+#define CLK_TOP_UNIVPLL_D7             63
+#define CLK_TOP_UNIVPLL_D2_D2          64
+#define CLK_TOP_UNIVPLL_D2_D4          65
+#define CLK_TOP_UNIVPLL_D2_D8          66
+#define CLK_TOP_UNIVPLL_D3_D2          67
+#define CLK_TOP_UNIVPLL_D3_D4          68
+#define CLK_TOP_UNIVPLL_D3_D8          69
+#define CLK_TOP_UNIVPLL_D5_D2          70
+#define CLK_TOP_UNIVPLL_D5_D4          71
+#define CLK_TOP_UNIVPLL_D5_D8          72
+#define CLK_TOP_APLL1_CK               73
+#define CLK_TOP_APLL1_D2               74
+#define CLK_TOP_APLL1_D4               75
+#define CLK_TOP_APLL1_D8               76
+#define CLK_TOP_APLL2_CK               77
+#define CLK_TOP_APLL2_D2               78
+#define CLK_TOP_APLL2_D4               79
+#define CLK_TOP_APLL2_D8               80
+#define CLK_TOP_TVDPLL_CK              81
+#define CLK_TOP_TVDPLL_D2              82
+#define CLK_TOP_TVDPLL_D4              83
+#define CLK_TOP_TVDPLL_D8              84
+#define CLK_TOP_TVDPLL_D16             85
+#define CLK_TOP_MSDCPLL_CK             86
+#define CLK_TOP_MSDCPLL_D2             87
+#define CLK_TOP_MSDCPLL_D4             88
+#define CLK_TOP_MSDCPLL_D8             89
+#define CLK_TOP_MSDCPLL_D16            90
+#define CLK_TOP_AD_OSC_CK              91
+#define CLK_TOP_OSC_D2                 92
+#define CLK_TOP_OSC_D4                 93
+#define CLK_TOP_OSC_D8                 94
+#define CLK_TOP_OSC_D16                        95
+#define CLK_TOP_F26M_CK_D2             96
+#define CLK_TOP_MFGPLL_CK              97
+#define CLK_TOP_UNIVP_192M_CK          98
+#define CLK_TOP_UNIVP_192M_D2          99
+#define CLK_TOP_UNIVP_192M_D4          100
+#define CLK_TOP_UNIVP_192M_D8          101
+#define CLK_TOP_UNIVP_192M_D16         102
+#define CLK_TOP_UNIVP_192M_D32         103
+#define CLK_TOP_MMPLL_CK               104
+#define CLK_TOP_MMPLL_D4               105
+#define CLK_TOP_MMPLL_D4_D2            106
+#define CLK_TOP_MMPLL_D4_D4            107
+#define CLK_TOP_MMPLL_D5               108
+#define CLK_TOP_MMPLL_D5_D2            109
+#define CLK_TOP_MMPLL_D5_D4            110
+#define CLK_TOP_MMPLL_D6               111
+#define CLK_TOP_MMPLL_D7               112
+#define CLK_TOP_CLK26M                 113
+#define CLK_TOP_CLK13M                 114
+#define CLK_TOP_ADSP                   115
+#define CLK_TOP_DPMAIF                 116
+#define CLK_TOP_VENC                   117
+#define CLK_TOP_VDEC                   118
+#define CLK_TOP_CAMTM                  119
+#define CLK_TOP_PWM                    120
+#define CLK_TOP_ADSPPLL_CK             121
+#define CLK_TOP_I2S0_M_SEL             122
+#define CLK_TOP_I2S1_M_SEL             123
+#define CLK_TOP_I2S2_M_SEL             124
+#define CLK_TOP_I2S3_M_SEL             125
+#define CLK_TOP_I2S4_M_SEL             126
+#define CLK_TOP_I2S5_M_SEL             127
+#define CLK_TOP_APLL12_DIV0            128
+#define CLK_TOP_APLL12_DIV1            129
+#define CLK_TOP_APLL12_DIV2            130
+#define CLK_TOP_APLL12_DIV3            131
+#define CLK_TOP_APLL12_DIV4            132
+#define CLK_TOP_APLL12_DIVB            133
+#define CLK_TOP_APLL12_DIV5            134
+#define CLK_TOP_IPE                    135
+#define CLK_TOP_DPE                    136
+#define CLK_TOP_CCU                    137
+#define CLK_TOP_DSP3                   138
+#define CLK_TOP_SENINF1                        139
+#define CLK_TOP_SENINF2                        140
+#define CLK_TOP_AUD_H                  141
+#define CLK_TOP_CAMTG5                 142
+#define CLK_TOP_TVDPLL_MAINPLL_D2_CK   143
+#define CLK_TOP_AD_OSC2_CK             144
+#define CLK_TOP_OSC2_D2                        145
+#define CLK_TOP_OSC2_D3                        146
+#define CLK_TOP_FMEM_466M_CK           147
+#define CLK_TOP_ADSPPLL_D4             148
+#define CLK_TOP_ADSPPLL_D5             149
+#define CLK_TOP_ADSPPLL_D6             150
+#define CLK_TOP_OSC_D10                        151
+#define CLK_TOP_UNIVPLL_D3_D16         152
+#define CLK_TOP_NR_CLK                 153
+
+/* APMIXED */
+#define CLK_APMIXED_ARMPLL_LL          1
+#define CLK_APMIXED_ARMPLL_BL          2
+#define CLK_APMIXED_ARMPLL_BB          3
+#define CLK_APMIXED_CCIPLL             4
+#define CLK_APMIXED_MAINPLL            5
+#define CLK_APMIXED_UNIV2PLL           6
+#define CLK_APMIXED_MSDCPLL            7
+#define CLK_APMIXED_ADSPPLL            8
+#define CLK_APMIXED_MMPLL              9
+#define CLK_APMIXED_MFGPLL             10
+#define CLK_APMIXED_TVDPLL             11
+#define CLK_APMIXED_APLL1              12
+#define CLK_APMIXED_APLL2              13
+#define CLK_APMIXED_SSUSB26M           14
+#define CLK_APMIXED_APPLL26M           15
+#define CLK_APMIXED_MIPIC0_26M         16
+#define CLK_APMIXED_MDPLLGP26M         17
+#define CLK_APMIXED_MM_F26M            18
+#define CLK_APMIXED_UFS26M             19
+#define CLK_APMIXED_MIPIC1_26M         20
+#define CLK_APMIXED_MEMPLL26M          21
+#define CLK_APMIXED_CLKSQ_LVPLL_26M    22
+#define CLK_APMIXED_MIPID0_26M         23
+#define CLK_APMIXED_MIPID1_26M         24
+#define CLK_APMIXED_NR_CLK             25
+
+/* CAMSYS */
+#define CLK_CAM_LARB10                 1
+#define CLK_CAM_DFP_VAD                        2
+#define CLK_CAM_LARB11                 3
+#define CLK_CAM_LARB9                  4
+#define CLK_CAM_CAM                    5
+#define CLK_CAM_CAMTG                  6
+#define CLK_CAM_SENINF                 7
+#define CLK_CAM_CAMSV0                 8
+#define CLK_CAM_CAMSV1                 9
+#define CLK_CAM_CAMSV2                 10
+#define CLK_CAM_CAMSV3                 11
+#define CLK_CAM_CCU                    12
+#define CLK_CAM_FAKE_ENG               13
+#define CLK_CAM_NR_CLK                 14
+
+/* INFRA */
+#define CLK_INFRA_PMIC_TMR             1
+#define CLK_INFRA_PMIC_AP              2
+#define CLK_INFRA_PMIC_MD              3
+#define CLK_INFRA_PMIC_CONN            4
+#define CLK_INFRA_SCPSYS               5
+#define CLK_INFRA_SEJ                  6
+#define CLK_INFRA_APXGPT               7
+#define CLK_INFRA_ICUSB                        8
+#define CLK_INFRA_GCE                  9
+#define CLK_INFRA_THERM                        10
+#define CLK_INFRA_I2C0                 11
+#define CLK_INFRA_I2C1                 12
+#define CLK_INFRA_I2C2                 13
+#define CLK_INFRA_I2C3                 14
+#define CLK_INFRA_PWM_HCLK             15
+#define CLK_INFRA_PWM1                 16
+#define CLK_INFRA_PWM2                 17
+#define CLK_INFRA_PWM3                 18
+#define CLK_INFRA_PWM4                 19
+#define CLK_INFRA_PWM                  20
+#define CLK_INFRA_UART0                        21
+#define CLK_INFRA_UART1                        22
+#define CLK_INFRA_UART2                        23
+#define CLK_INFRA_UART3                        24
+#define CLK_INFRA_GCE_26M              25
+#define CLK_INFRA_CQ_DMA_FPC           26
+#define CLK_INFRA_BTIF                 27
+#define CLK_INFRA_SPI0                 28
+#define CLK_INFRA_MSDC0                        29
+#define CLK_INFRA_MSDC1                        30
+#define CLK_INFRA_MSDC2                        31
+#define CLK_INFRA_MSDC0_SCK            32
+#define CLK_INFRA_DVFSRC               33
+#define CLK_INFRA_GCPU                 34
+#define CLK_INFRA_TRNG                 35
+#define CLK_INFRA_AUXADC               36
+#define CLK_INFRA_CPUM                 37
+#define CLK_INFRA_CCIF1_AP             38
+#define CLK_INFRA_CCIF1_MD             39
+#define CLK_INFRA_AUXADC_MD            40
+#define CLK_INFRA_MSDC1_SCK            41
+#define CLK_INFRA_MSDC2_SCK            42
+#define CLK_INFRA_AP_DMA               43
+#define CLK_INFRA_XIU                  44
+#define CLK_INFRA_DEVICE_APC           45
+#define CLK_INFRA_CCIF_AP              46
+#define CLK_INFRA_DEBUGSYS             47
+#define CLK_INFRA_AUD                  48
+#define CLK_INFRA_CCIF_MD              49
+#define CLK_INFRA_DXCC_SEC_CORE                50
+#define CLK_INFRA_DXCC_AO              51
+#define CLK_INFRA_DRAMC_F26M           52
+#define CLK_INFRA_IRTX                 53
+#define CLK_INFRA_DISP_PWM             54
+#define CLK_INFRA_DPMAIF_CK            55
+#define CLK_INFRA_AUD_26M_BCLK         56
+#define CLK_INFRA_SPI1                 57
+#define CLK_INFRA_I2C4                 58
+#define CLK_INFRA_MODEM_TEMP_SHARE     59
+#define CLK_INFRA_SPI2                 60
+#define CLK_INFRA_SPI3                 61
+#define CLK_INFRA_UNIPRO_SCK           62
+#define CLK_INFRA_UNIPRO_TICK          63
+#define CLK_INFRA_UFS_MP_SAP_BCLK      64
+#define CLK_INFRA_MD32_BCLK            65
+#define CLK_INFRA_SSPM                 66
+#define CLK_INFRA_UNIPRO_MBIST         67
+#define CLK_INFRA_SSPM_BUS_HCLK                68
+#define CLK_INFRA_I2C5                 69
+#define CLK_INFRA_I2C5_ARBITER         70
+#define CLK_INFRA_I2C5_IMM             71
+#define CLK_INFRA_I2C1_ARBITER         72
+#define CLK_INFRA_I2C1_IMM             73
+#define CLK_INFRA_I2C2_ARBITER         74
+#define CLK_INFRA_I2C2_IMM             75
+#define CLK_INFRA_SPI4                 76
+#define CLK_INFRA_SPI5                 77
+#define CLK_INFRA_CQ_DMA               78
+#define CLK_INFRA_UFS                  79
+#define CLK_INFRA_AES_UFSFDE           80
+#define CLK_INFRA_UFS_TICK             81
+#define CLK_INFRA_MSDC0_SELF           82
+#define CLK_INFRA_MSDC1_SELF           83
+#define CLK_INFRA_MSDC2_SELF           84
+#define CLK_INFRA_SSPM_26M_SELF                85
+#define CLK_INFRA_SSPM_32K_SELF                86
+#define CLK_INFRA_UFS_AXI              87
+#define CLK_INFRA_I2C6                 88
+#define CLK_INFRA_AP_MSDC0             89
+#define CLK_INFRA_MD_MSDC0             90
+#define CLK_INFRA_USB                  91
+#define CLK_INFRA_DEVMPU_BCLK          92
+#define CLK_INFRA_CCIF2_AP             93
+#define CLK_INFRA_CCIF2_MD             94
+#define CLK_INFRA_CCIF3_AP             95
+#define CLK_INFRA_CCIF3_MD             96
+#define CLK_INFRA_SEJ_F13M             97
+#define CLK_INFRA_AES_BCLK             98
+#define CLK_INFRA_I2C7                 99
+#define CLK_INFRA_I2C8                 100
+#define CLK_INFRA_FBIST2FPC            101
+#define CLK_INFRA_CCIF4_AP             102
+#define CLK_INFRA_CCIF4_MD             103
+#define CLK_INFRA_FADSP                        104
+#define CLK_INFRA_SSUSB_XHCI           105
+#define CLK_INFRA_SPI6                 106
+#define CLK_INFRA_SPI7                 107
+#define CLK_INFRA_NR_CLK               108
+
+/* MFGCFG */
+#define CLK_MFGCFG_BG3D                        1
+#define CLK_MFGCFG_NR_CLK              2
+
+/* IMG */
+#define CLK_IMG_WPE_A                  1
+#define CLK_IMG_MFB                    2
+#define CLK_IMG_DIP                    3
+#define CLK_IMG_LARB6                  4
+#define CLK_IMG_LARB5                  5
+#define CLK_IMG_NR_CLK                 6
+
+/* IPE */
+#define CLK_IPE_LARB7                  1
+#define CLK_IPE_LARB8                  2
+#define CLK_IPE_SMI_SUBCOM             3
+#define CLK_IPE_FD                     4
+#define CLK_IPE_FE                     5
+#define CLK_IPE_RSC                    6
+#define CLK_IPE_DPE                    7
+#define CLK_IPE_NR_CLK                 8
+
+/* MM_CONFIG */
+#define CLK_MM_SMI_COMMON              1
+#define CLK_MM_SMI_LARB0               2
+#define CLK_MM_SMI_LARB1               3
+#define CLK_MM_GALS_COMM0              4
+#define CLK_MM_GALS_COMM1              5
+#define CLK_MM_GALS_CCU2MM             6
+#define CLK_MM_GALS_IPU12MM            7
+#define CLK_MM_GALS_IMG2MM             8
+#define CLK_MM_GALS_CAM2MM             9
+#define CLK_MM_GALS_IPU2MM             10
+#define CLK_MM_MDP_DL_TXCK             11
+#define CLK_MM_IPU_DL_TXCK             12
+#define CLK_MM_MDP_RDMA0               13
+#define CLK_MM_MDP_RDMA1               14
+#define CLK_MM_MDP_RSZ0                        15
+#define CLK_MM_MDP_RSZ1                        16
+#define CLK_MM_MDP_TDSHP               17
+#define CLK_MM_MDP_WROT0               18
+#define CLK_MM_FAKE_ENG                        19
+#define CLK_MM_DISP_OVL0               20
+#define CLK_MM_DISP_OVL0_2L            21
+#define CLK_MM_DISP_OVL1_2L            22
+#define CLK_MM_DISP_RDMA0              23
+#define CLK_MM_DISP_RDMA1              24
+#define CLK_MM_DISP_WDMA0              25
+#define CLK_MM_DISP_COLOR0             26
+#define CLK_MM_DISP_CCORR0             27
+#define CLK_MM_DISP_AAL0               28
+#define CLK_MM_DISP_GAMMA0             29
+#define CLK_MM_DISP_DITHER0            30
+#define CLK_MM_DISP_SPLIT              31
+#define CLK_MM_DSI0_MM_CK              32
+#define CLK_MM_DSI0_IF_CK              33
+#define CLK_MM_DPI_MM_CK               34
+#define CLK_MM_DPI_IF_CK               35
+#define CLK_MM_FAKE_ENG2               36
+#define CLK_MM_MDP_DL_RX_CK            37
+#define CLK_MM_IPU_DL_RX_CK            38
+#define CLK_MM_26M                     39
+#define CLK_MM_MM_R2Y                  40
+#define CLK_MM_DISP_RSZ                        41
+#define CLK_MM_MDP_WDMA0               42
+#define CLK_MM_MDP_AAL                 43
+#define CLK_MM_MDP_HDR                 44
+#define CLK_MM_DBI_MM_CK               45
+#define CLK_MM_DBI_IF_CK               46
+#define CLK_MM_MDP_WROT1               47
+#define CLK_MM_DISP_POSTMASK0          48
+#define CLK_MM_DISP_HRT_BW             49
+#define CLK_MM_DISP_OVL_FBDC           50
+#define CLK_MM_NR_CLK                  51
+
+/* VDEC_GCON */
+#define CLK_VDEC_VDEC                  1
+#define CLK_VDEC_LARB1                 2
+#define CLK_VDEC_GCON_NR_CLK           3
+
+/* VENC_GCON */
+#define CLK_VENC_GCON_LARB             1
+#define CLK_VENC_GCON_VENC             2
+#define CLK_VENC_GCON_JPGENC           3
+#define CLK_VENC_GCON_GALS             4
+#define CLK_VENC_GCON_NR_CLK           5
+
+/* AUD */
+#define CLK_AUD_AFE                    1
+#define CLK_AUD_22M                    2
+#define CLK_AUD_24M                    3
+#define CLK_AUD_APLL2_TUNER            4
+#define CLK_AUD_APLL_TUNER             5
+#define CLK_AUD_TDM                    6
+#define CLK_AUD_ADC                    7
+#define CLK_AUD_DAC                    8
+#define CLK_AUD_DAC_PREDIS             9
+#define CLK_AUD_TML                    10
+#define CLK_AUD_NLE                    11
+#define CLK_AUD_I2S1_BCLK_SW           12
+#define CLK_AUD_I2S2_BCLK_SW           13
+#define CLK_AUD_I2S3_BCLK_SW           14
+#define CLK_AUD_I2S4_BCLK_SW           15
+#define CLK_AUD_I2S5_BCLK_SW           16
+#define CLK_AUD_CONN_I2S_ASRC          17
+#define CLK_AUD_GENERAL1_ASRC          18
+#define CLK_AUD_GENERAL2_ASRC          19
+#define CLK_AUD_DAC_HIRES              20
+#define CLK_AUD_PDN_ADDA6_ADC          21
+#define CLK_AUD_ADC_HIRES              22
+#define CLK_AUD_ADC_HIRES_TML          23
+#define CLK_AUD_ADDA6_ADC_HIRES                24
+#define CLK_AUD_3RD_DAC                        25
+#define CLK_AUD_3RD_DAC_PREDIS         26
+#define CLK_AUD_3RD_DAC_TML            27
+#define CLK_AUD_3RD_DAC_HIRES          28
+#define CLK_AUD_NR_CLK                 29
+
+#endif /* _DT_BINDINGS_CLK_MT6779_H */
index 0046506..a7b470b 100644 (file)
 #define CLK_INFRA_FBIST2FPC            100
 #define CLK_INFRA_NR_CLK               101
 
+/* PERICFG */
+#define CLK_PERI_AXI                   0
+#define CLK_PERI_NR_CLK                        1
+
 /* MFGCFG */
 #define CLK_MFG_BG3D                   0
 #define CLK_MFG_NR_CLK                 1
index f328395..e541193 100644 (file)
@@ -89,6 +89,9 @@
 /* dss clocks */
 #define OMAP5_DSS_CORE_CLKCTRL OMAP5_CLKCTRL_INDEX(0x20)
 
+/* gpu clocks */
+#define OMAP5_GPU_CLKCTRL      OMAP5_CLKCTRL_INDEX(0x20)
+
 /* l3init clocks */
 #define OMAP5_MMC1_CLKCTRL     OMAP5_CLKCTRL_INDEX(0x28)
 #define OMAP5_MMC2_CLKCTRL     OMAP5_CLKCTRL_INDEX(0x30)
index 2cd62c9..bc30515 100644 (file)
 #define GCC_MDP_TBU_CLK                                        138
 #define GCC_QDSS_DAP_CLK                               139
 #define GCC_DCC_XO_CLK                                 140
+#define GCC_WCSS_Q6_AHB_CLK                            141
+#define GCC_WCSS_Q6_AXIM_CLK                           142
 #define GCC_CDSP_CFG_AHB_CLK                           143
 #define GCC_BIMC_CDSP_CLK                              144
 #define GCC_CDSP_TBU_CLK                               145
 #define GCC_PCIE_0_CORE_STICKY_ARES                    19
 #define GCC_PCIE_0_SLEEP_ARES                          20
 #define GCC_PCIE_0_PIPE_ARES                           21
+#define GCC_WDSP_RESTART                               22
 
 #endif
diff --git a/include/dt-bindings/clock/qcom,gcc-sm8150.h b/include/dt-bindings/clock/qcom,gcc-sm8150.h
new file mode 100644 (file)
index 0000000..90d60ef
--- /dev/null
@@ -0,0 +1,243 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (c) 2019, The Linux Foundation. All rights reserved.
+ */
+
+#ifndef _DT_BINDINGS_CLK_QCOM_GCC_SM8150_H
+#define _DT_BINDINGS_CLK_QCOM_GCC_SM8150_H
+
+/* GCC clocks */
+#define GCC_AGGRE_NOC_PCIE_TBU_CLK                             0
+#define GCC_AGGRE_UFS_CARD_AXI_CLK                             1
+#define GCC_AGGRE_UFS_CARD_AXI_HW_CTL_CLK                      2
+#define GCC_AGGRE_UFS_PHY_AXI_CLK                              3
+#define GCC_AGGRE_UFS_PHY_AXI_HW_CTL_CLK                       4
+#define GCC_AGGRE_USB3_PRIM_AXI_CLK                            5
+#define GCC_AGGRE_USB3_SEC_AXI_CLK                             6
+#define GCC_BOOT_ROM_AHB_CLK                                   7
+#define GCC_CAMERA_AHB_CLK                                     8
+#define GCC_CAMERA_HF_AXI_CLK                                  9
+#define GCC_CAMERA_SF_AXI_CLK                                  10
+#define GCC_CAMERA_XO_CLK                                      11
+#define GCC_CFG_NOC_USB3_PRIM_AXI_CLK                          12
+#define GCC_CFG_NOC_USB3_SEC_AXI_CLK                           13
+#define GCC_CPUSS_AHB_CLK                                      14
+#define GCC_CPUSS_AHB_CLK_SRC                                  15
+#define GCC_CPUSS_DVM_BUS_CLK                                  16
+#define GCC_CPUSS_GNOC_CLK                                     17
+#define GCC_CPUSS_RBCPR_CLK                                    18
+#define GCC_DDRSS_GPU_AXI_CLK                                  19
+#define GCC_DISP_AHB_CLK                                       20
+#define GCC_DISP_HF_AXI_CLK                                    21
+#define GCC_DISP_SF_AXI_CLK                                    22
+#define GCC_DISP_XO_CLK                                                23
+#define GCC_EMAC_AXI_CLK                                       24
+#define GCC_EMAC_PTP_CLK                                       25
+#define GCC_EMAC_PTP_CLK_SRC                                   26
+#define GCC_EMAC_RGMII_CLK                                     27
+#define GCC_EMAC_RGMII_CLK_SRC                                 28
+#define GCC_EMAC_SLV_AHB_CLK                                   29
+#define GCC_GP1_CLK                                            30
+#define GCC_GP1_CLK_SRC                                                31
+#define GCC_GP2_CLK                                            32
+#define GCC_GP2_CLK_SRC                                                33
+#define GCC_GP3_CLK                                            34
+#define GCC_GP3_CLK_SRC                                                35
+#define GCC_GPU_CFG_AHB_CLK                                    36
+#define GCC_GPU_GPLL0_CLK_SRC                                  37
+#define GCC_GPU_GPLL0_DIV_CLK_SRC                              38
+#define GCC_GPU_IREF_CLK                                       39
+#define GCC_GPU_MEMNOC_GFX_CLK                                 40
+#define GCC_GPU_SNOC_DVM_GFX_CLK                               41
+#define GCC_NPU_AT_CLK                                         42
+#define GCC_NPU_AXI_CLK                                                43
+#define GCC_NPU_CFG_AHB_CLK                                    44
+#define GCC_NPU_GPLL0_CLK_SRC                                  45
+#define GCC_NPU_GPLL0_DIV_CLK_SRC                              46
+#define GCC_NPU_TRIG_CLK                                       47
+#define GCC_PCIE0_PHY_REFGEN_CLK                               48
+#define GCC_PCIE1_PHY_REFGEN_CLK                               49
+#define GCC_PCIE_0_AUX_CLK                                     50
+#define GCC_PCIE_0_AUX_CLK_SRC                                 51
+#define GCC_PCIE_0_CFG_AHB_CLK                                 52
+#define GCC_PCIE_0_CLKREF_CLK                                  53
+#define GCC_PCIE_0_MSTR_AXI_CLK                                        54
+#define GCC_PCIE_0_PIPE_CLK                                    55
+#define GCC_PCIE_0_SLV_AXI_CLK                                 56
+#define GCC_PCIE_0_SLV_Q2A_AXI_CLK                             57
+#define GCC_PCIE_1_AUX_CLK                                     58
+#define GCC_PCIE_1_AUX_CLK_SRC                                 59
+#define GCC_PCIE_1_CFG_AHB_CLK                                 60
+#define GCC_PCIE_1_CLKREF_CLK                                  61
+#define GCC_PCIE_1_MSTR_AXI_CLK                                        62
+#define GCC_PCIE_1_PIPE_CLK                                    63
+#define GCC_PCIE_1_SLV_AXI_CLK                                 64
+#define GCC_PCIE_1_SLV_Q2A_AXI_CLK                             65
+#define GCC_PCIE_PHY_AUX_CLK                                   66
+#define GCC_PCIE_PHY_REFGEN_CLK_SRC                            67
+#define GCC_PDM2_CLK                                           68
+#define GCC_PDM2_CLK_SRC                                       69
+#define GCC_PDM_AHB_CLK                                                70
+#define GCC_PDM_XO4_CLK                                                71
+#define GCC_PRNG_AHB_CLK                                       72
+#define GCC_QMIP_CAMERA_NRT_AHB_CLK                            73
+#define GCC_QMIP_CAMERA_RT_AHB_CLK                             74
+#define GCC_QMIP_DISP_AHB_CLK                                  75
+#define GCC_QMIP_VIDEO_CVP_AHB_CLK                             76
+#define GCC_QMIP_VIDEO_VCODEC_AHB_CLK                          77
+#define GCC_QSPI_CNOC_PERIPH_AHB_CLK                           78
+#define GCC_QSPI_CORE_CLK                                      79
+#define GCC_QSPI_CORE_CLK_SRC                                  80
+#define GCC_QUPV3_WRAP0_S0_CLK                                 81
+#define GCC_QUPV3_WRAP0_S0_CLK_SRC                             82
+#define GCC_QUPV3_WRAP0_S1_CLK                                 83
+#define GCC_QUPV3_WRAP0_S1_CLK_SRC                             84
+#define GCC_QUPV3_WRAP0_S2_CLK                                 85
+#define GCC_QUPV3_WRAP0_S2_CLK_SRC                             86
+#define GCC_QUPV3_WRAP0_S3_CLK                                 87
+#define GCC_QUPV3_WRAP0_S3_CLK_SRC                             88
+#define GCC_QUPV3_WRAP0_S4_CLK                                 89
+#define GCC_QUPV3_WRAP0_S4_CLK_SRC                             90
+#define GCC_QUPV3_WRAP0_S5_CLK                                 91
+#define GCC_QUPV3_WRAP0_S5_CLK_SRC                             92
+#define GCC_QUPV3_WRAP0_S6_CLK                                 93
+#define GCC_QUPV3_WRAP0_S6_CLK_SRC                             94
+#define GCC_QUPV3_WRAP0_S7_CLK                                 95
+#define GCC_QUPV3_WRAP0_S7_CLK_SRC                             96
+#define GCC_QUPV3_WRAP1_S0_CLK                                 97
+#define GCC_QUPV3_WRAP1_S0_CLK_SRC                             98
+#define GCC_QUPV3_WRAP1_S1_CLK                                 99
+#define GCC_QUPV3_WRAP1_S1_CLK_SRC                             100
+#define GCC_QUPV3_WRAP1_S2_CLK                                 101
+#define GCC_QUPV3_WRAP1_S2_CLK_SRC                             102
+#define GCC_QUPV3_WRAP1_S3_CLK                                 103
+#define GCC_QUPV3_WRAP1_S3_CLK_SRC                             104
+#define GCC_QUPV3_WRAP1_S4_CLK                                 105
+#define GCC_QUPV3_WRAP1_S4_CLK_SRC                             106
+#define GCC_QUPV3_WRAP1_S5_CLK                                 107
+#define GCC_QUPV3_WRAP1_S5_CLK_SRC                             108
+#define GCC_QUPV3_WRAP2_S0_CLK                                 109
+#define GCC_QUPV3_WRAP2_S0_CLK_SRC                             110
+#define GCC_QUPV3_WRAP2_S1_CLK                                 111
+#define GCC_QUPV3_WRAP2_S1_CLK_SRC                             112
+#define GCC_QUPV3_WRAP2_S2_CLK                                 113
+#define GCC_QUPV3_WRAP2_S2_CLK_SRC                             114
+#define GCC_QUPV3_WRAP2_S3_CLK                                 115
+#define GCC_QUPV3_WRAP2_S3_CLK_SRC                             116
+#define GCC_QUPV3_WRAP2_S4_CLK                                 117
+#define GCC_QUPV3_WRAP2_S4_CLK_SRC                             118
+#define GCC_QUPV3_WRAP2_S5_CLK                                 119
+#define GCC_QUPV3_WRAP2_S5_CLK_SRC                             120
+#define GCC_QUPV3_WRAP_0_M_AHB_CLK                             121
+#define GCC_QUPV3_WRAP_0_S_AHB_CLK                             122
+#define GCC_QUPV3_WRAP_1_M_AHB_CLK                             123
+#define GCC_QUPV3_WRAP_1_S_AHB_CLK                             124
+#define GCC_QUPV3_WRAP_2_M_AHB_CLK                             125
+#define GCC_QUPV3_WRAP_2_S_AHB_CLK                             126
+#define GCC_SDCC2_AHB_CLK                                      127
+#define GCC_SDCC2_APPS_CLK                                     128
+#define GCC_SDCC2_APPS_CLK_SRC                                 129
+#define GCC_SDCC4_AHB_CLK                                      130
+#define GCC_SDCC4_APPS_CLK                                     131
+#define GCC_SDCC4_APPS_CLK_SRC                                 132
+#define GCC_SYS_NOC_CPUSS_AHB_CLK                              133
+#define GCC_TSIF_AHB_CLK                                       134
+#define GCC_TSIF_INACTIVITY_TIMERS_CLK                         135
+#define GCC_TSIF_REF_CLK                                       136
+#define GCC_TSIF_REF_CLK_SRC                                   137
+#define GCC_UFS_CARD_AHB_CLK                                   138
+#define GCC_UFS_CARD_AXI_CLK                                   139
+#define GCC_UFS_CARD_AXI_CLK_SRC                               140
+#define GCC_UFS_CARD_AXI_HW_CTL_CLK                            141
+#define GCC_UFS_CARD_CLKREF_CLK                                        142
+#define GCC_UFS_CARD_ICE_CORE_CLK                              143
+#define GCC_UFS_CARD_ICE_CORE_CLK_SRC                          144
+#define GCC_UFS_CARD_ICE_CORE_HW_CTL_CLK                       145
+#define GCC_UFS_CARD_PHY_AUX_CLK                               146
+#define GCC_UFS_CARD_PHY_AUX_CLK_SRC                           147
+#define GCC_UFS_CARD_PHY_AUX_HW_CTL_CLK                                148
+#define GCC_UFS_CARD_RX_SYMBOL_0_CLK                           149
+#define GCC_UFS_CARD_RX_SYMBOL_1_CLK                           150
+#define GCC_UFS_CARD_TX_SYMBOL_0_CLK                           151
+#define GCC_UFS_CARD_UNIPRO_CORE_CLK                           152
+#define GCC_UFS_CARD_UNIPRO_CORE_CLK_SRC                       153
+#define GCC_UFS_CARD_UNIPRO_CORE_HW_CTL_CLK                    154
+#define GCC_UFS_MEM_CLKREF_CLK                                 155
+#define GCC_UFS_PHY_AHB_CLK                                    156
+#define GCC_UFS_PHY_AXI_CLK                                    157
+#define GCC_UFS_PHY_AXI_CLK_SRC                                        158
+#define GCC_UFS_PHY_AXI_HW_CTL_CLK                             159
+#define GCC_UFS_PHY_ICE_CORE_CLK                               160
+#define GCC_UFS_PHY_ICE_CORE_CLK_SRC                           161
+#define GCC_UFS_PHY_ICE_CORE_HW_CTL_CLK                                162
+#define GCC_UFS_PHY_PHY_AUX_CLK                                        163
+#define GCC_UFS_PHY_PHY_AUX_CLK_SRC                            164
+#define GCC_UFS_PHY_PHY_AUX_HW_CTL_CLK                         165
+#define GCC_UFS_PHY_RX_SYMBOL_0_CLK                            166
+#define GCC_UFS_PHY_RX_SYMBOL_1_CLK                            167
+#define GCC_UFS_PHY_TX_SYMBOL_0_CLK                            168
+#define GCC_UFS_PHY_UNIPRO_CORE_CLK                            169
+#define GCC_UFS_PHY_UNIPRO_CORE_CLK_SRC                                170
+#define GCC_UFS_PHY_UNIPRO_CORE_HW_CTL_CLK                     171
+#define GCC_USB30_PRIM_MASTER_CLK                              172
+#define GCC_USB30_PRIM_MASTER_CLK_SRC                          173
+#define GCC_USB30_PRIM_MOCK_UTMI_CLK                           174
+#define GCC_USB30_PRIM_MOCK_UTMI_CLK_SRC                       175
+#define GCC_USB30_PRIM_SLEEP_CLK                               176
+#define GCC_USB30_SEC_MASTER_CLK                               177
+#define GCC_USB30_SEC_MASTER_CLK_SRC                           178
+#define GCC_USB30_SEC_MOCK_UTMI_CLK                            179
+#define GCC_USB30_SEC_MOCK_UTMI_CLK_SRC                                180
+#define GCC_USB30_SEC_SLEEP_CLK                                        181
+#define GCC_USB3_PRIM_CLKREF_CLK                               182
+#define GCC_USB3_PRIM_PHY_AUX_CLK                              183
+#define GCC_USB3_PRIM_PHY_AUX_CLK_SRC                          184
+#define GCC_USB3_PRIM_PHY_COM_AUX_CLK                          185
+#define GCC_USB3_PRIM_PHY_PIPE_CLK                             186
+#define GCC_USB3_SEC_CLKREF_CLK                                        187
+#define GCC_USB3_SEC_PHY_AUX_CLK                               188
+#define GCC_USB3_SEC_PHY_AUX_CLK_SRC                           189
+#define GCC_USB3_SEC_PHY_COM_AUX_CLK                           190
+#define GCC_USB3_SEC_PHY_PIPE_CLK                              191
+#define GCC_VIDEO_AHB_CLK                                      192
+#define GCC_VIDEO_AXI0_CLK                                     193
+#define GCC_VIDEO_AXI1_CLK                                     194
+#define GCC_VIDEO_AXIC_CLK                                     195
+#define GCC_VIDEO_XO_CLK                                       196
+#define GPLL0                                                  197
+#define GPLL0_OUT_EVEN                                         198
+#define GPLL7                                                  199
+#define GPLL9                                                  200
+
+/* Reset clocks */
+#define GCC_EMAC_BCR                                           0
+#define GCC_GPU_BCR                                            1
+#define GCC_MMSS_BCR                                           2
+#define GCC_NPU_BCR                                            3
+#define GCC_PCIE_0_BCR                                         4
+#define GCC_PCIE_0_PHY_BCR                                     5
+#define GCC_PCIE_1_BCR                                         6
+#define GCC_PCIE_1_PHY_BCR                                     7
+#define GCC_PCIE_PHY_BCR                                       8
+#define GCC_PDM_BCR                                            9
+#define GCC_PRNG_BCR                                           10
+#define GCC_QSPI_BCR                                           11
+#define GCC_QUPV3_WRAPPER_0_BCR                                        12
+#define GCC_QUPV3_WRAPPER_1_BCR                                        13
+#define GCC_QUPV3_WRAPPER_2_BCR                                        14
+#define GCC_QUSB2PHY_PRIM_BCR                                  15
+#define GCC_QUSB2PHY_SEC_BCR                                   16
+#define GCC_USB3_PHY_PRIM_BCR                                  17
+#define GCC_USB3_DP_PHY_PRIM_BCR                               18
+#define GCC_USB3_PHY_SEC_BCR                                   19
+#define GCC_USB3PHY_PHY_SEC_BCR                                        20
+#define GCC_SDCC2_BCR                                          21
+#define GCC_SDCC4_BCR                                          22
+#define GCC_TSIF_BCR                                           23
+#define GCC_UFS_CARD_BCR                                       24
+#define GCC_UFS_PHY_BCR                                                25
+#define GCC_USB30_PRIM_BCR                                     26
+#define GCC_USB30_SEC_BCR                                      27
+#define GCC_USB_PHY_CFG_AHB2PHY_BCR                            28
+
+#endif
diff --git a/include/dt-bindings/clock/rk3308-cru.h b/include/dt-bindings/clock/rk3308-cru.h
new file mode 100644 (file)
index 0000000..d97840f
--- /dev/null
@@ -0,0 +1,387 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (c) 2019 Rockchip Electronics Co. Ltd.
+ * Author: Finley Xiao <finley.xiao@rock-chips.com>
+ */
+
+#ifndef _DT_BINDINGS_CLK_ROCKCHIP_RK3308_H
+#define _DT_BINDINGS_CLK_ROCKCHIP_RK3308_H
+
+/* core clocks */
+#define PLL_APLL               1
+#define PLL_DPLL               2
+#define PLL_VPLL0              3
+#define PLL_VPLL1              4
+#define ARMCLK                 5
+
+/* sclk (special clocks) */
+#define USB480M                        14
+#define SCLK_RTC32K            15
+#define SCLK_PVTM_CORE         16
+#define SCLK_UART0             17
+#define SCLK_UART1             18
+#define SCLK_UART2             19
+#define SCLK_UART3             20
+#define SCLK_UART4             21
+#define SCLK_I2C0              22
+#define SCLK_I2C1              23
+#define SCLK_I2C2              24
+#define SCLK_I2C3              25
+#define SCLK_PWM0              26
+#define SCLK_SPI0              27
+#define SCLK_SPI1              28
+#define SCLK_SPI2              29
+#define SCLK_TIMER0            30
+#define SCLK_TIMER1            31
+#define SCLK_TIMER2            32
+#define SCLK_TIMER3            33
+#define SCLK_TIMER4            34
+#define SCLK_TIMER5            35
+#define SCLK_TSADC             36
+#define SCLK_SARADC            37
+#define SCLK_OTP               38
+#define SCLK_OTP_USR           39
+#define SCLK_CPU_BOOST         40
+#define SCLK_CRYPTO            41
+#define SCLK_CRYPTO_APK                42
+#define SCLK_NANDC_DIV         43
+#define SCLK_NANDC_DIV50       44
+#define SCLK_NANDC             45
+#define SCLK_SDMMC_DIV         46
+#define SCLK_SDMMC_DIV50       47
+#define SCLK_SDMMC             48
+#define SCLK_SDMMC_DRV         49
+#define SCLK_SDMMC_SAMPLE      50
+#define SCLK_SDIO_DIV          51
+#define SCLK_SDIO_DIV50                52
+#define SCLK_SDIO              53
+#define SCLK_SDIO_DRV          54
+#define SCLK_SDIO_SAMPLE       55
+#define SCLK_EMMC_DIV          56
+#define SCLK_EMMC_DIV50                57
+#define SCLK_EMMC              58
+#define SCLK_EMMC_DRV          59
+#define SCLK_EMMC_SAMPLE       60
+#define SCLK_SFC               61
+#define SCLK_OTG_ADP           62
+#define SCLK_MAC_SRC           63
+#define SCLK_MAC               64
+#define SCLK_MAC_REF           65
+#define SCLK_MAC_RX_TX         66
+#define SCLK_MAC_RMII          67
+#define SCLK_DDR_MON_TIMER     68
+#define SCLK_DDR_MON           69
+#define SCLK_DDRCLK            70
+#define SCLK_PMU               71
+#define SCLK_USBPHY_REF                72
+#define SCLK_WIFI              73
+#define SCLK_PVTM_PMU          74
+#define SCLK_PDM               75
+#define SCLK_I2S0_8CH_TX       76
+#define SCLK_I2S0_8CH_TX_OUT   77
+#define SCLK_I2S0_8CH_RX       78
+#define SCLK_I2S0_8CH_RX_OUT   79
+#define SCLK_I2S1_8CH_TX       80
+#define SCLK_I2S1_8CH_TX_OUT   81
+#define SCLK_I2S1_8CH_RX       82
+#define SCLK_I2S1_8CH_RX_OUT   83
+#define SCLK_I2S2_8CH_TX       84
+#define SCLK_I2S2_8CH_TX_OUT   85
+#define SCLK_I2S2_8CH_RX       86
+#define SCLK_I2S2_8CH_RX_OUT   87
+#define SCLK_I2S3_8CH_TX       88
+#define SCLK_I2S3_8CH_TX_OUT   89
+#define SCLK_I2S3_8CH_RX       90
+#define SCLK_I2S3_8CH_RX_OUT   91
+#define SCLK_I2S0_2CH          92
+#define SCLK_I2S0_2CH_OUT      93
+#define SCLK_I2S1_2CH          94
+#define SCLK_I2S1_2CH_OUT      95
+#define SCLK_SPDIF_TX_DIV      96
+#define SCLK_SPDIF_TX_DIV50    97
+#define SCLK_SPDIF_TX          98
+#define SCLK_SPDIF_RX_DIV      99
+#define SCLK_SPDIF_RX_DIV50    100
+#define SCLK_SPDIF_RX          101
+#define SCLK_I2S0_8CH_TX_MUX   102
+#define SCLK_I2S0_8CH_RX_MUX   103
+#define SCLK_I2S1_8CH_TX_MUX   104
+#define SCLK_I2S1_8CH_RX_MUX   105
+#define SCLK_I2S2_8CH_TX_MUX   106
+#define SCLK_I2S2_8CH_RX_MUX   107
+#define SCLK_I2S3_8CH_TX_MUX   108
+#define SCLK_I2S3_8CH_RX_MUX   109
+#define SCLK_I2S0_8CH_TX_SRC   110
+#define SCLK_I2S0_8CH_RX_SRC   111
+#define SCLK_I2S1_8CH_TX_SRC   112
+#define SCLK_I2S1_8CH_RX_SRC   113
+#define SCLK_I2S2_8CH_TX_SRC   114
+#define SCLK_I2S2_8CH_RX_SRC   115
+#define SCLK_I2S3_8CH_TX_SRC   116
+#define SCLK_I2S3_8CH_RX_SRC   117
+#define SCLK_I2S0_2CH_SRC      118
+#define SCLK_I2S1_2CH_SRC      119
+#define SCLK_PWM1              120
+#define SCLK_PWM2              121
+#define SCLK_OWIRE             122
+
+/* dclk */
+#define DCLK_VOP               125
+
+/* aclk */
+#define ACLK_BUS_SRC           130
+#define ACLK_BUS               131
+#define ACLK_PERI_SRC          132
+#define ACLK_PERI              133
+#define ACLK_MAC               134
+#define ACLK_CRYPTO            135
+#define ACLK_VOP               136
+#define ACLK_GIC               137
+#define ACLK_DMAC0             138
+#define ACLK_DMAC1             139
+
+/* hclk */
+#define HCLK_BUS               150
+#define HCLK_PERI              151
+#define HCLK_AUDIO             152
+#define HCLK_NANDC             153
+#define HCLK_SDMMC             154
+#define HCLK_SDIO              155
+#define HCLK_EMMC              156
+#define HCLK_SFC               157
+#define HCLK_OTG               158
+#define HCLK_HOST              159
+#define HCLK_HOST_ARB          160
+#define HCLK_PDM               161
+#define HCLK_SPDIFTX           162
+#define HCLK_SPDIFRX           163
+#define HCLK_I2S0_8CH          164
+#define HCLK_I2S1_8CH          165
+#define HCLK_I2S2_8CH          166
+#define HCLK_I2S3_8CH          167
+#define HCLK_I2S0_2CH          168
+#define HCLK_I2S1_2CH          169
+#define HCLK_VAD               170
+#define HCLK_CRYPTO            171
+#define HCLK_VOP               172
+
+/* pclk */
+#define PCLK_BUS               190
+#define PCLK_DDR               191
+#define PCLK_PERI              192
+#define PCLK_PMU               193
+#define PCLK_AUDIO             194
+#define PCLK_MAC               195
+#define PCLK_ACODEC            196
+#define PCLK_UART0             197
+#define PCLK_UART1             198
+#define PCLK_UART2             199
+#define PCLK_UART3             200
+#define PCLK_UART4             201
+#define PCLK_I2C0              202
+#define PCLK_I2C1              203
+#define PCLK_I2C2              204
+#define PCLK_I2C3              205
+#define PCLK_PWM0              206
+#define PCLK_SPI0              207
+#define PCLK_SPI1              208
+#define PCLK_SPI2              209
+#define PCLK_SARADC            210
+#define PCLK_TSADC             211
+#define PCLK_TIMER             212
+#define PCLK_OTP_NS            213
+#define PCLK_WDT               214
+#define PCLK_GPIO0             215
+#define PCLK_GPIO1             216
+#define PCLK_GPIO2             217
+#define PCLK_GPIO3             218
+#define PCLK_GPIO4             219
+#define PCLK_SGRF              220
+#define PCLK_GRF               221
+#define PCLK_USBSD_DET         222
+#define PCLK_DDR_UPCTL         223
+#define PCLK_DDR_MON           224
+#define PCLK_DDRPHY            225
+#define PCLK_DDR_STDBY         226
+#define PCLK_USB_GRF           227
+#define PCLK_CRU               228
+#define PCLK_OTP_PHY           229
+#define PCLK_CPU_BOOST         230
+#define PCLK_PWM1              231
+#define PCLK_PWM2              232
+#define PCLK_CAN               233
+#define PCLK_OWIRE             234
+
+#define CLK_NR_CLKS            (PCLK_OWIRE + 1)
+
+/* soft-reset indices */
+
+/* cru_softrst_con0 */
+#define SRST_CORE0_PO          0
+#define SRST_CORE1_PO          1
+#define SRST_CORE2_PO          2
+#define SRST_CORE3_PO          3
+#define SRST_CORE0             4
+#define SRST_CORE1             5
+#define SRST_CORE2             6
+#define SRST_CORE3             7
+#define SRST_CORE0_DBG         8
+#define SRST_CORE1_DBG         9
+#define SRST_CORE2_DBG         10
+#define SRST_CORE3_DBG         11
+#define SRST_TOPDBG            12
+#define SRST_CORE_NOC          13
+#define SRST_STRC_A            14
+#define SRST_L2C               15
+
+/* cru_softrst_con1 */
+#define SRST_DAP               16
+#define SRST_CORE_PVTM         17
+#define SRST_CORE_PRF          18
+#define SRST_CORE_GRF          19
+#define SRST_DDRUPCTL          20
+#define SRST_DDRUPCTL_P                22
+#define SRST_MSCH              23
+#define SRST_DDRMON_P          25
+#define SRST_DDRSTDBY_P                26
+#define SRST_DDRSTDBY          27
+#define SRST_DDRPHY            28
+#define SRST_DDRPHY_DIV                29
+#define SRST_DDRPHY_P          30
+
+/* cru_softrst_con2 */
+#define SRST_BUS_NIU_H         32
+#define SRST_USB_NIU_P         33
+#define SRST_CRYPTO_A          34
+#define SRST_CRYPTO_H          35
+#define SRST_CRYPTO            36
+#define SRST_CRYPTO_APK                37
+#define SRST_VOP_A             38
+#define SRST_VOP_H             39
+#define SRST_VOP_D             40
+#define SRST_INTMEM_A          41
+#define SRST_ROM_H             42
+#define SRST_GIC_A             43
+#define SRST_UART0_P           44
+#define SRST_UART0             45
+#define SRST_UART1_P           46
+#define SRST_UART1             47
+
+/* cru_softrst_con3 */
+#define SRST_UART2_P           48
+#define SRST_UART2             49
+#define SRST_UART3_P           50
+#define SRST_UART3             51
+#define SRST_UART4_P           52
+#define SRST_UART4             53
+#define SRST_I2C0_P            54
+#define SRST_I2C0              55
+#define SRST_I2C1_P            56
+#define SRST_I2C1              57
+#define SRST_I2C2_P            58
+#define SRST_I2C2              59
+#define SRST_I2C3_P            60
+#define SRST_I2C3              61
+#define SRST_PWM0_P            62
+#define SRST_PWM0              63
+
+/* cru_softrst_con4 */
+#define SRST_SPI0_P            64
+#define SRST_SPI0              65
+#define SRST_SPI1_P            66
+#define SRST_SPI1              67
+#define SRST_SPI2_P            68
+#define SRST_SPI2              69
+#define SRST_SARADC_P          70
+#define SRST_TSADC_P           71
+#define SRST_TSADC             72
+#define SRST_TIMER0_P          73
+#define SRST_TIMER0            74
+#define SRST_TIMER1            75
+#define SRST_TIMER2            76
+#define SRST_TIMER3            77
+#define SRST_TIMER4            78
+#define SRST_TIMER5            79
+
+/* cru_softrst_con5 */
+#define SRST_OTP_NS_P          80
+#define SRST_OTP_NS_SBPI       81
+#define SRST_OTP_NS_USR                82
+#define SRST_OTP_PHY_P         83
+#define SRST_OTP_PHY           84
+#define SRST_GPIO0_P           86
+#define SRST_GPIO1_P           87
+#define SRST_GPIO2_P           88
+#define SRST_GPIO3_P           89
+#define SRST_GPIO4_P           90
+#define SRST_GRF_P             91
+#define SRST_USBSD_DET_P       92
+#define SRST_PMU               93
+#define SRST_PMU_PVTM          94
+#define SRST_USB_GRF_P         95
+
+/* cru_softrst_con6 */
+#define SRST_CPU_BOOST         96
+#define SRST_CPU_BOOST_P       97
+#define SRST_PWM1_P            98
+#define SRST_PWM1              99
+#define SRST_PWM2_P            100
+#define SRST_PWM2              101
+#define SRST_PERI_NIU_A                104
+#define SRST_PERI_NIU_H                105
+#define SRST_PERI_NIU_p                106
+#define SRST_USB2OTG_H         107
+#define SRST_USB2OTG           108
+#define SRST_USB2OTG_ADP       109
+#define SRST_USB2HOST_H                110
+#define SRST_USB2HOST_ARB_H    111
+
+/* cru_softrst_con7 */
+#define SRST_USB2HOST_AUX_H    112
+#define SRST_USB2HOST_EHCI     113
+#define SRST_USB2HOST          114
+#define SRST_USBPHYPOR         115
+#define SRST_UTMI0             116
+#define SRST_UTMI1             117
+#define SRST_SDIO_H            118
+#define SRST_EMMC_H            119
+#define SRST_SFC_H             120
+#define SRST_SFC               121
+#define SRST_SD_H              122
+#define SRST_NANDC_H           123
+#define SRST_NANDC_N           124
+#define SRST_MAC_A             125
+#define SRST_CAN_P             126
+#define SRST_OWIRE_P           127
+
+/* cru_softrst_con8 */
+#define SRST_AUDIO_NIU_H       128
+#define SRST_AUDIO_NIU_P       129
+#define SRST_PDM_H             130
+#define SRST_PDM_M             131
+#define SRST_SPDIFTX_H         132
+#define SRST_SPDIFTX_M         133
+#define SRST_SPDIFRX_H         134
+#define SRST_SPDIFRX_M         135
+#define SRST_I2S0_8CH_H                136
+#define SRST_I2S0_8CH_TX_M     137
+#define SRST_I2S0_8CH_RX_M     138
+#define SRST_I2S1_8CH_H                139
+#define SRST_I2S1_8CH_TX_M     140
+#define SRST_I2S1_8CH_RX_M     141
+#define SRST_I2S2_8CH_H                142
+#define SRST_I2S2_8CH_TX_M     143
+
+/* cru_softrst_con9 */
+#define SRST_I2S2_8CH_RX_M     144
+#define SRST_I2S3_8CH_H                145
+#define SRST_I2S3_8CH_TX_M     146
+#define SRST_I2S3_8CH_RX_M     147
+#define SRST_I2S0_2CH_H                148
+#define SRST_I2S0_2CH_M                149
+#define SRST_I2S1_2CH_H                150
+#define SRST_I2S1_2CH_M                151
+#define SRST_VAD_H             152
+#define SRST_ACODEC_P          153
+
+#endif
index c0d5d55..014ac61 100644 (file)
 
 #define CLK_MIPI_CSI           73
 
+/* Clocks not available on V3s */
+#define CLK_BUS_I2S0           75
+#define CLK_I2S0               76
+
 #endif /* _DT_BINDINGS_CLK_SUN8I_V3S_H_ */
index 45e11b6..499de62 100644 (file)
@@ -32,4 +32,7 @@
 #define AM65X_IOPAD(pa, val, muxmode)          (((pa) & 0x1fff)) ((val) | (muxmode))
 #define AM65X_WKUP_IOPAD(pa, val, muxmode)     (((pa) & 0x1fff)) ((val) | (muxmode))
 
+#define J721E_IOPAD(pa, val, muxmode)          (((pa) & 0x1fff)) ((val) | (muxmode))
+#define J721E_WKUP_IOPAD(pa, val, muxmode)     (((pa) & 0x1fff)) ((val) | (muxmode))
+
 #endif
diff --git a/include/dt-bindings/reset-controller/mt8183-resets.h b/include/dt-bindings/reset-controller/mt8183-resets.h
new file mode 100644 (file)
index 0000000..8804e34
--- /dev/null
@@ -0,0 +1,81 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (c) 2019 MediaTek Inc.
+ * Author: Yong Liang <yong.liang@mediatek.com>
+ */
+
+#ifndef _DT_BINDINGS_RESET_CONTROLLER_MT8183
+#define _DT_BINDINGS_RESET_CONTROLLER_MT8183
+
+/* INFRACFG AO resets */
+#define MT8183_INFRACFG_AO_THERM_SW_RST                                0
+#define MT8183_INFRACFG_AO_USB_TOP_SW_RST                      1
+#define MT8183_INFRACFG_AO_MM_IOMMU_SW_RST                     3
+#define MT8183_INFRACFG_AO_MSDC3_SW_RST                                4
+#define MT8183_INFRACFG_AO_MSDC2_SW_RST                                5
+#define MT8183_INFRACFG_AO_MSDC1_SW_RST                                6
+#define MT8183_INFRACFG_AO_MSDC0_SW_RST                                7
+#define MT8183_INFRACFG_AO_APDMA_SW_RST                                9
+#define MT8183_INFRACFG_AO_MIMP_D_SW_RST                       10
+#define MT8183_INFRACFG_AO_BTIF_SW_RST                         12
+#define MT8183_INFRACFG_AO_DISP_PWM_SW_RST                     14
+#define MT8183_INFRACFG_AO_AUXADC_SW_RST                       15
+
+#define MT8183_INFRACFG_AO_IRTX_SW_RST                         32
+#define MT8183_INFRACFG_AO_SPI0_SW_RST                         33
+#define MT8183_INFRACFG_AO_I2C0_SW_RST                         34
+#define MT8183_INFRACFG_AO_I2C1_SW_RST                         35
+#define MT8183_INFRACFG_AO_I2C2_SW_RST                         36
+#define MT8183_INFRACFG_AO_I2C3_SW_RST                         37
+#define MT8183_INFRACFG_AO_UART0_SW_RST                                38
+#define MT8183_INFRACFG_AO_UART1_SW_RST                                39
+#define MT8183_INFRACFG_AO_UART2_SW_RST                                40
+#define MT8183_INFRACFG_AO_PWM_SW_RST                          41
+#define MT8183_INFRACFG_AO_SPI1_SW_RST                         42
+#define MT8183_INFRACFG_AO_I2C4_SW_RST                         43
+#define MT8183_INFRACFG_AO_DVFSP_SW_RST                                44
+#define MT8183_INFRACFG_AO_SPI2_SW_RST                         45
+#define MT8183_INFRACFG_AO_SPI3_SW_RST                         46
+#define MT8183_INFRACFG_AO_UFSHCI_SW_RST                       47
+
+#define MT8183_INFRACFG_AO_PMIC_WRAP_SW_RST                    64
+#define MT8183_INFRACFG_AO_SPM_SW_RST                          65
+#define MT8183_INFRACFG_AO_USBSIF_SW_RST                       66
+#define MT8183_INFRACFG_AO_KP_SW_RST                           68
+#define MT8183_INFRACFG_AO_APXGPT_SW_RST                       69
+#define MT8183_INFRACFG_AO_CLDMA_AO_SW_RST                     70
+#define MT8183_INFRACFG_AO_UNIPRO_UFS_SW_RST                   71
+#define MT8183_INFRACFG_AO_DX_CC_SW_RST                                72
+#define MT8183_INFRACFG_AO_UFSPHY_SW_RST                       73
+
+#define MT8183_INFRACFG_AO_DX_CC_SEC_SW_RST                    96
+#define MT8183_INFRACFG_AO_GCE_SW_RST                          97
+#define MT8183_INFRACFG_AO_CLDMA_SW_RST                                98
+#define MT8183_INFRACFG_AO_TRNG_SW_RST                         99
+#define MT8183_INFRACFG_AO_AP_MD_CCIF_1_SW_RST                 103
+#define MT8183_INFRACFG_AO_AP_MD_CCIF_SW_RST                   104
+#define MT8183_INFRACFG_AO_I2C1_IMM_SW_RST                     105
+#define MT8183_INFRACFG_AO_I2C1_ARB_SW_RST                     106
+#define MT8183_INFRACFG_AO_I2C2_IMM_SW_RST                     107
+#define MT8183_INFRACFG_AO_I2C2_ARB_SW_RST                     108
+#define MT8183_INFRACFG_AO_I2C5_SW_RST                         109
+#define MT8183_INFRACFG_AO_I2C5_IMM_SW_RST                     110
+#define MT8183_INFRACFG_AO_I2C5_ARB_SW_RST                     111
+#define MT8183_INFRACFG_AO_SPI4_SW_RST                         112
+#define MT8183_INFRACFG_AO_SPI5_SW_RST                         113
+#define MT8183_INFRACFG_AO_INFRA2MFGAXI_CBIP_CLAS_SW_RST       114
+#define MT8183_INFRACFG_AO_MFGAXI2INFRA_M0_CBIP_GLAS_OUT_SW_RST        115
+#define MT8183_INFRACFG_AO_MFGAXI2INFRA_M1_CBIP_GLAS_OUT_SW_RST        116
+#define MT8183_INFRACFG_AO_UFS_AES_SW_RST                      117
+#define MT8183_INFRACFG_AO_CCU_I2C_IRQ_SW_RST                  118
+#define MT8183_INFRACFG_AO_CCU_I2C_DMA_SW_RST                  119
+#define MT8183_INFRACFG_AO_I2C6_SW_RST                         120
+#define MT8183_INFRACFG_AO_CCU_GALS_SW_RST                     121
+#define MT8183_INFRACFG_AO_IPU_GALS_SW_RST                     122
+#define MT8183_INFRACFG_AO_CONN2AP_GALS_SW_RST                 123
+#define MT8183_INFRACFG_AO_AP_MD_CCIF2_SW_RST                  124
+#define MT8183_INFRACFG_AO_AP_MD_CCIF3_SW_RST                  125
+#define MT8183_INFRACFG_AO_I2C7_SW_RST                         126
+#define MT8183_INFRACFG_AO_I2C8_SW_RST                         127
+
+#endif  /* _DT_BINDINGS_RESET_CONTROLLER_MT8183 */
index b58ef21..b679017 100644 (file)
@@ -75,4 +75,7 @@
 #define RST_BUS_UART1          50
 #define RST_BUS_UART2          51
 
+/* Reset lines not available on V3s */
+#define RST_BUS_I2S0           52
+
 #endif /* _DT_BINDINGS_RST_SUN8I_H3_H_ */
index 978cc23..8b4e516 100644 (file)
@@ -643,6 +643,12 @@ bool acpi_gtdt_c3stop(int type);
 int acpi_arch_timer_mem_init(struct arch_timer_mem *timer_mem, int *timer_count);
 #endif
 
+#ifndef ACPI_HAVE_ARCH_SET_ROOT_POINTER
+static inline void acpi_arch_set_root_pointer(u64 addr)
+{
+}
+#endif
+
 #ifndef ACPI_HAVE_ARCH_GET_ROOT_POINTER
 static inline u64 acpi_arch_get_root_pointer(void)
 {
index 0b58974..c7d6b2e 100644 (file)
@@ -46,6 +46,12 @@ enum backlight_notification {
        BACKLIGHT_UNREGISTERED,
 };
 
+enum backlight_scale {
+       BACKLIGHT_SCALE_UNKNOWN = 0,
+       BACKLIGHT_SCALE_LINEAR,
+       BACKLIGHT_SCALE_NON_LINEAR,
+};
+
 struct backlight_device;
 struct fb_info;
 
@@ -80,6 +86,8 @@ struct backlight_properties {
        enum backlight_type type;
        /* Flags used to signal drivers of state changes */
        unsigned int state;
+       /* Type of the brightness scale (linear, non-linear, ...) */
+       enum backlight_scale scale;
 
 #define BL_CORE_SUSPENDED      (1 << 0)        /* backlight is suspended */
 #define BL_CORE_FBBLANK                (1 << 1)        /* backlight is under an fb blank event */
index d9db32f..f3ea78b 100644 (file)
@@ -1524,10 +1524,14 @@ struct blk_integrity_iter {
 };
 
 typedef blk_status_t (integrity_processing_fn) (struct blk_integrity_iter *);
+typedef void (integrity_prepare_fn) (struct request *);
+typedef void (integrity_complete_fn) (struct request *, unsigned int);
 
 struct blk_integrity_profile {
        integrity_processing_fn         *generate_fn;
        integrity_processing_fn         *verify_fn;
+       integrity_prepare_fn            *prepare_fn;
+       integrity_complete_fn           *complete_fn;
        const char                      *name;
 };
 
index 82156da..b9dbda1 100644 (file)
@@ -293,6 +293,7 @@ struct ceph_client *ceph_create_client(struct ceph_options *opt, void *private);
 struct ceph_entity_addr *ceph_client_addr(struct ceph_client *client);
 u64 ceph_client_gid(struct ceph_client *client);
 extern void ceph_destroy_client(struct ceph_client *client);
+extern void ceph_reset_client_addr(struct ceph_client *client);
 extern int __ceph_open_session(struct ceph_client *client,
                               unsigned long started);
 extern int ceph_open_session(struct ceph_client *client);
index 23895d1..c4458dc 100644 (file)
@@ -337,6 +337,7 @@ extern void ceph_msgr_flush(void);
 extern void ceph_messenger_init(struct ceph_messenger *msgr,
                                struct ceph_entity_addr *myaddr);
 extern void ceph_messenger_fini(struct ceph_messenger *msgr);
+extern void ceph_messenger_reset_nonce(struct ceph_messenger *msgr);
 
 extern void ceph_con_init(struct ceph_connection *con, void *private,
                        const struct ceph_connection_operations *ops,
index b4d134d..dbb8a69 100644 (file)
@@ -109,6 +109,7 @@ extern int ceph_monmap_contains(struct ceph_monmap *m,
 
 extern int ceph_monc_init(struct ceph_mon_client *monc, struct ceph_client *cl);
 extern void ceph_monc_stop(struct ceph_mon_client *monc);
+extern void ceph_monc_reopen_session(struct ceph_mon_client *monc);
 
 enum {
        CEPH_SUB_MONMAP = 0,
index ad7fe5d..eaffbdd 100644 (file)
@@ -381,6 +381,7 @@ extern void ceph_osdc_cleanup(void);
 extern int ceph_osdc_init(struct ceph_osd_client *osdc,
                          struct ceph_client *client);
 extern void ceph_osdc_stop(struct ceph_osd_client *osdc);
+extern void ceph_osdc_reopen_osds(struct ceph_osd_client *osdc);
 
 extern void ceph_osdc_handle_reply(struct ceph_osd_client *osdc,
                                   struct ceph_msg *msg);
@@ -388,6 +389,7 @@ extern void ceph_osdc_handle_map(struct ceph_osd_client *osdc,
                                 struct ceph_msg *msg);
 void ceph_osdc_update_epoch_barrier(struct ceph_osd_client *osdc, u32 eb);
 void ceph_osdc_abort_requests(struct ceph_osd_client *osdc, int err);
+void ceph_osdc_clear_abort_err(struct ceph_osd_client *osdc);
 
 #define osd_req_op_data(oreq, whch, typ, fld)                          \
 ({                                                                     \
index dce5521..2fdfe80 100644 (file)
@@ -299,7 +299,8 @@ struct clk_init_data {
  * into the clk API
  *
  * @init: pointer to struct clk_init_data that contains the init data shared
- * with the common clock framework.
+ * with the common clock framework. This pointer will be set to NULL once
+ * a clk_register() variant is called on this clk_hw pointer.
  */
 struct clk_hw {
        struct clk_core *core;
index 853a8f1..18b7b95 100644 (file)
@@ -239,7 +239,8 @@ static inline int clk_prepare(struct clk *clk)
        return 0;
 }
 
-static inline int __must_check clk_bulk_prepare(int num_clks, struct clk_bulk_data *clks)
+static inline int __must_check
+clk_bulk_prepare(int num_clks, const struct clk_bulk_data *clks)
 {
        might_sleep();
        return 0;
@@ -263,7 +264,8 @@ static inline void clk_unprepare(struct clk *clk)
 {
        might_sleep();
 }
-static inline void clk_bulk_unprepare(int num_clks, struct clk_bulk_data *clks)
+static inline void clk_bulk_unprepare(int num_clks,
+                                     const struct clk_bulk_data *clks)
 {
        might_sleep();
 }
@@ -820,7 +822,8 @@ static inline int clk_enable(struct clk *clk)
        return 0;
 }
 
-static inline int __must_check clk_bulk_enable(int num_clks, struct clk_bulk_data *clks)
+static inline int __must_check clk_bulk_enable(int num_clks,
+                                              const struct clk_bulk_data *clks)
 {
        return 0;
 }
@@ -829,7 +832,7 @@ static inline void clk_disable(struct clk *clk) {}
 
 
 static inline void clk_bulk_disable(int num_clks,
-                                   struct clk_bulk_data *clks) {}
+                                   const struct clk_bulk_data *clks) {}
 
 static inline unsigned long clk_get_rate(struct clk *clk)
 {
@@ -918,8 +921,8 @@ static inline void clk_disable_unprepare(struct clk *clk)
        clk_unprepare(clk);
 }
 
-static inline int __must_check clk_bulk_prepare_enable(int num_clks,
-                                       struct clk_bulk_data *clks)
+static inline int __must_check
+clk_bulk_prepare_enable(int num_clks, const struct clk_bulk_data *clks)
 {
        int ret;
 
@@ -934,7 +937,7 @@ static inline int __must_check clk_bulk_prepare_enable(int num_clks,
 }
 
 static inline void clk_bulk_disable_unprepare(int num_clks,
-                                             struct clk_bulk_data *clks)
+                                             const struct clk_bulk_data *clks)
 {
        clk_bulk_disable(num_clks, clks);
        clk_bulk_unprepare(num_clks, clks);
index 85f8cf9..eae9652 100644 (file)
@@ -4,6 +4,9 @@
  * Sylwester Nawrocki <s.nawrocki@samsung.com>
  */
 
+#ifndef __CLK_CONF_H
+#define __CLK_CONF_H
+
 #include <linux/types.h>
 
 struct device_node;
@@ -17,3 +20,5 @@ static inline int of_clk_set_defaults(struct device_node *node,
        return 0;
 }
 #endif
+
+#endif /* __CLK_CONF_H */
index 9569e7c..4b898cd 100644 (file)
@@ -129,11 +129,8 @@ static inline bool compaction_failed(enum compact_result result)
        return false;
 }
 
-/*
- * Compaction  has backed off for some reason. It might be throttling or
- * lock contention. Retrying is still worthwhile.
- */
-static inline bool compaction_withdrawn(enum compact_result result)
+/* Compaction needs reclaim to be performed first, so it can continue. */
+static inline bool compaction_needs_reclaim(enum compact_result result)
 {
        /*
         * Compaction backed off due to watermark checks for order-0
@@ -142,6 +139,16 @@ static inline bool compaction_withdrawn(enum compact_result result)
        if (result == COMPACT_SKIPPED)
                return true;
 
+       return false;
+}
+
+/*
+ * Compaction has backed off for some reason after doing some work or none
+ * at all. It might be throttling or lock contention. Retrying might be still
+ * worthwhile, but with a higher priority if allowed.
+ */
+static inline bool compaction_withdrawn(enum compact_result result)
+{
        /*
         * If compaction is deferred for high-order allocations, it is
         * because sync compaction recently failed. If this is the case
@@ -207,6 +214,11 @@ static inline bool compaction_failed(enum compact_result result)
        return false;
 }
 
+static inline bool compaction_needs_reclaim(enum compact_result result)
+{
+       return false;
+}
+
 static inline bool compaction_withdrawn(enum compact_result result)
 {
        return true;
index b056a40..72393a8 100644 (file)
@@ -146,8 +146,17 @@ struct ftrace_likely_data {
        __inline_maybe_unused notrace
 #endif
 
+/*
+ * gcc provides both __inline__ and __inline as alternate spellings of
+ * the inline keyword, though the latter is undocumented. New kernel
+ * code should only use the inline spelling, but some existing code
+ * uses __inline__. Since we #define inline above, to ensure
+ * __inline__ has the same semantics, we need this #define.
+ *
+ * However, the spelling __inline is strictly reserved for referring
+ * to the bare keyword.
+ */
 #define __inline__ inline
-#define __inline   inline
 
 /*
  * GCC does not warn about unused static inline functions for -Wunused-function.
@@ -197,6 +206,12 @@ struct ftrace_likely_data {
 #define asm_volatile_goto(x...) asm goto(x)
 #endif
 
+#ifdef CONFIG_CC_HAS_ASM_INLINE
+#define asm_inline asm __inline
+#else
+#define asm_inline asm
+#endif
+
 #ifndef __no_fgcse
 # define __no_fgcse
 #endif
index 88dc0c6..d0633eb 100644 (file)
@@ -201,12 +201,14 @@ enum cpuhp_smt_control {
 extern enum cpuhp_smt_control cpu_smt_control;
 extern void cpu_smt_disable(bool force);
 extern void cpu_smt_check_topology(void);
+extern bool cpu_smt_possible(void);
 extern int cpuhp_smt_enable(void);
 extern int cpuhp_smt_disable(enum cpuhp_smt_control ctrlval);
 #else
 # define cpu_smt_control               (CPU_SMT_NOT_IMPLEMENTED)
 static inline void cpu_smt_disable(bool force) { }
 static inline void cpu_smt_check_topology(void) { }
+static inline bool cpu_smt_possible(void) { return false; }
 static inline int cpuhp_smt_enable(void) { return 0; }
 static inline int cpuhp_smt_disable(enum cpuhp_smt_control ctrlval) { return 0; }
 #endif
index b5a5a1e..78a73eb 100644 (file)
@@ -200,8 +200,8 @@ static inline unsigned int cpumask_local_spread(unsigned int i, int node)
        for ((cpu) = 0; (cpu) < 1; (cpu)++, (void)mask)
 #define for_each_cpu_wrap(cpu, mask, start)    \
        for ((cpu) = 0; (cpu) < 1; (cpu)++, (void)mask, (void)(start))
-#define for_each_cpu_and(cpu, mask, and)       \
-       for ((cpu) = 0; (cpu) < 1; (cpu)++, (void)mask, (void)and)
+#define for_each_cpu_and(cpu, mask1, mask2)    \
+       for ((cpu) = 0; (cpu) < 1; (cpu)++, (void)mask1, (void)mask2)
 #else
 /**
  * cpumask_first - get the first cpu in a cpumask
@@ -290,20 +290,20 @@ extern int cpumask_next_wrap(int n, const struct cpumask *mask, int start, bool
 /**
  * for_each_cpu_and - iterate over every cpu in both masks
  * @cpu: the (optionally unsigned) integer iterator
- * @mask: the first cpumask pointer
- * @and: the second cpumask pointer
+ * @mask1: the first cpumask pointer
+ * @mask2: the second cpumask pointer
  *
  * This saves a temporary CPU mask in many places.  It is equivalent to:
  *     struct cpumask tmp;
- *     cpumask_and(&tmp, &mask, &and);
+ *     cpumask_and(&tmp, &mask1, &mask2);
  *     for_each_cpu(cpu, &tmp)
  *             ...
  *
  * After the loop, cpu is >= nr_cpu_ids.
  */
-#define for_each_cpu_and(cpu, mask, and)                               \
+#define for_each_cpu_and(cpu, mask1, mask2)                            \
        for ((cpu) = -1;                                                \
-               (cpu) = cpumask_next_and((cpu), (mask), (and)),         \
+               (cpu) = cpumask_next_and((cpu), (mask1), (mask2)),      \
                (cpu) < nr_cpu_ids;)
 #endif /* SMP */
 
index f7a30e0..18639c0 100644 (file)
@@ -386,7 +386,6 @@ static inline void put_cred(const struct cred *_cred)
 #define current_fsgid()        (current_cred_xxx(fsgid))
 #define current_cap()          (current_cred_xxx(cap_effective))
 #define current_user()         (current_cred_xxx(user))
-#define current_security()     (current_cred_xxx(security))
 
 extern struct user_namespace init_user_ns;
 #ifdef CONFIG_USER_NS
index 7d8c112..95f55b7 100644 (file)
@@ -18,6 +18,8 @@ extern struct module __this_module;
 #define THIS_MODULE ((struct module *)0)
 #endif
 
+#define NS_SEPARATOR "."
+
 #ifdef CONFIG_MODVERSIONS
 /* Mark the CRC weak since genksyms apparently decides not to
  * generate a checksums for some symbols */
@@ -26,13 +28,13 @@ extern struct module __this_module;
        asm("   .section \"___kcrctab" sec "+" #sym "\", \"a\"  \n"     \
            "   .weak   __crc_" #sym "                          \n"     \
            "   .long   __crc_" #sym " - .                      \n"     \
-           "   .previous                                       \n");
+           "   .previous                                       \n")
 #else
 #define __CRC_SYMBOL(sym, sec)                                         \
        asm("   .section \"___kcrctab" sec "+" #sym "\", \"a\"  \n"     \
            "   .weak   __crc_" #sym "                          \n"     \
            "   .long   __crc_" #sym "                          \n"     \
-           "   .previous                                       \n");
+           "   .previous                                       \n")
 #endif
 #else
 #define __CRC_SYMBOL(sym, sec)
@@ -46,44 +48,77 @@ extern struct module __this_module;
  * absolute relocations that require runtime processing on relocatable
  * kernels.
  */
+#define __KSYMTAB_ENTRY_NS(sym, sec, ns)                               \
+       __ADDRESSABLE(sym)                                              \
+       asm("   .section \"___ksymtab" sec "+" #sym "\", \"a\"  \n"     \
+           "   .balign 4                                       \n"     \
+           "__ksymtab_" #sym NS_SEPARATOR #ns ":               \n"     \
+           "   .long   " #sym "- .                             \n"     \
+           "   .long   __kstrtab_" #sym "- .                   \n"     \
+           "   .long   __kstrtab_ns_" #sym "- .                \n"     \
+           "   .previous                                       \n")
+
 #define __KSYMTAB_ENTRY(sym, sec)                                      \
        __ADDRESSABLE(sym)                                              \
        asm("   .section \"___ksymtab" sec "+" #sym "\", \"a\"  \n"     \
-           "   .balign 8                                       \n"     \
+           "   .balign 4                                       \n"     \
            "__ksymtab_" #sym ":                                \n"     \
            "   .long   " #sym "- .                             \n"     \
            "   .long   __kstrtab_" #sym "- .                   \n"     \
+           "   .long   0                                       \n"     \
            "   .previous                                       \n")
 
 struct kernel_symbol {
        int value_offset;
        int name_offset;
+       int namespace_offset;
 };
 #else
+#define __KSYMTAB_ENTRY_NS(sym, sec, ns)                               \
+       static const struct kernel_symbol __ksymtab_##sym##__##ns       \
+       asm("__ksymtab_" #sym NS_SEPARATOR #ns)                         \
+       __attribute__((section("___ksymtab" sec "+" #sym), used))       \
+       __aligned(sizeof(void *))                                       \
+       = { (unsigned long)&sym, __kstrtab_##sym, __kstrtab_ns_##sym }
+
 #define __KSYMTAB_ENTRY(sym, sec)                                      \
        static const struct kernel_symbol __ksymtab_##sym               \
+       asm("__ksymtab_" #sym)                                          \
        __attribute__((section("___ksymtab" sec "+" #sym), used))       \
-       = { (unsigned long)&sym, __kstrtab_##sym }
+       __aligned(sizeof(void *))                                       \
+       = { (unsigned long)&sym, __kstrtab_##sym, NULL }
 
 struct kernel_symbol {
        unsigned long value;
        const char *name;
+       const char *namespace;
 };
 #endif
 
 #ifdef __GENKSYMS__
 
-#define ___EXPORT_SYMBOL(sym, sec)     __GENKSYMS_EXPORT_SYMBOL(sym)
+#define ___EXPORT_SYMBOL(sym,sec)      __GENKSYMS_EXPORT_SYMBOL(sym)
+#define ___EXPORT_SYMBOL_NS(sym,sec,ns)        __GENKSYMS_EXPORT_SYMBOL(sym)
 
 #else
 
-/* For every exported symbol, place a struct in the __ksymtab section */
-#define ___EXPORT_SYMBOL(sym, sec)                                     \
+#define ___export_symbol_common(sym, sec)                              \
        extern typeof(sym) sym;                                         \
-       __CRC_SYMBOL(sym, sec)                                          \
+       __CRC_SYMBOL(sym, sec);                                         \
        static const char __kstrtab_##sym[]                             \
        __attribute__((section("__ksymtab_strings"), used, aligned(1))) \
-       = #sym;                                                         \
+       = #sym                                                          \
+
+/* For every exported symbol, place a struct in the __ksymtab section */
+#define ___EXPORT_SYMBOL_NS(sym, sec, ns)                              \
+       ___export_symbol_common(sym, sec);                              \
+       static const char __kstrtab_ns_##sym[]                          \
+       __attribute__((section("__ksymtab_strings"), used, aligned(1))) \
+       = #ns;                                                          \
+       __KSYMTAB_ENTRY_NS(sym, sec, ns)
+
+#define ___EXPORT_SYMBOL(sym, sec)                                     \
+       ___export_symbol_common(sym, sec);                              \
        __KSYMTAB_ENTRY(sym, sec)
 
 #endif
@@ -95,6 +130,7 @@ struct kernel_symbol {
  * be reused in other execution contexts such as the UEFI stub or the
  * decompressor.
  */
+#define __EXPORT_SYMBOL_NS(sym, sec, ns)
 #define __EXPORT_SYMBOL(sym, sec)
 
 #elif defined(CONFIG_TRIM_UNUSED_KSYMS)
@@ -121,15 +157,35 @@ struct kernel_symbol {
 #define __cond_export_sym_1(sym, sec) ___EXPORT_SYMBOL(sym, sec)
 #define __cond_export_sym_0(sym, sec) /* nothing */
 
+#define __EXPORT_SYMBOL_NS(sym, sec, ns)                               \
+       __ksym_marker(sym);                                             \
+       __cond_export_ns_sym(sym, sec, ns, __is_defined(__KSYM_##sym))
+#define __cond_export_ns_sym(sym, sec, ns, conf)                       \
+       ___cond_export_ns_sym(sym, sec, ns, conf)
+#define ___cond_export_ns_sym(sym, sec, ns, enabled)                   \
+       __cond_export_ns_sym_##enabled(sym, sec, ns)
+#define __cond_export_ns_sym_1(sym, sec, ns) ___EXPORT_SYMBOL_NS(sym, sec, ns)
+#define __cond_export_ns_sym_0(sym, sec, ns) /* nothing */
+
 #else
 
-#define __EXPORT_SYMBOL(sym, sec)      ___EXPORT_SYMBOL(sym, sec)
+#define __EXPORT_SYMBOL_NS(sym,sec,ns) ___EXPORT_SYMBOL_NS(sym,sec,ns)
+#define __EXPORT_SYMBOL(sym,sec)       ___EXPORT_SYMBOL(sym,sec)
 
 #endif /* CONFIG_MODULES */
 
+#ifdef DEFAULT_SYMBOL_NAMESPACE
+#undef __EXPORT_SYMBOL
+#define __EXPORT_SYMBOL(sym, sec)                              \
+       __EXPORT_SYMBOL_NS(sym, sec, DEFAULT_SYMBOL_NAMESPACE)
+#endif
+
 #define EXPORT_SYMBOL(sym)             __EXPORT_SYMBOL(sym, "")
 #define EXPORT_SYMBOL_GPL(sym)         __EXPORT_SYMBOL(sym, "_gpl")
 #define EXPORT_SYMBOL_GPL_FUTURE(sym)  __EXPORT_SYMBOL(sym, "_gpl_future")
+#define EXPORT_SYMBOL_NS(sym, ns)      __EXPORT_SYMBOL_NS(sym, "", ns)
+#define EXPORT_SYMBOL_NS_GPL(sym, ns)  __EXPORT_SYMBOL_NS(sym, "_gpl", ns)
+
 #ifdef CONFIG_UNUSED_SYMBOLS
 #define EXPORT_UNUSED_SYMBOL(sym)      __EXPORT_SYMBOL(sym, "_unused")
 #define EXPORT_UNUSED_SYMBOL_GPL(sym)  __EXPORT_SYMBOL(sym, "_unused_gpl")
index 6555990..2847389 100644 (file)
 
 #define F2FS_MAX_QUOTAS                3
 
+#define F2FS_ENC_UTF8_12_1     1
+#define F2FS_ENC_STRICT_MODE_FL        (1 << 0)
+#define f2fs_has_strict_mode(sbi) \
+       (sbi->s_encoding_flags & F2FS_ENC_STRICT_MODE_FL)
+
 #define F2FS_IO_SIZE(sbi)      (1 << F2FS_OPTION(sbi).write_io_size_bits) /* Blocks */
 #define F2FS_IO_SIZE_KB(sbi)   (1 << (F2FS_OPTION(sbi).write_io_size_bits + 2)) /* KB */
 #define F2FS_IO_SIZE_BYTES(sbi)        (1 << (F2FS_OPTION(sbi).write_io_size_bits + 12)) /* B */
 #define F2FS_IO_SIZE_BITS(sbi) (F2FS_OPTION(sbi).write_io_size_bits) /* power of 2 */
 #define F2FS_IO_SIZE_MASK(sbi) (F2FS_IO_SIZE(sbi) - 1)
+#define F2FS_IO_ALIGNED(sbi)   (F2FS_IO_SIZE(sbi) > 1)
 
 /* This flag is used by node and meta inodes, and by recovery */
 #define GFP_F2FS_ZERO          (GFP_NOFS | __GFP_ZERO)
@@ -109,7 +115,9 @@ struct f2fs_super_block {
        struct f2fs_device devs[MAX_DEVICES];   /* device list */
        __le32 qf_ino[F2FS_MAX_QUOTAS]; /* quota inode numbers */
        __u8 hot_ext_count;             /* # of hot file extension */
-       __u8 reserved[310];             /* valid reserved region */
+       __le16  s_encoding;             /* Filename charset encoding */
+       __le16  s_encoding_flags;       /* Filename charset encoding flags */
+       __u8 reserved[306];             /* valid reserved region */
        __le32 crc;                     /* checksum of superblock */
 } __packed;
 
index 866268c..e0d909d 100644 (file)
@@ -429,6 +429,7 @@ int pagecache_write_end(struct file *, struct address_space *mapping,
  * @i_pages: Cached pages.
  * @gfp_mask: Memory allocation flags to use for allocating pages.
  * @i_mmap_writable: Number of VM_SHARED mappings.
+ * @nr_thps: Number of THPs in the pagecache (non-shmem only).
  * @i_mmap: Tree of private and shared mappings.
  * @i_mmap_rwsem: Protects @i_mmap and @i_mmap_writable.
  * @nrpages: Number of page entries, protected by the i_pages lock.
@@ -446,6 +447,10 @@ struct address_space {
        struct xarray           i_pages;
        gfp_t                   gfp_mask;
        atomic_t                i_mmap_writable;
+#ifdef CONFIG_READ_ONLY_THP_FOR_FS
+       /* number of thp, only for non-shmem files */
+       atomic_t                nr_thps;
+#endif
        struct rb_root_cached   i_mmap;
        struct rw_semaphore     i_mmap_rwsem;
        unsigned long           nrpages;
@@ -1163,6 +1168,11 @@ extern void lease_get_mtime(struct inode *, struct timespec64 *time);
 extern int generic_setlease(struct file *, long, struct file_lock **, void **priv);
 extern int vfs_setlease(struct file *, long, struct file_lock **, void **);
 extern int lease_modify(struct file_lock *, int, struct list_head *);
+
+struct notifier_block;
+extern int lease_register_notifier(struct notifier_block *);
+extern void lease_unregister_notifier(struct notifier_block *);
+
 struct files_struct;
 extern void show_fd_locks(struct seq_file *f,
                         struct file *filp, struct files_struct *files);
@@ -2798,6 +2808,33 @@ static inline errseq_t filemap_sample_wb_err(struct address_space *mapping)
        return errseq_sample(&mapping->wb_err);
 }
 
+static inline int filemap_nr_thps(struct address_space *mapping)
+{
+#ifdef CONFIG_READ_ONLY_THP_FOR_FS
+       return atomic_read(&mapping->nr_thps);
+#else
+       return 0;
+#endif
+}
+
+static inline void filemap_nr_thps_inc(struct address_space *mapping)
+{
+#ifdef CONFIG_READ_ONLY_THP_FOR_FS
+       atomic_inc(&mapping->nr_thps);
+#else
+       WARN_ON_ONCE(1);
+#endif
+}
+
+static inline void filemap_nr_thps_dec(struct address_space *mapping)
+{
+#ifdef CONFIG_READ_ONLY_THP_FOR_FS
+       atomic_dec(&mapping->nr_thps);
+#else
+       WARN_ON_ONCE(1);
+#endif
+}
+
 extern int vfs_fsync_range(struct file *file, loff_t start, loff_t end,
                           int datasync);
 extern int vfs_fsync(struct file *file, int datasync);
index 0424df7..e5c14e2 100644 (file)
@@ -95,7 +95,6 @@ struct fs_context {
        const struct cred       *cred;          /* The mounter's credentials */
        struct fc_log           *log;           /* Logging buffer */
        const char              *source;        /* The source name (eg. dev path) */
-       const char              *subtype;       /* The subtype to set on the superblock */
        void                    *security;      /* Linux S&M options */
        void                    *s_fs_info;     /* Proposed s_fs_info */
        unsigned int            sb_flags;       /* Proposed superblock flags (SB_*) */
index 2de3b2d..1915bdb 100644 (file)
@@ -475,6 +475,8 @@ extern void fsnotify_destroy_mark(struct fsnotify_mark *mark,
 extern void fsnotify_detach_mark(struct fsnotify_mark *mark);
 /* free mark */
 extern void fsnotify_free_mark(struct fsnotify_mark *mark);
+/* Wait until all marks queued for destruction are destroyed */
+extern void fsnotify_wait_marks_destroyed(void);
 /* run all the marks in a group, and clear all of the marks attached to given object type */
 extern void fsnotify_clear_marks_by_group(struct fsnotify_group *group, unsigned int type);
 /* run all the marks in a group, and clear all of the vfsmount marks */
index d770ab1..cd41f20 100644 (file)
@@ -1154,29 +1154,32 @@ int hid_pidff_init(struct hid_device *hid);
 #define hid_pidff_init NULL
 #endif
 
-#define dbg_hid(format, arg...)                                                \
+#define dbg_hid(fmt, ...)                                              \
 do {                                                                   \
        if (hid_debug)                                                  \
-               printk(KERN_DEBUG "%s: " format, __FILE__, ##arg);      \
+               printk(KERN_DEBUG "%s: " fmt, __FILE__, ##__VA_ARGS__); \
 } while (0)
 
-#define hid_printk(level, hid, fmt, arg...)            \
-       dev_printk(level, &(hid)->dev, fmt, ##arg)
-#define hid_emerg(hid, fmt, arg...)                    \
-       dev_emerg(&(hid)->dev, fmt, ##arg)
-#define hid_crit(hid, fmt, arg...)                     \
-       dev_crit(&(hid)->dev, fmt, ##arg)
-#define hid_alert(hid, fmt, arg...)                    \
-       dev_alert(&(hid)->dev, fmt, ##arg)
-#define hid_err(hid, fmt, arg...)                      \
-       dev_err(&(hid)->dev, fmt, ##arg)
-#define hid_notice(hid, fmt, arg...)                   \
-       dev_notice(&(hid)->dev, fmt, ##arg)
-#define hid_warn(hid, fmt, arg...)                     \
-       dev_warn(&(hid)->dev, fmt, ##arg)
-#define hid_info(hid, fmt, arg...)                     \
-       dev_info(&(hid)->dev, fmt, ##arg)
-#define hid_dbg(hid, fmt, arg...)                      \
-       dev_dbg(&(hid)->dev, fmt, ##arg)
+#define hid_err(hid, fmt, ...)                         \
+       dev_err(&(hid)->dev, fmt, ##__VA_ARGS__)
+#define hid_notice(hid, fmt, ...)                      \
+       dev_notice(&(hid)->dev, fmt, ##__VA_ARGS__)
+#define hid_warn(hid, fmt, ...)                                \
+       dev_warn(&(hid)->dev, fmt, ##__VA_ARGS__)
+#define hid_info(hid, fmt, ...)                                \
+       dev_info(&(hid)->dev, fmt, ##__VA_ARGS__)
+#define hid_dbg(hid, fmt, ...)                         \
+       dev_dbg(&(hid)->dev, fmt, ##__VA_ARGS__)
+
+#define hid_err_once(hid, fmt, ...)                    \
+       dev_err_once(&(hid)->dev, fmt, ##__VA_ARGS__)
+#define hid_notice_once(hid, fmt, ...)                 \
+       dev_notice_once(&(hid)->dev, fmt, ##__VA_ARGS__)
+#define hid_warn_once(hid, fmt, ...)                   \
+       dev_warn_once(&(hid)->dev, fmt, ##__VA_ARGS__)
+#define hid_info_once(hid, fmt, ...)                   \
+       dev_info_once(&(hid)->dev, fmt, ##__VA_ARGS__)
+#define hid_dbg_once(hid, fmt, ...)                    \
+       dev_dbg_once(&(hid)->dev, fmt, ##__VA_ARGS__)
 
 #endif
index 7ef56dc..3fec513 100644 (file)
  * @notifiers: count of active mmu notifiers
  */
 struct hmm {
-       struct mm_struct        *mm;
-       struct kref             kref;
+       struct mmu_notifier     mmu_notifier;
        spinlock_t              ranges_lock;
        struct list_head        ranges;
        struct list_head        mirrors;
-       struct mmu_notifier     mmu_notifier;
        struct rw_semaphore     mirrors_sem;
        wait_queue_head_t       wq;
-       struct rcu_head         rcu;
        long                    notifiers;
 };
 
@@ -158,13 +155,11 @@ enum hmm_pfn_value_e {
  * @values: pfn value for some special case (none, special, error, ...)
  * @default_flags: default flags for the range (write, read, ... see hmm doc)
  * @pfn_flags_mask: allows to mask pfn flags so that only default_flags matter
- * @page_shift: device virtual address shift value (should be >= PAGE_SHIFT)
  * @pfn_shifts: pfn shift value (should be <= PAGE_SHIFT)
  * @valid: pfns array did not change since it has been fill by an HMM function
  */
 struct hmm_range {
        struct hmm              *hmm;
-       struct vm_area_struct   *vma;
        struct list_head        list;
        unsigned long           start;
        unsigned long           end;
@@ -173,32 +168,11 @@ struct hmm_range {
        const uint64_t          *values;
        uint64_t                default_flags;
        uint64_t                pfn_flags_mask;
-       uint8_t                 page_shift;
        uint8_t                 pfn_shift;
        bool                    valid;
 };
 
 /*
- * hmm_range_page_shift() - return the page shift for the range
- * @range: range being queried
- * Return: page shift (page size = 1 << page shift) for the range
- */
-static inline unsigned hmm_range_page_shift(const struct hmm_range *range)
-{
-       return range->page_shift;
-}
-
-/*
- * hmm_range_page_size() - return the page size for the range
- * @range: range being queried
- * Return: page size for the range in bytes
- */
-static inline unsigned long hmm_range_page_size(const struct hmm_range *range)
-{
-       return 1UL << hmm_range_page_shift(range);
-}
-
-/*
  * hmm_range_wait_until_valid() - wait for range to be valid
  * @range: range affected by invalidation to wait on
  * @timeout: time out for wait in ms (ie abort wait after that period of time)
@@ -291,40 +265,6 @@ static inline uint64_t hmm_device_entry_from_pfn(const struct hmm_range *range,
 }
 
 /*
- * Old API:
- * hmm_pfn_to_page()
- * hmm_pfn_to_pfn()
- * hmm_pfn_from_page()
- * hmm_pfn_from_pfn()
- *
- * This are the OLD API please use new API, it is here to avoid cross-tree
- * merge painfullness ie we convert things to new API in stages.
- */
-static inline struct page *hmm_pfn_to_page(const struct hmm_range *range,
-                                          uint64_t pfn)
-{
-       return hmm_device_entry_to_page(range, pfn);
-}
-
-static inline unsigned long hmm_pfn_to_pfn(const struct hmm_range *range,
-                                          uint64_t pfn)
-{
-       return hmm_device_entry_to_pfn(range, pfn);
-}
-
-static inline uint64_t hmm_pfn_from_page(const struct hmm_range *range,
-                                        struct page *page)
-{
-       return hmm_device_entry_from_page(range, page);
-}
-
-static inline uint64_t hmm_pfn_from_pfn(const struct hmm_range *range,
-                                       unsigned long pfn)
-{
-       return hmm_device_entry_from_pfn(range, pfn);
-}
-
-/*
  * Mirroring: how to synchronize device page table with CPU page table.
  *
  * A device driver that is participating in HMM mirroring must always
@@ -375,29 +315,6 @@ static inline uint64_t hmm_pfn_from_pfn(const struct hmm_range *range,
 struct hmm_mirror;
 
 /*
- * enum hmm_update_event - type of update
- * @HMM_UPDATE_INVALIDATE: invalidate range (no indication as to why)
- */
-enum hmm_update_event {
-       HMM_UPDATE_INVALIDATE,
-};
-
-/*
- * struct hmm_update - HMM update information for callback
- *
- * @start: virtual start address of the range to update
- * @end: virtual end address of the range to update
- * @event: event triggering the update (what is happening)
- * @blockable: can the callback block/sleep ?
- */
-struct hmm_update {
-       unsigned long start;
-       unsigned long end;
-       enum hmm_update_event event;
-       bool blockable;
-};
-
-/*
  * struct hmm_mirror_ops - HMM mirror device operations callback
  *
  * @update: callback to update range on a device
@@ -417,9 +334,9 @@ struct hmm_mirror_ops {
        /* sync_cpu_device_pagetables() - synchronize page tables
         *
         * @mirror: pointer to struct hmm_mirror
-        * @update: update information (see struct hmm_update)
-        * Return: -EAGAIN if update.blockable false and callback need to
-        *          block, 0 otherwise.
+        * @update: update information (see struct mmu_notifier_range)
+        * Return: -EAGAIN if mmu_notifier_range_blockable(update) is false
+        * and callback needs to block, 0 otherwise.
         *
         * This callback ultimately originates from mmu_notifiers when the CPU
         * page table is updated. The device driver must update its page table
@@ -430,8 +347,9 @@ struct hmm_mirror_ops {
         * page tables are completely updated (TLBs flushed, etc); this is a
         * synchronous call.
         */
-       int (*sync_cpu_device_pagetables)(struct hmm_mirror *mirror,
-                                         const struct hmm_update *update);
+       int (*sync_cpu_device_pagetables)(
+               struct hmm_mirror *mirror,
+               const struct mmu_notifier_range *update);
 };
 
 /*
@@ -457,20 +375,24 @@ void hmm_mirror_unregister(struct hmm_mirror *mirror);
 /*
  * Please see Documentation/vm/hmm.rst for how to use the range API.
  */
-int hmm_range_register(struct hmm_range *range,
-                      struct hmm_mirror *mirror,
-                      unsigned long start,
-                      unsigned long end,
-                      unsigned page_shift);
+int hmm_range_register(struct hmm_range *range, struct hmm_mirror *mirror);
 void hmm_range_unregister(struct hmm_range *range);
-long hmm_range_snapshot(struct hmm_range *range);
-long hmm_range_fault(struct hmm_range *range, bool block);
+
+/*
+ * Retry fault if non-blocking, drop mmap_sem and return -EAGAIN in that case.
+ */
+#define HMM_FAULT_ALLOW_RETRY          (1 << 0)
+
+/* Don't fault in missing PTEs, just snapshot the current state. */
+#define HMM_FAULT_SNAPSHOT             (1 << 1)
+
+long hmm_range_fault(struct hmm_range *range, unsigned int flags);
+
 long hmm_range_dma_map(struct hmm_range *range,
                       struct device *device,
                       dma_addr_t *daddrs,
-                      bool block);
+                      unsigned int flags);
 long hmm_range_dma_unmap(struct hmm_range *range,
-                        struct vm_area_struct *vma,
                         struct device *device,
                         dma_addr_t *daddrs,
                         bool dirty);
@@ -484,13 +406,6 @@ long hmm_range_dma_unmap(struct hmm_range *range,
  */
 #define HMM_RANGE_DEFAULT_TIMEOUT 1000
 
-/* Below are for HMM internal use only! Not to be used by device driver! */
-static inline void hmm_mm_init(struct mm_struct *mm)
-{
-       mm->hmm = NULL;
-}
-#else /* IS_ENABLED(CONFIG_HMM_MIRROR) */
-static inline void hmm_mm_init(struct mm_struct *mm) {}
 #endif /* IS_ENABLED(CONFIG_HMM_MIRROR) */
 
 #endif /* LINUX_HMM_H */
index 45ede62..61c9ffd 100644 (file)
@@ -267,6 +267,15 @@ static inline bool thp_migration_supported(void)
        return IS_ENABLED(CONFIG_ARCH_ENABLE_THP_MIGRATION);
 }
 
+static inline struct list_head *page_deferred_list(struct page *page)
+{
+       /*
+        * Global or memcg deferred list in the second tail pages is
+        * occupied by compound_head.
+        */
+       return &page[2].deferred_list;
+}
+
 #else /* CONFIG_TRANSPARENT_HUGEPAGE */
 #define HPAGE_PMD_SHIFT ({ BUILD_BUG(); 0; })
 #define HPAGE_PMD_MASK ({ BUILD_BUG(); 0; })
index edfca42..53fc34f 100644 (file)
@@ -454,7 +454,7 @@ static inline pte_t arch_make_huge_pte(pte_t entry, struct vm_area_struct *vma,
 static inline struct hstate *page_hstate(struct page *page)
 {
        VM_BUG_ON_PAGE(!PageHuge(page), page);
-       return size_to_hstate(PAGE_SIZE << compound_order(page));
+       return size_to_hstate(page_size(page));
 }
 
 static inline unsigned hstate_index_to_shift(unsigned index)
index 2afe6fd..b4a0170 100644 (file)
@@ -245,7 +245,10 @@ struct vmbus_channel_offer {
                } pipe;
        } u;
        /*
-        * The sub_channel_index is defined in win8.
+        * The sub_channel_index is defined in Win8: a value of zero means a
+        * primary channel and a value of non-zero means a sub-channel.
+        *
+        * Before Win8, the field is reserved, meaning it's always zero.
         */
        u16 sub_channel_index;
        u16 reserved3;
@@ -423,6 +426,9 @@ enum vmbus_channel_message_type {
        CHANNELMSG_COUNT
 };
 
+/* Hyper-V supports about 2048 channels, and the RELIDs start with 1. */
+#define INVALID_RELID  U32_MAX
+
 struct vmbus_channel_message_header {
        enum vmbus_channel_message_type msgtype;
        u32 padding;
@@ -934,6 +940,11 @@ static inline bool is_hvsock_channel(const struct vmbus_channel *c)
                  VMBUS_CHANNEL_TLNPI_PROVIDER_OFFER);
 }
 
+static inline bool is_sub_channel(const struct vmbus_channel *c)
+{
+       return c->offermsg.offer.sub_channel_index != 0;
+}
+
 static inline void set_channel_affinity_state(struct vmbus_channel *c,
                                              enum hv_numa_policy policy)
 {
@@ -1149,6 +1160,9 @@ struct hv_driver {
        int (*remove)(struct hv_device *);
        void (*shutdown)(struct hv_device *);
 
+       int (*suspend)(struct hv_device *);
+       int (*resume)(struct hv_device *);
+
 };
 
 /* Base device object */
index c0a78c0..1361637 100644 (file)
@@ -473,7 +473,7 @@ extern struct i2c_client *
 devm_i2c_new_dummy_device(struct device *dev, struct i2c_adapter *adap, u16 address);
 
 extern struct i2c_client *
-i2c_new_secondary_device(struct i2c_client *client,
+i2c_new_ancillary_device(struct i2c_client *client,
                                const char *name,
                                u16 default_addr);
 
index a20ad39..1c37f17 100644 (file)
@@ -131,4 +131,13 @@ static inline int ima_inode_removexattr(struct dentry *dentry,
        return 0;
 }
 #endif /* CONFIG_IMA_APPRAISE */
+
+#if defined(CONFIG_IMA_APPRAISE) && defined(CONFIG_INTEGRITY_TRUSTED_KEYRING)
+extern bool ima_appraise_signature(enum kernel_read_file_id func);
+#else
+static inline bool ima_appraise_signature(enum kernel_read_file_id func)
+{
+       return false;
+}
+#endif /* CONFIG_IMA_APPRAISE && CONFIG_INTEGRITY_TRUSTED_KEYRING */
 #endif /* _LINUX_IMA_H */
index 8554761..aaa8a07 100644 (file)
                                                                              \
 /* Callbacks for augmented rbtree insert and remove */                       \
                                                                              \
-static inline ITTYPE ITPREFIX ## _compute_subtree_last(ITSTRUCT *node)       \
-{                                                                            \
-       ITTYPE max = ITLAST(node), subtree_last;                              \
-       if (node->ITRB.rb_left) {                                             \
-               subtree_last = rb_entry(node->ITRB.rb_left,                   \
-                                       ITSTRUCT, ITRB)->ITSUBTREE;           \
-               if (max < subtree_last)                                       \
-                       max = subtree_last;                                   \
-       }                                                                     \
-       if (node->ITRB.rb_right) {                                            \
-               subtree_last = rb_entry(node->ITRB.rb_right,                  \
-                                       ITSTRUCT, ITRB)->ITSUBTREE;           \
-               if (max < subtree_last)                                       \
-                       max = subtree_last;                                   \
-       }                                                                     \
-       return max;                                                           \
-}                                                                            \
-                                                                             \
-RB_DECLARE_CALLBACKS(static, ITPREFIX ## _augment, ITSTRUCT, ITRB,           \
-                    ITTYPE, ITSUBTREE, ITPREFIX ## _compute_subtree_last)    \
+RB_DECLARE_CALLBACKS_MAX(static, ITPREFIX ## _augment,                       \
+                        ITSTRUCT, ITRB, ITTYPE, ITSUBTREE, ITLAST)           \
                                                                              \
 /* Insert / remove interval nodes from the tree */                           \
                                                                              \
index bc499ce..7aa5d61 100644 (file)
@@ -188,10 +188,14 @@ sector_t iomap_bmap(struct address_space *mapping, sector_t bno,
  */
 #define IOMAP_DIO_UNWRITTEN    (1 << 0)        /* covers unwritten extent(s) */
 #define IOMAP_DIO_COW          (1 << 1)        /* covers COW extent(s) */
-typedef int (iomap_dio_end_io_t)(struct kiocb *iocb, ssize_t ret,
-               unsigned flags);
+
+struct iomap_dio_ops {
+       int (*end_io)(struct kiocb *iocb, ssize_t size, int error,
+                     unsigned flags);
+};
+
 ssize_t iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
-               const struct iomap_ops *ops, iomap_dio_end_io_t end_io);
+               const struct iomap_ops *ops, const struct iomap_dio_ops *dops);
 int iomap_dio_iopoll(struct kiocb *kiocb, bool spin);
 
 #ifdef CONFIG_SWAP
index 5b6a712..7bddddf 100644 (file)
@@ -297,6 +297,8 @@ static inline bool resource_overlaps(struct resource *r1, struct resource *r2)
 
 struct resource *devm_request_free_mem_region(struct device *dev,
                struct resource *base, unsigned long size);
+struct resource *request_free_mem_region(struct resource *base,
+               unsigned long size, const char *name);
 
 #endif /* __ASSEMBLY__ */
 #endif /* _LINUX_IOPORT_H */
index df03825..603fbc4 100644 (file)
@@ -1410,8 +1410,6 @@ extern int           jbd2_journal_clear_err  (journal_t *);
 extern int        jbd2_journal_bmap(journal_t *, unsigned long, unsigned long long *);
 extern int        jbd2_journal_force_commit(journal_t *);
 extern int        jbd2_journal_force_commit_nested(journal_t *);
-extern int        jbd2_journal_inode_add_write(handle_t *handle, struct jbd2_inode *inode);
-extern int        jbd2_journal_inode_add_wait(handle_t *handle, struct jbd2_inode *inode);
 extern int        jbd2_journal_inode_ranged_write(handle_t *handle,
                        struct jbd2_inode *inode, loff_t start_byte,
                        loff_t length);
index 4fa360a..d83d403 100644 (file)
@@ -217,7 +217,9 @@ extern void __cant_sleep(const char *file, int line, int preempt_offset);
  * might_sleep - annotation for functions that can sleep
  *
  * this macro will print a stack trace if it is executed in an atomic
- * context (spinlock, irq-handler, ...).
+ * context (spinlock, irq-handler, ...). Additional sections where blocking is
+ * not allowed can be annotated with non_block_start() and non_block_end()
+ * pairs.
  *
  * This is a useful debugging help to be able to catch problems early and not
  * be bitten later when the calling function happens to sleep when it is not
@@ -233,6 +235,23 @@ extern void __cant_sleep(const char *file, int line, int preempt_offset);
 # define cant_sleep() \
        do { __cant_sleep(__FILE__, __LINE__, 0); } while (0)
 # define sched_annotate_sleep()        (current->task_state_change = 0)
+/**
+ * non_block_start - annotate the start of section where sleeping is prohibited
+ *
+ * This is on behalf of the oom reaper, specifically when it is calling the mmu
+ * notifiers. The problem is that if the notifier were to block on, for example,
+ * mutex_lock() and if the process which holds that mutex were to perform a
+ * sleeping memory allocation, the oom reaper is now blocked on completion of
+ * that memory allocation. Other blocking calls like wait_event() pose similar
+ * issues.
+ */
+# define non_block_start() (current->non_block_count++)
+/**
+ * non_block_end - annotate the end of section where sleeping is prohibited
+ *
+ * Closes a section opened by non_block_start().
+ */
+# define non_block_end() WARN_ON(current->non_block_count-- == 0)
 #else
   static inline void ___might_sleep(const char *file, int line,
                                   int preempt_offset) { }
@@ -241,6 +260,8 @@ extern void __cant_sleep(const char *file, int line, int preempt_offset);
 # define might_sleep() do { might_resched(); } while (0)
 # define cant_sleep() do { } while (0)
 # define sched_annotate_sleep() do { } while (0)
+# define non_block_start() do { } while (0)
+# define non_block_end() do { } while (0)
 #endif
 
 #define might_sleep_if(cond) do { if (cond) might_sleep(); } while (0)
index f0b8092..1776eb2 100644 (file)
@@ -125,7 +125,7 @@ typedef void *(kexec_load_t)(struct kimage *image, char *kernel_buf,
                             unsigned long cmdline_len);
 typedef int (kexec_cleanup_t)(void *loader_data);
 
-#ifdef CONFIG_KEXEC_VERIFY_SIG
+#ifdef CONFIG_KEXEC_SIG
 typedef int (kexec_verify_sig_t)(const char *kernel_buf,
                                 unsigned long kernel_len);
 #endif
@@ -134,7 +134,7 @@ struct kexec_file_ops {
        kexec_probe_t *probe;
        kexec_load_t *load;
        kexec_cleanup_t *cleanup;
-#ifdef CONFIG_KEXEC_VERIFY_SIG
+#ifdef CONFIG_KEXEC_SIG
        kexec_verify_sig_t *verify_sig;
 #endif
 };
@@ -183,6 +183,8 @@ int kexec_purgatory_get_set_symbol(struct kimage *image, const char *name,
                                   bool get_value);
 void *kexec_purgatory_get_symbol_addr(struct kimage *image, const char *name);
 
+int __weak arch_kexec_kernel_image_probe(struct kimage *image, void *buf,
+                                        unsigned long buf_len);
 void * __weak arch_kexec_kernel_image_load(struct kimage *image);
 int __weak arch_kexec_apply_relocations_add(struct purgatory_info *pi,
                                            Elf_Shdr *section,
index fbf144a..b072aeb 100644 (file)
@@ -326,8 +326,10 @@ extern atomic_t                    kgdb_active;
        (raw_smp_processor_id() == atomic_read(&kgdb_active))
 extern bool dbg_is_early;
 extern void __init dbg_late_init(void);
+extern void kgdb_panic(const char *msg);
 #else /* ! CONFIG_KGDB */
 #define in_dbg_master() (0)
 #define dbg_late_init()
+static inline void kgdb_panic(const char *msg) {}
 #endif /* ! CONFIG_KGDB */
 #endif /* _KGDB_H_ */
index 082d1d2..bc45ea1 100644 (file)
@@ -15,6 +15,14 @@ extern int __khugepaged_enter(struct mm_struct *mm);
 extern void __khugepaged_exit(struct mm_struct *mm);
 extern int khugepaged_enter_vma_merge(struct vm_area_struct *vma,
                                      unsigned long vm_flags);
+#ifdef CONFIG_SHMEM
+extern void collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr);
+#else
+static inline void collapse_pte_mapped_thp(struct mm_struct *mm,
+                                          unsigned long addr)
+{
+}
+#endif
 
 #define khugepaged_enabled()                                          \
        (transparent_hugepage_flags &                                  \
@@ -73,6 +81,10 @@ static inline int khugepaged_enter_vma_merge(struct vm_area_struct *vma,
 {
        return 0;
 }
+static inline void collapse_pte_mapped_thp(struct mm_struct *mm,
+                                          unsigned long addr)
+{
+}
 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
 
 #endif /* _LINUX_KHUGEPAGED_H */
index 7a64b3d..b6eddf9 100644 (file)
@@ -160,8 +160,11 @@ static inline struct nd_blk_region_desc *to_blk_region_desc(
 
 }
 
-enum nvdimm_security_state {
-       NVDIMM_SECURITY_ERROR = -1,
+/*
+ * Note that separate bits for locked + unlocked are defined so that
+ * 'flags == 0' corresponds to an error / not-supported state.
+ */
+enum nvdimm_security_bits {
        NVDIMM_SECURITY_DISABLED,
        NVDIMM_SECURITY_UNLOCKED,
        NVDIMM_SECURITY_LOCKED,
@@ -182,7 +185,7 @@ enum nvdimm_passphrase_type {
 };
 
 struct nvdimm_security_ops {
-       enum nvdimm_security_state (*state)(struct nvdimm *nvdimm,
+       unsigned long (*get_flags)(struct nvdimm *nvdimm,
                        enum nvdimm_passphrase_type pass_type);
        int (*freeze)(struct nvdimm *nvdimm);
        int (*change_key)(struct nvdimm *nvdimm,
index df1318d..a376324 100644 (file)
  *     Check for permission to change root directory.
  *     @path contains the path structure.
  *     Return 0 if permission is granted.
+ * @path_notify:
+ *     Check permissions before setting a watch on events as defined by @mask,
+ *     on an object at @path, whose type is defined by @obj_type.
  * @inode_readlink:
  *     Check the permission to read the symbolic link.
  *     @dentry contains the dentry structure for the file link.
  * @bpf_prog_free_security:
  *     Clean up the security information stored inside bpf prog.
  *
+ * @locked_down
+ *     Determine whether a kernel feature that potentially enables arbitrary
+ *     code execution in kernel space should be permitted.
+ *
+ *     @what: kernel feature being accessed
  */
 union security_list_options {
        int (*binder_set_context_mgr)(struct task_struct *mgr);
@@ -1535,7 +1543,9 @@ union security_list_options {
        int (*path_chown)(const struct path *path, kuid_t uid, kgid_t gid);
        int (*path_chroot)(const struct path *path);
 #endif
-
+       /* Needed for inode based security check */
+       int (*path_notify)(const struct path *path, u64 mask,
+                               unsigned int obj_type);
        int (*inode_alloc_security)(struct inode *inode);
        void (*inode_free_security)(struct inode *inode);
        int (*inode_init_security)(struct inode *inode, struct inode *dir,
@@ -1807,6 +1817,7 @@ union security_list_options {
        int (*bpf_prog_alloc_security)(struct bpf_prog_aux *aux);
        void (*bpf_prog_free_security)(struct bpf_prog_aux *aux);
 #endif /* CONFIG_BPF_SYSCALL */
+       int (*locked_down)(enum lockdown_reason what);
 };
 
 struct security_hook_heads {
@@ -1860,6 +1871,8 @@ struct security_hook_heads {
        struct hlist_head path_chown;
        struct hlist_head path_chroot;
 #endif
+       /* Needed for inode based modules as well */
+       struct hlist_head path_notify;
        struct hlist_head inode_alloc_security;
        struct hlist_head inode_free_security;
        struct hlist_head inode_init_security;
@@ -2046,6 +2059,7 @@ struct security_hook_heads {
        struct hlist_head bpf_prog_alloc_security;
        struct hlist_head bpf_prog_free_security;
 #endif /* CONFIG_BPF_SYSCALL */
+       struct hlist_head locked_down;
 } __randomize_layout;
 
 /*
@@ -2104,12 +2118,18 @@ struct lsm_info {
 };
 
 extern struct lsm_info __start_lsm_info[], __end_lsm_info[];
+extern struct lsm_info __start_early_lsm_info[], __end_early_lsm_info[];
 
 #define DEFINE_LSM(lsm)                                                        \
        static struct lsm_info __lsm_##lsm                              \
                __used __section(.lsm_info.init)                        \
                __aligned(sizeof(unsigned long))
 
+#define DEFINE_EARLY_LSM(lsm)                                          \
+       static struct lsm_info __early_lsm_##lsm                        \
+               __used __section(.early_lsm_info.init)                  \
+               __aligned(sizeof(unsigned long))
+
 #ifdef CONFIG_SECURITY_SELINUX_DISABLE
 /*
  * Assuring the safety of deleting a security module is up to
index ad8f1a3..9b60863 100644 (file)
@@ -128,9 +128,8 @@ struct mem_cgroup_per_node {
 
        struct mem_cgroup_reclaim_iter  iter[DEF_PRIORITY + 1];
 
-#ifdef CONFIG_MEMCG_KMEM
        struct memcg_shrinker_map __rcu *shrinker_map;
-#endif
+
        struct rb_node          tree_node;      /* RB tree node */
        unsigned long           usage_in_excess;/* Set to the value by which */
                                                /* the soft limit is exceeded*/
@@ -331,6 +330,10 @@ struct mem_cgroup {
        struct list_head event_list;
        spinlock_t event_list_lock;
 
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+       struct deferred_split deferred_split_queue;
+#endif
+
        struct mem_cgroup_per_node *nodeinfo[0];
        /* WARNING: nodeinfo must be the last member here */
 };
@@ -1311,6 +1314,11 @@ static inline bool mem_cgroup_under_socket_pressure(struct mem_cgroup *memcg)
        } while ((memcg = parent_mem_cgroup(memcg)));
        return false;
 }
+
+extern int memcg_expand_shrinker_maps(int new_id);
+
+extern void memcg_set_shrinker_bit(struct mem_cgroup *memcg,
+                                  int nid, int shrinker_id);
 #else
 #define mem_cgroup_sockets_enabled 0
 static inline void mem_cgroup_sk_alloc(struct sock *sk) { };
@@ -1319,6 +1327,11 @@ static inline bool mem_cgroup_under_socket_pressure(struct mem_cgroup *memcg)
 {
        return false;
 }
+
+static inline void memcg_set_shrinker_bit(struct mem_cgroup *memcg,
+                                         int nid, int shrinker_id)
+{
+}
 #endif
 
 struct kmem_cache *memcg_kmem_get_cache(struct kmem_cache *cachep);
@@ -1390,10 +1403,6 @@ static inline int memcg_cache_id(struct mem_cgroup *memcg)
        return memcg ? memcg->kmemcg_id : -1;
 }
 
-extern int memcg_expand_shrinker_maps(int new_id);
-
-extern void memcg_set_shrinker_bit(struct mem_cgroup *memcg,
-                                  int nid, int shrinker_id);
 #else
 
 static inline int memcg_kmem_charge(struct page *page, gfp_t gfp, int order)
@@ -1435,8 +1444,6 @@ static inline void memcg_put_cache_ids(void)
 {
 }
 
-static inline void memcg_set_shrinker_bit(struct mem_cgroup *memcg,
-                                         int nid, int shrinker_id) { }
 #endif /* CONFIG_MEMCG_KMEM */
 
 #endif /* _LINUX_MEMCONTROL_H */
index 02e633f..0ebb105 100644 (file)
@@ -25,7 +25,6 @@
 
 struct memory_block {
        unsigned long start_section_nr;
-       unsigned long end_section_nr;
        unsigned long state;            /* serialized by the dev->lock */
        int section_count;              /* serialized by mem_sysfs_mutex */
        int online_type;                /* for passing data to online routine */
@@ -80,9 +79,9 @@ struct mem_section;
 #define IPC_CALLBACK_PRI        10
 
 #ifndef CONFIG_MEMORY_HOTPLUG_SPARSE
-static inline int memory_dev_init(void)
+static inline void memory_dev_init(void)
 {
-       return 0;
+       return;
 }
 static inline int register_memory_notifier(struct notifier_block *nb)
 {
@@ -113,7 +112,7 @@ extern int register_memory_isolate_notifier(struct notifier_block *nb);
 extern void unregister_memory_isolate_notifier(struct notifier_block *nb);
 int create_memory_block_devices(unsigned long start, unsigned long size);
 void remove_memory_block_devices(unsigned long start, unsigned long size);
-extern int memory_dev_init(void);
+extern void memory_dev_init(void);
 extern int memory_notify(unsigned long val, void *v);
 extern int memory_isolate_notify(unsigned long val, void *v);
 extern struct memory_block *find_memory_block(struct mem_section *);
index f8a5b2a..bef51e3 100644 (file)
@@ -109,10 +109,8 @@ struct dev_pagemap {
        struct percpu_ref *ref;
        struct percpu_ref internal_ref;
        struct completion done;
-       struct device *dev;
        enum memory_type type;
        unsigned int flags;
-       u64 pci_p2pdma_bus_offset;
        const struct dev_pagemap_ops *ops;
 };
 
@@ -124,6 +122,8 @@ static inline struct vmem_altmap *pgmap_altmap(struct dev_pagemap *pgmap)
 }
 
 #ifdef CONFIG_ZONE_DEVICE
+void *memremap_pages(struct dev_pagemap *pgmap, int nid);
+void memunmap_pages(struct dev_pagemap *pgmap);
 void *devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap);
 void devm_memunmap_pages(struct device *dev, struct dev_pagemap *pgmap);
 struct dev_pagemap *get_dev_pagemap(unsigned long pfn,
diff --git a/include/linux/mfd/da9063/pdata.h b/include/linux/mfd/da9063/pdata.h
deleted file mode 100644 (file)
index 085edbf..0000000
+++ /dev/null
@@ -1,60 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * Platform configuration options for DA9063
- *
- * Copyright 2012 Dialog Semiconductor Ltd.
- *
- * Author: Michal Hajduk, Dialog Semiconductor
- * Author: Krystian Garbaciak, Dialog Semiconductor
- */
-
-#ifndef __MFD_DA9063_PDATA_H__
-#define __MFD_DA9063_PDATA_H__
-
-/*
- * RGB LED configuration
- */
-/* LED IDs for flags in struct led_info. */
-enum {
-       DA9063_GPIO11_LED,
-       DA9063_GPIO14_LED,
-       DA9063_GPIO15_LED,
-
-       DA9063_LED_NUM
-};
-#define DA9063_LED_ID_MASK             0x3
-
-/* LED polarity for flags in struct led_info. */
-#define DA9063_LED_HIGH_LEVEL_ACTIVE   0x0
-#define DA9063_LED_LOW_LEVEL_ACTIVE    0x4
-
-
-/*
- * General PMIC configuration
- */
-/* HWMON ADC channels configuration */
-#define DA9063_FLG_FORCE_IN0_MANUAL_MODE       0x0010
-#define DA9063_FLG_FORCE_IN0_AUTO_MODE         0x0020
-#define DA9063_FLG_FORCE_IN1_MANUAL_MODE       0x0040
-#define DA9063_FLG_FORCE_IN1_AUTO_MODE         0x0080
-#define DA9063_FLG_FORCE_IN2_MANUAL_MODE       0x0100
-#define DA9063_FLG_FORCE_IN2_AUTO_MODE         0x0200
-#define DA9063_FLG_FORCE_IN3_MANUAL_MODE       0x0400
-#define DA9063_FLG_FORCE_IN3_AUTO_MODE         0x0800
-
-/* Disable register caching. */
-#define DA9063_FLG_NO_CACHE                    0x0008
-
-struct da9063;
-
-/* DA9063 platform data */
-struct da9063_pdata {
-       int                             (*init)(struct da9063 *da9063);
-       int                             irq_base;
-       bool                            key_power;
-       unsigned                        flags;
-       struct da9063_regulators_pdata  *regulators_pdata;
-       struct led_platform_data        *leds_pdata;
-};
-
-#endif /* __MFD_DA9063_PDATA_H__ */
diff --git a/include/linux/mfd/intel_soc_pmic_mrfld.h b/include/linux/mfd/intel_soc_pmic_mrfld.h
new file mode 100644 (file)
index 0000000..4daecd6
--- /dev/null
@@ -0,0 +1,81 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Header file for Intel Merrifield Basin Cove PMIC
+ *
+ * Copyright (C) 2019 Intel Corporation. All rights reserved.
+ */
+
+#ifndef __INTEL_SOC_PMIC_MRFLD_H__
+#define __INTEL_SOC_PMIC_MRFLD_H__
+
+#include <linux/bits.h>
+
+#define BCOVE_ID               0x00
+
+#define BCOVE_ID_MINREV0       GENMASK(2, 0)
+#define BCOVE_ID_MAJREV0       GENMASK(5, 3)
+#define BCOVE_ID_VENDID0       GENMASK(7, 6)
+
+#define BCOVE_MINOR(x)         (unsigned int)(((x) & BCOVE_ID_MINREV0) >> 0)
+#define BCOVE_MAJOR(x)         (unsigned int)(((x) & BCOVE_ID_MAJREV0) >> 3)
+#define BCOVE_VENDOR(x)                (unsigned int)(((x) & BCOVE_ID_VENDID0) >> 6)
+
+#define BCOVE_IRQLVL1          0x01
+
+#define BCOVE_PBIRQ            0x02
+#define BCOVE_TMUIRQ           0x03
+#define BCOVE_THRMIRQ          0x04
+#define BCOVE_BCUIRQ           0x05
+#define BCOVE_ADCIRQ           0x06
+#define BCOVE_CHGRIRQ0         0x07
+#define BCOVE_CHGRIRQ1         0x08
+#define BCOVE_GPIOIRQ          0x09
+#define BCOVE_CRITIRQ          0x0B
+
+#define BCOVE_MIRQLVL1         0x0C
+
+#define BCOVE_MPBIRQ           0x0D
+#define BCOVE_MTMUIRQ          0x0E
+#define BCOVE_MTHRMIRQ         0x0F
+#define BCOVE_MBCUIRQ          0x10
+#define BCOVE_MADCIRQ          0x11
+#define BCOVE_MCHGRIRQ0                0x12
+#define BCOVE_MCHGRIRQ1                0x13
+#define BCOVE_MGPIOIRQ         0x14
+#define BCOVE_MCRITIRQ         0x16
+
+#define BCOVE_SCHGRIRQ0                0x4E
+#define BCOVE_SCHGRIRQ1                0x4F
+
+/* Level 1 IRQs */
+#define BCOVE_LVL1_PWRBTN      BIT(0)  /* power button */
+#define BCOVE_LVL1_TMU         BIT(1)  /* time management unit */
+#define BCOVE_LVL1_THRM                BIT(2)  /* thermal */
+#define BCOVE_LVL1_BCU         BIT(3)  /* burst control unit */
+#define BCOVE_LVL1_ADC         BIT(4)  /* ADC */
+#define BCOVE_LVL1_CHGR                BIT(5)  /* charger */
+#define BCOVE_LVL1_GPIO                BIT(6)  /* GPIO */
+#define BCOVE_LVL1_CRIT                BIT(7)  /* critical event */
+
+/* Level 2 IRQs: power button */
+#define BCOVE_PBIRQ_PBTN       BIT(0)
+#define BCOVE_PBIRQ_UBTN       BIT(1)
+
+/* Level 2 IRQs: ADC */
+#define BCOVE_ADCIRQ_BATTEMP   BIT(2)
+#define BCOVE_ADCIRQ_SYSTEMP   BIT(3)
+#define BCOVE_ADCIRQ_BATTID    BIT(4)
+#define BCOVE_ADCIRQ_VIBATT    BIT(5)
+#define BCOVE_ADCIRQ_CCTICK    BIT(7)
+
+/* Level 2 IRQs: charger */
+#define BCOVE_CHGRIRQ_BAT0ALRT BIT(4)
+#define BCOVE_CHGRIRQ_BAT1ALRT BIT(5)
+#define BCOVE_CHGRIRQ_BATCRIT  BIT(6)
+
+#define BCOVE_CHGRIRQ_VBUSDET  BIT(0)
+#define BCOVE_CHGRIRQ_DCDET    BIT(1)
+#define BCOVE_CHGRIRQ_BATTDET  BIT(2)
+#define BCOVE_CHGRIRQ_USBIDDET BIT(3)
+
+#endif /* __INTEL_SOC_PMIC_MRFLD_H__ */
index 25a95e7..fc88d31 100644 (file)
@@ -7,6 +7,14 @@
 #ifndef __MFD_MT6397_CORE_H__
 #define __MFD_MT6397_CORE_H__
 
+#include <linux/mutex.h>
+
+enum chip_id {
+       MT6323_CHIP_ID = 0x23,
+       MT6391_CHIP_ID = 0x91,
+       MT6397_CHIP_ID = 0x97,
+};
+
 enum mt6397_irq_numbers {
        MT6397_IRQ_SPKL_AB = 0,
        MT6397_IRQ_SPKR_AB,
@@ -54,6 +62,9 @@ struct mt6397_chip {
        u16 irq_masks_cache[2];
        u16 int_con[2];
        u16 int_status[2];
+       u16 chip_id;
 };
 
+int mt6397_irq_init(struct mt6397_chip *chip);
+
 #endif /* __MFD_MT6397_CORE_H__ */
index 8cfda05..112dc66 100644 (file)
 struct device_node;
 
 #ifdef CONFIG_MFD_SYSCON
+extern struct regmap *device_node_to_regmap(struct device_node *np);
 extern struct regmap *syscon_node_to_regmap(struct device_node *np);
 extern struct regmap *syscon_regmap_lookup_by_compatible(const char *s);
 extern struct regmap *syscon_regmap_lookup_by_phandle(
                                        struct device_node *np,
                                        const char *property);
 #else
+static inline struct regmap *device_node_to_regmap(struct device_node *np)
+{
+       return ERR_PTR(-ENOTSUPP);
+}
+
 static inline struct regmap *syscon_node_to_regmap(struct device_node *np)
 {
        return ERR_PTR(-ENOTSUPP);
index 7f04754..7212006 100644 (file)
@@ -166,8 +166,6 @@ static inline int migrate_misplaced_transhuge_page(struct mm_struct *mm,
 #define MIGRATE_PFN_MIGRATE    (1UL << 1)
 #define MIGRATE_PFN_LOCKED     (1UL << 2)
 #define MIGRATE_PFN_WRITE      (1UL << 3)
-#define MIGRATE_PFN_DEVICE     (1UL << 4)
-#define MIGRATE_PFN_ERROR      (1UL << 5)
 #define MIGRATE_PFN_SHIFT      6
 
 static inline struct page *migrate_pfn_to_page(unsigned long mpfn)
@@ -182,107 +180,27 @@ static inline unsigned long migrate_pfn(unsigned long pfn)
        return (pfn << MIGRATE_PFN_SHIFT) | MIGRATE_PFN_VALID;
 }
 
-/*
- * struct migrate_vma_ops - migrate operation callback
- *
- * @alloc_and_copy: alloc destination memory and copy source memory to it
- * @finalize_and_map: allow caller to map the successfully migrated pages
- *
- *
- * The alloc_and_copy() callback happens once all source pages have been locked,
- * unmapped and checked (checked whether pinned or not). All pages that can be
- * migrated will have an entry in the src array set with the pfn value of the
- * page and with the MIGRATE_PFN_VALID and MIGRATE_PFN_MIGRATE flag set (other
- * flags might be set but should be ignored by the callback).
- *
- * The alloc_and_copy() callback can then allocate destination memory and copy
- * source memory to it for all those entries (ie with MIGRATE_PFN_VALID and
- * MIGRATE_PFN_MIGRATE flag set). Once these are allocated and copied, the
- * callback must update each corresponding entry in the dst array with the pfn
- * value of the destination page and with the MIGRATE_PFN_VALID and
- * MIGRATE_PFN_LOCKED flags set (destination pages must have their struct pages
- * locked, via lock_page()).
- *
- * At this point the alloc_and_copy() callback is done and returns.
- *
- * Note that the callback does not have to migrate all the pages that are
- * marked with MIGRATE_PFN_MIGRATE flag in src array unless this is a migration
- * from device memory to system memory (ie the MIGRATE_PFN_DEVICE flag is also
- * set in the src array entry). If the device driver cannot migrate a device
- * page back to system memory, then it must set the corresponding dst array
- * entry to MIGRATE_PFN_ERROR. This will trigger a SIGBUS if CPU tries to
- * access any of the virtual addresses originally backed by this page. Because
- * a SIGBUS is such a severe result for the userspace process, the device
- * driver should avoid setting MIGRATE_PFN_ERROR unless it is really in an
- * unrecoverable state.
- *
- * For empty entry inside CPU page table (pte_none() or pmd_none() is true) we
- * do set MIGRATE_PFN_MIGRATE flag inside the corresponding source array thus
- * allowing device driver to allocate device memory for those unback virtual
- * address. For this the device driver simply have to allocate device memory
- * and properly set the destination entry like for regular migration. Note that
- * this can still fails and thus inside the device driver must check if the
- * migration was successful for those entry inside the finalize_and_map()
- * callback just like for regular migration.
- *
- * THE alloc_and_copy() CALLBACK MUST NOT CHANGE ANY OF THE SRC ARRAY ENTRIES
- * OR BAD THINGS WILL HAPPEN !
- *
- *
- * The finalize_and_map() callback happens after struct page migration from
- * source to destination (destination struct pages are the struct pages for the
- * memory allocated by the alloc_and_copy() callback).  Migration can fail, and
- * thus the finalize_and_map() allows the driver to inspect which pages were
- * successfully migrated, and which were not. Successfully migrated pages will
- * have the MIGRATE_PFN_MIGRATE flag set for their src array entry.
- *
- * It is safe to update device page table from within the finalize_and_map()
- * callback because both destination and source page are still locked, and the
- * mmap_sem is held in read mode (hence no one can unmap the range being
- * migrated).
- *
- * Once callback is done cleaning up things and updating its page table (if it
- * chose to do so, this is not an obligation) then it returns. At this point,
- * the HMM core will finish up the final steps, and the migration is complete.
- *
- * THE finalize_and_map() CALLBACK MUST NOT CHANGE ANY OF THE SRC OR DST ARRAY
- * ENTRIES OR BAD THINGS WILL HAPPEN !
- */
-struct migrate_vma_ops {
-       void (*alloc_and_copy)(struct vm_area_struct *vma,
-                              const unsigned long *src,
-                              unsigned long *dst,
-                              unsigned long start,
-                              unsigned long end,
-                              void *private);
-       void (*finalize_and_map)(struct vm_area_struct *vma,
-                                const unsigned long *src,
-                                const unsigned long *dst,
-                                unsigned long start,
-                                unsigned long end,
-                                void *private);
+struct migrate_vma {
+       struct vm_area_struct   *vma;
+       /*
+        * Both src and dst array must be big enough for
+        * (end - start) >> PAGE_SHIFT entries.
+        *
+        * The src array must not be modified by the caller after
+        * migrate_vma_setup(), and must not change the dst array after
+        * migrate_vma_pages() returns.
+        */
+       unsigned long           *dst;
+       unsigned long           *src;
+       unsigned long           cpages;
+       unsigned long           npages;
+       unsigned long           start;
+       unsigned long           end;
 };
 
-#if defined(CONFIG_MIGRATE_VMA_HELPER)
-int migrate_vma(const struct migrate_vma_ops *ops,
-               struct vm_area_struct *vma,
-               unsigned long start,
-               unsigned long end,
-               unsigned long *src,
-               unsigned long *dst,
-               void *private);
-#else
-static inline int migrate_vma(const struct migrate_vma_ops *ops,
-                             struct vm_area_struct *vma,
-                             unsigned long start,
-                             unsigned long end,
-                             unsigned long *src,
-                             unsigned long *dst,
-                             void *private)
-{
-       return -EINVAL;
-}
-#endif /* IS_ENABLED(CONFIG_MIGRATE_VMA_HELPER) */
+int migrate_vma_setup(struct migrate_vma *args);
+void migrate_vma_pages(struct migrate_vma *migrate);
+void migrate_vma_finalize(struct migrate_vma *migrate);
 
 #endif /* CONFIG_MIGRATION */
 
index f3773e8..cc1c230 100644 (file)
@@ -328,6 +328,7 @@ enum mlx5_event {
        MLX5_EVENT_TYPE_GPIO_EVENT         = 0x15,
        MLX5_EVENT_TYPE_PORT_MODULE_EVENT  = 0x16,
        MLX5_EVENT_TYPE_TEMP_WARN_EVENT    = 0x17,
+       MLX5_EVENT_TYPE_XRQ_ERROR          = 0x18,
        MLX5_EVENT_TYPE_REMOTE_CONFIG      = 0x19,
        MLX5_EVENT_TYPE_GENERAL_EVENT      = 0x22,
        MLX5_EVENT_TYPE_MONITOR_COUNTER    = 0x24,
@@ -345,6 +346,7 @@ enum mlx5_event {
        MLX5_EVENT_TYPE_ESW_FUNCTIONS_CHANGED = 0xe,
 
        MLX5_EVENT_TYPE_DCT_DRAINED        = 0x1c,
+       MLX5_EVENT_TYPE_DCT_KEY_VIOLATION  = 0x1d,
 
        MLX5_EVENT_TYPE_FPGA_ERROR         = 0x20,
        MLX5_EVENT_TYPE_FPGA_QP_ERROR      = 0x21,
@@ -584,6 +586,12 @@ struct mlx5_eqe_cq_err {
        u8      syndrome;
 };
 
+struct mlx5_eqe_xrq_err {
+       __be32  reserved1[5];
+       __be32  type_xrqn;
+       __be32  reserved2;
+};
+
 struct mlx5_eqe_port_state {
        u8      reserved0[8];
        u8      port;
@@ -698,6 +706,7 @@ union ev_data {
        struct mlx5_eqe_pps             pps;
        struct mlx5_eqe_dct             dct;
        struct mlx5_eqe_temp_warning    temp_warning;
+       struct mlx5_eqe_xrq_err         xrq_err;
 } __packed;
 
 struct mlx5_eqe {
index 0334ca9..cc29227 100644 (file)
@@ -805,6 +805,24 @@ static inline void set_compound_order(struct page *page, unsigned int order)
        page[1].compound_order = order;
 }
 
+/* Returns the number of pages in this potentially compound page. */
+static inline unsigned long compound_nr(struct page *page)
+{
+       return 1UL << compound_order(page);
+}
+
+/* Returns the number of bytes in this potentially compound page. */
+static inline unsigned long page_size(struct page *page)
+{
+       return PAGE_SIZE << compound_order(page);
+}
+
+/* Returns the number of bits needed for the number of bytes in a page */
+static inline unsigned int page_shift(struct page *page)
+{
+       return PAGE_SHIFT + compound_order(page);
+}
+
 void free_compound_page(struct page *page);
 
 #ifdef CONFIG_MMU
@@ -1057,8 +1075,9 @@ static inline void put_user_page(struct page *page)
        put_page(page);
 }
 
-void put_user_pages_dirty(struct page **pages, unsigned long npages);
-void put_user_pages_dirty_lock(struct page **pages, unsigned long npages);
+void put_user_pages_dirty_lock(struct page **pages, unsigned long npages,
+                              bool make_dirty);
+
 void put_user_pages(struct page **pages, unsigned long npages);
 
 #if defined(CONFIG_SPARSEMEM) && !defined(CONFIG_SPARSEMEM_VMEMMAP)
@@ -1405,7 +1424,11 @@ extern void pagefault_out_of_memory(void);
 
 extern void show_free_areas(unsigned int flags, nodemask_t *nodemask);
 
+#ifdef CONFIG_MMU
 extern bool can_do_mlock(void);
+#else
+static inline bool can_do_mlock(void) { return false; }
+#endif
 extern int user_shm_lock(size_t, struct user_struct *);
 extern void user_shm_unlock(size_t, struct user_struct *);
 
@@ -1430,54 +1453,8 @@ void zap_page_range(struct vm_area_struct *vma, unsigned long address,
 void unmap_vmas(struct mmu_gather *tlb, struct vm_area_struct *start_vma,
                unsigned long start, unsigned long end);
 
-/**
- * mm_walk - callbacks for walk_page_range
- * @pud_entry: if set, called for each non-empty PUD (2nd-level) entry
- *            this handler should only handle pud_trans_huge() puds.
- *            the pmd_entry or pte_entry callbacks will be used for
- *            regular PUDs.
- * @pmd_entry: if set, called for each non-empty PMD (3rd-level) entry
- *            this handler is required to be able to handle
- *            pmd_trans_huge() pmds.  They may simply choose to
- *            split_huge_page() instead of handling it explicitly.
- * @pte_entry: if set, called for each non-empty PTE (4th-level) entry
- * @pte_hole: if set, called for each hole at all levels
- * @hugetlb_entry: if set, called for each hugetlb entry
- * @test_walk: caller specific callback function to determine whether
- *             we walk over the current vma or not. Returning 0
- *             value means "do page table walk over the current vma,"
- *             and a negative one means "abort current page table walk
- *             right now." 1 means "skip the current vma."
- * @mm:        mm_struct representing the target process of page table walk
- * @vma:       vma currently walked (NULL if walking outside vmas)
- * @private:   private data for callbacks' usage
- *
- * (see the comment on walk_page_range() for more details)
- */
-struct mm_walk {
-       int (*pud_entry)(pud_t *pud, unsigned long addr,
-                        unsigned long next, struct mm_walk *walk);
-       int (*pmd_entry)(pmd_t *pmd, unsigned long addr,
-                        unsigned long next, struct mm_walk *walk);
-       int (*pte_entry)(pte_t *pte, unsigned long addr,
-                        unsigned long next, struct mm_walk *walk);
-       int (*pte_hole)(unsigned long addr, unsigned long next,
-                       struct mm_walk *walk);
-       int (*hugetlb_entry)(pte_t *pte, unsigned long hmask,
-                            unsigned long addr, unsigned long next,
-                            struct mm_walk *walk);
-       int (*test_walk)(unsigned long addr, unsigned long next,
-                       struct mm_walk *walk);
-       struct mm_struct *mm;
-       struct vm_area_struct *vma;
-       void *private;
-};
-
 struct mmu_notifier_range;
 
-int walk_page_range(unsigned long addr, unsigned long end,
-               struct mm_walk *walk);
-int walk_page_vma(struct vm_area_struct *vma, struct mm_walk *walk);
 void free_pgd_range(struct mmu_gather *tlb, unsigned long addr,
                unsigned long end, unsigned long floor, unsigned long ceiling);
 int copy_page_range(struct mm_struct *dst, struct mm_struct *src,
@@ -1972,7 +1949,7 @@ static inline void pgtable_init(void)
        pgtable_cache_init();
 }
 
-static inline bool pgtable_page_ctor(struct page *page)
+static inline bool pgtable_pte_page_ctor(struct page *page)
 {
        if (!ptlock_init(page))
                return false;
@@ -1981,7 +1958,7 @@ static inline bool pgtable_page_ctor(struct page *page)
        return true;
 }
 
-static inline void pgtable_page_dtor(struct page *page)
+static inline void pgtable_pte_page_dtor(struct page *page)
 {
        ptlock_free(page);
        __ClearPageTable(page);
@@ -2351,6 +2328,8 @@ extern int install_special_mapping(struct mm_struct *mm,
                                   unsigned long addr, unsigned long len,
                                   unsigned long flags, struct page **pages);
 
+unsigned long randomize_stack_top(unsigned long stack_top);
+
 extern unsigned long get_unmapped_area(struct file *, unsigned long, unsigned long, unsigned long, unsigned long);
 
 extern unsigned long mmap_region(struct file *file, unsigned long addr,
@@ -2614,6 +2593,7 @@ struct page *follow_page(struct vm_area_struct *vma, unsigned long address,
 #define FOLL_COW       0x4000  /* internal GUP flag */
 #define FOLL_ANON      0x8000  /* don't do file mappings */
 #define FOLL_LONGTERM  0x10000 /* mapping lifetime is indefinite: see below */
+#define FOLL_SPLIT_PMD 0x20000 /* split huge pmd before returning */
 
 /*
  * NOTE on FOLL_LONGTERM:
@@ -2891,5 +2871,12 @@ void __init setup_nr_node_ids(void);
 static inline void setup_nr_node_ids(void) {}
 #endif
 
+extern int memcmp_pages(struct page *page1, struct page *page2);
+
+static inline int pages_identical(struct page *page1, struct page *page2)
+{
+       return !memcmp_pages(page1, page2);
+}
+
 #endif /* __KERNEL__ */
 #endif /* _LINUX_MM_H */
index 6a7a108..2222fa7 100644 (file)
@@ -25,7 +25,6 @@
 
 struct address_space;
 struct mem_cgroup;
-struct hmm;
 
 /*
  * Each physical page in the system has a struct page associated with
@@ -139,6 +138,7 @@ struct page {
                struct {        /* Second tail page of compound page */
                        unsigned long _compound_pad_1;  /* compound_head */
                        unsigned long _compound_pad_2;
+                       /* For both global and memcg */
                        struct list_head deferred_list;
                };
                struct {        /* Page table pages */
@@ -383,6 +383,16 @@ struct mm_struct {
                unsigned long highest_vm_end;   /* highest vma end address */
                pgd_t * pgd;
 
+#ifdef CONFIG_MEMBARRIER
+               /**
+                * @membarrier_state: Flags controlling membarrier behavior.
+                *
+                * This field is close to @pgd to hopefully fit in the same
+                * cache-line, which needs to be touched by switch_mm().
+                */
+               atomic_t membarrier_state;
+#endif
+
                /**
                 * @mm_users: The number of users including userspace.
                 *
@@ -452,9 +462,7 @@ struct mm_struct {
                unsigned long flags; /* Must use atomic bitops to access */
 
                struct core_state *core_state; /* coredumping support */
-#ifdef CONFIG_MEMBARRIER
-               atomic_t membarrier_state;
-#endif
+
 #ifdef CONFIG_AIO
                spinlock_t                      ioctx_lock;
                struct kioctx_table __rcu       *ioctx_table;
@@ -511,11 +519,6 @@ struct mm_struct {
                atomic_long_t hugetlb_usage;
 #endif
                struct work_struct async_put_work;
-
-#ifdef CONFIG_HMM_MIRROR
-               /* HMM needs to track a few things per mm */
-               struct hmm *hmm;
-#endif
        } __randomize_layout;
 
        /*
index d7016dc..c1bc673 100644 (file)
@@ -36,6 +36,10 @@ struct vmacache {
        struct vm_area_struct *vmas[VMACACHE_SIZE];
 };
 
+/*
+ * When updating this, please also update struct resident_page_types[] in
+ * kernel/fork.c
+ */
 enum {
        MM_FILEPAGES,   /* Resident file mapping pages */
        MM_ANONPAGES,   /* Resident anonymous pages */
index b6c004b..1bd8e6a 100644 (file)
@@ -42,6 +42,10 @@ enum mmu_notifier_event {
 
 #ifdef CONFIG_MMU_NOTIFIER
 
+#ifdef CONFIG_LOCKDEP
+extern struct lockdep_map __mmu_notifier_invalidate_range_start_map;
+#endif
+
 /*
  * The mmu notifier_mm structure is allocated and installed in
  * mm->mmu_notifier_mm inside the mm_take_all_locks() protected
@@ -211,6 +215,19 @@ struct mmu_notifier_ops {
         */
        void (*invalidate_range)(struct mmu_notifier *mn, struct mm_struct *mm,
                                 unsigned long start, unsigned long end);
+
+       /*
+        * These callbacks are used with the get/put interface to manage the
+        * lifetime of the mmu_notifier memory. alloc_notifier() returns a new
+        * notifier for use with the mm.
+        *
+        * free_notifier() is only called after the mmu_notifier has been
+        * fully put, calls to any ops callback are prevented and no ops
+        * callbacks are currently running. It is called from a SRCU callback
+        * and cannot sleep.
+        */
+       struct mmu_notifier *(*alloc_notifier)(struct mm_struct *mm);
+       void (*free_notifier)(struct mmu_notifier *mn);
 };
 
 /*
@@ -227,6 +244,9 @@ struct mmu_notifier_ops {
 struct mmu_notifier {
        struct hlist_node hlist;
        const struct mmu_notifier_ops *ops;
+       struct mm_struct *mm;
+       struct rcu_head rcu;
+       unsigned int users;
 };
 
 static inline int mm_has_notifiers(struct mm_struct *mm)
@@ -234,14 +254,27 @@ static inline int mm_has_notifiers(struct mm_struct *mm)
        return unlikely(mm->mmu_notifier_mm);
 }
 
+struct mmu_notifier *mmu_notifier_get_locked(const struct mmu_notifier_ops *ops,
+                                            struct mm_struct *mm);
+static inline struct mmu_notifier *
+mmu_notifier_get(const struct mmu_notifier_ops *ops, struct mm_struct *mm)
+{
+       struct mmu_notifier *ret;
+
+       down_write(&mm->mmap_sem);
+       ret = mmu_notifier_get_locked(ops, mm);
+       up_write(&mm->mmap_sem);
+       return ret;
+}
+void mmu_notifier_put(struct mmu_notifier *mn);
+void mmu_notifier_synchronize(void);
+
 extern int mmu_notifier_register(struct mmu_notifier *mn,
                                 struct mm_struct *mm);
 extern int __mmu_notifier_register(struct mmu_notifier *mn,
                                   struct mm_struct *mm);
 extern void mmu_notifier_unregister(struct mmu_notifier *mn,
                                    struct mm_struct *mm);
-extern void mmu_notifier_unregister_no_release(struct mmu_notifier *mn,
-                                              struct mm_struct *mm);
 extern void __mmu_notifier_mm_destroy(struct mm_struct *mm);
 extern void __mmu_notifier_release(struct mm_struct *mm);
 extern int __mmu_notifier_clear_flush_young(struct mm_struct *mm,
@@ -310,25 +343,36 @@ static inline void mmu_notifier_change_pte(struct mm_struct *mm,
 static inline void
 mmu_notifier_invalidate_range_start(struct mmu_notifier_range *range)
 {
+       might_sleep();
+
+       lock_map_acquire(&__mmu_notifier_invalidate_range_start_map);
        if (mm_has_notifiers(range->mm)) {
                range->flags |= MMU_NOTIFIER_RANGE_BLOCKABLE;
                __mmu_notifier_invalidate_range_start(range);
        }
+       lock_map_release(&__mmu_notifier_invalidate_range_start_map);
 }
 
 static inline int
 mmu_notifier_invalidate_range_start_nonblock(struct mmu_notifier_range *range)
 {
+       int ret = 0;
+
+       lock_map_acquire(&__mmu_notifier_invalidate_range_start_map);
        if (mm_has_notifiers(range->mm)) {
                range->flags &= ~MMU_NOTIFIER_RANGE_BLOCKABLE;
-               return __mmu_notifier_invalidate_range_start(range);
+               ret = __mmu_notifier_invalidate_range_start(range);
        }
-       return 0;
+       lock_map_release(&__mmu_notifier_invalidate_range_start_map);
+       return ret;
 }
 
 static inline void
 mmu_notifier_invalidate_range_end(struct mmu_notifier_range *range)
 {
+       if (mmu_notifier_range_blockable(range))
+               might_sleep();
+
        if (mm_has_notifiers(range->mm))
                __mmu_notifier_invalidate_range_end(range, false);
 }
@@ -482,9 +526,6 @@ static inline void mmu_notifier_range_init(struct mmu_notifier_range *range,
        set_pte_at(___mm, ___address, __ptep, ___pte);                  \
 })
 
-extern void mmu_notifier_call_srcu(struct rcu_head *rcu,
-                                  void (*func)(struct rcu_head *rcu));
-
 #else /* CONFIG_MMU_NOTIFIER */
 
 struct mmu_notifier_range {
@@ -581,6 +622,10 @@ static inline void mmu_notifier_mm_destroy(struct mm_struct *mm)
 #define pudp_huge_clear_flush_notify pudp_huge_clear_flush
 #define set_pte_at_notify set_pte_at
 
+static inline void mmu_notifier_synchronize(void)
+{
+}
+
 #endif /* CONFIG_MMU_NOTIFIER */
 
 #endif /* _LINUX_MMU_NOTIFIER_H */
index 3f38c30..bda2028 100644 (file)
@@ -235,6 +235,8 @@ enum node_stat_item {
        NR_SHMEM,               /* shmem pages (included tmpfs/GEM pages) */
        NR_SHMEM_THPS,
        NR_SHMEM_PMDMAPPED,
+       NR_FILE_THPS,
+       NR_FILE_PMDMAPPED,
        NR_ANON_THPS,
        NR_UNSTABLE_NFS,        /* NFS unstable pages */
        NR_VMSCAN_WRITE,
@@ -677,6 +679,14 @@ struct zonelist {
 extern struct page *mem_map;
 #endif
 
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+struct deferred_split {
+       spinlock_t split_queue_lock;
+       struct list_head split_queue;
+       unsigned long split_queue_len;
+};
+#endif
+
 /*
  * On NUMA machines, each NUMA node would have a pg_data_t to describe
  * it's memory layout. On UMA machines there is a single pglist_data which
@@ -756,9 +766,7 @@ typedef struct pglist_data {
 #endif /* CONFIG_DEFERRED_STRUCT_PAGE_INIT */
 
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
-       spinlock_t split_queue_lock;
-       struct list_head split_queue;
-       unsigned long split_queue_len;
+       struct deferred_split deferred_split_queue;
 #endif
 
        /* Fields commonly accessed by the page reclaim scanner */
index 1455812..6d20895 100644 (file)
@@ -26,9 +26,6 @@
 #include <linux/percpu.h>
 #include <asm/module.h>
 
-/* In stripped ARM and x86-64 modules, ~ is surprisingly rare. */
-#define MODULE_SIG_STRING "~Module signature appended~\n"
-
 /* Not Yet Implemented */
 #define MODULE_SUPPORTED_DEVICE(name)
 
@@ -276,6 +273,8 @@ extern typeof(name) __mod_##type##__##name##_device_table           \
  * files require multiple MODULE_FIRMWARE() specifiers */
 #define MODULE_FIRMWARE(_firmware) MODULE_INFO(firmware, _firmware)
 
+#define MODULE_IMPORT_NS(ns) MODULE_INFO(import_ns, #ns)
+
 struct notifier_block;
 
 #ifdef CONFIG_MODULES
diff --git a/include/linux/module_signature.h b/include/linux/module_signature.h
new file mode 100644 (file)
index 0000000..7eb4b00
--- /dev/null
@@ -0,0 +1,46 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+/*
+ * Module signature handling.
+ *
+ * Copyright (C) 2012 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ */
+
+#ifndef _LINUX_MODULE_SIGNATURE_H
+#define _LINUX_MODULE_SIGNATURE_H
+
+#include <linux/types.h>
+
+/* In stripped ARM and x86-64 modules, ~ is surprisingly rare. */
+#define MODULE_SIG_STRING "~Module signature appended~\n"
+
+enum pkey_id_type {
+       PKEY_ID_PGP,            /* OpenPGP generated key ID */
+       PKEY_ID_X509,           /* X.509 arbitrary subjectKeyIdentifier */
+       PKEY_ID_PKCS7,          /* Signature in PKCS#7 message */
+};
+
+/*
+ * Module signature information block.
+ *
+ * The constituents of the signature section are, in order:
+ *
+ *     - Signer's name
+ *     - Key identifier
+ *     - Signature data
+ *     - Information block
+ */
+struct module_signature {
+       u8      algo;           /* Public-key crypto algorithm [0] */
+       u8      hash;           /* Digest algorithm [0] */
+       u8      id_type;        /* Key identifier type [PKEY_ID_PKCS7] */
+       u8      signer_len;     /* Length of signer's name [0] */
+       u8      key_id_len;     /* Length of key identifier [0] */
+       u8      __pad[3];
+       __be32  sig_len;        /* Length of signature data */
+};
+
+int mod_check_sig(const struct module_signature *ms, size_t file_len,
+                 const char *name);
+
+#endif /* _LINUX_MODULE_SIGNATURE_H */
index 4ca8c1c..249e8d9 100644 (file)
@@ -189,6 +189,9 @@ struct module;      /* only needed for owner field in mtd_info */
  */
 struct mtd_debug_info {
        struct dentry *dfs_dir;
+
+       const char *partname;
+       const char *partid;
 };
 
 struct mtd_info {
index cebc38b..0c74838 100644 (file)
@@ -346,7 +346,7 @@ static inline unsigned int nanddev_ntargets(const struct nand_device *nand)
 }
 
 /**
- * nanddev_neraseblocks() - Get the total number of erasablocks
+ * nanddev_neraseblocks() - Get the total number of eraseblocks
  * @nand: NAND device
  *
  * Return: the total number of eraseblocks exposed by @nand.
index 01306eb..d2c3cf2 100644 (file)
@@ -5,6 +5,9 @@
  * Copyright (C) 2008 Dmitry Baryshkov
  */
 
+#ifndef _MTD_SHARPSL_H
+#define _MTD_SHARPSL_H
+
 #include <linux/mtd/rawnand.h>
 #include <linux/mtd/nand_ecc.h>
 #include <linux/mtd/partitions.h>
@@ -16,3 +19,5 @@ struct sharpsl_nand_platform_data {
        unsigned int            nr_partitions;
        const char *const       *part_parsers;
 };
+
+#endif /* _MTD_SHARPSL_H */
index 9f57cdf..fc0b4b1 100644 (file)
@@ -9,6 +9,7 @@
 #include <linux/bitops.h>
 #include <linux/mtd/cfi.h>
 #include <linux/mtd/mtd.h>
+#include <linux/spi/spi-mem.h>
 
 /*
  * Manufacturer IDs
@@ -224,7 +225,6 @@ static inline u8 spi_nor_get_protocol_width(enum spi_nor_protocol proto)
        return spi_nor_get_protocol_data_nbits(proto);
 }
 
-#define SPI_NOR_MAX_CMD_SIZE   8
 enum spi_nor_ops {
        SPI_NOR_OPS_READ = 0,
        SPI_NOR_OPS_WRITE,
@@ -237,12 +237,12 @@ enum spi_nor_option_flags {
        SNOR_F_USE_FSR          = BIT(0),
        SNOR_F_HAS_SR_TB        = BIT(1),
        SNOR_F_NO_OP_CHIP_ERASE = BIT(2),
-       SNOR_F_S3AN_ADDR_DEFAULT = BIT(3),
-       SNOR_F_READY_XSR_RDY    = BIT(4),
-       SNOR_F_USE_CLSR         = BIT(5),
-       SNOR_F_BROKEN_RESET     = BIT(6),
-       SNOR_F_4B_OPCODES       = BIT(7),
-       SNOR_F_HAS_4BAIT        = BIT(8),
+       SNOR_F_READY_XSR_RDY    = BIT(3),
+       SNOR_F_USE_CLSR         = BIT(4),
+       SNOR_F_BROKEN_RESET     = BIT(5),
+       SNOR_F_4B_OPCODES       = BIT(6),
+       SNOR_F_HAS_4BAIT        = BIT(7),
+       SNOR_F_HAS_LOCK         = BIT(8),
 };
 
 /**
@@ -334,6 +334,195 @@ struct spi_nor_erase_map {
 };
 
 /**
+ * struct spi_nor_hwcaps - Structure for describing the hardware capabilies
+ * supported by the SPI controller (bus master).
+ * @mask:              the bitmask listing all the supported hw capabilies
+ */
+struct spi_nor_hwcaps {
+       u32     mask;
+};
+
+/*
+ *(Fast) Read capabilities.
+ * MUST be ordered by priority: the higher bit position, the higher priority.
+ * As a matter of performances, it is relevant to use Octal SPI protocols first,
+ * then Quad SPI protocols before Dual SPI protocols, Fast Read and lastly
+ * (Slow) Read.
+ */
+#define SNOR_HWCAPS_READ_MASK          GENMASK(14, 0)
+#define SNOR_HWCAPS_READ               BIT(0)
+#define SNOR_HWCAPS_READ_FAST          BIT(1)
+#define SNOR_HWCAPS_READ_1_1_1_DTR     BIT(2)
+
+#define SNOR_HWCAPS_READ_DUAL          GENMASK(6, 3)
+#define SNOR_HWCAPS_READ_1_1_2         BIT(3)
+#define SNOR_HWCAPS_READ_1_2_2         BIT(4)
+#define SNOR_HWCAPS_READ_2_2_2         BIT(5)
+#define SNOR_HWCAPS_READ_1_2_2_DTR     BIT(6)
+
+#define SNOR_HWCAPS_READ_QUAD          GENMASK(10, 7)
+#define SNOR_HWCAPS_READ_1_1_4         BIT(7)
+#define SNOR_HWCAPS_READ_1_4_4         BIT(8)
+#define SNOR_HWCAPS_READ_4_4_4         BIT(9)
+#define SNOR_HWCAPS_READ_1_4_4_DTR     BIT(10)
+
+#define SNOR_HWCAPS_READ_OCTAL         GENMASK(14, 11)
+#define SNOR_HWCAPS_READ_1_1_8         BIT(11)
+#define SNOR_HWCAPS_READ_1_8_8         BIT(12)
+#define SNOR_HWCAPS_READ_8_8_8         BIT(13)
+#define SNOR_HWCAPS_READ_1_8_8_DTR     BIT(14)
+
+/*
+ * Page Program capabilities.
+ * MUST be ordered by priority: the higher bit position, the higher priority.
+ * Like (Fast) Read capabilities, Octal/Quad SPI protocols are preferred to the
+ * legacy SPI 1-1-1 protocol.
+ * Note that Dual Page Programs are not supported because there is no existing
+ * JEDEC/SFDP standard to define them. Also at this moment no SPI flash memory
+ * implements such commands.
+ */
+#define SNOR_HWCAPS_PP_MASK    GENMASK(22, 16)
+#define SNOR_HWCAPS_PP         BIT(16)
+
+#define SNOR_HWCAPS_PP_QUAD    GENMASK(19, 17)
+#define SNOR_HWCAPS_PP_1_1_4   BIT(17)
+#define SNOR_HWCAPS_PP_1_4_4   BIT(18)
+#define SNOR_HWCAPS_PP_4_4_4   BIT(19)
+
+#define SNOR_HWCAPS_PP_OCTAL   GENMASK(22, 20)
+#define SNOR_HWCAPS_PP_1_1_8   BIT(20)
+#define SNOR_HWCAPS_PP_1_8_8   BIT(21)
+#define SNOR_HWCAPS_PP_8_8_8   BIT(22)
+
+#define SNOR_HWCAPS_X_X_X      (SNOR_HWCAPS_READ_2_2_2 |       \
+                                SNOR_HWCAPS_READ_4_4_4 |       \
+                                SNOR_HWCAPS_READ_8_8_8 |       \
+                                SNOR_HWCAPS_PP_4_4_4 |         \
+                                SNOR_HWCAPS_PP_8_8_8)
+
+#define SNOR_HWCAPS_DTR                (SNOR_HWCAPS_READ_1_1_1_DTR |   \
+                                SNOR_HWCAPS_READ_1_2_2_DTR |   \
+                                SNOR_HWCAPS_READ_1_4_4_DTR |   \
+                                SNOR_HWCAPS_READ_1_8_8_DTR)
+
+#define SNOR_HWCAPS_ALL                (SNOR_HWCAPS_READ_MASK |        \
+                                SNOR_HWCAPS_PP_MASK)
+
+struct spi_nor_read_command {
+       u8                      num_mode_clocks;
+       u8                      num_wait_states;
+       u8                      opcode;
+       enum spi_nor_protocol   proto;
+};
+
+struct spi_nor_pp_command {
+       u8                      opcode;
+       enum spi_nor_protocol   proto;
+};
+
+enum spi_nor_read_command_index {
+       SNOR_CMD_READ,
+       SNOR_CMD_READ_FAST,
+       SNOR_CMD_READ_1_1_1_DTR,
+
+       /* Dual SPI */
+       SNOR_CMD_READ_1_1_2,
+       SNOR_CMD_READ_1_2_2,
+       SNOR_CMD_READ_2_2_2,
+       SNOR_CMD_READ_1_2_2_DTR,
+
+       /* Quad SPI */
+       SNOR_CMD_READ_1_1_4,
+       SNOR_CMD_READ_1_4_4,
+       SNOR_CMD_READ_4_4_4,
+       SNOR_CMD_READ_1_4_4_DTR,
+
+       /* Octal SPI */
+       SNOR_CMD_READ_1_1_8,
+       SNOR_CMD_READ_1_8_8,
+       SNOR_CMD_READ_8_8_8,
+       SNOR_CMD_READ_1_8_8_DTR,
+
+       SNOR_CMD_READ_MAX
+};
+
+enum spi_nor_pp_command_index {
+       SNOR_CMD_PP,
+
+       /* Quad SPI */
+       SNOR_CMD_PP_1_1_4,
+       SNOR_CMD_PP_1_4_4,
+       SNOR_CMD_PP_4_4_4,
+
+       /* Octal SPI */
+       SNOR_CMD_PP_1_1_8,
+       SNOR_CMD_PP_1_8_8,
+       SNOR_CMD_PP_8_8_8,
+
+       SNOR_CMD_PP_MAX
+};
+
+/* Forward declaration that will be used in 'struct spi_nor_flash_parameter' */
+struct spi_nor;
+
+/**
+ * struct spi_nor_locking_ops - SPI NOR locking methods
+ * @lock:      lock a region of the SPI NOR.
+ * @unlock:    unlock a region of the SPI NOR.
+ * @is_locked: check if a region of the SPI NOR is completely locked
+ */
+struct spi_nor_locking_ops {
+       int (*lock)(struct spi_nor *nor, loff_t ofs, uint64_t len);
+       int (*unlock)(struct spi_nor *nor, loff_t ofs, uint64_t len);
+       int (*is_locked)(struct spi_nor *nor, loff_t ofs, uint64_t len);
+};
+
+/**
+ * struct spi_nor_flash_parameter - SPI NOR flash parameters and settings.
+ * Includes legacy flash parameters and settings that can be overwritten
+ * by the spi_nor_fixups hooks, or dynamically when parsing the JESD216
+ * Serial Flash Discoverable Parameters (SFDP) tables.
+ *
+ * @size:              the flash memory density in bytes.
+ * @page_size:         the page size of the SPI NOR flash memory.
+ * @hwcaps:            describes the read and page program hardware
+ *                     capabilities.
+ * @reads:             read capabilities ordered by priority: the higher index
+ *                      in the array, the higher priority.
+ * @page_programs:     page program capabilities ordered by priority: the
+ *                      higher index in the array, the higher priority.
+ * @erase_map:         the erase map parsed from the SFDP Sector Map Parameter
+ *                      Table.
+ * @quad_enable:       enables SPI NOR quad mode.
+ * @set_4byte:         puts the SPI NOR in 4 byte addressing mode.
+ * @convert_addr:      converts an absolute address into something the flash
+ *                      will understand. Particularly useful when pagesize is
+ *                      not a power-of-2.
+ * @setup:              configures the SPI NOR memory. Useful for SPI NOR
+ *                      flashes that have peculiarities to the SPI NOR standard
+ *                      e.g. different opcodes, specific address calculation,
+ *                      page size, etc.
+ * @locking_ops:       SPI NOR locking methods.
+ */
+struct spi_nor_flash_parameter {
+       u64                             size;
+       u32                             page_size;
+
+       struct spi_nor_hwcaps           hwcaps;
+       struct spi_nor_read_command     reads[SNOR_CMD_READ_MAX];
+       struct spi_nor_pp_command       page_programs[SNOR_CMD_PP_MAX];
+
+       struct spi_nor_erase_map        erase_map;
+
+       int (*quad_enable)(struct spi_nor *nor);
+       int (*set_4byte)(struct spi_nor *nor, bool enable);
+       u32 (*convert_addr)(struct spi_nor *nor, u32 addr);
+       int (*setup)(struct spi_nor *nor, const struct spi_nor_hwcaps *hwcaps);
+
+       const struct spi_nor_locking_ops *locking_ops;
+};
+
+/**
  * struct flash_info - Forward declaration of a structure used internally by
  *                    spi_nor_scan()
  */
@@ -344,6 +533,10 @@ struct flash_info;
  * @mtd:               point to a mtd_info structure
  * @lock:              the lock for the read/write/erase/lock/unlock operations
  * @dev:               point to a spi device, or a spi nor controller device.
+ * @spimem:            point to the spi mem device
+ * @bouncebuf:         bounce buffer used when the buffer passed by the MTD
+ *                      layer is not DMA-able
+ * @bouncebuf_size:    size of the bounce buffer
  * @info:              spi-nor part JDEC MFR id and other info
  * @page_size:         the page size of the SPI NOR
  * @addr_width:                number of address bytes
@@ -356,8 +549,6 @@ struct flash_info;
  * @read_proto:                the SPI protocol for read operations
  * @write_proto:       the SPI protocol for write operations
  * @reg_proto          the SPI protocol for read_reg/write_reg/erase operations
- * @cmd_buf:           used by the write_reg
- * @erase_map:         the erase map of the SPI NOR
  * @prepare:           [OPTIONAL] do some preparations for the
  *                     read/write/erase/lock/unlock operations
  * @unprepare:         [OPTIONAL] do some post work after the
@@ -369,19 +560,21 @@ struct flash_info;
  * @erase:             [DRIVER-SPECIFIC] erase a sector of the SPI NOR
  *                     at the offset @offs; if not provided by the driver,
  *                     spi-nor will send the erase opcode via write_reg()
- * @flash_lock:                [FLASH-SPECIFIC] lock a region of the SPI NOR
- * @flash_unlock:      [FLASH-SPECIFIC] unlock a region of the SPI NOR
- * @flash_is_locked:   [FLASH-SPECIFIC] check if a region of the SPI NOR is
- * @quad_enable:       [FLASH-SPECIFIC] enables SPI NOR quad mode
  * @clear_sr_bp:       [FLASH-SPECIFIC] clears the Block Protection Bits from
  *                     the SPI NOR Status Register.
- *                     completely locked
+ * @params:            [FLASH-SPECIFIC] SPI-NOR flash parameters and settings.
+ *                      The structure includes legacy flash parameters and
+ *                      settings that can be overwritten by the spi_nor_fixups
+ *                      hooks, or dynamically when parsing the SFDP tables.
  * @priv:              the private data
  */
 struct spi_nor {
        struct mtd_info         mtd;
        struct mutex            lock;
        struct device           *dev;
+       struct spi_mem          *spimem;
+       u8                      *bouncebuf;
+       size_t                  bouncebuf_size;
        const struct flash_info *info;
        u32                     page_size;
        u8                      addr_width;
@@ -394,8 +587,6 @@ struct spi_nor {
        enum spi_nor_protocol   reg_proto;
        bool                    sst_write_second;
        u32                     flags;
-       u8                      cmd_buf[SPI_NOR_MAX_CMD_SIZE];
-       struct spi_nor_erase_map        erase_map;
 
        int (*prepare)(struct spi_nor *nor, enum spi_nor_ops ops);
        void (*unprepare)(struct spi_nor *nor, enum spi_nor_ops ops);
@@ -408,11 +599,8 @@ struct spi_nor {
                        size_t len, const u_char *write_buf);
        int (*erase)(struct spi_nor *nor, loff_t offs);
 
-       int (*flash_lock)(struct spi_nor *nor, loff_t ofs, uint64_t len);
-       int (*flash_unlock)(struct spi_nor *nor, loff_t ofs, uint64_t len);
-       int (*flash_is_locked)(struct spi_nor *nor, loff_t ofs, uint64_t len);
-       int (*quad_enable)(struct spi_nor *nor);
        int (*clear_sr_bp)(struct spi_nor *nor);
+       struct spi_nor_flash_parameter params;
 
        void *priv;
 };
@@ -443,7 +631,7 @@ spi_nor_region_mark_overlay(struct spi_nor_erase_region *region)
 
 static bool __maybe_unused spi_nor_has_uniform_erase(const struct spi_nor *nor)
 {
-       return !!nor->erase_map.uniform_erase_type;
+       return !!nor->params.erase_map.uniform_erase_type;
 }
 
 static inline void spi_nor_set_flash_node(struct spi_nor *nor,
@@ -458,67 +646,6 @@ static inline struct device_node *spi_nor_get_flash_node(struct spi_nor *nor)
 }
 
 /**
- * struct spi_nor_hwcaps - Structure for describing the hardware capabilies
- * supported by the SPI controller (bus master).
- * @mask:              the bitmask listing all the supported hw capabilies
- */
-struct spi_nor_hwcaps {
-       u32     mask;
-};
-
-/*
- *(Fast) Read capabilities.
- * MUST be ordered by priority: the higher bit position, the higher priority.
- * As a matter of performances, it is relevant to use Octal SPI protocols first,
- * then Quad SPI protocols before Dual SPI protocols, Fast Read and lastly
- * (Slow) Read.
- */
-#define SNOR_HWCAPS_READ_MASK          GENMASK(14, 0)
-#define SNOR_HWCAPS_READ               BIT(0)
-#define SNOR_HWCAPS_READ_FAST          BIT(1)
-#define SNOR_HWCAPS_READ_1_1_1_DTR     BIT(2)
-
-#define SNOR_HWCAPS_READ_DUAL          GENMASK(6, 3)
-#define SNOR_HWCAPS_READ_1_1_2         BIT(3)
-#define SNOR_HWCAPS_READ_1_2_2         BIT(4)
-#define SNOR_HWCAPS_READ_2_2_2         BIT(5)
-#define SNOR_HWCAPS_READ_1_2_2_DTR     BIT(6)
-
-#define SNOR_HWCAPS_READ_QUAD          GENMASK(10, 7)
-#define SNOR_HWCAPS_READ_1_1_4         BIT(7)
-#define SNOR_HWCAPS_READ_1_4_4         BIT(8)
-#define SNOR_HWCAPS_READ_4_4_4         BIT(9)
-#define SNOR_HWCAPS_READ_1_4_4_DTR     BIT(10)
-
-#define SNOR_HWCAPS_READ_OCTAL         GENMASK(14, 11)
-#define SNOR_HWCAPS_READ_1_1_8         BIT(11)
-#define SNOR_HWCAPS_READ_1_8_8         BIT(12)
-#define SNOR_HWCAPS_READ_8_8_8         BIT(13)
-#define SNOR_HWCAPS_READ_1_8_8_DTR     BIT(14)
-
-/*
- * Page Program capabilities.
- * MUST be ordered by priority: the higher bit position, the higher priority.
- * Like (Fast) Read capabilities, Octal/Quad SPI protocols are preferred to the
- * legacy SPI 1-1-1 protocol.
- * Note that Dual Page Programs are not supported because there is no existing
- * JEDEC/SFDP standard to define them. Also at this moment no SPI flash memory
- * implements such commands.
- */
-#define SNOR_HWCAPS_PP_MASK    GENMASK(22, 16)
-#define SNOR_HWCAPS_PP         BIT(16)
-
-#define SNOR_HWCAPS_PP_QUAD    GENMASK(19, 17)
-#define SNOR_HWCAPS_PP_1_1_4   BIT(17)
-#define SNOR_HWCAPS_PP_1_4_4   BIT(18)
-#define SNOR_HWCAPS_PP_4_4_4   BIT(19)
-
-#define SNOR_HWCAPS_PP_OCTAL   GENMASK(22, 20)
-#define SNOR_HWCAPS_PP_1_1_8   BIT(20)
-#define SNOR_HWCAPS_PP_1_8_8   BIT(21)
-#define SNOR_HWCAPS_PP_8_8_8   BIT(22)
-
-/**
  * spi_nor_scan() - scan the SPI NOR
  * @nor:       the spi_nor structure
  * @name:      the chip type name
index 0a11712..570a60c 100644 (file)
@@ -490,6 +490,9 @@ extern const struct file_operations nfs_dir_operations;
 extern const struct dentry_operations nfs_dentry_operations;
 
 extern void nfs_force_lookup_revalidate(struct inode *dir);
+extern struct dentry *nfs_add_or_obtain(struct dentry *dentry,
+                       struct nfs_fh *fh, struct nfs_fattr *fattr,
+                       struct nfs4_label *label);
 extern int nfs_instantiate(struct dentry *dentry, struct nfs_fh *fh,
                        struct nfs_fattr *fattr, struct nfs4_label *label);
 extern int nfs_may_open(struct inode *inode, const struct cred *cred, int openflags);
index 98d9049..10f8162 100644 (file)
@@ -6,6 +6,8 @@
 #ifndef _NVME_FC_DRIVER_H
 #define _NVME_FC_DRIVER_H 1
 
+#include <linux/scatterlist.h>
+
 
 /*
  * **********************  LLDD FC-NVME Host API ********************
index 0959295..682fd46 100644 (file)
@@ -18,6 +18,7 @@ struct page_ext_operations {
 
 enum page_ext_flags {
        PAGE_EXT_OWNER,
+       PAGE_EXT_OWNER_ACTIVE,
 #if defined(CONFIG_IDLE_PAGE_TRACKING) && !defined(CONFIG_64BIT)
        PAGE_EXT_YOUNG,
        PAGE_EXT_IDLE,
index c755245..37a4d9e 100644 (file)
@@ -333,6 +333,16 @@ static inline struct page *grab_cache_page_nowait(struct address_space *mapping,
                        mapping_gfp_mask(mapping));
 }
 
+static inline struct page *find_subpage(struct page *page, pgoff_t offset)
+{
+       if (PageHuge(page))
+               return page;
+
+       VM_BUG_ON_PAGE(PageTail(page), page);
+
+       return page + (offset & (compound_nr(page) - 1));
+}
+
 struct page *find_get_entry(struct address_space *mapping, pgoff_t offset);
 struct page *find_lock_entry(struct address_space *mapping, pgoff_t offset);
 unsigned find_get_entries(struct address_space *mapping, pgoff_t start,
diff --git a/include/linux/pagewalk.h b/include/linux/pagewalk.h
new file mode 100644 (file)
index 0000000..bddd975
--- /dev/null
@@ -0,0 +1,66 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _LINUX_PAGEWALK_H
+#define _LINUX_PAGEWALK_H
+
+#include <linux/mm.h>
+
+struct mm_walk;
+
+/**
+ * mm_walk_ops - callbacks for walk_page_range
+ * @pud_entry:         if set, called for each non-empty PUD (2nd-level) entry
+ *                     this handler should only handle pud_trans_huge() puds.
+ *                     the pmd_entry or pte_entry callbacks will be used for
+ *                     regular PUDs.
+ * @pmd_entry:         if set, called for each non-empty PMD (3rd-level) entry
+ *                     this handler is required to be able to handle
+ *                     pmd_trans_huge() pmds.  They may simply choose to
+ *                     split_huge_page() instead of handling it explicitly.
+ * @pte_entry:         if set, called for each non-empty PTE (4th-level) entry
+ * @pte_hole:          if set, called for each hole at all levels
+ * @hugetlb_entry:     if set, called for each hugetlb entry
+ * @test_walk:         caller specific callback function to determine whether
+ *                     we walk over the current vma or not. Returning 0 means
+ *                     "do page table walk over the current vma", returning
+ *                     a negative value means "abort current page table walk
+ *                     right now" and returning 1 means "skip the current vma"
+ */
+struct mm_walk_ops {
+       int (*pud_entry)(pud_t *pud, unsigned long addr,
+                        unsigned long next, struct mm_walk *walk);
+       int (*pmd_entry)(pmd_t *pmd, unsigned long addr,
+                        unsigned long next, struct mm_walk *walk);
+       int (*pte_entry)(pte_t *pte, unsigned long addr,
+                        unsigned long next, struct mm_walk *walk);
+       int (*pte_hole)(unsigned long addr, unsigned long next,
+                       struct mm_walk *walk);
+       int (*hugetlb_entry)(pte_t *pte, unsigned long hmask,
+                            unsigned long addr, unsigned long next,
+                            struct mm_walk *walk);
+       int (*test_walk)(unsigned long addr, unsigned long next,
+                       struct mm_walk *walk);
+};
+
+/**
+ * mm_walk - walk_page_range data
+ * @ops:       operation to call during the walk
+ * @mm:                mm_struct representing the target process of page table walk
+ * @vma:       vma currently walked (NULL if walking outside vmas)
+ * @private:   private data for callbacks' usage
+ *
+ * (see the comment on walk_page_range() for more details)
+ */
+struct mm_walk {
+       const struct mm_walk_ops *ops;
+       struct mm_struct *mm;
+       struct vm_area_struct *vma;
+       void *private;
+};
+
+int walk_page_range(struct mm_struct *mm, unsigned long start,
+               unsigned long end, const struct mm_walk_ops *ops,
+               void *private);
+int walk_page_vma(struct vm_area_struct *vma, const struct mm_walk_ops *ops,
+               void *private);
+
+#endif /* _LINUX_PAGEWALK_H */
diff --git a/include/linux/pci-aspm.h b/include/linux/pci-aspm.h
deleted file mode 100644 (file)
index 6706414..0000000
+++ /dev/null
@@ -1,36 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- *     aspm.h
- *
- *     PCI Express ASPM defines and function prototypes
- *
- *     Copyright (C) 2007 Intel Corp.
- *             Zhang Yanmin (yanmin.zhang@intel.com)
- *             Shaohua Li (shaohua.li@intel.com)
- *
- *     For more information, please consult the following manuals (look at
- *     http://www.pcisig.com/ for how to get them):
- *
- *     PCI Express Specification
- */
-
-#ifndef LINUX_ASPM_H
-#define LINUX_ASPM_H
-
-#include <linux/pci.h>
-
-#define PCIE_LINK_STATE_L0S    1
-#define PCIE_LINK_STATE_L1     2
-#define PCIE_LINK_STATE_CLKPM  4
-
-#ifdef CONFIG_PCIEASPM
-int pci_disable_link_state(struct pci_dev *pdev, int state);
-int pci_disable_link_state_locked(struct pci_dev *pdev, int state);
-void pcie_no_aspm(void);
-#else
-static inline int pci_disable_link_state(struct pci_dev *pdev, int state)
-{ return 0; }
-static inline void pcie_no_aspm(void) { }
-#endif
-
-#endif /* LINUX_ASPM_H */
index bca9bc3..8318a97 100644 (file)
@@ -30,8 +30,10 @@ struct scatterlist *pci_p2pmem_alloc_sgl(struct pci_dev *pdev,
                                         unsigned int *nents, u32 length);
 void pci_p2pmem_free_sgl(struct pci_dev *pdev, struct scatterlist *sgl);
 void pci_p2pmem_publish(struct pci_dev *pdev, bool publish);
-int pci_p2pdma_map_sg(struct device *dev, struct scatterlist *sg, int nents,
-                     enum dma_data_direction dir);
+int pci_p2pdma_map_sg_attrs(struct device *dev, struct scatterlist *sg,
+               int nents, enum dma_data_direction dir, unsigned long attrs);
+void pci_p2pdma_unmap_sg_attrs(struct device *dev, struct scatterlist *sg,
+               int nents, enum dma_data_direction dir, unsigned long attrs);
 int pci_p2pdma_enable_store(const char *page, struct pci_dev **p2p_dev,
                            bool *use_p2pdma);
 ssize_t pci_p2pdma_enable_show(char *page, struct pci_dev *p2p_dev,
@@ -81,11 +83,17 @@ static inline void pci_p2pmem_free_sgl(struct pci_dev *pdev,
 static inline void pci_p2pmem_publish(struct pci_dev *pdev, bool publish)
 {
 }
-static inline int pci_p2pdma_map_sg(struct device *dev,
-               struct scatterlist *sg, int nents, enum dma_data_direction dir)
+static inline int pci_p2pdma_map_sg_attrs(struct device *dev,
+               struct scatterlist *sg, int nents, enum dma_data_direction dir,
+               unsigned long attrs)
 {
        return 0;
 }
+static inline void pci_p2pdma_unmap_sg_attrs(struct device *dev,
+               struct scatterlist *sg, int nents, enum dma_data_direction dir,
+               unsigned long attrs)
+{
+}
 static inline int pci_p2pdma_enable_store(const char *page,
                struct pci_dev **p2p_dev, bool *use_p2pdma)
 {
@@ -111,4 +119,16 @@ static inline struct pci_dev *pci_p2pmem_find(struct device *client)
        return pci_p2pmem_find_many(&client, 1);
 }
 
+static inline int pci_p2pdma_map_sg(struct device *dev, struct scatterlist *sg,
+                                   int nents, enum dma_data_direction dir)
+{
+       return pci_p2pdma_map_sg_attrs(dev, sg, nents, dir, 0);
+}
+
+static inline void pci_p2pdma_unmap_sg(struct device *dev,
+               struct scatterlist *sg, int nents, enum dma_data_direction dir)
+{
+       pci_p2pdma_unmap_sg_attrs(dev, sg, nents, dir, 0);
+}
+
 #endif /* _LINUX_PCI_P2P_H */
index 82e4cd1..f9088c8 100644 (file)
@@ -6,12 +6,18 @@
  *     Copyright 1994, Drew Eckhardt
  *     Copyright 1997--1999 Martin Mares <mj@ucw.cz>
  *
+ *     PCI Express ASPM defines and function prototypes
+ *     Copyright (c) 2007 Intel Corp.
+ *             Zhang Yanmin (yanmin.zhang@intel.com)
+ *             Shaohua Li (shaohua.li@intel.com)
+ *
  *     For more information, please consult the following manuals (look at
  *     http://www.pcisig.com/ for how to get them):
  *
  *     PCI BIOS Specification
  *     PCI Local Bus Specification
  *     PCI to PCI Bridge Specification
+ *     PCI Express Specification
  *     PCI System Design Guide
  */
 #ifndef LINUX_PCI_H
@@ -145,11 +151,6 @@ static inline const char *pci_power_name(pci_power_t state)
        return pci_power_names[1 + (__force int) state];
 }
 
-#define PCI_PM_D2_DELAY                200
-#define PCI_PM_D3_WAIT         10
-#define PCI_PM_D3COLD_WAIT     100
-#define PCI_PM_BUS_WAIT                50
-
 /**
  * typedef pci_channel_state_t
  *
@@ -418,7 +419,6 @@ struct pci_dev {
        unsigned int    broken_intx_masking:1;  /* INTx masking can't be used */
        unsigned int    io_window_1k:1;         /* Intel bridge 1K I/O windows */
        unsigned int    irq_managed:1;
-       unsigned int    has_secondary_link:1;
        unsigned int    non_compliant_bars:1;   /* Broken BARs; ignore them */
        unsigned int    is_probed:1;            /* Device probing in progress */
        unsigned int    link_active_reporting:1;/* Device capable of reporting link active */
@@ -649,9 +649,6 @@ static inline struct pci_dev *pci_upstream_bridge(struct pci_dev *dev)
        return dev->bus->self;
 }
 
-struct device *pci_get_host_bridge_device(struct pci_dev *dev);
-void pci_put_host_bridge_device(struct device *dev);
-
 #ifdef CONFIG_PCI_MSI
 static inline bool pci_dev_msi_enabled(struct pci_dev *pci_dev)
 {
@@ -925,6 +922,11 @@ enum {
        PCI_SCAN_ALL_PCIE_DEVS  = 0x00000040,   /* Scan all, not just dev 0 */
 };
 
+#define PCI_IRQ_LEGACY         (1 << 0) /* Allow legacy interrupts */
+#define PCI_IRQ_MSI            (1 << 1) /* Allow MSI interrupts */
+#define PCI_IRQ_MSIX           (1 << 2) /* Allow MSI-X interrupts */
+#define PCI_IRQ_AFFINITY       (1 << 3) /* Auto-assign affinity */
+
 /* These external functions are only available when PCI support is enabled */
 #ifdef CONFIG_PCI
 
@@ -969,7 +971,7 @@ resource_size_t pcibios_align_resource(void *, const struct resource *,
                                resource_size_t,
                                resource_size_t);
 
-/* Weak but can be overriden by arch */
+/* Weak but can be overridden by arch */
 void pci_fixup_cardbus(struct pci_bus *);
 
 /* Generic PCI functions used internally */
@@ -995,7 +997,6 @@ struct pci_bus *pci_scan_root_bus(struct device *parent, int bus,
 int pci_scan_root_bus_bridge(struct pci_host_bridge *bridge);
 struct pci_bus *pci_add_new_bus(struct pci_bus *parent, struct pci_dev *dev,
                                int busnr);
-void pcie_update_link_speed(struct pci_bus *bus, u16 link_status);
 struct pci_slot *pci_create_slot(struct pci_bus *parent, int slot_nr,
                                 const char *name,
                                 struct hotplug_slot *hotplug);
@@ -1241,19 +1242,12 @@ int pci_wake_from_d3(struct pci_dev *dev, bool enable);
 int pci_prepare_to_sleep(struct pci_dev *dev);
 int pci_back_from_sleep(struct pci_dev *dev);
 bool pci_dev_run_wake(struct pci_dev *dev);
-bool pci_check_pme_status(struct pci_dev *dev);
-void pci_pme_wakeup_bus(struct pci_bus *bus);
 void pci_d3cold_enable(struct pci_dev *dev);
 void pci_d3cold_disable(struct pci_dev *dev);
 bool pcie_relaxed_ordering_enabled(struct pci_dev *dev);
 void pci_wakeup_bus(struct pci_bus *bus);
 void pci_bus_set_current_state(struct pci_bus *bus, pci_power_t state);
 
-/* PCI Virtual Channel */
-int pci_save_vc_state(struct pci_dev *dev);
-void pci_restore_vc_state(struct pci_dev *dev);
-void pci_allocate_vc_save_buffers(struct pci_dev *dev);
-
 /* For use by arch with custom probe code */
 void set_pcie_port_type(struct pci_dev *pdev);
 void set_pcie_hotplug_bridge(struct pci_dev *pdev);
@@ -1297,8 +1291,6 @@ int pci_request_selected_regions_exclusive(struct pci_dev *, int, const char *);
 void pci_release_selected_regions(struct pci_dev *, int);
 
 /* drivers/pci/bus.c */
-struct pci_bus *pci_bus_get(struct pci_bus *bus);
-void pci_bus_put(struct pci_bus *bus);
 void pci_add_resource(struct list_head *resources, struct resource *res);
 void pci_add_resource_offset(struct list_head *resources, struct resource *res,
                             resource_size_t offset);
@@ -1408,11 +1400,6 @@ resource_size_t pcibios_window_alignment(struct pci_bus *bus,
 int pci_set_vga_state(struct pci_dev *pdev, bool decode,
                      unsigned int command_bits, u32 flags);
 
-#define PCI_IRQ_LEGACY         (1 << 0) /* Allow legacy interrupts */
-#define PCI_IRQ_MSI            (1 << 1) /* Allow MSI interrupts */
-#define PCI_IRQ_MSIX           (1 << 2) /* Allow MSI-X interrupts */
-#define PCI_IRQ_AFFINITY       (1 << 3) /* Auto-assign affinity */
-
 /*
  * Virtual interrupts allow for more interrupts to be allocated
  * than the device has interrupts for. These are not programmed
@@ -1517,14 +1504,6 @@ static inline int pci_irq_get_node(struct pci_dev *pdev, int vec)
 }
 #endif
 
-static inline int
-pci_alloc_irq_vectors(struct pci_dev *dev, unsigned int min_vecs,
-                     unsigned int max_vecs, unsigned int flags)
-{
-       return pci_alloc_irq_vectors_affinity(dev, min_vecs, max_vecs, flags,
-                                             NULL);
-}
-
 /**
  * pci_irqd_intx_xlate() - Translate PCI INTx value to an IRQ domain hwirq
  * @d: the INTx IRQ domain
@@ -1565,10 +1544,22 @@ extern bool pcie_ports_native;
 #define pcie_ports_native      false
 #endif
 
+#define PCIE_LINK_STATE_L0S    1
+#define PCIE_LINK_STATE_L1     2
+#define PCIE_LINK_STATE_CLKPM  4
+
 #ifdef CONFIG_PCIEASPM
+int pci_disable_link_state(struct pci_dev *pdev, int state);
+int pci_disable_link_state_locked(struct pci_dev *pdev, int state);
+void pcie_no_aspm(void);
 bool pcie_aspm_support_enabled(void);
 bool pcie_aspm_enabled(struct pci_dev *pdev);
 #else
+static inline int pci_disable_link_state(struct pci_dev *pdev, int state)
+{ return 0; }
+static inline int pci_disable_link_state_locked(struct pci_dev *pdev, int state)
+{ return 0; }
+static inline void pcie_no_aspm(void) { }
 static inline bool pcie_aspm_support_enabled(void) { return false; }
 static inline bool pcie_aspm_enabled(struct pci_dev *pdev) { return false; }
 #endif
@@ -1579,23 +1570,8 @@ bool pci_aer_available(void);
 static inline bool pci_aer_available(void) { return false; }
 #endif
 
-#ifdef CONFIG_PCIE_ECRC
-void pcie_set_ecrc_checking(struct pci_dev *dev);
-void pcie_ecrc_get_policy(char *str);
-#else
-static inline void pcie_set_ecrc_checking(struct pci_dev *dev) { }
-static inline void pcie_ecrc_get_policy(char *str) { }
-#endif
-
 bool pci_ats_disabled(void);
 
-#ifdef CONFIG_PCIE_PTM
-int pci_enable_ptm(struct pci_dev *dev, u8 *granularity);
-#else
-static inline int pci_enable_ptm(struct pci_dev *dev, u8 *granularity)
-{ return -EINVAL; }
-#endif
-
 void pci_cfg_access_lock(struct pci_dev *dev);
 bool pci_cfg_access_trylock(struct pci_dev *dev);
 void pci_cfg_access_unlock(struct pci_dev *dev);
@@ -1749,11 +1725,6 @@ static inline void pci_release_regions(struct pci_dev *dev) { }
 
 static inline unsigned long pci_address_to_pio(phys_addr_t addr) { return -1; }
 
-static inline void pci_block_cfg_access(struct pci_dev *dev) { }
-static inline int pci_block_cfg_access_in_atomic(struct pci_dev *dev)
-{ return 0; }
-static inline void pci_unblock_cfg_access(struct pci_dev *dev) { }
-
 static inline struct pci_bus *pci_find_next_bus(const struct pci_bus *from)
 { return NULL; }
 static inline struct pci_dev *pci_get_slot(struct pci_bus *bus,
@@ -1782,17 +1753,36 @@ static inline const struct pci_device_id *pci_match_id(const struct pci_device_i
                                                         struct pci_dev *dev)
 { return NULL; }
 static inline bool pci_ats_disabled(void) { return true; }
+
+static inline int pci_irq_vector(struct pci_dev *dev, unsigned int nr)
+{
+       return -EINVAL;
+}
+
+static inline int
+pci_alloc_irq_vectors_affinity(struct pci_dev *dev, unsigned int min_vecs,
+                              unsigned int max_vecs, unsigned int flags,
+                              struct irq_affinity *aff_desc)
+{
+       return -ENOSPC;
+}
 #endif /* CONFIG_PCI */
 
+static inline int
+pci_alloc_irq_vectors(struct pci_dev *dev, unsigned int min_vecs,
+                     unsigned int max_vecs, unsigned int flags)
+{
+       return pci_alloc_irq_vectors_affinity(dev, min_vecs, max_vecs, flags,
+                                             NULL);
+}
+
 #ifdef CONFIG_PCI_ATS
 /* Address Translation Service */
-void pci_ats_init(struct pci_dev *dev);
 int pci_enable_ats(struct pci_dev *dev, int ps);
 void pci_disable_ats(struct pci_dev *dev);
 int pci_ats_queue_depth(struct pci_dev *dev);
 int pci_ats_page_aligned(struct pci_dev *dev);
 #else
-static inline void pci_ats_init(struct pci_dev *d) { }
 static inline int pci_enable_ats(struct pci_dev *d, int ps) { return -ENODEV; }
 static inline void pci_disable_ats(struct pci_dev *d) { }
 static inline int pci_ats_queue_depth(struct pci_dev *d) { return -ENODEV; }
@@ -1803,7 +1793,7 @@ static inline int pci_ats_page_aligned(struct pci_dev *dev) { return 0; }
 
 #include <asm/pci.h>
 
-/* These two functions provide almost identical functionality. Depennding
+/* These two functions provide almost identical functionality. Depending
  * on the architecture, one will be implemented as a wrapper around the
  * other (in drivers/pci/mmap.c).
  *
@@ -1872,25 +1862,9 @@ static inline const char *pci_name(const struct pci_dev *pdev)
        return dev_name(&pdev->dev);
 }
 
-
-/*
- * Some archs don't want to expose struct resource to userland as-is
- * in sysfs and /proc
- */
-#ifdef HAVE_ARCH_PCI_RESOURCE_TO_USER
 void pci_resource_to_user(const struct pci_dev *dev, int bar,
                          const struct resource *rsrc,
                          resource_size_t *start, resource_size_t *end);
-#else
-static inline void pci_resource_to_user(const struct pci_dev *dev, int bar,
-               const struct resource *rsrc, resource_size_t *start,
-               resource_size_t *end)
-{
-       *start = rsrc->start;
-       *end = rsrc->end;
-}
-#endif /* HAVE_ARCH_PCI_RESOURCE_TO_USER */
-
 
 /*
  * The world is not perfect and supplies us with broken PCI devices.
@@ -2032,10 +2006,6 @@ extern unsigned long pci_cardbus_mem_size;
 extern u8 pci_dfl_cache_line_size;
 extern u8 pci_cache_line_size;
 
-extern unsigned long pci_hotplug_io_size;
-extern unsigned long pci_hotplug_mem_size;
-extern unsigned long pci_hotplug_bus_size;
-
 /* Architecture-specific versions may override these (weak) */
 void pcibios_disable_device(struct pci_dev *dev);
 void pcibios_set_master(struct pci_dev *dev);
@@ -2305,10 +2275,6 @@ int pci_vpd_find_info_keyword(const u8 *buf, unsigned int off,
 #ifdef CONFIG_OF
 struct device_node;
 struct irq_domain;
-void pci_set_of_node(struct pci_dev *dev);
-void pci_release_of_node(struct pci_dev *dev);
-void pci_set_bus_of_node(struct pci_bus *bus);
-void pci_release_bus_of_node(struct pci_bus *bus);
 struct irq_domain *pci_host_bridge_of_msi_domain(struct pci_bus *bus);
 int pci_parse_request_of_pci_ranges(struct device *dev,
                                    struct list_head *resources,
@@ -2318,10 +2284,6 @@ int pci_parse_request_of_pci_ranges(struct device *dev,
 struct device_node *pcibios_get_phb_of_node(struct pci_bus *bus);
 
 #else  /* CONFIG_OF */
-static inline void pci_set_of_node(struct pci_dev *dev) { }
-static inline void pci_release_of_node(struct pci_dev *dev) { }
-static inline void pci_set_bus_of_node(struct pci_bus *bus) { }
-static inline void pci_release_bus_of_node(struct pci_bus *bus) { }
 static inline struct irq_domain *
 pci_host_bridge_of_msi_domain(struct pci_bus *bus) { return NULL; }
 static inline int pci_parse_request_of_pci_ranges(struct device *dev,
@@ -2435,4 +2397,7 @@ void pci_uevent_ers(struct pci_dev *pdev, enum  pci_ers_result err_type);
 #define pci_notice_ratelimited(pdev, fmt, arg...) \
        dev_notice_ratelimited(&(pdev)->dev, fmt, ##arg)
 
+#define pci_info_ratelimited(pdev, fmt, arg...) \
+       dev_info_ratelimited(&(pdev)->dev, fmt, ##arg)
+
 #endif /* LINUX_PCI_H */
index f694eb2..b482e42 100644 (file)
@@ -86,114 +86,14 @@ void pci_hp_deregister(struct hotplug_slot *slot);
 #define pci_hp_initialize(slot, bus, nr, name) \
        __pci_hp_initialize(slot, bus, nr, name, THIS_MODULE, KBUILD_MODNAME)
 
-/* PCI Setting Record (Type 0) */
-struct hpp_type0 {
-       u32 revision;
-       u8  cache_line_size;
-       u8  latency_timer;
-       u8  enable_serr;
-       u8  enable_perr;
-};
-
-/* PCI-X Setting Record (Type 1) */
-struct hpp_type1 {
-       u32 revision;
-       u8  max_mem_read;
-       u8  avg_max_split;
-       u16 tot_max_split;
-};
-
-/* PCI Express Setting Record (Type 2) */
-struct hpp_type2 {
-       u32 revision;
-       u32 unc_err_mask_and;
-       u32 unc_err_mask_or;
-       u32 unc_err_sever_and;
-       u32 unc_err_sever_or;
-       u32 cor_err_mask_and;
-       u32 cor_err_mask_or;
-       u32 adv_err_cap_and;
-       u32 adv_err_cap_or;
-       u16 pci_exp_devctl_and;
-       u16 pci_exp_devctl_or;
-       u16 pci_exp_lnkctl_and;
-       u16 pci_exp_lnkctl_or;
-       u32 sec_unc_err_sever_and;
-       u32 sec_unc_err_sever_or;
-       u32 sec_unc_err_mask_and;
-       u32 sec_unc_err_mask_or;
-};
-
-/*
- * _HPX PCI Express Setting Record (Type 3)
- */
-struct hpx_type3 {
-       u16 device_type;
-       u16 function_type;
-       u16 config_space_location;
-       u16 pci_exp_cap_id;
-       u16 pci_exp_cap_ver;
-       u16 pci_exp_vendor_id;
-       u16 dvsec_id;
-       u16 dvsec_rev;
-       u16 match_offset;
-       u32 match_mask_and;
-       u32 match_value;
-       u16 reg_offset;
-       u32 reg_mask_and;
-       u32 reg_mask_or;
-};
-
-struct hotplug_program_ops {
-       void (*program_type0)(struct pci_dev *dev, struct hpp_type0 *hpp);
-       void (*program_type1)(struct pci_dev *dev, struct hpp_type1 *hpp);
-       void (*program_type2)(struct pci_dev *dev, struct hpp_type2 *hpp);
-       void (*program_type3)(struct pci_dev *dev, struct hpx_type3 *hpp);
-};
-
-enum hpx_type3_dev_type {
-       HPX_TYPE_ENDPOINT       = BIT(0),
-       HPX_TYPE_LEG_END        = BIT(1),
-       HPX_TYPE_RC_END         = BIT(2),
-       HPX_TYPE_RC_EC          = BIT(3),
-       HPX_TYPE_ROOT_PORT      = BIT(4),
-       HPX_TYPE_UPSTREAM       = BIT(5),
-       HPX_TYPE_DOWNSTREAM     = BIT(6),
-       HPX_TYPE_PCI_BRIDGE     = BIT(7),
-       HPX_TYPE_PCIE_BRIDGE    = BIT(8),
-};
-
-enum hpx_type3_fn_type {
-       HPX_FN_NORMAL           = BIT(0),
-       HPX_FN_SRIOV_PHYS       = BIT(1),
-       HPX_FN_SRIOV_VIRT       = BIT(2),
-};
-
-enum hpx_type3_cfg_loc {
-       HPX_CFG_PCICFG          = 0,
-       HPX_CFG_PCIE_CAP        = 1,
-       HPX_CFG_PCIE_CAP_EXT    = 2,
-       HPX_CFG_VEND_CAP        = 3,
-       HPX_CFG_DVSEC           = 4,
-       HPX_CFG_MAX,
-};
-
 #ifdef CONFIG_ACPI
 #include <linux/acpi.h>
-int pci_acpi_program_hp_params(struct pci_dev *dev,
-                              const struct hotplug_program_ops *hp_ops);
 bool pciehp_is_native(struct pci_dev *bridge);
 int acpi_get_hp_hw_control_from_firmware(struct pci_dev *bridge);
 bool shpchp_is_native(struct pci_dev *bridge);
 int acpi_pci_check_ejectable(struct pci_bus *pbus, acpi_handle handle);
 int acpi_pci_detect_ejectable(acpi_handle handle);
 #else
-static inline int pci_acpi_program_hp_params(struct pci_dev *dev,
-                                   const struct hotplug_program_ops *hp_ops)
-{
-       return -ENODEV;
-}
-
 static inline int acpi_get_hp_hw_control_from_firmware(struct pci_dev *bridge)
 {
        return 0;
index de1b75e..21a5724 100644 (file)
 #define PCI_VENDOR_ID_MYRICOM          0x14c1
 
 #define PCI_VENDOR_ID_MEDIATEK         0x14c3
+#define PCI_DEVICE_ID_MEDIATEK_7629    0x7629
 
 #define PCI_VENDOR_ID_TITAN            0x14D2
 #define PCI_DEVICE_ID_TITAN_010L       0x8001
 
 #define PCI_VENDOR_ID_ASMEDIA          0x1b21
 
+#define PCI_VENDOR_ID_AMAZON_ANNAPURNA_LABS    0x1c36
+
 #define PCI_VENDOR_ID_CIRCUITCO                0x1cc8
 #define PCI_SUBSYSTEM_ID_CIRCUITCO_MINNOWBOARD 0x0001
 
index 7ccb875..9841568 100644 (file)
@@ -5513,6 +5513,18 @@ struct ec_params_fp_seed {
        uint8_t seed[FP_CONTEXT_TPM_BYTES];
 } __ec_align4;
 
+#define EC_CMD_FP_ENC_STATUS 0x0409
+
+/* FP TPM seed has been set or not */
+#define FP_ENC_STATUS_SEED_SET BIT(0)
+
+struct ec_response_fp_encryption_status {
+       /* Used bits in encryption engine status */
+       uint32_t valid_flags;
+       /* Encryption engine status */
+       uint32_t status;
+} __ec_align4;
+
 /*****************************************************************************/
 /* Touchpad MCU commands: range 0x0500-0x05FF */
 
index 0c587d4..b5b7a34 100644 (file)
@@ -1,3 +1,5 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
 #ifndef __TI_SYSC_DATA_H__
 #define __TI_SYSC_DATA_H__
 
@@ -47,6 +49,7 @@ struct sysc_regbits {
        s8 emufree_shift;
 };
 
+#define SYSC_MODULE_QUIRK_SGX          BIT(18)
 #define SYSC_MODULE_QUIRK_HDQ1W                BIT(17)
 #define SYSC_MODULE_QUIRK_I2C          BIT(16)
 #define SYSC_MODULE_QUIRK_WDT          BIT(15)
@@ -70,7 +73,7 @@ struct sysc_regbits {
 
 /**
  * struct sysc_capabilities - capabilities for an interconnect target module
- *
+ * @type: sysc type identifier for the module
  * @sysc_mask: bitmask of supported SYSCONFIG register bits
  * @regbits: bitmask of SYSCONFIG register bits
  * @mod_quirks: bitmask of module specific quirks
@@ -85,8 +88,9 @@ struct sysc_capabilities {
 /**
  * struct sysc_config - configuration for an interconnect target module
  * @sysc_val: configured value for sysc register
+ * @syss_mask: configured mask value for SYSSTATUS register
  * @midlemodes: bitmask of supported master idle modes
- * @sidlemodes: bitmask of supported master idle modes
+ * @sidlemodes: bitmask of supported slave idle modes
  * @srst_udelay: optional delay needed after OCP soft reset
  * @quirks: bitmask of enabled quirks
  */
index cefd374..c09d67e 100644 (file)
@@ -488,13 +488,6 @@ extern int hex_dump_to_buffer(const void *buf, size_t len, int rowsize,
 extern void print_hex_dump(const char *level, const char *prefix_str,
                           int prefix_type, int rowsize, int groupsize,
                           const void *buf, size_t len, bool ascii);
-#if defined(CONFIG_DYNAMIC_DEBUG)
-#define print_hex_dump_bytes(prefix_str, prefix_type, buf, len)        \
-       dynamic_hex_dump(prefix_str, prefix_type, 16, 1, buf, len, true)
-#else
-extern void print_hex_dump_bytes(const char *prefix_str, int prefix_type,
-                                const void *buf, size_t len);
-#endif /* defined(CONFIG_DYNAMIC_DEBUG) */
 #else
 static inline void print_hex_dump(const char *level, const char *prefix_str,
                                  int prefix_type, int rowsize, int groupsize,
@@ -526,4 +519,19 @@ static inline void print_hex_dump_debug(const char *prefix_str, int prefix_type,
 }
 #endif
 
+/**
+ * print_hex_dump_bytes - shorthand form of print_hex_dump() with default params
+ * @prefix_str: string to prefix each line with;
+ *  caller supplies trailing spaces for alignment if desired
+ * @prefix_type: controls whether prefix of an offset, address, or none
+ *  is printed (%DUMP_PREFIX_OFFSET, %DUMP_PREFIX_ADDRESS, %DUMP_PREFIX_NONE)
+ * @buf: data blob to dump
+ * @len: number of bytes in the @buf
+ *
+ * Calls print_hex_dump(), with log level of KERN_DEBUG,
+ * rowsize of 16, groupsize of 1, and ASCII output included.
+ */
+#define print_hex_dump_bytes(prefix_str, prefix_type, buf, len)        \
+       print_hex_dump_debug(prefix_str, prefix_type, 16, 1, buf, len, true)
+
 #endif
index 24632a7..b2c9c46 100644 (file)
@@ -262,7 +262,7 @@ struct pwm_ops {
        int (*capture)(struct pwm_chip *chip, struct pwm_device *pwm,
                       struct pwm_capture *result, unsigned long timeout);
        int (*apply)(struct pwm_chip *chip, struct pwm_device *pwm,
-                    struct pwm_state *state);
+                    const struct pwm_state *state);
        void (*get_state)(struct pwm_chip *chip, struct pwm_device *pwm,
                          struct pwm_state *state);
        struct module *owner;
@@ -316,7 +316,7 @@ struct pwm_capture {
 /* PWM user APIs */
 struct pwm_device *pwm_request(int pwm_id, const char *label);
 void pwm_free(struct pwm_device *pwm);
-int pwm_apply_state(struct pwm_device *pwm, struct pwm_state *state);
+int pwm_apply_state(struct pwm_device *pwm, const struct pwm_state *state);
 int pwm_adjust_config(struct pwm_device *pwm);
 
 /**
index 898f595..74efca1 100644 (file)
@@ -225,7 +225,7 @@ struct qed_rdma_start_in_params {
 
 struct qed_rdma_add_user_out_params {
        u16 dpi;
-       u64 dpi_addr;
+       void __iomem *dpi_addr;
        u64 dpi_phys_addr;
        u32 dpi_size;
        u16 wid_count;
diff --git a/include/linux/quicklist.h b/include/linux/quicklist.h
deleted file mode 100644 (file)
index 034982c..0000000
+++ /dev/null
@@ -1,94 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef LINUX_QUICKLIST_H
-#define LINUX_QUICKLIST_H
-/*
- * Fast allocations and disposal of pages. Pages must be in the condition
- * as needed after allocation when they are freed. Per cpu lists of pages
- * are kept that only contain node local pages.
- *
- * (C) 2007, SGI. Christoph Lameter <cl@linux.com>
- */
-#include <linux/kernel.h>
-#include <linux/gfp.h>
-#include <linux/percpu.h>
-
-#ifdef CONFIG_QUICKLIST
-
-struct quicklist {
-       void *page;
-       int nr_pages;
-};
-
-DECLARE_PER_CPU(struct quicklist, quicklist)[CONFIG_NR_QUICK];
-
-/*
- * The two key functions quicklist_alloc and quicklist_free are inline so
- * that they may be custom compiled for the platform.
- * Specifying a NULL ctor can remove constructor support. Specifying
- * a constant quicklist allows the determination of the exact address
- * in the per cpu area.
- *
- * The fast patch in quicklist_alloc touched only a per cpu cacheline and
- * the first cacheline of the page itself. There is minmal overhead involved.
- */
-static inline void *quicklist_alloc(int nr, gfp_t flags, void (*ctor)(void *))
-{
-       struct quicklist *q;
-       void **p = NULL;
-
-       q =&get_cpu_var(quicklist)[nr];
-       p = q->page;
-       if (likely(p)) {
-               q->page = p[0];
-               p[0] = NULL;
-               q->nr_pages--;
-       }
-       put_cpu_var(quicklist);
-       if (likely(p))
-               return p;
-
-       p = (void *)__get_free_page(flags | __GFP_ZERO);
-       if (ctor && p)
-               ctor(p);
-       return p;
-}
-
-static inline void __quicklist_free(int nr, void (*dtor)(void *), void *p,
-       struct page *page)
-{
-       struct quicklist *q;
-
-       q = &get_cpu_var(quicklist)[nr];
-       *(void **)p = q->page;
-       q->page = p;
-       q->nr_pages++;
-       put_cpu_var(quicklist);
-}
-
-static inline void quicklist_free(int nr, void (*dtor)(void *), void *pp)
-{
-       __quicklist_free(nr, dtor, pp, virt_to_page(pp));
-}
-
-static inline void quicklist_free_page(int nr, void (*dtor)(void *),
-                                                       struct page *page)
-{
-       __quicklist_free(nr, dtor, page_address(page), page);
-}
-
-void quicklist_trim(int nr, void (*dtor)(void *),
-       unsigned long min_pages, unsigned long max_free);
-
-unsigned long quicklist_total_size(void);
-
-#else
-
-static inline unsigned long quicklist_total_size(void)
-{
-       return 0;
-}
-
-#endif
-
-#endif /* LINUX_QUICKLIST_H */
-
index dc905a4..185d948 100644 (file)
@@ -22,7 +22,7 @@ static inline struct quota_info *sb_dqopt(struct super_block *sb)
 /* i_mutex must being held */
 static inline bool is_quota_modification(struct inode *inode, struct iattr *ia)
 {
-       return (ia->ia_valid & ATTR_SIZE && ia->ia_size != inode->i_size) ||
+       return (ia->ia_valid & ATTR_SIZE) ||
                (ia->ia_valid & ATTR_UID && !uid_eq(ia->ia_uid, inode->i_uid)) ||
                (ia->ia_valid & ATTR_GID && !gid_eq(ia->ia_gid, inode->i_gid));
 }
index 179faab..fdd421b 100644 (file)
@@ -60,41 +60,87 @@ rb_insert_augmented_cached(struct rb_node *node,
        rb_insert_augmented(node, &root->rb_root, augment);
 }
 
-#define RB_DECLARE_CALLBACKS(rbstatic, rbname, rbstruct, rbfield,      \
-                            rbtype, rbaugmented, rbcompute)            \
+/*
+ * Template for declaring augmented rbtree callbacks (generic case)
+ *
+ * RBSTATIC:    'static' or empty
+ * RBNAME:      name of the rb_augment_callbacks structure
+ * RBSTRUCT:    struct type of the tree nodes
+ * RBFIELD:     name of struct rb_node field within RBSTRUCT
+ * RBAUGMENTED: name of field within RBSTRUCT holding data for subtree
+ * RBCOMPUTE:   name of function that recomputes the RBAUGMENTED data
+ */
+
+#define RB_DECLARE_CALLBACKS(RBSTATIC, RBNAME,                         \
+                            RBSTRUCT, RBFIELD, RBAUGMENTED, RBCOMPUTE) \
 static inline void                                                     \
-rbname ## _propagate(struct rb_node *rb, struct rb_node *stop)         \
+RBNAME ## _propagate(struct rb_node *rb, struct rb_node *stop)         \
 {                                                                      \
        while (rb != stop) {                                            \
-               rbstruct *node = rb_entry(rb, rbstruct, rbfield);       \
-               rbtype augmented = rbcompute(node);                     \
-               if (node->rbaugmented == augmented)                     \
+               RBSTRUCT *node = rb_entry(rb, RBSTRUCT, RBFIELD);       \
+               if (RBCOMPUTE(node, true))                              \
                        break;                                          \
-               node->rbaugmented = augmented;                          \
-               rb = rb_parent(&node->rbfield);                         \
+               rb = rb_parent(&node->RBFIELD);                         \
        }                                                               \
 }                                                                      \
 static inline void                                                     \
-rbname ## _copy(struct rb_node *rb_old, struct rb_node *rb_new)                \
+RBNAME ## _copy(struct rb_node *rb_old, struct rb_node *rb_new)                \
 {                                                                      \
-       rbstruct *old = rb_entry(rb_old, rbstruct, rbfield);            \
-       rbstruct *new = rb_entry(rb_new, rbstruct, rbfield);            \
-       new->rbaugmented = old->rbaugmented;                            \
+       RBSTRUCT *old = rb_entry(rb_old, RBSTRUCT, RBFIELD);            \
+       RBSTRUCT *new = rb_entry(rb_new, RBSTRUCT, RBFIELD);            \
+       new->RBAUGMENTED = old->RBAUGMENTED;                            \
 }                                                                      \
 static void                                                            \
-rbname ## _rotate(struct rb_node *rb_old, struct rb_node *rb_new)      \
+RBNAME ## _rotate(struct rb_node *rb_old, struct rb_node *rb_new)      \
 {                                                                      \
-       rbstruct *old = rb_entry(rb_old, rbstruct, rbfield);            \
-       rbstruct *new = rb_entry(rb_new, rbstruct, rbfield);            \
-       new->rbaugmented = old->rbaugmented;                            \
-       old->rbaugmented = rbcompute(old);                              \
+       RBSTRUCT *old = rb_entry(rb_old, RBSTRUCT, RBFIELD);            \
+       RBSTRUCT *new = rb_entry(rb_new, RBSTRUCT, RBFIELD);            \
+       new->RBAUGMENTED = old->RBAUGMENTED;                            \
+       RBCOMPUTE(old, false);                                          \
 }                                                                      \
-rbstatic const struct rb_augment_callbacks rbname = {                  \
-       .propagate = rbname ## _propagate,                              \
-       .copy = rbname ## _copy,                                        \
-       .rotate = rbname ## _rotate                                     \
+RBSTATIC const struct rb_augment_callbacks RBNAME = {                  \
+       .propagate = RBNAME ## _propagate,                              \
+       .copy = RBNAME ## _copy,                                        \
+       .rotate = RBNAME ## _rotate                                     \
 };
 
+/*
+ * Template for declaring augmented rbtree callbacks,
+ * computing RBAUGMENTED scalar as max(RBCOMPUTE(node)) for all subtree nodes.
+ *
+ * RBSTATIC:    'static' or empty
+ * RBNAME:      name of the rb_augment_callbacks structure
+ * RBSTRUCT:    struct type of the tree nodes
+ * RBFIELD:     name of struct rb_node field within RBSTRUCT
+ * RBTYPE:      type of the RBAUGMENTED field
+ * RBAUGMENTED: name of RBTYPE field within RBSTRUCT holding data for subtree
+ * RBCOMPUTE:   name of function that returns the per-node RBTYPE scalar
+ */
+
+#define RB_DECLARE_CALLBACKS_MAX(RBSTATIC, RBNAME, RBSTRUCT, RBFIELD,        \
+                                RBTYPE, RBAUGMENTED, RBCOMPUTE)              \
+static inline bool RBNAME ## _compute_max(RBSTRUCT *node, bool exit)         \
+{                                                                            \
+       RBSTRUCT *child;                                                      \
+       RBTYPE max = RBCOMPUTE(node);                                         \
+       if (node->RBFIELD.rb_left) {                                          \
+               child = rb_entry(node->RBFIELD.rb_left, RBSTRUCT, RBFIELD);   \
+               if (child->RBAUGMENTED > max)                                 \
+                       max = child->RBAUGMENTED;                             \
+       }                                                                     \
+       if (node->RBFIELD.rb_right) {                                         \
+               child = rb_entry(node->RBFIELD.rb_right, RBSTRUCT, RBFIELD);  \
+               if (child->RBAUGMENTED > max)                                 \
+                       max = child->RBAUGMENTED;                             \
+       }                                                                     \
+       if (exit && node->RBAUGMENTED == max)                                 \
+               return true;                                                  \
+       node->RBAUGMENTED = max;                                              \
+       return false;                                                         \
+}                                                                            \
+RB_DECLARE_CALLBACKS(RBSTATIC, RBNAME,                                       \
+                    RBSTRUCT, RBFIELD, RBAUGMENTED, RBNAME ## _compute_max)
+
 
 #define        RB_RED          0
 #define        RB_BLACK        1
index 563290f..75c97e4 100644 (file)
@@ -6,16 +6,11 @@
 
 /*
  * rcuwait provides a way of blocking and waking up a single
- * task in an rcu-safe manner; where it is forbidden to use
- * after exit_notify(). task_struct is not properly rcu protected,
- * unless dealing with rcu-aware lists, ie: find_task_by_*().
+ * task in an rcu-safe manner.
  *
- * Alternatively we have task_rcu_dereference(), but the return
- * semantics have different implications which would break the
- * wakeup side. The only time @task is non-nil is when a user is
- * blocked (or checking if it needs to) on a condition, and reset
- * as soon as we know that the condition has succeeded and are
- * awoken.
+ * The only time @task is non-nil is when a user is blocked (or
+ * checking if it needs to) on a condition, and reset as soon as we
+ * know that the condition has succeeded and are awoken.
  */
 struct rcuwait {
        struct task_struct __rcu *task;
@@ -37,13 +32,6 @@ extern void rcuwait_wake_up(struct rcuwait *w);
  */
 #define rcuwait_wait_event(w, condition)                               \
 ({                                                                     \
-       /*                                                              \
-        * Complain if we are called after do_exit()/exit_notify(),     \
-        * as we cannot rely on the rcu critical region for the         \
-        * wakeup side.                                                 \
-        */                                                             \
-       WARN_ON(current->exit_state);                                   \
-                                                                       \
        rcu_assign_pointer((w)->task, current);                         \
        for (;;) {                                                      \
                /*                                                      \
index b75b282..2c2e56b 100644 (file)
@@ -958,6 +958,10 @@ struct task_struct {
        struct mutex_waiter             *blocked_on;
 #endif
 
+#ifdef CONFIG_DEBUG_ATOMIC_SLEEP
+       int                             non_block_count;
+#endif
+
 #ifdef CONFIG_TRACE_IRQFLAGS
        unsigned int                    irq_events;
        unsigned long                   hardirq_enable_ip;
@@ -1126,7 +1130,10 @@ struct task_struct {
 
        struct tlbflush_unmap_batch     tlb_ubc;
 
-       struct rcu_head                 rcu;
+       union {
+               refcount_t              rcu_users;
+               struct rcu_head         rcu;
+       };
 
        /* Cache last used pipe for splice(): */
        struct pipe_inode_info          *splice_pipe;
@@ -1835,7 +1842,10 @@ static inline void set_task_cpu(struct task_struct *p, unsigned int cpu)
  * running or not.
  */
 #ifndef vcpu_is_preempted
-# define vcpu_is_preempted(cpu)        false
+static inline bool vcpu_is_preempted(int cpu)
+{
+       return false;
+}
 #endif
 
 extern long sched_setaffinity(pid_t pid, const struct cpumask *new_mask);
index 4a79440..e677001 100644 (file)
@@ -362,16 +362,16 @@ enum {
 
 static inline void membarrier_mm_sync_core_before_usermode(struct mm_struct *mm)
 {
+       if (current->mm != mm)
+               return;
        if (likely(!(atomic_read(&mm->membarrier_state) &
                     MEMBARRIER_STATE_PRIVATE_EXPEDITED_SYNC_CORE)))
                return;
        sync_core_before_usermode();
 }
 
-static inline void membarrier_execve(struct task_struct *t)
-{
-       atomic_set(&t->mm->membarrier_state, 0);
-}
+extern void membarrier_exec_mmap(struct mm_struct *mm);
+
 #else
 #ifdef CONFIG_ARCH_HAS_MEMBARRIER_CALLBACKS
 static inline void membarrier_arch_switch_mm(struct mm_struct *prev,
@@ -380,7 +380,7 @@ static inline void membarrier_arch_switch_mm(struct mm_struct *prev,
 {
 }
 #endif
-static inline void membarrier_execve(struct task_struct *t)
+static inline void membarrier_exec_mmap(struct mm_struct *mm)
 {
 }
 static inline void membarrier_mm_sync_core_before_usermode(struct mm_struct *mm)
index 3d90ed8..4b1c3b6 100644 (file)
@@ -119,7 +119,7 @@ static inline void put_task_struct(struct task_struct *t)
                __put_task_struct(t);
 }
 
-struct task_struct *task_rcu_dereference(struct task_struct **ptask);
+void put_task_struct_rcu_user(struct task_struct *task);
 
 #ifdef CONFIG_ARCH_WANTS_DYNAMIC_TASK_STRUCT
 extern int arch_task_struct_size __read_mostly;
index 5f7441a..a8d59d6 100644 (file)
@@ -77,6 +77,54 @@ enum lsm_event {
        LSM_POLICY_CHANGE,
 };
 
+/*
+ * These are reasons that can be passed to the security_locked_down()
+ * LSM hook. Lockdown reasons that protect kernel integrity (ie, the
+ * ability for userland to modify kernel code) are placed before
+ * LOCKDOWN_INTEGRITY_MAX.  Lockdown reasons that protect kernel
+ * confidentiality (ie, the ability for userland to extract
+ * information from the running kernel that would otherwise be
+ * restricted) are placed before LOCKDOWN_CONFIDENTIALITY_MAX.
+ *
+ * LSM authors should note that the semantics of any given lockdown
+ * reason are not guaranteed to be stable - the same reason may block
+ * one set of features in one kernel release, and a slightly different
+ * set of features in a later kernel release. LSMs that seek to expose
+ * lockdown policy at any level of granularity other than "none",
+ * "integrity" or "confidentiality" are responsible for either
+ * ensuring that they expose a consistent level of functionality to
+ * userland, or ensuring that userland is aware that this is
+ * potentially a moving target. It is easy to misuse this information
+ * in a way that could break userspace. Please be careful not to do
+ * so.
+ *
+ * If you add to this, remember to extend lockdown_reasons in
+ * security/lockdown/lockdown.c.
+ */
+enum lockdown_reason {
+       LOCKDOWN_NONE,
+       LOCKDOWN_MODULE_SIGNATURE,
+       LOCKDOWN_DEV_MEM,
+       LOCKDOWN_KEXEC,
+       LOCKDOWN_HIBERNATION,
+       LOCKDOWN_PCI_ACCESS,
+       LOCKDOWN_IOPORT,
+       LOCKDOWN_MSR,
+       LOCKDOWN_ACPI_TABLES,
+       LOCKDOWN_PCMCIA_CIS,
+       LOCKDOWN_TIOCSSERIAL,
+       LOCKDOWN_MODULE_PARAMETERS,
+       LOCKDOWN_MMIOTRACE,
+       LOCKDOWN_DEBUGFS,
+       LOCKDOWN_INTEGRITY_MAX,
+       LOCKDOWN_KCORE,
+       LOCKDOWN_KPROBES,
+       LOCKDOWN_BPF_READ,
+       LOCKDOWN_PERF,
+       LOCKDOWN_TRACEFS,
+       LOCKDOWN_CONFIDENTIALITY_MAX,
+};
+
 /* These functions are in security/commoncap.c */
 extern int cap_capable(const struct cred *cred, struct user_namespace *ns,
                       int cap, unsigned int opts);
@@ -195,6 +243,7 @@ int unregister_blocking_lsm_notifier(struct notifier_block *nb);
 
 /* prototypes */
 extern int security_init(void);
+extern int early_security_init(void);
 
 /* Security operations */
 int security_binder_set_context_mgr(struct task_struct *mgr);
@@ -259,7 +308,8 @@ int security_dentry_create_files_as(struct dentry *dentry, int mode,
                                        struct qstr *name,
                                        const struct cred *old,
                                        struct cred *new);
-
+int security_path_notify(const struct path *path, u64 mask,
+                                       unsigned int obj_type);
 int security_inode_alloc(struct inode *inode);
 void security_inode_free(struct inode *inode);
 int security_inode_init_security(struct inode *inode, struct inode *dir,
@@ -387,11 +437,11 @@ int security_ismaclabel(const char *name);
 int security_secid_to_secctx(u32 secid, char **secdata, u32 *seclen);
 int security_secctx_to_secid(const char *secdata, u32 seclen, u32 *secid);
 void security_release_secctx(char *secdata, u32 seclen);
-
 void security_inode_invalidate_secctx(struct inode *inode);
 int security_inode_notifysecctx(struct inode *inode, void *ctx, u32 ctxlen);
 int security_inode_setsecctx(struct dentry *dentry, void *ctx, u32 ctxlen);
 int security_inode_getsecctx(struct inode *inode, void **ctx, u32 *ctxlen);
+int security_locked_down(enum lockdown_reason what);
 #else /* CONFIG_SECURITY */
 
 static inline int call_blocking_lsm_notifier(enum lsm_event event, void *data)
@@ -423,6 +473,11 @@ static inline int security_init(void)
        return 0;
 }
 
+static inline int early_security_init(void)
+{
+       return 0;
+}
+
 static inline int security_binder_set_context_mgr(struct task_struct *mgr)
 {
        return 0;
@@ -621,6 +676,12 @@ static inline int security_move_mount(const struct path *from_path,
        return 0;
 }
 
+static inline int security_path_notify(const struct path *path, u64 mask,
+                               unsigned int obj_type)
+{
+       return 0;
+}
+
 static inline int security_inode_alloc(struct inode *inode)
 {
        return 0;
@@ -1204,6 +1265,10 @@ static inline int security_inode_getsecctx(struct inode *inode, void **ctx, u32
 {
        return -EOPNOTSUPP;
 }
+static inline int security_locked_down(enum lockdown_reason what)
+{
+       return 0;
+}
 #endif /* CONFIG_SECURITY */
 
 #ifdef CONFIG_SECURITY_NETWORK
index 9443caf..0f80123 100644 (file)
@@ -69,7 +69,7 @@ struct shrinker {
 
        /* These are for internal use */
        struct list_head list;
-#ifdef CONFIG_MEMCG_KMEM
+#ifdef CONFIG_MEMCG
        /* ID in shrinker_idr */
        int id;
 #endif
@@ -81,6 +81,11 @@ struct shrinker {
 /* Flags */
 #define SHRINKER_NUMA_AWARE    (1 << 0)
 #define SHRINKER_MEMCG_AWARE   (1 << 1)
+/*
+ * It just makes sense when the shrinker is also MEMCG_AWARE for now,
+ * non-MEMCG_AWARE shrinker should not have this flag set.
+ */
+#define SHRINKER_NONSLAB       (1 << 2)
 
 extern int prealloc_shrinker(struct shrinker *shrinker);
 extern void register_shrinker_prepared(struct shrinker *shrinker);
index 56c9c7e..ab2b98a 100644 (file)
@@ -595,68 +595,6 @@ static __always_inline void *kmalloc_node(size_t size, gfp_t flags, int node)
        return __kmalloc_node(size, flags, node);
 }
 
-struct memcg_cache_array {
-       struct rcu_head rcu;
-       struct kmem_cache *entries[0];
-};
-
-/*
- * This is the main placeholder for memcg-related information in kmem caches.
- * Both the root cache and the child caches will have it. For the root cache,
- * this will hold a dynamically allocated array large enough to hold
- * information about the currently limited memcgs in the system. To allow the
- * array to be accessed without taking any locks, on relocation we free the old
- * version only after a grace period.
- *
- * Root and child caches hold different metadata.
- *
- * @root_cache:        Common to root and child caches.  NULL for root, pointer to
- *             the root cache for children.
- *
- * The following fields are specific to root caches.
- *
- * @memcg_caches: kmemcg ID indexed table of child caches.  This table is
- *             used to index child cachces during allocation and cleared
- *             early during shutdown.
- *
- * @root_caches_node: List node for slab_root_caches list.
- *
- * @children:  List of all child caches.  While the child caches are also
- *             reachable through @memcg_caches, a child cache remains on
- *             this list until it is actually destroyed.
- *
- * The following fields are specific to child caches.
- *
- * @memcg:     Pointer to the memcg this cache belongs to.
- *
- * @children_node: List node for @root_cache->children list.
- *
- * @kmem_caches_node: List node for @memcg->kmem_caches list.
- */
-struct memcg_cache_params {
-       struct kmem_cache *root_cache;
-       union {
-               struct {
-                       struct memcg_cache_array __rcu *memcg_caches;
-                       struct list_head __root_caches_node;
-                       struct list_head children;
-                       bool dying;
-               };
-               struct {
-                       struct mem_cgroup *memcg;
-                       struct list_head children_node;
-                       struct list_head kmem_caches_node;
-                       struct percpu_ref refcnt;
-
-                       void (*work_fn)(struct kmem_cache *);
-                       union {
-                               struct rcu_head rcu_head;
-                               struct work_struct work;
-                       };
-               };
-       };
-};
-
 int memcg_update_all_caches(int num_memcgs);
 
 /**
index bea46bd..ea78720 100644 (file)
@@ -4,6 +4,8 @@
 #ifndef __SOUNDWIRE_H
 #define __SOUNDWIRE_H
 
+#include <linux/mod_devicetable.h>
+
 struct sdw_bus;
 struct sdw_slave;
 
@@ -377,6 +379,8 @@ struct sdw_slave_prop {
  * @dynamic_frame: Dynamic frame shape supported
  * @err_threshold: Number of times that software may retry sending a single
  * command
+ * @mclk_freq: clock reference passed to SoundWire Master, in Hz.
+ * @hw_disabled: if true, the Master is not functional, typically due to pin-mux
  */
 struct sdw_master_prop {
        u32 revision;
@@ -391,6 +395,8 @@ struct sdw_master_prop {
        u32 default_col;
        bool dynamic_frame;
        u32 err_threshold;
+       u32 mclk_freq;
+       bool hw_disabled;
 };
 
 int sdw_master_read_prop(struct sdw_bus *bus);
@@ -538,6 +544,7 @@ struct sdw_slave_ops {
  * @bus: Bus handle
  * @ops: Slave callback ops
  * @prop: Slave properties
+ * @debugfs: Slave debugfs
  * @node: node for bus list
  * @port_ready: Port ready completion flag for each Slave port
  * @dev_num: Device Number assigned by Bus
@@ -549,6 +556,9 @@ struct sdw_slave {
        struct sdw_bus *bus;
        const struct sdw_slave_ops *ops;
        struct sdw_slave_prop prop;
+#ifdef CONFIG_DEBUG_FS
+       struct dentry *debugfs;
+#endif
        struct list_head node;
        struct completion *port_ready;
        u16 dev_num;
@@ -718,6 +728,7 @@ struct sdw_master_ops {
  * Bit set implies used number, bit clear implies unused number.
  * @bus_lock: bus lock
  * @msg_lock: message lock
+ * @compute_params: points to Bus resource management implementation
  * @ops: Master callback ops
  * @port_ops: Master port callback ops
  * @params: Current bus parameters
@@ -725,6 +736,7 @@ struct sdw_master_ops {
  * @m_rt_list: List of Master instance of all stream(s) running on Bus. This
  * is used to compute and program bus bandwidth, clock, frame shape,
  * transport and port parameters
+ * @debugfs: Bus debugfs
  * @defer_msg: Defer message
  * @clk_stop_timeout: Clock stop timeout computed
  * @bank_switch_timeout: Bank switch timeout computed
@@ -739,11 +751,15 @@ struct sdw_bus {
        DECLARE_BITMAP(assigned, SDW_MAX_DEVICES);
        struct mutex bus_lock;
        struct mutex msg_lock;
+       int (*compute_params)(struct sdw_bus *bus);
        const struct sdw_master_ops *ops;
        const struct sdw_master_port_ops *port_ops;
        struct sdw_bus_params params;
        struct sdw_master_prop prop;
        struct list_head m_rt_list;
+#ifdef CONFIG_DEBUG_FS
+       struct dentry *debugfs;
+#endif
        struct sdw_defer defer_msg;
        unsigned int clk_stop_timeout;
        u32 bank_switch_timeout;
@@ -828,7 +844,7 @@ struct sdw_stream_params {
  * @m_rt_count: Count of Master runtime(s) in this stream
  */
 struct sdw_stream_runtime {
-       char *name;
+       const char *name;
        struct sdw_stream_params params;
        enum sdw_stream_state state;
        enum sdw_stream_type type;
@@ -836,7 +852,7 @@ struct sdw_stream_runtime {
        int m_rt_count;
 };
 
-struct sdw_stream_runtime *sdw_alloc_stream(char *stream_name);
+struct sdw_stream_runtime *sdw_alloc_stream(const char *stream_name);
 void sdw_release_stream(struct sdw_stream_runtime *stream);
 int sdw_stream_add_master(struct sdw_bus *bus,
                struct sdw_stream_config *stream_config,
index 4d70da4..c9427cb 100644 (file)
@@ -8,6 +8,7 @@
  * struct sdw_intel_ops: Intel audio driver callback ops
  *
  * @config_stream: configure the stream with the hw_params
+ * the first argument containing the context is mandatory
  */
 struct sdw_intel_ops {
        int (*config_stream)(void *arg, void *substream,
index 4deb11f..b2f9df7 100644 (file)
@@ -474,8 +474,9 @@ static inline void memcpy_and_pad(void *dest, size_t dest_len,
  * But this can lead to bugs due to typos, or if prefix is a pointer
  * and not a constant. Instead use str_has_prefix().
  *
- * Returns: 0 if @str does not start with @prefix
-         strlen(@prefix) if @str does start with @prefix
+ * Returns:
+ * * strlen(@prefix) if @str starts with @prefix
+ * * 0 if @str does not start with @prefix
  */
 static __always_inline size_t str_has_prefix(const char *str, const char *prefix)
 {
index c7f38e8..f860372 100644 (file)
@@ -87,6 +87,7 @@ struct cache_detail {
                                              int has_died);
 
        struct cache_head *     (*alloc)(void);
+       void                    (*flush)(void);
        int                     (*match)(struct cache_head *orig, struct cache_head *new);
        void                    (*init)(struct cache_head *orig, struct cache_head *new);
        void                    (*update)(struct cache_head *orig, struct cache_head *new);
@@ -107,9 +108,9 @@ struct cache_detail {
        /* fields for communication over channel */
        struct list_head        queue;
 
-       atomic_t                readers;                /* how many time is /chennel open */
-       time_t                  last_close;             /* if no readers, when did last close */
-       time_t                  last_warn;              /* when we last warned about no readers */
+       atomic_t                writers;                /* how many time is /channel open */
+       time_t                  last_close;             /* if no writers, when did last close */
+       time_t                  last_warn;              /* when we last warned about no writers */
 
        union {
                struct proc_dir_entry   *procfs;
index 27536b9..a6ef351 100644 (file)
@@ -242,9 +242,6 @@ void                rpc_sleep_on_priority_timeout(struct rpc_wait_queue *queue,
 void           rpc_sleep_on_priority(struct rpc_wait_queue *,
                                        struct rpc_task *,
                                        int priority);
-void rpc_wake_up_queued_task_on_wq(struct workqueue_struct *wq,
-               struct rpc_wait_queue *queue,
-               struct rpc_task *task);
 void           rpc_wake_up_queued_task(struct rpc_wait_queue *,
                                        struct rpc_task *);
 void           rpc_wake_up_queued_task_set_status(struct rpc_wait_queue *,
index 981f0d7..40f6588 100644 (file)
@@ -42,6 +42,7 @@
 
 #ifndef SVC_RDMA_H
 #define SVC_RDMA_H
+#include <linux/llist.h>
 #include <linux/sunrpc/xdr.h>
 #include <linux/sunrpc/svcsock.h>
 #include <linux/sunrpc/rpc_rdma.h>
@@ -107,8 +108,7 @@ struct svcxprt_rdma {
        struct list_head     sc_read_complete_q;
        struct work_struct   sc_work;
 
-       spinlock_t           sc_recv_lock;
-       struct list_head     sc_recv_ctxts;
+       struct llist_head    sc_recv_ctxts;
 };
 /* sc_flags */
 #define RDMAXPRT_CONN_PENDING  3
@@ -125,6 +125,7 @@ enum {
 #define RPCSVC_MAXPAYLOAD_RDMA RPCSVC_MAXPAYLOAD
 
 struct svc_rdma_recv_ctxt {
+       struct llist_node       rc_node;
        struct list_head        rc_list;
        struct ib_recv_wr       rc_recv_wr;
        struct ib_cqe           rc_cqe;
@@ -200,7 +201,6 @@ extern struct svc_xprt_class svc_rdma_bc_class;
 #endif
 
 /* svc_rdma.c */
-extern struct workqueue_struct *svc_rdma_wq;
 extern int svc_rdma_init(void);
 extern void svc_rdma_cleanup(void);
 
index 8a87d8b..f33e501 100644 (file)
@@ -186,7 +186,7 @@ xdr_adjust_iovec(struct kvec *iov, __be32 *p)
 extern void xdr_shift_buf(struct xdr_buf *, size_t);
 extern void xdr_buf_from_iov(struct kvec *, struct xdr_buf *);
 extern int xdr_buf_subsegment(struct xdr_buf *, struct xdr_buf *, unsigned int, unsigned int);
-extern int xdr_buf_read_netobj(struct xdr_buf *, struct xdr_netobj *, unsigned int);
+extern int xdr_buf_read_mic(struct xdr_buf *, struct xdr_netobj *, unsigned int);
 extern int read_bytes_from_xdr_buf(struct xdr_buf *, unsigned int, void *, unsigned int);
 extern int write_bytes_to_xdr_buf(struct xdr_buf *, unsigned int, void *, unsigned int);
 
index 13e108b..d783e15 100644 (file)
@@ -352,6 +352,7 @@ bool                        xprt_prepare_transmit(struct rpc_task *task);
 void                   xprt_request_enqueue_transmit(struct rpc_task *task);
 void                   xprt_request_enqueue_receive(struct rpc_task *task);
 void                   xprt_request_wait_receive(struct rpc_task *task);
+void                   xprt_request_dequeue_xprt(struct rpc_task *task);
 bool                   xprt_request_need_retransmit(struct rpc_task *task);
 void                   xprt_transmit(struct rpc_task *task);
 void                   xprt_end_transmit(struct rpc_task *task);
index 86fc38f..16c239e 100644 (file)
@@ -49,9 +49,9 @@
  * fully-chunked NFS message (read chunks are the largest). Note only
  * a single chunk type per message is supported currently.
  */
-#define RPCRDMA_MIN_SLOT_TABLE (2U)
+#define RPCRDMA_MIN_SLOT_TABLE (4U)
 #define RPCRDMA_DEF_SLOT_TABLE (128U)
-#define RPCRDMA_MAX_SLOT_TABLE (256U)
+#define RPCRDMA_MAX_SLOT_TABLE (16384U)
 
 #define RPCRDMA_MIN_INLINE  (1024)     /* min inline thresh */
 #define RPCRDMA_DEF_INLINE  (4096)     /* default inline thresh */
index de2c67a..063c0c1 100644 (file)
@@ -340,6 +340,7 @@ extern void lru_add_drain_cpu(int cpu);
 extern void lru_add_drain_all(void);
 extern void rotate_reclaimable_page(struct page *page);
 extern void deactivate_file_page(struct page *page);
+extern void deactivate_page(struct page *page);
 extern void mark_page_lazyfree(struct page *page);
 extern void swap_setup(void);
 
@@ -364,6 +365,7 @@ extern int vm_swappiness;
 extern int remove_mapping(struct address_space *mapping, struct page *page);
 extern unsigned long vm_total_pages;
 
+extern unsigned long reclaim_pages(struct list_head *page_list);
 #ifdef CONFIG_NUMA
 extern int node_reclaim_mode;
 extern int sysctl_min_unmapped_ratio;
index 3e2a80c..96305a6 100644 (file)
@@ -53,18 +53,4 @@ extern const struct blk_integrity_profile t10_pi_type1_ip;
 extern const struct blk_integrity_profile t10_pi_type3_crc;
 extern const struct blk_integrity_profile t10_pi_type3_ip;
 
-#ifdef CONFIG_BLK_DEV_INTEGRITY
-extern void t10_pi_prepare(struct request *rq, u8 protection_type);
-extern void t10_pi_complete(struct request *rq, u8 protection_type,
-                           unsigned int intervals);
-#else
-static inline void t10_pi_complete(struct request *rq, u8 protection_type,
-                                  unsigned int intervals)
-{
-}
-static inline void t10_pi_prepare(struct request *rq, u8 protection_type)
-{
-}
-#endif
-
 #endif
index 8d8821b..659a440 100644 (file)
@@ -134,7 +134,7 @@ static inline void copy_overflow(int size, unsigned long count)
        WARN(1, "Buffer overflow detected (%d < %lu)!\n", size, count);
 }
 
-static __always_inline bool
+static __always_inline __must_check bool
 check_copy_size(const void *addr, size_t bytes, bool is_source)
 {
        int sz = __compiletime_object_size(addr);
index 34a0385..70bbdc3 100644 (file)
@@ -55,7 +55,7 @@
  * as usual) and both source and destination can trigger faults.
  */
 
-static __always_inline unsigned long
+static __always_inline __must_check unsigned long
 __copy_from_user_inatomic(void *to, const void __user *from, unsigned long n)
 {
        kasan_check_write(to, n);
@@ -63,7 +63,7 @@ __copy_from_user_inatomic(void *to, const void __user *from, unsigned long n)
        return raw_copy_from_user(to, from, n);
 }
 
-static __always_inline unsigned long
+static __always_inline __must_check unsigned long
 __copy_from_user(void *to, const void __user *from, unsigned long n)
 {
        might_fault();
@@ -85,7 +85,7 @@ __copy_from_user(void *to, const void __user *from, unsigned long n)
  * The caller should also make sure he pins the user space address
  * so that we don't result in page fault and sleep.
  */
-static __always_inline unsigned long
+static __always_inline __must_check unsigned long
 __copy_to_user_inatomic(void __user *to, const void *from, unsigned long n)
 {
        kasan_check_read(from, n);
@@ -93,7 +93,7 @@ __copy_to_user_inatomic(void __user *to, const void *from, unsigned long n)
        return raw_copy_to_user(to, from, n);
 }
 
-static __always_inline unsigned long
+static __always_inline __must_check unsigned long
 __copy_to_user(void __user *to, const void *from, unsigned long n)
 {
        might_fault();
@@ -103,7 +103,7 @@ __copy_to_user(void __user *to, const void *from, unsigned long n)
 }
 
 #ifdef INLINE_COPY_FROM_USER
-static inline unsigned long
+static inline __must_check unsigned long
 _copy_from_user(void *to, const void __user *from, unsigned long n)
 {
        unsigned long res = n;
@@ -117,12 +117,12 @@ _copy_from_user(void *to, const void __user *from, unsigned long n)
        return res;
 }
 #else
-extern unsigned long
+extern __must_check unsigned long
 _copy_from_user(void *, const void __user *, unsigned long);
 #endif
 
 #ifdef INLINE_COPY_TO_USER
-static inline unsigned long
+static inline __must_check unsigned long
 _copy_to_user(void __user *to, const void *from, unsigned long n)
 {
        might_fault();
@@ -133,7 +133,7 @@ _copy_to_user(void __user *to, const void *from, unsigned long n)
        return n;
 }
 #else
-extern unsigned long
+extern __must_check unsigned long
 _copy_to_user(void __user *, const void *, unsigned long);
 #endif
 
@@ -222,8 +222,9 @@ static inline bool pagefault_disabled(void)
 
 #ifndef ARCH_HAS_NOCACHE_UACCESS
 
-static inline unsigned long __copy_from_user_inatomic_nocache(void *to,
-                               const void __user *from, unsigned long n)
+static inline __must_check unsigned long
+__copy_from_user_inatomic_nocache(void *to, const void __user *from,
+                                 unsigned long n)
 {
        return __copy_from_user_inatomic(to, from, n);
 }
index 32d990d..911ab7c 100644 (file)
@@ -32,6 +32,7 @@ extern const char *const key_being_used_for[NR__KEY_BEING_USED_FOR];
 #ifdef CONFIG_SYSTEM_DATA_VERIFICATION
 
 struct key;
+struct pkcs7_message;
 
 extern int verify_pkcs7_signature(const void *data, size_t len,
                                  const void *raw_pkcs7, size_t pkcs7_len,
@@ -41,6 +42,15 @@ extern int verify_pkcs7_signature(const void *data, size_t len,
                                                      const void *data, size_t len,
                                                      size_t asn1hdrlen),
                                  void *ctx);
+extern int verify_pkcs7_message_sig(const void *data, size_t len,
+                                   struct pkcs7_message *pkcs7,
+                                   struct key *trusted_keys,
+                                   enum key_being_used_for usage,
+                                   int (*view_content)(void *ctx,
+                                                       const void *data,
+                                                       size_t len,
+                                                       size_t asn1hdrlen),
+                                   void *ctx);
 
 #ifdef CONFIG_SIGNED_PE_FILE_VERIFICATION
 extern int verify_pefile_signature(const void *pebuf, unsigned pelen,
index dfa718f..4e78094 100644 (file)
@@ -53,15 +53,21 @@ struct vmap_area {
        unsigned long va_start;
        unsigned long va_end;
 
-       /*
-        * Largest available free size in subtree.
-        */
-       unsigned long subtree_max_size;
-       unsigned long flags;
        struct rb_node rb_node;         /* address sorted rbtree */
        struct list_head list;          /* address sorted list */
-       struct llist_node purge_list;    /* "lazy purge" list */
-       struct vm_struct *vm;
+
+       /*
+        * The following three variables can be packed, because
+        * a vmap_area object is always one of the three states:
+        *    1) in "free" tree (root is vmap_area_root)
+        *    2) in "busy" tree (root is free_vmap_area_root)
+        *    3) in purge list  (head is vmap_purge_list)
+        */
+       union {
+               unsigned long subtree_max_size; /* in "free" tree */
+               struct vm_struct *vm;           /* in "busy" tree */
+               struct llist_node purge_list;   /* in purge list */
+       };
 };
 
 /*
index 7238865..51bf430 100644 (file)
@@ -46,6 +46,8 @@ const char *zpool_get_type(struct zpool *pool);
 
 void zpool_destroy_pool(struct zpool *pool);
 
+bool zpool_malloc_support_movable(struct zpool *pool);
+
 int zpool_malloc(struct zpool *pool, size_t size, gfp_t gfp,
                        unsigned long *handle);
 
@@ -90,6 +92,7 @@ struct zpool_driver {
                        struct zpool *zpool);
        void (*destroy)(void *pool);
 
+       bool malloc_support_movable;
        int (*malloc)(void *pool, size_t size, gfp_t gfp,
                                unsigned long *handle);
        void (*free)(void *pool, unsigned long handle);
index 4f385ec..fe2fc9e 100644 (file)
@@ -36,6 +36,8 @@
 #include <linux/types.h>
 #include <linux/sched.h>
 #include <linux/cred.h>
+#include <linux/uaccess.h>
+#include <linux/fs.h>
 
 struct ib_addr {
        union {
index 1052d0d..a91b2af 100644 (file)
@@ -42,7 +42,7 @@ struct ib_ucontext;
 struct ib_umem_odp;
 
 struct ib_umem {
-       struct ib_ucontext     *context;
+       struct ib_device       *ibdev;
        struct mm_struct       *owning_mm;
        size_t                  length;
        unsigned long           address;
index 479db5c..253df1a 100644 (file)
 #include <rdma/ib_verbs.h>
 #include <linux/interval_tree.h>
 
-struct umem_odp_node {
-       u64 __subtree_last;
-       struct rb_node rb;
-};
-
 struct ib_umem_odp {
        struct ib_umem umem;
        struct ib_ucontext_per_mm *per_mm;
@@ -72,7 +67,15 @@ struct ib_umem_odp {
        int npages;
 
        /* Tree tracking */
-       struct umem_odp_node    interval_tree;
+       struct interval_tree_node interval_tree;
+
+       /*
+        * An implicit odp umem cannot be DMA mapped, has 0 length, and serves
+        * only as an anchor for the driver to hold onto the per_mm. FIXME:
+        * This should be removed and drivers should work with the per_mm
+        * directly.
+        */
+       bool is_implicit_odp;
 
        struct completion       notifier_completion;
        int                     dying;
@@ -88,14 +91,13 @@ static inline struct ib_umem_odp *to_ib_umem_odp(struct ib_umem *umem)
 /* Returns the first page of an ODP umem. */
 static inline unsigned long ib_umem_start(struct ib_umem_odp *umem_odp)
 {
-       return ALIGN_DOWN(umem_odp->umem.address, 1UL << umem_odp->page_shift);
+       return umem_odp->interval_tree.start;
 }
 
 /* Returns the address of the page after the last one of an ODP umem. */
 static inline unsigned long ib_umem_end(struct ib_umem_odp *umem_odp)
 {
-       return ALIGN(umem_odp->umem.address + umem_odp->umem.length,
-                    1UL << umem_odp->page_shift);
+       return umem_odp->interval_tree.last + 1;
 }
 
 static inline size_t ib_umem_odp_num_pages(struct ib_umem_odp *umem_odp)
@@ -120,25 +122,20 @@ static inline size_t ib_umem_odp_num_pages(struct ib_umem_odp *umem_odp)
 #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
 
 struct ib_ucontext_per_mm {
-       struct ib_ucontext *context;
-       struct mm_struct *mm;
+       struct mmu_notifier mn;
        struct pid *tgid;
-       bool active;
 
        struct rb_root_cached umem_tree;
        /* Protects umem_tree */
        struct rw_semaphore umem_rwsem;
-
-       struct mmu_notifier mn;
-       unsigned int odp_mrs_count;
-
-       struct list_head ucontext_list;
-       struct rcu_head rcu;
 };
 
-int ib_umem_odp_get(struct ib_umem_odp *umem_odp, int access);
-struct ib_umem_odp *ib_alloc_odp_umem(struct ib_umem_odp *root_umem,
-                                     unsigned long addr, size_t size);
+struct ib_umem_odp *ib_umem_odp_get(struct ib_udata *udata, unsigned long addr,
+                                   size_t size, int access);
+struct ib_umem_odp *ib_umem_odp_alloc_implicit(struct ib_udata *udata,
+                                              int access);
+struct ib_umem_odp *ib_umem_odp_alloc_child(struct ib_umem_odp *root_umem,
+                                           unsigned long addr, size_t size);
 void ib_umem_odp_release(struct ib_umem_odp *umem_odp);
 
 int ib_umem_odp_map_dma_pages(struct ib_umem_odp *umem_odp, u64 start_offset,
@@ -163,8 +160,17 @@ int rbt_ib_umem_for_each_in_range(struct rb_root_cached *root,
  * Find first region intersecting with address range.
  * Return NULL if not found
  */
-struct ib_umem_odp *rbt_ib_umem_lookup(struct rb_root_cached *root,
-                                      u64 addr, u64 length);
+static inline struct ib_umem_odp *
+rbt_ib_umem_lookup(struct rb_root_cached *root, u64 addr, u64 length)
+{
+       struct interval_tree_node *node;
+
+       node = interval_tree_iter_first(root, addr, addr + length - 1);
+       if (!node)
+               return NULL;
+       return container_of(node, struct ib_umem_odp, interval_tree);
+
+}
 
 static inline int ib_umem_mmu_notifier_retry(struct ib_umem_odp *umem_odp,
                                             unsigned long mmu_seq)
@@ -185,9 +191,11 @@ static inline int ib_umem_mmu_notifier_retry(struct ib_umem_odp *umem_odp,
 
 #else /* CONFIG_INFINIBAND_ON_DEMAND_PAGING */
 
-static inline int ib_umem_odp_get(struct ib_umem_odp *umem_odp, int access)
+static inline struct ib_umem_odp *ib_umem_odp_get(struct ib_udata *udata,
+                                                 unsigned long addr,
+                                                 size_t size, int access)
 {
-       return -EINVAL;
+       return ERR_PTR(-EINVAL);
 }
 
 static inline void ib_umem_odp_release(struct ib_umem_odp *umem_odp) {}
index 4f22517..6a47ba8 100644 (file)
@@ -98,15 +98,54 @@ void ibdev_info(const struct ib_device *ibdev, const char *format, ...);
 #if defined(CONFIG_DYNAMIC_DEBUG)
 #define ibdev_dbg(__dev, format, args...)                       \
        dynamic_ibdev_dbg(__dev, format, ##args)
-#elif defined(DEBUG)
-#define ibdev_dbg(__dev, format, args...)                       \
-       ibdev_printk(KERN_DEBUG, __dev, format, ##args)
 #else
 __printf(2, 3) __cold
 static inline
 void ibdev_dbg(const struct ib_device *ibdev, const char *format, ...) {}
 #endif
 
+#define ibdev_level_ratelimited(ibdev_level, ibdev, fmt, ...)           \
+do {                                                                    \
+       static DEFINE_RATELIMIT_STATE(_rs,                              \
+                                     DEFAULT_RATELIMIT_INTERVAL,       \
+                                     DEFAULT_RATELIMIT_BURST);         \
+       if (__ratelimit(&_rs))                                          \
+               ibdev_level(ibdev, fmt, ##__VA_ARGS__);                 \
+} while (0)
+
+#define ibdev_emerg_ratelimited(ibdev, fmt, ...) \
+       ibdev_level_ratelimited(ibdev_emerg, ibdev, fmt, ##__VA_ARGS__)
+#define ibdev_alert_ratelimited(ibdev, fmt, ...) \
+       ibdev_level_ratelimited(ibdev_alert, ibdev, fmt, ##__VA_ARGS__)
+#define ibdev_crit_ratelimited(ibdev, fmt, ...) \
+       ibdev_level_ratelimited(ibdev_crit, ibdev, fmt, ##__VA_ARGS__)
+#define ibdev_err_ratelimited(ibdev, fmt, ...) \
+       ibdev_level_ratelimited(ibdev_err, ibdev, fmt, ##__VA_ARGS__)
+#define ibdev_warn_ratelimited(ibdev, fmt, ...) \
+       ibdev_level_ratelimited(ibdev_warn, ibdev, fmt, ##__VA_ARGS__)
+#define ibdev_notice_ratelimited(ibdev, fmt, ...) \
+       ibdev_level_ratelimited(ibdev_notice, ibdev, fmt, ##__VA_ARGS__)
+#define ibdev_info_ratelimited(ibdev, fmt, ...) \
+       ibdev_level_ratelimited(ibdev_info, ibdev, fmt, ##__VA_ARGS__)
+
+#if defined(CONFIG_DYNAMIC_DEBUG)
+/* descriptor check is first to prevent flooding with "callbacks suppressed" */
+#define ibdev_dbg_ratelimited(ibdev, fmt, ...)                          \
+do {                                                                    \
+       static DEFINE_RATELIMIT_STATE(_rs,                              \
+                                     DEFAULT_RATELIMIT_INTERVAL,       \
+                                     DEFAULT_RATELIMIT_BURST);         \
+       DEFINE_DYNAMIC_DEBUG_METADATA(descriptor, fmt);                 \
+       if (DYNAMIC_DEBUG_BRANCH(descriptor) && __ratelimit(&_rs))      \
+               __dynamic_ibdev_dbg(&descriptor, ibdev, fmt,            \
+                                   ##__VA_ARGS__);                     \
+} while (0)
+#else
+__printf(2, 3) __cold
+static inline
+void ibdev_dbg_ratelimited(const struct ib_device *ibdev, const char *format, ...) {}
+#endif
+
 union ib_gid {
        u8      raw[16];
        struct {
@@ -451,6 +490,16 @@ enum ib_port_state {
        IB_PORT_ACTIVE_DEFER    = 5
 };
 
+enum ib_port_phys_state {
+       IB_PORT_PHYS_STATE_SLEEP = 1,
+       IB_PORT_PHYS_STATE_POLLING = 2,
+       IB_PORT_PHYS_STATE_DISABLED = 3,
+       IB_PORT_PHYS_STATE_PORT_CONFIGURATION_TRAINING = 4,
+       IB_PORT_PHYS_STATE_LINK_UP = 5,
+       IB_PORT_PHYS_STATE_LINK_ERROR_RECOVERY = 6,
+       IB_PORT_PHYS_STATE_PHY_TEST = 7,
+};
+
 enum ib_port_width {
        IB_WIDTH_1X     = 1,
        IB_WIDTH_2X     = 16,
@@ -1417,11 +1466,6 @@ struct ib_ucontext {
 
        bool cleanup_retryable;
 
-       void (*invalidate_range)(struct ib_umem_odp *umem_odp,
-                                unsigned long start, unsigned long end);
-       struct mutex per_mm_list_lock;
-       struct list_head per_mm_list;
-
        struct ib_rdmacg_object cg_obj;
        /*
         * Implementation details of the RDMA core, don't use in drivers:
@@ -2378,6 +2422,8 @@ struct ib_device_ops {
                            u64 iova);
        int (*unmap_fmr)(struct list_head *fmr_list);
        int (*dealloc_fmr)(struct ib_fmr *fmr);
+       void (*invalidate_range)(struct ib_umem_odp *umem_odp,
+                                unsigned long start, unsigned long end);
        int (*attach_mcast)(struct ib_qp *qp, union ib_gid *gid, u16 lid);
        int (*detach_mcast)(struct ib_qp *qp, union ib_gid *gid, u16 lid);
        struct ib_xrcd *(*alloc_xrcd)(struct ib_device *device,
@@ -3713,6 +3759,25 @@ static inline struct ib_cq *ib_alloc_cq(struct ib_device *dev, void *private,
                                NULL);
 }
 
+struct ib_cq *__ib_alloc_cq_any(struct ib_device *dev, void *private,
+                               int nr_cqe, enum ib_poll_context poll_ctx,
+                               const char *caller);
+
+/**
+ * ib_alloc_cq_any: Allocate kernel CQ
+ * @dev: The IB device
+ * @private: Private data attached to the CQE
+ * @nr_cqe: Number of CQEs in the CQ
+ * @poll_ctx: Context used for polling the CQ
+ */
+static inline struct ib_cq *ib_alloc_cq_any(struct ib_device *dev,
+                                           void *private, int nr_cqe,
+                                           enum ib_poll_context poll_ctx)
+{
+       return __ib_alloc_cq_any(dev, private, nr_cqe, poll_ctx,
+                                KBUILD_MODNAME);
+}
+
 /**
  * ib_free_cq_user - Free kernel/user CQ
  * @cq: The CQ to free
index b9fee7f..c895350 100644 (file)
@@ -33,6 +33,9 @@
 #ifndef _IW_PORTMAP_H
 #define _IW_PORTMAP_H
 
+#include <linux/socket.h>
+#include <linux/netlink.h>
+
 #define IWPM_ULIBNAME_SIZE     32
 #define IWPM_DEVNAME_SIZE      32
 #define IWPM_IFNAME_SIZE       16
index 7147a92..bdbfe25 100644 (file)
@@ -33,6 +33,8 @@
 #if !defined(OPA_PORT_INFO_H)
 #define OPA_PORT_INFO_H
 
+#include <rdma/opa_smi.h>
+
 #define OPA_PORT_LINK_MODE_NOP 0               /* No change */
 #define OPA_PORT_LINK_MODE_OPA 4               /* Port mode is OPA */
 
index 6631624..ab22759 100644 (file)
@@ -76,28 +76,32 @@ int ibnl_put_attr(struct sk_buff *skb, struct nlmsghdr *nlh,
 
 /**
  * Send the supplied skb to a specific userspace PID.
+ * @net: Net namespace in which to send the skb
  * @skb: The netlink skb
  * @pid: Userspace netlink process ID
  * Returns 0 on success or a negative error code.
  */
-int rdma_nl_unicast(struct sk_buff *skb, u32 pid);
+int rdma_nl_unicast(struct net *net, struct sk_buff *skb, u32 pid);
 
 /**
  * Send, with wait/1 retry, the supplied skb to a specific userspace PID.
+ * @net: Net namespace in which to send the skb
  * @skb: The netlink skb
  * @pid: Userspace netlink process ID
  * Returns 0 on success or a negative error code.
  */
-int rdma_nl_unicast_wait(struct sk_buff *skb, __u32 pid);
+int rdma_nl_unicast_wait(struct net *net, struct sk_buff *skb, __u32 pid);
 
 /**
  * Send the supplied skb to a netlink group.
+ * @net: Net namespace in which to send the skb
  * @skb: The netlink skb
  * @group: Netlink group ID
  * @flags: allocation flags
  * Returns 0 on success or a negative error code.
  */
-int rdma_nl_multicast(struct sk_buff *skb, unsigned int group, gfp_t flags);
+int rdma_nl_multicast(struct net *net, struct sk_buff *skb,
+                     unsigned int group, gfp_t flags);
 
 /**
  * Check if there are any listeners to the netlink group
index 525848e..ac5a943 100644 (file)
@@ -116,6 +116,7 @@ struct rvt_ibport {
        u64 n_unaligned;
        u64 n_rc_dupreq;
        u64 n_rc_seqnak;
+       u64 n_rc_crwaits;
        u16 pkey_violations;
        u16 qkey_violations;
        u16 mkey_violations;
index 04c519e..574eb72 100644 (file)
@@ -53,6 +53,7 @@
 
 #include <linux/kthread.h>
 #include <rdma/ib_user_verbs.h>
+#include <rdma/ib_verbs.h>
 
 /*
  * Define an ib_cq_notify value that is not valid so we know when CQ
index e06c77d..b550ae8 100644 (file)
@@ -973,6 +973,41 @@ static inline void rvt_free_rq(struct rvt_rq *rq)
        rq->wq = NULL;
 }
 
+/**
+ * rvt_to_iport - Get the ibport pointer
+ * @qp: the qp pointer
+ *
+ * This function returns the ibport pointer from the qp pointer.
+ */
+static inline struct rvt_ibport *rvt_to_iport(struct rvt_qp *qp)
+{
+       struct rvt_dev_info *rdi = ib_to_rvt(qp->ibqp.device);
+
+       return rdi->ports[qp->port_num - 1];
+}
+
+/**
+ * rvt_rc_credit_avail - Check if there are enough RC credits for the request
+ * @qp: the qp
+ * @wqe: the request
+ *
+ * This function returns false when there are not enough credits for the given
+ * request and true otherwise.
+ */
+static inline bool rvt_rc_credit_avail(struct rvt_qp *qp, struct rvt_swqe *wqe)
+{
+       lockdep_assert_held(&qp->s_lock);
+       if (!(qp->s_flags & RVT_S_UNLIMITED_CREDIT) &&
+           rvt_cmp_msn(wqe->ssn, qp->s_lsn + 1) > 0) {
+               struct rvt_ibport *rvp = rvt_to_iport(qp);
+
+               qp->s_flags |= RVT_S_WAIT_SSN_CREDIT;
+               rvp->n_rc_crwaits++;
+               return false;
+       }
+       return true;
+}
+
 struct rvt_qp_iter *rvt_qp_iter_init(struct rvt_dev_info *rdi,
                                     u64 v,
                                     void (*cb)(struct rvt_qp *qp, u64 v));
index f24cc2a..d16b0fc 100644 (file)
@@ -6,6 +6,8 @@
 #ifndef _RDMA_SIGNATURE_H_
 #define _RDMA_SIGNATURE_H_
 
+#include <linux/types.h>
+
 enum ib_signature_prot_cap {
        IB_PROT_T10DIF_TYPE_1 = 1,
        IB_PROT_T10DIF_TYPE_2 = 1 << 1,
index 76ed5e4..91bd749 100644 (file)
@@ -57,6 +57,7 @@ struct scsi_pointer {
 #define SCMD_TAGGED            (1 << 0)
 #define SCMD_UNCHECKED_ISA_DMA (1 << 1)
 #define SCMD_INITIALIZED       (1 << 2)
+#define SCMD_LAST              (1 << 3)
 /* flags preserved across unprep / reprep */
 #define SCMD_PRESERVED_FLAGS   (SCMD_UNCHECKED_ISA_DMA | SCMD_INITIALIZED)
 
index e03bd9d..7b196d2 100644 (file)
@@ -6,8 +6,6 @@ struct scsi_cmnd;
 struct scsi_device;
 struct scsi_sense_hdr;
 
-#define SCSI_LOG_BUFSIZE 128
-
 extern void scsi_print_command(struct scsi_cmnd *);
 extern size_t __scsi_format_command(char *, size_t,
                                   const unsigned char *, size_t);
index cc139db..31e0d6c 100644 (file)
@@ -80,8 +80,10 @@ struct scsi_host_template {
         * command block to the LLDD.  When the driver finished
         * processing the command the done callback is invoked.
         *
-        * If queuecommand returns 0, then the HBA has accepted the
-        * command.  The done() function must be called on the command
+        * If queuecommand returns 0, then the driver has accepted the
+        * command.  It must also push it to the HBA if the scsi_cmnd
+        * flag SCMD_LAST is set, or if the driver does not implement
+        * commit_rqs.  The done() function must be called on the command
         * when the driver has finished with it. (you may call done on the
         * command before queuecommand returns, but in this case you
         * *must* return 0 from queuecommand).
@@ -110,6 +112,16 @@ struct scsi_host_template {
        int (* queuecommand)(struct Scsi_Host *, struct scsi_cmnd *);
 
        /*
+        * The commit_rqs function is used to trigger a hardware
+        * doorbell after some requests have been queued with
+        * queuecommand, when an error is encountered before sending
+        * the request with SCMD_LAST set.
+        *
+        * STATUS: OPTIONAL
+        */
+       void (*commit_rqs)(struct Scsi_Host *, u16);
+
+       /*
         * This is an error handling strategy routine.  You don't need to
         * define one of these if you don't want to - there is a default
         * routine that is present that should work in most cases.  For those
index f6a4eaa..a138306 100644 (file)
@@ -451,20 +451,81 @@ TRACE_EVENT(xprtrdma_createmrs,
 
        TP_STRUCT__entry(
                __field(const void *, r_xprt)
+               __string(addr, rpcrdma_addrstr(r_xprt))
+               __string(port, rpcrdma_portstr(r_xprt))
                __field(unsigned int, count)
        ),
 
        TP_fast_assign(
                __entry->r_xprt = r_xprt;
                __entry->count = count;
+               __assign_str(addr, rpcrdma_addrstr(r_xprt));
+               __assign_str(port, rpcrdma_portstr(r_xprt));
        ),
 
-       TP_printk("r_xprt=%p: created %u MRs",
-               __entry->r_xprt, __entry->count
+       TP_printk("peer=[%s]:%s r_xprt=%p: created %u MRs",
+               __get_str(addr), __get_str(port), __entry->r_xprt,
+               __entry->count
        )
 );
 
-DEFINE_RXPRT_EVENT(xprtrdma_nomrs);
+TRACE_EVENT(xprtrdma_mr_get,
+       TP_PROTO(
+               const struct rpcrdma_req *req
+       ),
+
+       TP_ARGS(req),
+
+       TP_STRUCT__entry(
+               __field(const void *, req)
+               __field(unsigned int, task_id)
+               __field(unsigned int, client_id)
+               __field(u32, xid)
+       ),
+
+       TP_fast_assign(
+               const struct rpc_rqst *rqst = &req->rl_slot;
+
+               __entry->req = req;
+               __entry->task_id = rqst->rq_task->tk_pid;
+               __entry->client_id = rqst->rq_task->tk_client->cl_clid;
+               __entry->xid = be32_to_cpu(rqst->rq_xid);
+       ),
+
+       TP_printk("task:%u@%u xid=0x%08x req=%p",
+               __entry->task_id, __entry->client_id, __entry->xid,
+               __entry->req
+       )
+);
+
+TRACE_EVENT(xprtrdma_nomrs,
+       TP_PROTO(
+               const struct rpcrdma_req *req
+       ),
+
+       TP_ARGS(req),
+
+       TP_STRUCT__entry(
+               __field(const void *, req)
+               __field(unsigned int, task_id)
+               __field(unsigned int, client_id)
+               __field(u32, xid)
+       ),
+
+       TP_fast_assign(
+               const struct rpc_rqst *rqst = &req->rl_slot;
+
+               __entry->req = req;
+               __entry->task_id = rqst->rq_task->tk_pid;
+               __entry->client_id = rqst->rq_task->tk_client->cl_clid;
+               __entry->xid = be32_to_cpu(rqst->rq_xid);
+       ),
+
+       TP_printk("task:%u@%u xid=0x%08x req=%p",
+               __entry->task_id, __entry->client_id, __entry->xid,
+               __entry->req
+       )
+);
 
 DEFINE_RDCH_EVENT(read);
 DEFINE_WRCH_EVENT(write);
@@ -623,21 +684,21 @@ TRACE_EVENT(xprtrdma_post_send,
 
 TRACE_EVENT(xprtrdma_post_recv,
        TP_PROTO(
-               const struct ib_cqe *cqe
+               const struct rpcrdma_rep *rep
        ),
 
-       TP_ARGS(cqe),
+       TP_ARGS(rep),
 
        TP_STRUCT__entry(
-               __field(const void *, cqe)
+               __field(const void *, rep)
        ),
 
        TP_fast_assign(
-               __entry->cqe = cqe;
+               __entry->rep = rep;
        ),
 
-       TP_printk("cqe=%p",
-               __entry->cqe
+       TP_printk("rep=%p",
+               __entry->rep
        )
 );
 
@@ -715,14 +776,15 @@ TRACE_EVENT(xprtrdma_wc_receive,
        TP_ARGS(wc),
 
        TP_STRUCT__entry(
-               __field(const void *, cqe)
+               __field(const void *, rep)
                __field(u32, byte_len)
                __field(unsigned int, status)
                __field(u32, vendor_err)
        ),
 
        TP_fast_assign(
-               __entry->cqe = wc->wr_cqe;
+               __entry->rep = container_of(wc->wr_cqe, struct rpcrdma_rep,
+                                           rr_cqe);
                __entry->status = wc->status;
                if (wc->status) {
                        __entry->byte_len = 0;
@@ -733,8 +795,8 @@ TRACE_EVENT(xprtrdma_wc_receive,
                }
        ),
 
-       TP_printk("cqe=%p %u bytes: %s (%u/0x%x)",
-               __entry->cqe, __entry->byte_len,
+       TP_printk("rep=%p %u bytes: %s (%u/0x%x)",
+               __entry->rep, __entry->byte_len,
                rdma_show_wc_status(__entry->status),
                __entry->status, __entry->vendor_err
        )
index 3a27335..c2ce648 100644 (file)
@@ -66,8 +66,9 @@ DECLARE_EVENT_CLASS(writeback_page_template,
        ),
 
        TP_fast_assign(
-               strncpy(__entry->name,
-                       mapping ? dev_name(inode_to_bdi(mapping->host)->dev) : "(unknown)", 32);
+               strscpy_pad(__entry->name,
+                           mapping ? dev_name(inode_to_bdi(mapping->host)->dev) : "(unknown)",
+                           32);
                __entry->ino = mapping ? mapping->host->i_ino : 0;
                __entry->index = page->index;
        ),
@@ -110,8 +111,8 @@ DECLARE_EVENT_CLASS(writeback_dirty_inode_template,
                struct backing_dev_info *bdi = inode_to_bdi(inode);
 
                /* may be called for files on pseudo FSes w/ unregistered bdi */
-               strncpy(__entry->name,
-                       bdi->dev ? dev_name(bdi->dev) : "(unknown)", 32);
+               strscpy_pad(__entry->name,
+                           bdi->dev ? dev_name(bdi->dev) : "(unknown)", 32);
                __entry->ino            = inode->i_ino;
                __entry->state          = inode->i_state;
                __entry->flags          = flags;
@@ -316,8 +317,8 @@ DECLARE_EVENT_CLASS(writeback_write_inode_template,
        ),
 
        TP_fast_assign(
-               strncpy(__entry->name,
-                       dev_name(inode_to_bdi(inode)->dev), 32);
+               strscpy_pad(__entry->name,
+                           dev_name(inode_to_bdi(inode)->dev), 32);
                __entry->ino            = inode->i_ino;
                __entry->sync_mode      = wbc->sync_mode;
                __entry->cgroup_ino     = __trace_wbc_assign_cgroup(wbc);
@@ -360,8 +361,9 @@ DECLARE_EVENT_CLASS(writeback_work_class,
                __field(unsigned int, cgroup_ino)
        ),
        TP_fast_assign(
-               strncpy(__entry->name,
-                       wb->bdi->dev ? dev_name(wb->bdi->dev) : "(unknown)", 32);
+               strscpy_pad(__entry->name,
+                           wb->bdi->dev ? dev_name(wb->bdi->dev) :
+                           "(unknown)", 32);
                __entry->nr_pages = work->nr_pages;
                __entry->sb_dev = work->sb ? work->sb->s_dev : 0;
                __entry->sync_mode = work->sync_mode;
@@ -414,7 +416,7 @@ DECLARE_EVENT_CLASS(writeback_class,
                __field(unsigned int, cgroup_ino)
        ),
        TP_fast_assign(
-               strncpy(__entry->name, dev_name(wb->bdi->dev), 32);
+               strscpy_pad(__entry->name, dev_name(wb->bdi->dev), 32);
                __entry->cgroup_ino = __trace_wb_assign_cgroup(wb);
        ),
        TP_printk("bdi %s: cgroup_ino=%u",
@@ -436,7 +438,7 @@ TRACE_EVENT(writeback_bdi_register,
                __array(char, name, 32)
        ),
        TP_fast_assign(
-               strncpy(__entry->name, dev_name(bdi->dev), 32);
+               strscpy_pad(__entry->name, dev_name(bdi->dev), 32);
        ),
        TP_printk("bdi %s",
                __entry->name
@@ -461,7 +463,7 @@ DECLARE_EVENT_CLASS(wbc_class,
        ),
 
        TP_fast_assign(
-               strncpy(__entry->name, dev_name(bdi->dev), 32);
+               strscpy_pad(__entry->name, dev_name(bdi->dev), 32);
                __entry->nr_to_write    = wbc->nr_to_write;
                __entry->pages_skipped  = wbc->pages_skipped;
                __entry->sync_mode      = wbc->sync_mode;
@@ -512,7 +514,7 @@ TRACE_EVENT(writeback_queue_io,
        ),
        TP_fast_assign(
                unsigned long *older_than_this = work->older_than_this;
-               strncpy(__entry->name, dev_name(wb->bdi->dev), 32);
+               strscpy_pad(__entry->name, dev_name(wb->bdi->dev), 32);
                __entry->older  = older_than_this ?  *older_than_this : 0;
                __entry->age    = older_than_this ?
                                  (jiffies - *older_than_this) * 1000 / HZ : -1;
@@ -598,7 +600,7 @@ TRACE_EVENT(bdi_dirty_ratelimit,
        ),
 
        TP_fast_assign(
-               strlcpy(__entry->bdi, dev_name(wb->bdi->dev), 32);
+               strscpy_pad(__entry->bdi, dev_name(wb->bdi->dev), 32);
                __entry->write_bw       = KBps(wb->write_bandwidth);
                __entry->avg_write_bw   = KBps(wb->avg_write_bandwidth);
                __entry->dirty_rate     = KBps(dirty_rate);
@@ -663,7 +665,7 @@ TRACE_EVENT(balance_dirty_pages,
 
        TP_fast_assign(
                unsigned long freerun = (thresh + bg_thresh) / 2;
-               strlcpy(__entry->bdi, dev_name(wb->bdi->dev), 32);
+               strscpy_pad(__entry->bdi, dev_name(wb->bdi->dev), 32);
 
                __entry->limit          = global_wb_domain.dirty_limit;
                __entry->setpoint       = (global_wb_domain.dirty_limit +
@@ -723,8 +725,8 @@ TRACE_EVENT(writeback_sb_inodes_requeue,
        ),
 
        TP_fast_assign(
-               strncpy(__entry->name,
-                       dev_name(inode_to_bdi(inode)->dev), 32);
+               strscpy_pad(__entry->name,
+                           dev_name(inode_to_bdi(inode)->dev), 32);
                __entry->ino            = inode->i_ino;
                __entry->state          = inode->i_state;
                __entry->dirtied_when   = inode->dirtied_when;
@@ -797,8 +799,8 @@ DECLARE_EVENT_CLASS(writeback_single_inode_template,
        ),
 
        TP_fast_assign(
-               strncpy(__entry->name,
-                       dev_name(inode_to_bdi(inode)->dev), 32);
+               strscpy_pad(__entry->name,
+                           dev_name(inode_to_bdi(inode)->dev), 32);
                __entry->ino            = inode->i_ino;
                __entry->state          = inode->i_state;
                __entry->dirtied_when   = inode->dirtied_when;
index 63b1f50..c160a53 100644 (file)
@@ -67,6 +67,9 @@
 #define MADV_WIPEONFORK 18             /* Zero memory on fork, child only */
 #define MADV_KEEPONFORK 19             /* Undo MADV_WIPEONFORK */
 
+#define MADV_COLD      20              /* deactivate these pages */
+#define MADV_PAGEOUT   21              /* reclaim these pages */
+
 /* compatibility flags */
 #define MAP_FILE       0
 
index e4a79f8..ab5c7e8 100644 (file)
@@ -11,6 +11,9 @@
    more information about COFF, then O'Reilly has a very excellent book.
 */
 
+#ifndef _UAPI_LINUX_COFF_H
+#define _UAPI_LINUX_COFF_H
+
 #define  E_SYMNMLEN  8   /* Number of characters in a symbol name         */
 #define  E_FILNMLEN 14   /* Number of characters in a file name           */
 #define  E_DIMNUM    4   /* Number of array dimensions in auxiliary entry */
@@ -350,3 +353,5 @@ struct COFF_reloc {
 
 /* For new sections we haven't heard of before */
 #define COFF_DEF_SECTION_ALIGNMENT       4
+
+#endif /* _UAPI_LINUX_COFF_H */
index f396a82..2df8cec 100644 (file)
@@ -243,6 +243,7 @@ enum {
        DM_TARGET_MSG_CMD,
        DM_DEV_SET_GEOMETRY_CMD,
        DM_DEV_ARM_POLL_CMD,
+       DM_GET_TARGET_VERSION_CMD,
 };
 
 #define DM_IOCTL 0xfd
@@ -265,14 +266,15 @@ enum {
 #define DM_TABLE_STATUS  _IOWR(DM_IOCTL, DM_TABLE_STATUS_CMD, struct dm_ioctl)
 
 #define DM_LIST_VERSIONS _IOWR(DM_IOCTL, DM_LIST_VERSIONS_CMD, struct dm_ioctl)
+#define DM_GET_TARGET_VERSION _IOWR(DM_IOCTL, DM_GET_TARGET_VERSION_CMD, struct dm_ioctl)
 
 #define DM_TARGET_MSG   _IOWR(DM_IOCTL, DM_TARGET_MSG_CMD, struct dm_ioctl)
 #define DM_DEV_SET_GEOMETRY    _IOWR(DM_IOCTL, DM_DEV_SET_GEOMETRY_CMD, struct dm_ioctl)
 
 #define DM_VERSION_MAJOR       4
-#define DM_VERSION_MINOR       40
+#define DM_VERSION_MINOR       41
 #define DM_VERSION_PATCHLEVEL  0
-#define DM_VERSION_EXTRA       "-ioctl (2019-01-18)"
+#define DM_VERSION_EXTRA       "-ioctl (2019-09-16)"
 
 /* Status bits */
 #define DM_READONLY_FLAG       (1 << 0) /* In/Out */
index aad225b..379a612 100644 (file)
@@ -264,6 +264,7 @@ struct fsxattr {
 #define FS_NOCOW_FL                    0x00800000 /* Do not cow file */
 #define FS_INLINE_DATA_FL              0x10000000 /* Reserved for ext4 */
 #define FS_PROJINHERIT_FL              0x20000000 /* Create with parents projid */
+#define FS_CASEFOLD_FL                 0x40000000 /* Folder is case insensitive */
 #define FS_RESERVED_FL                 0x80000000 /* reserved for ext2 lib */
 
 #define FS_FL_USER_VISIBLE             0x0003DFFF /* User visible flags */
index 2971d29..802b037 100644 (file)
  *
  *  7.31
  *  - add FUSE_WRITE_KILL_PRIV flag
+ *  - add FUSE_SETUPMAPPING and FUSE_REMOVEMAPPING
+ *  - add map_alignment to fuse_init_out, add FUSE_MAP_ALIGNMENT flag
  */
 
 #ifndef _LINUX_FUSE_H
@@ -274,6 +276,7 @@ struct fuse_file_lock {
  * FUSE_CACHE_SYMLINKS: cache READLINK responses
  * FUSE_NO_OPENDIR_SUPPORT: kernel supports zero-message opendir
  * FUSE_EXPLICIT_INVAL_DATA: only invalidate cached pages on explicit request
+ * FUSE_MAP_ALIGNMENT: map_alignment field is valid
  */
 #define FUSE_ASYNC_READ                (1 << 0)
 #define FUSE_POSIX_LOCKS       (1 << 1)
@@ -301,6 +304,7 @@ struct fuse_file_lock {
 #define FUSE_CACHE_SYMLINKS    (1 << 23)
 #define FUSE_NO_OPENDIR_SUPPORT (1 << 24)
 #define FUSE_EXPLICIT_INVAL_DATA (1 << 25)
+#define FUSE_MAP_ALIGNMENT     (1 << 26)
 
 /**
  * CUSE INIT request/reply flags
@@ -422,9 +426,15 @@ enum fuse_opcode {
        FUSE_RENAME2            = 45,
        FUSE_LSEEK              = 46,
        FUSE_COPY_FILE_RANGE    = 47,
+       FUSE_SETUPMAPPING       = 48,
+       FUSE_REMOVEMAPPING      = 49,
 
        /* CUSE specific operations */
        CUSE_INIT               = 4096,
+
+       /* Reserved opcodes: helpful to detect structure endian-ness */
+       CUSE_INIT_BSWAP_RESERVED        = 1048576,      /* CUSE_INIT << 8 */
+       FUSE_INIT_BSWAP_RESERVED        = 436207616,    /* FUSE_INIT << 24 */
 };
 
 enum fuse_notify_code {
@@ -652,7 +662,7 @@ struct fuse_init_out {
        uint32_t        max_write;
        uint32_t        time_gran;
        uint16_t        max_pages;
-       uint16_t        padding;
+       uint16_t        map_alignment;
        uint32_t        unused[8];
 };
 
index 96ee9d9..ea57526 100644 (file)
@@ -28,6 +28,7 @@ struct io_uring_sqe {
                __u16           poll_events;
                __u32           sync_range_flags;
                __u32           msg_flags;
+               __u32           timeout_flags;
        };
        __u64   user_data;      /* data to be passed back at completion time */
        union {
@@ -61,6 +62,7 @@ struct io_uring_sqe {
 #define IORING_OP_SYNC_FILE_RANGE      8
 #define IORING_OP_SENDMSG      9
 #define IORING_OP_RECVMSG      10
+#define IORING_OP_TIMEOUT      11
 
 /*
  * sqe->fsync_flags
index 233efbb..52641d8 100644 (file)
@@ -999,6 +999,7 @@ struct kvm_ppc_resize_hpt {
 #define KVM_CAP_ARM_PTRAUTH_GENERIC 172
 #define KVM_CAP_PMU_EVENT_FILTER 173
 #define KVM_CAP_ARM_IRQ_LINE_LAYOUT_2 174
+#define KVM_CAP_HYPERV_DIRECT_TLBFLUSH 175
 
 #ifdef KVM_CAP_IRQ_ROUTING
 
@@ -1145,6 +1146,7 @@ struct kvm_dirty_tlb {
 #define KVM_REG_S390           0x5000000000000000ULL
 #define KVM_REG_ARM64          0x6000000000000000ULL
 #define KVM_REG_MIPS           0x7000000000000000ULL
+#define KVM_REG_RISCV          0x8000000000000000ULL
 
 #define KVM_REG_SIZE_SHIFT     52
 #define KVM_REG_SIZE_MASK      0x00f0000000000000ULL
index b1e9de4..a519313 100644 (file)
 #include <linux/types.h>
 
 /* latest upcall version available */
-#define CLD_UPCALL_VERSION 1
+#define CLD_UPCALL_VERSION 2
 
 /* defined by RFC3530 */
 #define NFS4_OPAQUE_LIMIT 1024
 
+#ifndef SHA256_DIGEST_SIZE
+#define SHA256_DIGEST_SIZE      32
+#endif
+
 enum cld_command {
        Cld_Create,             /* create a record for this cm_id */
        Cld_Remove,             /* remove record of this cm_id */
        Cld_Check,              /* is this cm_id allowed? */
        Cld_GraceDone,          /* grace period is complete */
-       Cld_GraceStart,
+       Cld_GraceStart,         /* grace start (upload client records) */
+       Cld_GetVersion,         /* query max supported upcall version */
 };
 
 /* representation of long-form NFSv4 client ID */
@@ -45,6 +50,17 @@ struct cld_name {
        unsigned char   cn_id[NFS4_OPAQUE_LIMIT];       /* client-provided */
 } __attribute__((packed));
 
+/* sha256 hash of the kerberos principal */
+struct cld_princhash {
+       __u8            cp_len;                         /* length of cp_data */
+       unsigned char   cp_data[SHA256_DIGEST_SIZE];    /* hash of principal */
+} __attribute__((packed));
+
+struct cld_clntinfo {
+       struct cld_name         cc_name;
+       struct cld_princhash    cc_princhash;
+} __attribute__((packed));
+
 /* message struct for communication with userspace */
 struct cld_msg {
        __u8            cm_vers;                /* upcall version */
@@ -54,7 +70,28 @@ struct cld_msg {
        union {
                __s64           cm_gracetime;   /* grace period start time */
                struct cld_name cm_name;
+               __u8            cm_version;     /* for getting max version */
+       } __attribute__((packed)) cm_u;
+} __attribute__((packed));
+
+/* version 2 message can include hash of kerberos principal */
+struct cld_msg_v2 {
+       __u8            cm_vers;                /* upcall version */
+       __u8            cm_cmd;                 /* upcall command */
+       __s16           cm_status;              /* return code */
+       __u32           cm_xid;                 /* transaction id */
+       union {
+               struct cld_name cm_name;
+               __u8            cm_version;     /* for getting max version */
+               struct cld_clntinfo cm_clntinfo; /* name & princ hash */
        } __attribute__((packed)) cm_u;
 } __attribute__((packed));
 
+struct cld_msg_hdr {
+       __u8            cm_vers;                /* upcall version */
+       __u8            cm_cmd;                 /* upcall command */
+       __s16           cm_status;              /* return code */
+       __u32           cm_xid;                 /* transaction id */
+} __attribute__((packed));
+
 #endif /* !_NFSD_CLD_H */
index f28e562..29d6e93 100644 (file)
 #define  PCI_EXP_SLTCTL_CCIE   0x0010  /* Command Completed Interrupt Enable */
 #define  PCI_EXP_SLTCTL_HPIE   0x0020  /* Hot-Plug Interrupt Enable */
 #define  PCI_EXP_SLTCTL_AIC    0x00c0  /* Attention Indicator Control */
+#define  PCI_EXP_SLTCTL_ATTN_IND_SHIFT 6      /* Attention Indicator shift */
 #define  PCI_EXP_SLTCTL_ATTN_IND_ON    0x0040 /* Attention Indicator on */
 #define  PCI_EXP_SLTCTL_ATTN_IND_BLINK 0x0080 /* Attention Indicator blinking */
 #define  PCI_EXP_SLTCTL_ATTN_IND_OFF   0x00c0 /* Attention Indicator off */
 #define PCI_EXT_CAP_ID_DPC     0x1D    /* Downstream Port Containment */
 #define PCI_EXT_CAP_ID_L1SS    0x1E    /* L1 PM Substates */
 #define PCI_EXT_CAP_ID_PTM     0x1F    /* Precision Time Measurement */
-#define PCI_EXT_CAP_ID_MAX     PCI_EXT_CAP_ID_PTM
+#define PCI_EXT_CAP_ID_DLF     0x25    /* Data Link Feature */
+#define PCI_EXT_CAP_ID_PL_16GT 0x26    /* Physical Layer 16.0 GT/s */
+#define PCI_EXT_CAP_ID_MAX     PCI_EXT_CAP_ID_PL_16GT
 
 #define PCI_EXT_CAP_DSN_SIZEOF 12
 #define PCI_EXT_CAP_MCAST_ENDPOINT_SIZEOF 40
 #define  PCI_L1SS_CTL1_LTR_L12_TH_SCALE        0xe0000000  /* LTR_L1.2_THRESHOLD_Scale */
 #define PCI_L1SS_CTL2          0x0c    /* Control 2 Register */
 
+/* Data Link Feature */
+#define PCI_DLF_CAP            0x04    /* Capabilities Register */
+#define  PCI_DLF_EXCHANGE_ENABLE       0x80000000  /* Data Link Feature Exchange Enable */
+
+/* Physical Layer 16.0 GT/s */
+#define PCI_PL_16GT_LE_CTRL    0x20    /* Lane Equalization Control Register */
+#define  PCI_PL_16GT_LE_CTRL_DSP_TX_PRESET_MASK                0x0000000F
+#define  PCI_PL_16GT_LE_CTRL_USP_TX_PRESET_MASK                0x000000F0
+#define  PCI_PL_16GT_LE_CTRL_USP_TX_PRESET_SHIFT       4
+
 #endif /* LINUX_PCI_REGS_H */
index 8f10748..9e843a1 100644 (file)
@@ -295,15 +295,38 @@ struct vfio_region_info_cap_type {
        __u32 subtype;  /* type specific */
 };
 
+/*
+ * List of region types, global per bus driver.
+ * If you introduce a new type, please add it here.
+ */
+
+/* PCI region type containing a PCI vendor part */
 #define VFIO_REGION_TYPE_PCI_VENDOR_TYPE       (1 << 31)
 #define VFIO_REGION_TYPE_PCI_VENDOR_MASK       (0xffff)
+#define VFIO_REGION_TYPE_GFX                    (1)
+#define VFIO_REGION_TYPE_CCW                   (2)
+
+/* sub-types for VFIO_REGION_TYPE_PCI_* */
 
-/* 8086 Vendor sub-types */
+/* 8086 vendor PCI sub-types */
 #define VFIO_REGION_SUBTYPE_INTEL_IGD_OPREGION (1)
 #define VFIO_REGION_SUBTYPE_INTEL_IGD_HOST_CFG (2)
 #define VFIO_REGION_SUBTYPE_INTEL_IGD_LPC_CFG  (3)
 
-#define VFIO_REGION_TYPE_GFX                    (1)
+/* 10de vendor PCI sub-types */
+/*
+ * NVIDIA GPU NVlink2 RAM is coherent RAM mapped onto the host address space.
+ */
+#define VFIO_REGION_SUBTYPE_NVIDIA_NVLINK2_RAM (1)
+
+/* 1014 vendor PCI sub-types */
+/*
+ * IBM NPU NVlink2 ATSD (Address Translation Shootdown) register of NPU
+ * to do TLB invalidation on a GPU.
+ */
+#define VFIO_REGION_SUBTYPE_IBM_NVLINK2_ATSD   (1)
+
+/* sub-types for VFIO_REGION_TYPE_GFX */
 #define VFIO_REGION_SUBTYPE_GFX_EDID            (1)
 
 /**
@@ -353,26 +376,10 @@ struct vfio_region_gfx_edid {
 #define VFIO_DEVICE_GFX_LINK_STATE_DOWN  2
 };
 
-#define VFIO_REGION_TYPE_CCW                   (2)
-/* ccw sub-types */
+/* sub-types for VFIO_REGION_TYPE_CCW */
 #define VFIO_REGION_SUBTYPE_CCW_ASYNC_CMD      (1)
 
 /*
- * 10de vendor sub-type
- *
- * NVIDIA GPU NVlink2 RAM is coherent RAM mapped onto the host address space.
- */
-#define VFIO_REGION_SUBTYPE_NVIDIA_NVLINK2_RAM (1)
-
-/*
- * 1014 vendor sub-type
- *
- * IBM NPU NVlink2 ATSD (Address Translation Shootdown) register of NPU
- * to do TLB invalidation on a GPU.
- */
-#define VFIO_REGION_SUBTYPE_IBM_NVLINK2_ATSD   (1)
-
-/*
  * The MSIX mappable capability informs that MSIX data of a BAR can be mmapped
  * which allows direct access to non-MSIX registers which happened to be within
  * the same system page.
@@ -714,7 +721,31 @@ struct vfio_iommu_type1_info {
        __u32   argsz;
        __u32   flags;
 #define VFIO_IOMMU_INFO_PGSIZES (1 << 0)       /* supported page sizes info */
-       __u64   iova_pgsizes;           /* Bitmap of supported page sizes */
+#define VFIO_IOMMU_INFO_CAPS   (1 << 1)        /* Info supports caps */
+       __u64   iova_pgsizes;   /* Bitmap of supported page sizes */
+       __u32   cap_offset;     /* Offset within info struct of first cap */
+};
+
+/*
+ * The IOVA capability allows to report the valid IOVA range(s)
+ * excluding any non-relaxable reserved regions exposed by
+ * devices attached to the container. Any DMA map attempt
+ * outside the valid iova range will return error.
+ *
+ * The structures below define version 1 of this capability.
+ */
+#define VFIO_IOMMU_TYPE1_INFO_CAP_IOVA_RANGE  1
+
+struct vfio_iova_range {
+       __u64   start;
+       __u64   end;
+};
+
+struct vfio_iommu_type1_info_cap_iova_range {
+       struct  vfio_info_cap_header header;
+       __u32   nr_iovas;
+       __u32   reserved;
+       struct  vfio_iova_range iova_ranges[];
 };
 
 #define VFIO_IOMMU_GET_INFO _IO(VFIO_TYPE, VFIO_BASE + 12)
diff --git a/include/uapi/linux/virtio_fs.h b/include/uapi/linux/virtio_fs.h
new file mode 100644 (file)
index 0000000..b02eb2a
--- /dev/null
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) */
+
+#ifndef _UAPI_LINUX_VIRTIO_FS_H
+#define _UAPI_LINUX_VIRTIO_FS_H
+
+#include <linux/types.h>
+#include <linux/virtio_ids.h>
+#include <linux/virtio_config.h>
+#include <linux/virtio_types.h>
+
+struct virtio_fs_config {
+       /* Filesystem name (UTF-8, not NUL-terminated, padded with NULs) */
+       __u8 tag[36];
+
+       /* Number of request queues */
+       __u32 num_request_queues;
+} __attribute__((packed));
+
+#endif /* _UAPI_LINUX_VIRTIO_FS_H */
index 348fd01..585e07b 100644 (file)
@@ -44,6 +44,7 @@
 #define VIRTIO_ID_VSOCK        19 /* virtio vsock transport */
 #define VIRTIO_ID_CRYPTO       20 /* virtio crypto */
 #define VIRTIO_ID_IOMMU        23 /* virtio IOMMU */
+#define VIRTIO_ID_FS           26 /* virtio filesystem */
 #define VIRTIO_ID_PMEM         27 /* virtio pmem */
 
 #endif /* _LINUX_VIRTIO_IDS_H */
index 7e9900b..88b6ca7 100644 (file)
@@ -43,6 +43,7 @@ enum mlx5_ib_uapi_flow_table_type {
        MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_RX     = 0x0,
        MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_TX     = 0x1,
        MLX5_IB_UAPI_FLOW_TABLE_TYPE_FDB        = 0x2,
+       MLX5_IB_UAPI_FLOW_TABLE_TYPE_RDMA_RX    = 0x3,
 };
 
 enum mlx5_ib_uapi_flow_action_packet_reformat_type {
index 52f32a6..3ae65e9 100644 (file)
@@ -8,6 +8,8 @@
 #ifndef SCSI_BSG_FC_H
 #define SCSI_BSG_FC_H
 
+#include <linux/types.h>
+
 /*
  * This file intended to be included by both kernel and user space
  */
  * with the transport upon completion of the login.
  */
 struct fc_bsg_host_add_rport {
-       uint8_t         reserved;
+       __u8    reserved;
 
        /* FC Address Identier of the remote port to login to */
-       uint8_t         port_id[3];
+       __u8    port_id[3];
 };
 
 /* Response:
@@ -87,10 +89,10 @@ struct fc_bsg_host_add_rport {
  * remain logged in with the remote port.
  */
 struct fc_bsg_host_del_rport {
-       uint8_t         reserved;
+       __u8    reserved;
 
        /* FC Address Identier of the remote port to logout of */
-       uint8_t         port_id[3];
+       __u8    port_id[3];
 };
 
 /* Response:
@@ -111,10 +113,10 @@ struct fc_bsg_host_els {
         * ELS Command Code being sent (must be the same as byte 0
         * of the payload)
         */
-       uint8_t         command_code;
+       __u8    command_code;
 
        /* FC Address Identier of the remote port to send the ELS to */
-       uint8_t         port_id[3];
+       __u8    port_id[3];
 };
 
 /* Response:
@@ -151,14 +153,14 @@ struct fc_bsg_ctels_reply {
         * Note: x_RJT/BSY status will indicae that the rjt_data field
         *   is valid and contains the reason/explanation values.
         */
-       uint32_t        status;         /* See FC_CTELS_STATUS_xxx */
+       __u32   status;         /* See FC_CTELS_STATUS_xxx */
 
        /* valid if status is not FC_CTELS_STATUS_OK */
        struct  {
-               uint8_t action;         /* fragment_id for CT REJECT */
-               uint8_t reason_code;
-               uint8_t reason_explanation;
-               uint8_t vendor_unique;
+               __u8    action;         /* fragment_id for CT REJECT */
+               __u8    reason_code;
+               __u8    reason_explanation;
+               __u8    vendor_unique;
        } rjt_data;
 };
 
@@ -174,17 +176,17 @@ struct fc_bsg_ctels_reply {
  * and whether to tear it down after the request.
  */
 struct fc_bsg_host_ct {
-       uint8_t         reserved;
+       __u8    reserved;
 
        /* FC Address Identier of the remote port to send the ELS to */
-       uint8_t         port_id[3];
+       __u8    port_id[3];
 
        /*
         * We need words 0-2 of the generic preamble for the LLD's
         */
-       uint32_t        preamble_word0; /* revision & IN_ID */
-       uint32_t        preamble_word1; /* GS_Type, GS_SubType, Options, Rsvd */
-       uint32_t        preamble_word2; /* Cmd Code, Max Size */
+       __u32   preamble_word0; /* revision & IN_ID */
+       __u32   preamble_word1; /* GS_Type, GS_SubType, Options, Rsvd */
+       __u32   preamble_word2; /* Cmd Code, Max Size */
 
 };
 /* Response:
@@ -204,17 +206,17 @@ struct fc_bsg_host_vendor {
         * Identifies the vendor that the message is formatted for. This
         * should be the recipient of the message.
         */
-       uint64_t vendor_id;
+       __u64 vendor_id;
 
        /* start of vendor command area */
-       uint32_t vendor_cmd[0];
+       __u32 vendor_cmd[0];
 };
 
 /* Response:
  */
 struct fc_bsg_host_vendor_reply {
        /* start of vendor response area */
-       uint32_t vendor_rsp[0];
+       __u32 vendor_rsp[0];
 };
 
 
@@ -233,7 +235,7 @@ struct fc_bsg_rport_els {
         * ELS Command Code being sent (must be the same as
         * byte 0 of the payload)
         */
-       uint8_t els_code;
+       __u8 els_code;
 };
 
 /* Response:
@@ -251,9 +253,9 @@ struct fc_bsg_rport_ct {
        /*
         * We need words 0-2 of the generic preamble for the LLD's
         */
-       uint32_t        preamble_word0; /* revision & IN_ID */
-       uint32_t        preamble_word1; /* GS_Type, GS_SubType, Options, Rsvd */
-       uint32_t        preamble_word2; /* Cmd Code, Max Size */
+       __u32   preamble_word0; /* revision & IN_ID */
+       __u32   preamble_word1; /* GS_Type, GS_SubType, Options, Rsvd */
+       __u32   preamble_word2; /* Cmd Code, Max Size */
 };
 /* Response:
  *
@@ -265,7 +267,7 @@ struct fc_bsg_rport_ct {
 
 /* request (CDB) structure of the sg_io_v4 */
 struct fc_bsg_request {
-       uint32_t msgcode;
+       __u32 msgcode;
        union {
                struct fc_bsg_host_add_rport    h_addrport;
                struct fc_bsg_host_del_rport    h_delrport;
@@ -289,10 +291,10 @@ struct fc_bsg_reply {
         *    msg and status fields. The per-msgcode reply structure
         *    will contain valid data.
         */
-       uint32_t result;
+       __u32 result;
 
        /* If there was reply_payload, how much was recevied ? */
-       uint32_t reply_payload_rcv_len;
+       __u32 reply_payload_rcv_len;
 
        union {
                struct fc_bsg_host_vendor_reply         vendor_reply;
index 5dd3820..1b1737c 100644 (file)
 
 /* SCSI_TRANSPORT_MSG event message header */
 struct scsi_nl_hdr {
-       uint8_t version;
-       uint8_t transport;
-       uint16_t magic;
-       uint16_t msgtype;
-       uint16_t msglen;
-} __attribute__((aligned(sizeof(uint64_t))));
+       __u8 version;
+       __u8 transport;
+       __u16 magic;
+       __u16 msgtype;
+       __u16 msglen;
+} __attribute__((aligned(sizeof(__u64))));
 
 /* scsi_nl_hdr->version value */
 #define SCSI_NL_VERSION                                1
@@ -75,10 +75,10 @@ struct scsi_nl_hdr {
  */
 struct scsi_nl_host_vendor_msg {
        struct scsi_nl_hdr snlh;                /* must be 1st element ! */
-       uint64_t vendor_id;
-       uint16_t host_no;
-       uint16_t vmsg_datalen;
-} __attribute__((aligned(sizeof(uint64_t))));
+       __u64 vendor_id;
+       __u16 host_no;
+       __u16 vmsg_datalen;
+} __attribute__((aligned(sizeof(__u64))));
 
 
 /*
index a390235..7535253 100644 (file)
@@ -7,6 +7,7 @@
 #ifndef SCSI_NETLINK_FC_H
 #define SCSI_NETLINK_FC_H
 
+#include <linux/types.h>
 #include <scsi/scsi_netlink.h>
 
 /*
  */
 struct fc_nl_event {
        struct scsi_nl_hdr snlh;                /* must be 1st element ! */
-       uint64_t seconds;
-       uint64_t vendor_id;
-       uint16_t host_no;
-       uint16_t event_datalen;
-       uint32_t event_num;
-       uint32_t event_code;
-       uint32_t event_data;
-} __attribute__((aligned(sizeof(uint64_t))));
+       __u64 seconds;
+       __u64 vendor_id;
+       __u16 host_no;
+       __u16 event_datalen;
+       __u32 event_num;
+       __u32 event_code;
+       __u32 event_data;
+} __attribute__((aligned(sizeof(__u64))));
 
 
 #endif /* SCSI_NETLINK_FC_H */
index f4534c5..b4daad2 100644 (file)
@@ -33,6 +33,9 @@ config CC_HAS_ASM_GOTO
 config TOOLS_SUPPORT_RELR
        def_bool $(success,env "CC=$(CC)" "LD=$(LD)" "NM=$(NM)" "OBJCOPY=$(OBJCOPY)" $(srctree)/scripts/tools-support-relr.sh)
 
+config CC_HAS_ASM_INLINE
+       def_bool $(success,echo 'void foo(void) { asm inline (""); }' | $(CC) -x c - -c -o /dev/null)
+
 config CC_HAS_WARN_MAYBE_UNINITIALIZED
        def_bool $(cc-option,-Wmaybe-uninitialized)
        help
@@ -51,7 +54,6 @@ config CC_DISABLE_WARN_MAYBE_UNINITIALIZED
 
 config CONSTRUCTORS
        bool
-       depends on !UML
 
 config IRQ_WORK
        bool
@@ -1961,6 +1963,10 @@ config BASE_SMALL
        default 0 if BASE_FULL
        default 1 if !BASE_FULL
 
+config MODULE_SIG_FORMAT
+       def_bool n
+       select SYSTEM_DATA_VERIFICATION
+
 menuconfig MODULES
        bool "Enable loadable module support"
        option modules
@@ -2045,8 +2051,7 @@ config MODULE_SRCVERSION_ALL
 
 config MODULE_SIG
        bool "Module signature verification"
-       depends on MODULES
-       select SYSTEM_DATA_VERIFICATION
+       select MODULE_SIG_FORMAT
        help
          Check modules for valid signatures upon load: the signature
          is simply appended to the module. For more information see
@@ -2056,6 +2061,11 @@ config MODULE_SIG
          kernel build dependency so that the signing tool can use its crypto
          library.
 
+         You should enable this option if you wish to use either
+         CONFIG_SECURITY_LOCKDOWN_LSM or lockdown functionality imposed via
+         another LSM - otherwise unsigned modules will be loadable regardless
+         of the lockdown policy.
+
          !!!WARNING!!!  If you enable this option, you MUST make sure that the
          module DOES NOT get stripped after being signed.  This includes the
          debuginfo strip done by some packagers (such as rpmbuild) and
@@ -2122,7 +2132,6 @@ config MODULE_SIG_HASH
 
 config MODULE_COMPRESS
        bool "Compress modules on installation"
-       depends on MODULES
        help
 
          Compresses kernel modules when 'make modules_install' is run; gzip or
@@ -2158,9 +2167,38 @@ config MODULE_COMPRESS_XZ
 
 endchoice
 
+config MODULE_ALLOW_MISSING_NAMESPACE_IMPORTS
+       bool "Allow loading of modules with missing namespace imports"
+       help
+         Symbols exported with EXPORT_SYMBOL_NS*() are considered exported in
+         a namespace. A module that makes use of a symbol exported with such a
+         namespace is required to import the namespace via MODULE_IMPORT_NS().
+         There is no technical reason to enforce correct namespace imports,
+         but it creates consistency between symbols defining namespaces and
+         users importing namespaces they make use of. This option relaxes this
+         requirement and lifts the enforcement when loading a module.
+
+         If unsure, say N.
+
+config UNUSED_SYMBOLS
+       bool "Enable unused/obsolete exported symbols"
+       default y if X86
+       help
+         Unused but exported symbols make the kernel needlessly bigger.  For
+         that reason most of these unused exports will soon be removed.  This
+         option is provided temporarily to provide a transition period in case
+         some external kernel module needs one of these symbols anyway. If you
+         encounter such a case in your module, consider if you are actually
+         using the right API.  (rationale: since nobody in the kernel is using
+         this in a module, there is a pretty good chance it's actually the
+         wrong interface to use).  If you really need the symbol, please send a
+         mail to the linux kernel mailing list mentioning the symbol and why
+         you really need it, and what the merge plan to the mainline kernel for
+         your module is.
+
 config TRIM_UNUSED_KSYMS
        bool "Trim unused exported kernel symbols"
-       depends on MODULES && !UNUSED_SYMBOLS
+       depends on !UNUSED_SYMBOLS
        help
          The kernel and some modules make many symbols available for
          other modules to use via EXPORT_SYMBOL() and variants. Depending
index 653693d..91f6ebb 100644 (file)
@@ -507,7 +507,7 @@ void __init __weak mem_encrypt_init(void) { }
 
 void __init __weak poking_init(void) { }
 
-void __init __weak pgd_cache_init(void) { }
+void __init __weak pgtable_cache_init(void) { }
 
 bool initcall_debug;
 core_param(initcall_debug, initcall_debug, bool, 0644);
@@ -556,6 +556,7 @@ static void __init mm_init(void)
        report_meminit();
        mem_init();
        kmem_cache_init();
+       kmemleak_init();
        pgtable_init();
        debug_objects_mem_init();
        vmalloc_init();
@@ -564,7 +565,6 @@ static void __init mm_init(void)
        init_espfix_bsp();
        /* Should be run after espfix64 is set up. */
        pti_init();
-       pgd_cache_init();
 }
 
 void __init __weak arch_call_rest_init(void)
@@ -593,8 +593,8 @@ asmlinkage __visible void __init start_kernel(void)
        boot_cpu_init();
        page_address_init();
        pr_notice("%s", linux_banner);
+       early_security_init();
        setup_arch(&command_line);
-       mm_init_cpumask(&init_mm);
        setup_command_line(command_line);
        setup_nr_cpu_ids();
        setup_per_cpu_areas();
@@ -740,7 +740,6 @@ asmlinkage __visible void __init start_kernel(void)
                initrd_start = 0;
        }
 #endif
-       kmemleak_init();
        setup_per_cpu_pageset();
        numa_policy_init();
        acpi_early_init();
index 7c15729..3d920ff 100644 (file)
@@ -1240,15 +1240,14 @@ static int do_mq_notify(mqd_t mqdes, const struct sigevent *notification)
 
                        /* create the notify skb */
                        nc = alloc_skb(NOTIFY_COOKIE_LEN, GFP_KERNEL);
-                       if (!nc) {
-                               ret = -ENOMEM;
-                               goto out;
-                       }
+                       if (!nc)
+                               return -ENOMEM;
+
                        if (copy_from_user(nc->data,
                                        notification->sigev_value.sival_ptr,
                                        NOTIFY_COOKIE_LEN)) {
                                ret = -EFAULT;
-                               goto out;
+                               goto free_skb;
                        }
 
                        /* TODO: add a header? */
@@ -1264,8 +1263,7 @@ retry:
                        fdput(f);
                        if (IS_ERR(sock)) {
                                ret = PTR_ERR(sock);
-                               sock = NULL;
-                               goto out;
+                               goto free_skb;
                        }
 
                        timeo = MAX_SCHEDULE_TIMEOUT;
@@ -1274,11 +1272,8 @@ retry:
                                sock = NULL;
                                goto retry;
                        }
-                       if (ret) {
-                               sock = NULL;
-                               nc = NULL;
-                               goto out;
-                       }
+                       if (ret)
+                               return ret;
                }
        }
 
@@ -1333,7 +1328,8 @@ out_fput:
 out:
        if (sock)
                netlink_detachskb(sock, nc);
-       else if (nc)
+       else
+free_skb:
                dev_kfree_skb(nc);
 
        return ret;
index 7da4504..ec97a70 100644 (file)
--- a/ipc/sem.c
+++ b/ipc/sem.c
@@ -1852,7 +1852,8 @@ static struct sem_undo *__lookup_undo(struct sem_undo_list *ulp, int semid)
 {
        struct sem_undo *un;
 
-       list_for_each_entry_rcu(un, &ulp->list_proc, list_proc) {
+       list_for_each_entry_rcu(un, &ulp->list_proc, list_proc,
+                               spin_is_locked(&ulp->lock)) {
                if (un->semid == semid)
                        return un;
        }
index 25f9d83..daad787 100644 (file)
@@ -58,6 +58,7 @@ endif
 obj-$(CONFIG_UID16) += uid16.o
 obj-$(CONFIG_MODULES) += module.o
 obj-$(CONFIG_MODULE_SIG) += module_signing.o
+obj-$(CONFIG_MODULE_SIG_FORMAT) += module_signature.o
 obj-$(CONFIG_KALLSYMS) += kallsyms.o
 obj-$(CONFIG_BSD_PROCESS_ACCT) += acct.o
 obj-$(CONFIG_CRASH_CORE) += crash_core.o
index cc0d0cf..a70f720 100644 (file)
@@ -14,8 +14,9 @@
 #include <linux/mount.h>
 #include <linux/namei.h>
 #include <linux/fs.h>
+#include <linux/fs_context.h>
+#include <linux/fs_parser.h>
 #include <linux/kdev_t.h>
-#include <linux/parser.h>
 #include <linux/filter.h>
 #include <linux/bpf.h>
 #include <linux/bpf_trace.h>
@@ -583,58 +584,52 @@ static const struct super_operations bpf_super_ops = {
 
 enum {
        OPT_MODE,
-       OPT_ERR,
 };
 
-static const match_table_t bpf_mount_tokens = {
-       { OPT_MODE, "mode=%o" },
-       { OPT_ERR, NULL },
+static const struct fs_parameter_spec bpf_param_specs[] = {
+       fsparam_u32oct  ("mode",                        OPT_MODE),
+       {}
+};
+
+static const struct fs_parameter_description bpf_fs_parameters = {
+       .name           = "bpf",
+       .specs          = bpf_param_specs,
 };
 
 struct bpf_mount_opts {
        umode_t mode;
 };
 
-static int bpf_parse_options(char *data, struct bpf_mount_opts *opts)
+static int bpf_parse_param(struct fs_context *fc, struct fs_parameter *param)
 {
-       substring_t args[MAX_OPT_ARGS];
-       int option, token;
-       char *ptr;
+       struct bpf_mount_opts *opts = fc->fs_private;
+       struct fs_parse_result result;
+       int opt;
 
-       opts->mode = S_IRWXUGO;
-
-       while ((ptr = strsep(&data, ",")) != NULL) {
-               if (!*ptr)
-                       continue;
-
-               token = match_token(ptr, bpf_mount_tokens, args);
-               switch (token) {
-               case OPT_MODE:
-                       if (match_octal(&args[0], &option))
-                               return -EINVAL;
-                       opts->mode = option & S_IALLUGO;
-                       break;
+       opt = fs_parse(fc, &bpf_fs_parameters, param, &result);
+       if (opt < 0)
                /* We might like to report bad mount options here, but
                 * traditionally we've ignored all mount options, so we'd
                 * better continue to ignore non-existing options for bpf.
                 */
-               }
+               return opt == -ENOPARAM ? 0 : opt;
+
+       switch (opt) {
+       case OPT_MODE:
+               opts->mode = result.uint_32 & S_IALLUGO;
+               break;
        }
 
        return 0;
 }
 
-static int bpf_fill_super(struct super_block *sb, void *data, int silent)
+static int bpf_fill_super(struct super_block *sb, struct fs_context *fc)
 {
        static const struct tree_descr bpf_rfiles[] = { { "" } };
-       struct bpf_mount_opts opts;
+       struct bpf_mount_opts *opts = fc->fs_private;
        struct inode *inode;
        int ret;
 
-       ret = bpf_parse_options(data, &opts);
-       if (ret)
-               return ret;
-
        ret = simple_fill_super(sb, BPF_FS_MAGIC, bpf_rfiles);
        if (ret)
                return ret;
@@ -644,21 +639,50 @@ static int bpf_fill_super(struct super_block *sb, void *data, int silent)
        inode = sb->s_root->d_inode;
        inode->i_op = &bpf_dir_iops;
        inode->i_mode &= ~S_IALLUGO;
-       inode->i_mode |= S_ISVTX | opts.mode;
+       inode->i_mode |= S_ISVTX | opts->mode;
 
        return 0;
 }
 
-static struct dentry *bpf_mount(struct file_system_type *type, int flags,
-                               const char *dev_name, void *data)
+static int bpf_get_tree(struct fs_context *fc)
+{
+       return get_tree_nodev(fc, bpf_fill_super);
+}
+
+static void bpf_free_fc(struct fs_context *fc)
 {
-       return mount_nodev(type, flags, data, bpf_fill_super);
+       kfree(fc->fs_private);
+}
+
+static const struct fs_context_operations bpf_context_ops = {
+       .free           = bpf_free_fc,
+       .parse_param    = bpf_parse_param,
+       .get_tree       = bpf_get_tree,
+};
+
+/*
+ * Set up the filesystem mount context.
+ */
+static int bpf_init_fs_context(struct fs_context *fc)
+{
+       struct bpf_mount_opts *opts;
+
+       opts = kzalloc(sizeof(struct bpf_mount_opts), GFP_KERNEL);
+       if (!opts)
+               return -ENOMEM;
+
+       opts->mode = S_IRWXUGO;
+
+       fc->fs_private = opts;
+       fc->ops = &bpf_context_ops;
+       return 0;
 }
 
 static struct file_system_type bpf_fs_type = {
        .owner          = THIS_MODULE,
        .name           = "bpf",
-       .mount          = bpf_mount,
+       .init_fs_context = bpf_init_fs_context,
+       .parameters     = &bpf_fs_parameters,
        .kill_sb        = kill_litter_super,
 };
 
index e1967e9..fc28e17 100644 (file)
@@ -392,8 +392,7 @@ enum cpuhp_smt_control cpu_smt_control __read_mostly = CPU_SMT_ENABLED;
 
 void __init cpu_smt_disable(bool force)
 {
-       if (cpu_smt_control == CPU_SMT_FORCE_DISABLED ||
-               cpu_smt_control == CPU_SMT_NOT_SUPPORTED)
+       if (!cpu_smt_possible())
                return;
 
        if (force) {
@@ -438,6 +437,14 @@ static inline bool cpu_smt_allowed(unsigned int cpu)
         */
        return !cpumask_test_cpu(cpu, &cpus_booted_once_mask);
 }
+
+/* Returns true if SMT is not supported of forcefully (irreversibly) disabled */
+bool cpu_smt_possible(void)
+{
+       return cpu_smt_control != CPU_SMT_FORCE_DISABLED &&
+               cpu_smt_control != CPU_SMT_NOT_SUPPORTED;
+}
+EXPORT_SYMBOL_GPL(cpu_smt_possible);
 #else
 static inline bool cpu_smt_allowed(unsigned int cpu) { return true; }
 #endif
index 10f1187..f76d6f7 100644 (file)
@@ -893,30 +893,25 @@ static struct sysrq_key_op sysrq_dbg_op = {
 };
 #endif
 
-static int kgdb_panic_event(struct notifier_block *self,
-                           unsigned long val,
-                           void *data)
+void kgdb_panic(const char *msg)
 {
+       if (!kgdb_io_module_registered)
+               return;
+
        /*
-        * Avoid entering the debugger if we were triggered due to a panic
-        * We don't want to get stuck waiting for input from user in such case.
-        * panic_timeout indicates the system should automatically
+        * We don't want to get stuck waiting for input from user if
+        * "panic_timeout" indicates the system should automatically
         * reboot on panic.
         */
        if (panic_timeout)
-               return NOTIFY_DONE;
+               return;
 
        if (dbg_kdb_mode)
-               kdb_printf("PANIC: %s\n", (char *)data);
+               kdb_printf("PANIC: %s\n", msg);
+
        kgdb_breakpoint();
-       return NOTIFY_DONE;
 }
 
-static struct notifier_block kgdb_panic_event_nb = {
-       .notifier_call  = kgdb_panic_event,
-       .priority       = INT_MAX,
-};
-
 void __weak kgdb_arch_late(void)
 {
 }
@@ -965,8 +960,6 @@ static void kgdb_register_callbacks(void)
                        kgdb_arch_late();
                register_module_notifier(&dbg_module_load_nb);
                register_reboot_notifier(&dbg_reboot_notifier);
-               atomic_notifier_chain_register(&panic_notifier_list,
-                                              &kgdb_panic_event_nb);
 #ifdef CONFIG_MAGIC_SYSRQ
                register_sysrq_key('g', &sysrq_dbg_op);
 #endif
@@ -980,16 +973,14 @@ static void kgdb_register_callbacks(void)
 static void kgdb_unregister_callbacks(void)
 {
        /*
-        * When this routine is called KGDB should unregister from the
-        * panic handler and clean up, making sure it is not handling any
+        * When this routine is called KGDB should unregister from
+        * handlers and clean up, making sure it is not handling any
         * break exceptions at the time.
         */
        if (kgdb_io_module_registered) {
                kgdb_io_module_registered = 0;
                unregister_reboot_notifier(&dbg_reboot_notifier);
                unregister_module_notifier(&dbg_module_load_nb);
-               atomic_notifier_chain_unregister(&panic_notifier_list,
-                                              &kgdb_panic_event_nb);
                kgdb_arch_exit();
 #ifdef CONFIG_MAGIC_SYSRQ
                unregister_sysrq_key('g', &sysrq_dbg_op);
index fc482c8..57fb4dc 100644 (file)
@@ -3,6 +3,7 @@
 #include <linux/fs.h>
 #include <linux/mm.h>
 #include <linux/binfmts.h>
+#include <linux/elfcore.h>
 
 Elf_Half __weak elf_core_extra_phdrs(void)
 {
index 4f08b17..4655adb 100644 (file)
@@ -2239,7 +2239,7 @@ static void __perf_event_disable(struct perf_event *event,
  *
  * If event->ctx is a cloned context, callers must make sure that
  * every task struct that event->ctx->task could possibly point to
- * remains valid.  This condition is satisifed when called through
+ * remains valid.  This condition is satisfied when called through
  * perf_event_for_each_child or perf_event_for_each because they
  * hold the top-level event's child_mutex, so any descendant that
  * goes to exit will block in perf_event_exit_event().
@@ -6054,7 +6054,7 @@ static void perf_sample_regs_intr(struct perf_regs *regs_intr,
  * Get remaining task size from user stack pointer.
  *
  * It'd be better to take stack vma map and limit this more
- * precisly, but there's no way to get it safely under interrupt,
+ * precisely, but there's no way to get it safely under interrupt,
  * so using TASK_SIZE as limit.
  */
 static u64 perf_ustack_task_size(struct pt_regs *regs)
@@ -6616,7 +6616,7 @@ void perf_prepare_sample(struct perf_event_header *header,
 
        if (sample_type & PERF_SAMPLE_STACK_USER) {
                /*
-                * Either we need PERF_SAMPLE_STACK_USER bit to be allways
+                * Either we need PERF_SAMPLE_STACK_USER bit to be always
                 * processed as the last one or have additional check added
                 * in case new sample type is added, because we could eat
                 * up the rest of the sample size.
@@ -10917,6 +10917,13 @@ SYSCALL_DEFINE5(perf_event_open,
            perf_paranoid_kernel() && !capable(CAP_SYS_ADMIN))
                return -EACCES;
 
+       err = security_locked_down(LOCKDOWN_PERF);
+       if (err && (attr.sample_type & PERF_SAMPLE_REGS_INTR))
+               /* REGS_INTR can leak data, lockdown must prevent this */
+               return err;
+
+       err = 0;
+
        /*
         * In cgroup mode, the pid argument is used to pass the fd
         * opened to the cgroup directory in cgroupfs. The cpu argument
index 84fa004..94d38a3 100644 (file)
@@ -26,6 +26,7 @@
 #include <linux/percpu-rwsem.h>
 #include <linux/task_work.h>
 #include <linux/shmem_fs.h>
+#include <linux/khugepaged.h>
 
 #include <linux/uprobes.h>
 
@@ -143,17 +144,19 @@ static loff_t vaddr_to_offset(struct vm_area_struct *vma, unsigned long vaddr)
  *
  * @vma:      vma that holds the pte pointing to page
  * @addr:     address the old @page is mapped at
- * @page:     the cowed page we are replacing by kpage
- * @kpage:    the modified page we replace page by
+ * @old_page: the page we are replacing by new_page
+ * @new_page: the modified page we replace page by
  *
- * Returns 0 on success, -EFAULT on failure.
+ * If @new_page is NULL, only unmap @old_page.
+ *
+ * Returns 0 on success, negative error code otherwise.
  */
 static int __replace_page(struct vm_area_struct *vma, unsigned long addr,
                                struct page *old_page, struct page *new_page)
 {
        struct mm_struct *mm = vma->vm_mm;
        struct page_vma_mapped_walk pvmw = {
-               .page = old_page,
+               .page = compound_head(old_page),
                .vma = vma,
                .address = addr,
        };
@@ -164,12 +167,12 @@ static int __replace_page(struct vm_area_struct *vma, unsigned long addr,
        mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, vma, mm, addr,
                                addr + PAGE_SIZE);
 
-       VM_BUG_ON_PAGE(PageTransHuge(old_page), old_page);
-
-       err = mem_cgroup_try_charge(new_page, vma->vm_mm, GFP_KERNEL, &memcg,
-                       false);
-       if (err)
-               return err;
+       if (new_page) {
+               err = mem_cgroup_try_charge(new_page, vma->vm_mm, GFP_KERNEL,
+                                           &memcg, false);
+               if (err)
+                       return err;
+       }
 
        /* For try_to_free_swap() and munlock_vma_page() below */
        lock_page(old_page);
@@ -177,15 +180,20 @@ static int __replace_page(struct vm_area_struct *vma, unsigned long addr,
        mmu_notifier_invalidate_range_start(&range);
        err = -EAGAIN;
        if (!page_vma_mapped_walk(&pvmw)) {
-               mem_cgroup_cancel_charge(new_page, memcg, false);
+               if (new_page)
+                       mem_cgroup_cancel_charge(new_page, memcg, false);
                goto unlock;
        }
        VM_BUG_ON_PAGE(addr != pvmw.address, old_page);
 
-       get_page(new_page);
-       page_add_new_anon_rmap(new_page, vma, addr, false);
-       mem_cgroup_commit_charge(new_page, memcg, false, false);
-       lru_cache_add_active_or_unevictable(new_page, vma);
+       if (new_page) {
+               get_page(new_page);
+               page_add_new_anon_rmap(new_page, vma, addr, false);
+               mem_cgroup_commit_charge(new_page, memcg, false, false);
+               lru_cache_add_active_or_unevictable(new_page, vma);
+       } else
+               /* no new page, just dec_mm_counter for old_page */
+               dec_mm_counter(mm, MM_ANONPAGES);
 
        if (!PageAnon(old_page)) {
                dec_mm_counter(mm, mm_counter_file(old_page));
@@ -194,8 +202,9 @@ static int __replace_page(struct vm_area_struct *vma, unsigned long addr,
 
        flush_cache_page(vma, addr, pte_pfn(*pvmw.pte));
        ptep_clear_flush_notify(vma, addr, pvmw.pte);
-       set_pte_at_notify(mm, addr, pvmw.pte,
-                       mk_pte(new_page, vma->vm_page_prot));
+       if (new_page)
+               set_pte_at_notify(mm, addr, pvmw.pte,
+                                 mk_pte(new_page, vma->vm_page_prot));
 
        page_remove_rmap(old_page, false);
        if (!page_mapped(old_page))
@@ -464,6 +473,7 @@ int uprobe_write_opcode(struct arch_uprobe *auprobe, struct mm_struct *mm,
        struct page *old_page, *new_page;
        struct vm_area_struct *vma;
        int ret, is_register, ref_ctr_updated = 0;
+       bool orig_page_huge = false;
 
        is_register = is_swbp_insn(&opcode);
        uprobe = container_of(auprobe, struct uprobe, arch);
@@ -471,7 +481,7 @@ int uprobe_write_opcode(struct arch_uprobe *auprobe, struct mm_struct *mm,
 retry:
        /* Read the page with vaddr into memory */
        ret = get_user_pages_remote(NULL, mm, vaddr, 1,
-                       FOLL_FORCE | FOLL_SPLIT, &old_page, &vma, NULL);
+                       FOLL_FORCE | FOLL_SPLIT_PMD, &old_page, &vma, NULL);
        if (ret <= 0)
                return ret;
 
@@ -488,6 +498,10 @@ retry:
                ref_ctr_updated = 1;
        }
 
+       ret = 0;
+       if (!is_register && !PageAnon(old_page))
+               goto put_old;
+
        ret = anon_vma_prepare(vma);
        if (ret)
                goto put_old;
@@ -501,8 +515,33 @@ retry:
        copy_highpage(new_page, old_page);
        copy_to_page(new_page, vaddr, &opcode, UPROBE_SWBP_INSN_SIZE);
 
+       if (!is_register) {
+               struct page *orig_page;
+               pgoff_t index;
+
+               VM_BUG_ON_PAGE(!PageAnon(old_page), old_page);
+
+               index = vaddr_to_offset(vma, vaddr & PAGE_MASK) >> PAGE_SHIFT;
+               orig_page = find_get_page(vma->vm_file->f_inode->i_mapping,
+                                         index);
+
+               if (orig_page) {
+                       if (PageUptodate(orig_page) &&
+                           pages_identical(new_page, orig_page)) {
+                               /* let go new_page */
+                               put_page(new_page);
+                               new_page = NULL;
+
+                               if (PageCompound(orig_page))
+                                       orig_page_huge = true;
+                       }
+                       put_page(orig_page);
+               }
+       }
+
        ret = __replace_page(vma, vaddr, old_page, new_page);
-       put_page(new_page);
+       if (new_page)
+               put_page(new_page);
 put_old:
        put_page(old_page);
 
@@ -513,6 +552,10 @@ put_old:
        if (ret && is_register && ref_ctr_updated)
                update_ref_ctr(uprobe, mm, -1);
 
+       /* try collapse pmd for compound page */
+       if (!ret && orig_page_huge)
+               collapse_pte_mapped_thp(mm, vaddr);
+
        return ret;
 }
 
index 22ab6a4..a46a50d 100644 (file)
@@ -182,6 +182,11 @@ static void delayed_put_task_struct(struct rcu_head *rhp)
        put_task_struct(tsk);
 }
 
+void put_task_struct_rcu_user(struct task_struct *task)
+{
+       if (refcount_dec_and_test(&task->rcu_users))
+               call_rcu(&task->rcu, delayed_put_task_struct);
+}
 
 void release_task(struct task_struct *p)
 {
@@ -222,76 +227,13 @@ repeat:
 
        write_unlock_irq(&tasklist_lock);
        release_thread(p);
-       call_rcu(&p->rcu, delayed_put_task_struct);
+       put_task_struct_rcu_user(p);
 
        p = leader;
        if (unlikely(zap_leader))
                goto repeat;
 }
 
-/*
- * Note that if this function returns a valid task_struct pointer (!NULL)
- * task->usage must remain >0 for the duration of the RCU critical section.
- */
-struct task_struct *task_rcu_dereference(struct task_struct **ptask)
-{
-       struct sighand_struct *sighand;
-       struct task_struct *task;
-
-       /*
-        * We need to verify that release_task() was not called and thus
-        * delayed_put_task_struct() can't run and drop the last reference
-        * before rcu_read_unlock(). We check task->sighand != NULL,
-        * but we can read the already freed and reused memory.
-        */
-retry:
-       task = rcu_dereference(*ptask);
-       if (!task)
-               return NULL;
-
-       probe_kernel_address(&task->sighand, sighand);
-
-       /*
-        * Pairs with atomic_dec_and_test() in put_task_struct(). If this task
-        * was already freed we can not miss the preceding update of this
-        * pointer.
-        */
-       smp_rmb();
-       if (unlikely(task != READ_ONCE(*ptask)))
-               goto retry;
-
-       /*
-        * We've re-checked that "task == *ptask", now we have two different
-        * cases:
-        *
-        * 1. This is actually the same task/task_struct. In this case
-        *    sighand != NULL tells us it is still alive.
-        *
-        * 2. This is another task which got the same memory for task_struct.
-        *    We can't know this of course, and we can not trust
-        *    sighand != NULL.
-        *
-        *    In this case we actually return a random value, but this is
-        *    correct.
-        *
-        *    If we return NULL - we can pretend that we actually noticed that
-        *    *ptask was updated when the previous task has exited. Or pretend
-        *    that probe_slab_address(&sighand) reads NULL.
-        *
-        *    If we return the new task (because sighand is not NULL for any
-        *    reason) - this is fine too. This (new) task can't go away before
-        *    another gp pass.
-        *
-        *    And note: We could even eliminate the false positive if re-read
-        *    task->sighand once again to avoid the falsely NULL. But this case
-        *    is very unlikely so we don't care.
-        */
-       if (!sighand)
-               return NULL;
-
-       return task;
-}
-
 void rcuwait_wake_up(struct rcuwait *w)
 {
        struct task_struct *task;
@@ -311,10 +253,6 @@ void rcuwait_wake_up(struct rcuwait *w)
         */
        smp_mb(); /* (B) */
 
-       /*
-        * Avoid using task_rcu_dereference() magic as long as we are careful,
-        * see comment in rcuwait_wait_event() regarding ->exit_state.
-        */
        task = rcu_dereference(w->task);
        if (task)
                wake_up_process(task);
index 53e7807..f9572f4 100644 (file)
@@ -125,6 +125,15 @@ int nr_threads;                    /* The idle threads do not count.. */
 
 static int max_threads;                /* tunable limit on nr_threads */
 
+#define NAMED_ARRAY_INDEX(x)   [x] = __stringify(x)
+
+static const char * const resident_page_types[] = {
+       NAMED_ARRAY_INDEX(MM_FILEPAGES),
+       NAMED_ARRAY_INDEX(MM_ANONPAGES),
+       NAMED_ARRAY_INDEX(MM_SWAPENTS),
+       NAMED_ARRAY_INDEX(MM_SHMEMPAGES),
+};
+
 DEFINE_PER_CPU(unsigned long, process_counts) = 0;
 
 __cacheline_aligned DEFINE_RWLOCK(tasklist_lock);  /* outer */
@@ -645,12 +654,15 @@ static void check_mm(struct mm_struct *mm)
 {
        int i;
 
+       BUILD_BUG_ON_MSG(ARRAY_SIZE(resident_page_types) != NR_MM_COUNTERS,
+                        "Please make sure 'struct resident_page_types[]' is updated as well");
+
        for (i = 0; i < NR_MM_COUNTERS; i++) {
                long x = atomic_long_read(&mm->rss_stat.count[i]);
 
                if (unlikely(x))
-                       printk(KERN_ALERT "BUG: Bad rss-counter state "
-                                         "mm:%p idx:%d val:%ld\n", mm, i, x);
+                       pr_alert("BUG: Bad rss-counter state mm:%p type:%s val:%ld\n",
+                                mm, resident_page_types[i], x);
        }
 
        if (mm_pgtables_bytes(mm))
@@ -903,10 +915,12 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node)
                tsk->cpus_ptr = &tsk->cpus_mask;
 
        /*
-        * One for us, one for whoever does the "release_task()" (usually
-        * parent)
+        * One for the user space visible state that goes away when reaped.
+        * One for the scheduler.
         */
-       refcount_set(&tsk->usage, 2);
+       refcount_set(&tsk->rcu_users, 2);
+       /* One for the rcu users */
+       refcount_set(&tsk->usage, 1);
 #ifdef CONFIG_BLK_DEV_IO_TRACE
        tsk->btrace_seq = 0;
 #endif
@@ -1009,7 +1023,6 @@ static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p,
        mm_init_owner(mm, p);
        RCU_INIT_POINTER(mm->exe_file, NULL);
        mmu_notifier_mm_init(mm);
-       hmm_mm_init(mm);
        init_tlb_flush_pending(mm);
 #if defined(CONFIG_TRANSPARENT_HUGEPAGE) && !USE_SPLIT_PMD_PTLOCKS
        mm->pmd_huge_pte = NULL;
index 3941a9c..060e8e7 100644 (file)
@@ -4,7 +4,7 @@ menu "GCOV-based kernel profiling"
 config GCOV_KERNEL
        bool "Enable gcov-based kernel profiling"
        depends on DEBUG_FS
-       select CONSTRUCTORS if !UML
+       select CONSTRUCTORS
        default n
        ---help---
        This option enables gcov-based code profiling (e.g. for code coverage
index 1b018f1..bc933c0 100644 (file)
@@ -206,6 +206,14 @@ static inline int kexec_load_check(unsigned long nr_segments,
                return result;
 
        /*
+        * kexec can be used to circumvent module loading restrictions, so
+        * prevent loading in that case
+        */
+       result = security_locked_down(LOCKDOWN_KEXEC);
+       if (result)
+               return result;
+
+       /*
         * Verify we have a legal set of flags
         * This leaves us room for future extensions.
         */
index d587072..15d70a9 100644 (file)
@@ -300,6 +300,8 @@ static struct page *kimage_alloc_pages(gfp_t gfp_mask, unsigned int order)
 {
        struct page *pages;
 
+       if (fatal_signal_pending(current))
+               return NULL;
        pages = alloc_pages(gfp_mask & ~__GFP_ZERO, order);
        if (pages) {
                unsigned int count, i;
index b8cc032..79f252a 100644 (file)
@@ -88,7 +88,7 @@ int __weak arch_kimage_file_post_load_cleanup(struct kimage *image)
        return kexec_image_post_load_cleanup_default(image);
 }
 
-#ifdef CONFIG_KEXEC_VERIFY_SIG
+#ifdef CONFIG_KEXEC_SIG
 static int kexec_image_verify_sig_default(struct kimage *image, void *buf,
                                          unsigned long buf_len)
 {
@@ -177,6 +177,59 @@ void kimage_file_post_load_cleanup(struct kimage *image)
        image->image_loader_data = NULL;
 }
 
+#ifdef CONFIG_KEXEC_SIG
+static int
+kimage_validate_signature(struct kimage *image)
+{
+       const char *reason;
+       int ret;
+
+       ret = arch_kexec_kernel_verify_sig(image, image->kernel_buf,
+                                          image->kernel_buf_len);
+       switch (ret) {
+       case 0:
+               break;
+
+               /* Certain verification errors are non-fatal if we're not
+                * checking errors, provided we aren't mandating that there
+                * must be a valid signature.
+                */
+       case -ENODATA:
+               reason = "kexec of unsigned image";
+               goto decide;
+       case -ENOPKG:
+               reason = "kexec of image with unsupported crypto";
+               goto decide;
+       case -ENOKEY:
+               reason = "kexec of image with unavailable key";
+       decide:
+               if (IS_ENABLED(CONFIG_KEXEC_SIG_FORCE)) {
+                       pr_notice("%s rejected\n", reason);
+                       return ret;
+               }
+
+               /* If IMA is guaranteed to appraise a signature on the kexec
+                * image, permit it even if the kernel is otherwise locked
+                * down.
+                */
+               if (!ima_appraise_signature(READING_KEXEC_IMAGE) &&
+                   security_locked_down(LOCKDOWN_KEXEC))
+                       return -EPERM;
+
+               return 0;
+
+               /* All other errors are fatal, including nomem, unparseable
+                * signatures and signature check failures - even if signatures
+                * aren't required.
+                */
+       default:
+               pr_notice("kernel signature verification failed (%d).\n", ret);
+       }
+
+       return ret;
+}
+#endif
+
 /*
  * In file mode list of segments is prepared by kernel. Copy relevant
  * data from user space, do error checking, prepare segment list
@@ -186,7 +239,7 @@ kimage_file_prepare_segments(struct kimage *image, int kernel_fd, int initrd_fd,
                             const char __user *cmdline_ptr,
                             unsigned long cmdline_len, unsigned flags)
 {
-       int ret = 0;
+       int ret;
        void *ldata;
        loff_t size;
 
@@ -202,14 +255,11 @@ kimage_file_prepare_segments(struct kimage *image, int kernel_fd, int initrd_fd,
        if (ret)
                goto out;
 
-#ifdef CONFIG_KEXEC_VERIFY_SIG
-       ret = arch_kexec_kernel_verify_sig(image, image->kernel_buf,
-                                          image->kernel_buf_len);
-       if (ret) {
-               pr_debug("kernel signature verification failed.\n");
+#ifdef CONFIG_KEXEC_SIG
+       ret = kimage_validate_signature(image);
+
+       if (ret)
                goto out;
-       }
-       pr_debug("kernel signature verification successful.\n");
 #endif
        /* It is possible that there no initramfs is being loaded */
        if (!(flags & KEXEC_FILE_NO_INITRAMFS)) {
index c4ce08f..ab4a460 100644 (file)
@@ -1175,6 +1175,7 @@ err:
        pr_warn("patch '%s' failed for module '%s', refusing to load module '%s'\n",
                patch->mod->name, obj->mod->name, obj->mod->name);
        mod->klp_alive = false;
+       obj->mod = NULL;
        klp_cleanup_module_patches_limited(mod, patch);
        mutex_unlock(&klp_mutex);
 
index 89bab07..e84d21a 100644 (file)
@@ -269,7 +269,7 @@ pv_wait_early(struct pv_node *prev, int loop)
        if ((loop & PV_PREV_CHECK_MASK) != 0)
                return false;
 
-       return READ_ONCE(prev->state) != vcpu_running || vcpu_is_preempted(prev->cpu);
+       return READ_ONCE(prev->state) != vcpu_running;
 }
 
 /*
index 9ee9342..ff2d735 100644 (file)
@@ -7,6 +7,7 @@
 #include <linux/export.h>
 #include <linux/extable.h>
 #include <linux/moduleloader.h>
+#include <linux/module_signature.h>
 #include <linux/trace_events.h>
 #include <linux/init.h>
 #include <linux/kallsyms.h>
@@ -544,12 +545,20 @@ static const char *kernel_symbol_name(const struct kernel_symbol *sym)
 #endif
 }
 
-static int cmp_name(const void *va, const void *vb)
+static const char *kernel_symbol_namespace(const struct kernel_symbol *sym)
 {
-       const char *a;
-       const struct kernel_symbol *b;
-       a = va; b = vb;
-       return strcmp(a, kernel_symbol_name(b));
+#ifdef CONFIG_HAVE_ARCH_PREL32_RELOCATIONS
+       if (!sym->namespace_offset)
+               return NULL;
+       return offset_to_ptr(&sym->namespace_offset);
+#else
+       return sym->namespace;
+#endif
+}
+
+static int cmp_name(const void *name, const void *sym)
+{
+       return strcmp(name, kernel_symbol_name(sym));
 }
 
 static bool find_exported_symbol_in_section(const struct symsearch *syms,
@@ -1379,6 +1388,41 @@ static inline int same_magic(const char *amagic, const char *bmagic,
 }
 #endif /* CONFIG_MODVERSIONS */
 
+static char *get_modinfo(const struct load_info *info, const char *tag);
+static char *get_next_modinfo(const struct load_info *info, const char *tag,
+                             char *prev);
+
+static int verify_namespace_is_imported(const struct load_info *info,
+                                       const struct kernel_symbol *sym,
+                                       struct module *mod)
+{
+       const char *namespace;
+       char *imported_namespace;
+
+       namespace = kernel_symbol_namespace(sym);
+       if (namespace) {
+               imported_namespace = get_modinfo(info, "import_ns");
+               while (imported_namespace) {
+                       if (strcmp(namespace, imported_namespace) == 0)
+                               return 0;
+                       imported_namespace = get_next_modinfo(
+                               info, "import_ns", imported_namespace);
+               }
+#ifdef CONFIG_MODULE_ALLOW_MISSING_NAMESPACE_IMPORTS
+               pr_warn(
+#else
+               pr_err(
+#endif
+                       "%s: module uses symbol (%s) from namespace %s, but does not import it.\n",
+                       mod->name, kernel_symbol_name(sym), namespace);
+#ifndef CONFIG_MODULE_ALLOW_MISSING_NAMESPACE_IMPORTS
+               return -EINVAL;
+#endif
+       }
+       return 0;
+}
+
+
 /* Resolve a symbol for this module.  I.e. if we find one, record usage. */
 static const struct kernel_symbol *resolve_symbol(struct module *mod,
                                                  const struct load_info *info,
@@ -1407,6 +1451,12 @@ static const struct kernel_symbol *resolve_symbol(struct module *mod,
                goto getname;
        }
 
+       err = verify_namespace_is_imported(info, sym, mod);
+       if (err) {
+               sym = ERR_PTR(err);
+               goto getname;
+       }
+
        err = ref_module(mod, owner);
        if (err) {
                sym = ERR_PTR(err);
@@ -2481,7 +2531,8 @@ static char *next_string(char *string, unsigned long *secsize)
        return string;
 }
 
-static char *get_modinfo(struct load_info *info, const char *tag)
+static char *get_next_modinfo(const struct load_info *info, const char *tag,
+                             char *prev)
 {
        char *p;
        unsigned int taglen = strlen(tag);
@@ -2492,13 +2543,25 @@ static char *get_modinfo(struct load_info *info, const char *tag)
         * get_modinfo() calls made before rewrite_section_headers()
         * must use sh_offset, as sh_addr isn't set!
         */
-       for (p = (char *)info->hdr + infosec->sh_offset; p; p = next_string(p, &size)) {
+       char *modinfo = (char *)info->hdr + infosec->sh_offset;
+
+       if (prev) {
+               size -= prev - modinfo;
+               modinfo = next_string(prev, &size);
+       }
+
+       for (p = modinfo; p; p = next_string(p, &size)) {
                if (strncmp(p, tag, taglen) == 0 && p[taglen] == '=')
                        return p + taglen + 1;
        }
        return NULL;
 }
 
+static char *get_modinfo(const struct load_info *info, const char *tag)
+{
+       return get_next_modinfo(info, tag, NULL);
+}
+
 static void setup_modinfo(struct module *mod, struct load_info *info)
 {
        struct module_attribute *attr;
@@ -2776,8 +2839,9 @@ static inline void kmemleak_load_module(const struct module *mod,
 #ifdef CONFIG_MODULE_SIG
 static int module_sig_check(struct load_info *info, int flags)
 {
-       int err = -ENOKEY;
+       int err = -ENODATA;
        const unsigned long markerlen = sizeof(MODULE_SIG_STRING) - 1;
+       const char *reason;
        const void *mod = info->hdr;
 
        /*
@@ -2792,16 +2856,38 @@ static int module_sig_check(struct load_info *info, int flags)
                err = mod_verify_sig(mod, info);
        }
 
-       if (!err) {
+       switch (err) {
+       case 0:
                info->sig_ok = true;
                return 0;
-       }
 
-       /* Not having a signature is only an error if we're strict. */
-       if (err == -ENOKEY && !is_module_sig_enforced())
-               err = 0;
+               /* We don't permit modules to be loaded into trusted kernels
+                * without a valid signature on them, but if we're not
+                * enforcing, certain errors are non-fatal.
+                */
+       case -ENODATA:
+               reason = "Loading of unsigned module";
+               goto decide;
+       case -ENOPKG:
+               reason = "Loading of module with unsupported crypto";
+               goto decide;
+       case -ENOKEY:
+               reason = "Loading of module with unavailable key";
+       decide:
+               if (is_module_sig_enforced()) {
+                       pr_notice("%s is rejected\n", reason);
+                       return -EKEYREJECTED;
+               }
 
-       return err;
+               return security_locked_down(LOCKDOWN_MODULE_SIGNATURE);
+
+               /* All other errors are fatal, including nomem, unparseable
+                * signatures and signature check failures - even if signatures
+                * aren't required.
+                */
+       default:
+               return err;
+       }
 }
 #else /* !CONFIG_MODULE_SIG */
 static int module_sig_check(struct load_info *info, int flags)
diff --git a/kernel/module_signature.c b/kernel/module_signature.c
new file mode 100644 (file)
index 0000000..4224a10
--- /dev/null
@@ -0,0 +1,46 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Module signature checker
+ *
+ * Copyright (C) 2012 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ */
+
+#include <linux/errno.h>
+#include <linux/printk.h>
+#include <linux/module_signature.h>
+#include <asm/byteorder.h>
+
+/**
+ * mod_check_sig - check that the given signature is sane
+ *
+ * @ms:                Signature to check.
+ * @file_len:  Size of the file to which @ms is appended.
+ * @name:      What is being checked. Used for error messages.
+ */
+int mod_check_sig(const struct module_signature *ms, size_t file_len,
+                 const char *name)
+{
+       if (be32_to_cpu(ms->sig_len) >= file_len - sizeof(*ms))
+               return -EBADMSG;
+
+       if (ms->id_type != PKEY_ID_PKCS7) {
+               pr_err("%s: Module is not signed with expected PKCS#7 message\n",
+                      name);
+               return -ENOPKG;
+       }
+
+       if (ms->algo != 0 ||
+           ms->hash != 0 ||
+           ms->signer_len != 0 ||
+           ms->key_id_len != 0 ||
+           ms->__pad[0] != 0 ||
+           ms->__pad[1] != 0 ||
+           ms->__pad[2] != 0) {
+               pr_err("%s: PKCS#7 signature info has unexpected non-zero params\n",
+                      name);
+               return -EBADMSG;
+       }
+
+       return 0;
+}
index b10fb19..9d9fc67 100644 (file)
@@ -7,37 +7,13 @@
 
 #include <linux/kernel.h>
 #include <linux/errno.h>
+#include <linux/module.h>
+#include <linux/module_signature.h>
 #include <linux/string.h>
 #include <linux/verification.h>
 #include <crypto/public_key.h>
 #include "module-internal.h"
 
-enum pkey_id_type {
-       PKEY_ID_PGP,            /* OpenPGP generated key ID */
-       PKEY_ID_X509,           /* X.509 arbitrary subjectKeyIdentifier */
-       PKEY_ID_PKCS7,          /* Signature in PKCS#7 message */
-};
-
-/*
- * Module signature information block.
- *
- * The constituents of the signature section are, in order:
- *
- *     - Signer's name
- *     - Key identifier
- *     - Signature data
- *     - Information block
- */
-struct module_signature {
-       u8      algo;           /* Public-key crypto algorithm [0] */
-       u8      hash;           /* Digest algorithm [0] */
-       u8      id_type;        /* Key identifier type [PKEY_ID_PKCS7] */
-       u8      signer_len;     /* Length of signer's name [0] */
-       u8      key_id_len;     /* Length of key identifier [0] */
-       u8      __pad[3];
-       __be32  sig_len;        /* Length of signature data */
-};
-
 /*
  * Verify the signature on a module.
  */
@@ -45,6 +21,7 @@ int mod_verify_sig(const void *mod, struct load_info *info)
 {
        struct module_signature ms;
        size_t sig_len, modlen = info->len;
+       int ret;
 
        pr_devel("==>%s(,%zu)\n", __func__, modlen);
 
@@ -52,32 +29,15 @@ int mod_verify_sig(const void *mod, struct load_info *info)
                return -EBADMSG;
 
        memcpy(&ms, mod + (modlen - sizeof(ms)), sizeof(ms));
-       modlen -= sizeof(ms);
+
+       ret = mod_check_sig(&ms, modlen, info->name);
+       if (ret)
+               return ret;
 
        sig_len = be32_to_cpu(ms.sig_len);
-       if (sig_len >= modlen)
-               return -EBADMSG;
-       modlen -= sig_len;
+       modlen -= sig_len + sizeof(ms);
        info->len = modlen;
 
-       if (ms.id_type != PKEY_ID_PKCS7) {
-               pr_err("%s: Module is not signed with expected PKCS#7 message\n",
-                      info->name);
-               return -ENOPKG;
-       }
-
-       if (ms.algo != 0 ||
-           ms.hash != 0 ||
-           ms.signer_len != 0 ||
-           ms.key_id_len != 0 ||
-           ms.__pad[0] != 0 ||
-           ms.__pad[1] != 0 ||
-           ms.__pad[2] != 0) {
-               pr_err("%s: PKCS#7 signature info has unexpected non-zero params\n",
-                      info->name);
-               return -EBADMSG;
-       }
-
        return verify_pkcs7_signature(mod, modlen, mod + modlen, sig_len,
                                      VERIFY_USE_SECONDARY_KEYRING,
                                      VERIFYING_MODULE_SIGNATURE,
index 057540b..47e8ebc 100644 (file)
@@ -12,6 +12,7 @@
 #include <linux/debug_locks.h>
 #include <linux/sched/debug.h>
 #include <linux/interrupt.h>
+#include <linux/kgdb.h>
 #include <linux/kmsg_dump.h>
 #include <linux/kallsyms.h>
 #include <linux/notifier.h>
@@ -220,6 +221,13 @@ void panic(const char *fmt, ...)
 #endif
 
        /*
+        * If kgdb is enabled, give it a chance to run before we stop all
+        * the other CPUs or else we won't be able to debug processes left
+        * running on them.
+        */
+       kgdb_panic(buf);
+
+       /*
         * If we have crashed and we have a crash kernel loaded let it handle
         * everything else.
         * If we want to run this after calling panic_notifiers, pass
@@ -551,9 +559,6 @@ void __warn(const char *file, int line, void *caller, unsigned taint,
 {
        disable_trace_on_warning();
 
-       if (args)
-               pr_warn(CUT_HERE);
-
        if (file)
                pr_warn("WARNING: CPU: %d PID: %d at %s:%d %pS\n",
                        raw_smp_processor_id(), current->pid, file, line,
@@ -591,37 +596,26 @@ void __warn(const char *file, int line, void *caller, unsigned taint,
        add_taint(taint, LOCKDEP_STILL_OK);
 }
 
-#ifdef WANT_WARN_ON_SLOWPATH
-void warn_slowpath_fmt(const char *file, int line, const char *fmt, ...)
+#ifndef __WARN_FLAGS
+void warn_slowpath_fmt(const char *file, int line, unsigned taint,
+                      const char *fmt, ...)
 {
        struct warn_args args;
 
-       args.fmt = fmt;
-       va_start(args.args, fmt);
-       __warn(file, line, __builtin_return_address(0), TAINT_WARN, NULL,
-              &args);
-       va_end(args.args);
-}
-EXPORT_SYMBOL(warn_slowpath_fmt);
+       pr_warn(CUT_HERE);
 
-void warn_slowpath_fmt_taint(const char *file, int line,
-                            unsigned taint, const char *fmt, ...)
-{
-       struct warn_args args;
+       if (!fmt) {
+               __warn(file, line, __builtin_return_address(0), taint,
+                      NULL, NULL);
+               return;
+       }
 
        args.fmt = fmt;
        va_start(args.args, fmt);
        __warn(file, line, __builtin_return_address(0), taint, NULL, &args);
        va_end(args.args);
 }
-EXPORT_SYMBOL(warn_slowpath_fmt_taint);
-
-void warn_slowpath_null(const char *file, int line)
-{
-       pr_warn(CUT_HERE);
-       __warn(file, line, __builtin_return_address(0), TAINT_WARN, NULL, NULL);
-}
-EXPORT_SYMBOL(warn_slowpath_null);
+EXPORT_SYMBOL(warn_slowpath_fmt);
 #else
 void __warn_printk(const char *fmt, ...)
 {
index cf44878..8e56f8b 100644 (file)
@@ -12,6 +12,7 @@
 #include <linux/err.h>
 #include <linux/slab.h>
 #include <linux/ctype.h>
+#include <linux/security.h>
 
 #ifdef CONFIG_SYSFS
 /* Protects all built-in parameters, modules use their own param_lock */
@@ -96,13 +97,19 @@ bool parameq(const char *a, const char *b)
        return parameqn(a, b, strlen(a)+1);
 }
 
-static void param_check_unsafe(const struct kernel_param *kp)
+static bool param_check_unsafe(const struct kernel_param *kp)
 {
+       if (kp->flags & KERNEL_PARAM_FL_HWPARAM &&
+           security_locked_down(LOCKDOWN_MODULE_PARAMETERS))
+               return false;
+
        if (kp->flags & KERNEL_PARAM_FL_UNSAFE) {
                pr_notice("Setting dangerous option %s - tainting kernel\n",
                          kp->name);
                add_taint(TAINT_USER, LOCKDEP_STILL_OK);
        }
+
+       return true;
 }
 
 static int parse_one(char *param,
@@ -132,8 +139,10 @@ static int parse_one(char *param,
                        pr_debug("handling %s with %p\n", param,
                                params[i].ops->set);
                        kernel_param_lock(params[i].mod);
-                       param_check_unsafe(&params[i]);
-                       err = params[i].ops->set(val, &params[i]);
+                       if (param_check_unsafe(&params[i]))
+                               err = params[i].ops->set(val, &params[i]);
+                       else
+                               err = -EPERM;
                        kernel_param_unlock(params[i].mod);
                        return err;
                }
@@ -553,8 +562,10 @@ static ssize_t param_attr_store(struct module_attribute *mattr,
                return -EPERM;
 
        kernel_param_lock(mk->mod);
-       param_check_unsafe(attribute->param);
-       err = attribute->param->ops->set(buf, attribute->param);
+       if (param_check_unsafe(attribute->param))
+               err = attribute->param->ops->set(buf, attribute->param);
+       else
+               err = -EPERM;
        kernel_param_unlock(mk->mod);
        if (!err)
                return len;
index cd7434e..3c0a5a8 100644 (file)
@@ -30,6 +30,7 @@
 #include <linux/ctype.h>
 #include <linux/genhd.h>
 #include <linux/ktime.h>
+#include <linux/security.h>
 #include <trace/events/power.h>
 
 #include "power.h"
@@ -68,7 +69,7 @@ static const struct platform_hibernation_ops *hibernation_ops;
 
 bool hibernation_available(void)
 {
-       return (nohibernate == 0);
+       return nohibernate == 0 && !security_locked_down(LOCKDOWN_HIBERNATION);
 }
 
 /**
index 1d21eba..17a9591 100644 (file)
 
 int _braille_console_setup(char **str, char **brl_options)
 {
-       if (!strncmp(*str, "brl,", 4)) {
+       size_t len;
+
+       len = str_has_prefix(*str, "brl,");
+       if (len) {
                *brl_options = "";
-               *str += 4;
-       } else if (!strncmp(*str, "brl=", 4)) {
-               *brl_options = *str + 4;
+               *str += len;
+               return 0;
+       }
+
+       len = str_has_prefix(*str, "brl=");
+       if (len) {
+               *brl_options = *str + len;
                *str = strchr(*brl_options, ',');
                if (!*str) {
                        pr_err("need port name after brl=\n");
index 1888f6a..ca65327 100644 (file)
@@ -118,19 +118,29 @@ static unsigned int __read_mostly devkmsg_log = DEVKMSG_LOG_MASK_DEFAULT;
 
 static int __control_devkmsg(char *str)
 {
+       size_t len;
+
        if (!str)
                return -EINVAL;
 
-       if (!strncmp(str, "on", 2)) {
+       len = str_has_prefix(str, "on");
+       if (len) {
                devkmsg_log = DEVKMSG_LOG_MASK_ON;
-               return 2;
-       } else if (!strncmp(str, "off", 3)) {
+               return len;
+       }
+
+       len = str_has_prefix(str, "off");
+       if (len) {
                devkmsg_log = DEVKMSG_LOG_MASK_OFF;
-               return 3;
-       } else if (!strncmp(str, "ratelimit", 9)) {
+               return len;
+       }
+
+       len = str_has_prefix(str, "ratelimit");
+       if (len) {
                devkmsg_log = DEVKMSG_LOG_MASK_DEFAULT;
-               return 9;
+               return len;
        }
+
        return -EINVAL;
 }
 
@@ -3274,7 +3284,7 @@ bool kmsg_dump_get_buffer(struct kmsg_dumper *dumper, bool syslog,
        /* move first record forward until length fits into the buffer */
        seq = dumper->cur_seq;
        idx = dumper->cur_idx;
-       while (l > size && seq < dumper->next_seq) {
+       while (l >= size && seq < dumper->next_seq) {
                struct printk_log *msg = log_from_idx(idx);
 
                l -= msg_print_text(msg, true, time, NULL, 0);
index 7ea4306..76036a4 100644 (file)
@@ -487,8 +487,8 @@ int walk_system_ram_range(unsigned long start_pfn, unsigned long nr_pages,
        while (start < end &&
               !find_next_iomem_res(start, end, flags, IORES_DESC_NONE,
                                    false, &res)) {
-               pfn = (res.start + PAGE_SIZE - 1) >> PAGE_SHIFT;
-               end_pfn = (res.end + 1) >> PAGE_SHIFT;
+               pfn = PFN_UP(res.start);
+               end_pfn = PFN_DOWN(res.end + 1);
                if (end_pfn > pfn)
                        ret = (*func)(pfn, end_pfn - pfn, arg);
                if (ret)
@@ -1644,19 +1644,8 @@ void resource_list_free(struct list_head *head)
 EXPORT_SYMBOL(resource_list_free);
 
 #ifdef CONFIG_DEVICE_PRIVATE
-/**
- * devm_request_free_mem_region - find free region for device private memory
- *
- * @dev: device struct to bind the resource to
- * @size: size in bytes of the device memory to add
- * @base: resource tree to look in
- *
- * This function tries to find an empty range of physical address big enough to
- * contain the new resource, so that it can later be hotplugged as ZONE_DEVICE
- * memory, which in turn allocates struct pages.
- */
-struct resource *devm_request_free_mem_region(struct device *dev,
-               struct resource *base, unsigned long size)
+static struct resource *__request_free_mem_region(struct device *dev,
+               struct resource *base, unsigned long size, const char *name)
 {
        resource_size_t end, addr;
        struct resource *res;
@@ -1670,7 +1659,10 @@ struct resource *devm_request_free_mem_region(struct device *dev,
                                REGION_DISJOINT)
                        continue;
 
-               res = devm_request_mem_region(dev, addr, size, dev_name(dev));
+               if (dev)
+                       res = devm_request_mem_region(dev, addr, size, name);
+               else
+                       res = request_mem_region(addr, size, name);
                if (!res)
                        return ERR_PTR(-ENOMEM);
                res->desc = IORES_DESC_DEVICE_PRIVATE_MEMORY;
@@ -1679,7 +1671,32 @@ struct resource *devm_request_free_mem_region(struct device *dev,
 
        return ERR_PTR(-ERANGE);
 }
+
+/**
+ * devm_request_free_mem_region - find free region for device private memory
+ *
+ * @dev: device struct to bind the resource to
+ * @size: size in bytes of the device memory to add
+ * @base: resource tree to look in
+ *
+ * This function tries to find an empty range of physical address big enough to
+ * contain the new resource, so that it can later be hotplugged as ZONE_DEVICE
+ * memory, which in turn allocates struct pages.
+ */
+struct resource *devm_request_free_mem_region(struct device *dev,
+               struct resource *base, unsigned long size)
+{
+       return __request_free_mem_region(dev, base, size, dev_name(dev));
+}
 EXPORT_SYMBOL_GPL(devm_request_free_mem_region);
+
+struct resource *request_free_mem_region(struct resource *base,
+               unsigned long size, const char *name)
+{
+       return __request_free_mem_region(NULL, base, size, name);
+}
+EXPORT_SYMBOL_GPL(request_free_mem_region);
+
 #endif /* CONFIG_DEVICE_PRIVATE */
 
 static int __init strict_iomem(char *str)
index 5e8387b..7880f4f 100644 (file)
@@ -1656,7 +1656,8 @@ static int __set_cpus_allowed_ptr(struct task_struct *p,
        if (cpumask_equal(p->cpus_ptr, new_mask))
                goto out;
 
-       if (!cpumask_intersects(new_mask, cpu_valid_mask)) {
+       dest_cpu = cpumask_any_and(cpu_valid_mask, new_mask);
+       if (dest_cpu >= nr_cpu_ids) {
                ret = -EINVAL;
                goto out;
        }
@@ -1677,7 +1678,6 @@ static int __set_cpus_allowed_ptr(struct task_struct *p,
        if (cpumask_test_cpu(task_cpu(p), new_mask))
                goto out;
 
-       dest_cpu = cpumask_any_and(cpu_valid_mask, new_mask);
        if (task_running(rq, p) || p->state == TASK_WAKING) {
                struct migration_arg arg = { p, dest_cpu };
                /* Need help from migration thread: drop lock and wait. */
@@ -3254,7 +3254,7 @@ static struct rq *finish_task_switch(struct task_struct *prev)
                /* Task is done with its stack. */
                put_task_stack(prev);
 
-               put_task_struct(prev);
+               put_task_struct_rcu_user(prev);
        }
 
        tick_nohz_task_switch();
@@ -3358,15 +3358,15 @@ context_switch(struct rq *rq, struct task_struct *prev,
                else
                        prev->active_mm = NULL;
        } else {                                        // to user
+               membarrier_switch_mm(rq, prev->active_mm, next->mm);
                /*
                 * sys_membarrier() requires an smp_mb() between setting
-                * rq->curr and returning to userspace.
+                * rq->curr / membarrier_switch_mm() and returning to userspace.
                 *
                 * The below provides this either through switch_mm(), or in
                 * case 'prev->active_mm == next->mm' through
                 * finish_task_switch()'s mmdrop().
                 */
-
                switch_mm_irqs_off(prev->active_mm, next->mm, next);
 
                if (!prev->mm) {                        // from kernel
@@ -3871,13 +3871,22 @@ static noinline void __schedule_bug(struct task_struct *prev)
 /*
  * Various schedule()-time debugging checks and statistics:
  */
-static inline void schedule_debug(struct task_struct *prev)
+static inline void schedule_debug(struct task_struct *prev, bool preempt)
 {
 #ifdef CONFIG_SCHED_STACK_END_CHECK
        if (task_stack_end_corrupted(prev))
                panic("corrupted stack end detected inside scheduler\n");
 #endif
 
+#ifdef CONFIG_DEBUG_ATOMIC_SLEEP
+       if (!preempt && prev->state && prev->non_block_count) {
+               printk(KERN_ERR "BUG: scheduling in a non-blocking section: %s/%d/%i\n",
+                       prev->comm, prev->pid, prev->non_block_count);
+               dump_stack();
+               add_taint(TAINT_WARN, LOCKDEP_STILL_OK);
+       }
+#endif
+
        if (unlikely(in_atomic_preempt_off())) {
                __schedule_bug(prev);
                preempt_count_set(PREEMPT_DISABLED);
@@ -3989,7 +3998,7 @@ static void __sched notrace __schedule(bool preempt)
        rq = cpu_rq(cpu);
        prev = rq->curr;
 
-       schedule_debug(prev);
+       schedule_debug(prev, preempt);
 
        if (sched_feat(HRTICK))
                hrtick_clear(rq);
@@ -4033,7 +4042,11 @@ static void __sched notrace __schedule(bool preempt)
 
        if (likely(prev != next)) {
                rq->nr_switches++;
-               rq->curr = next;
+               /*
+                * RCU users of rcu_dereference(rq->curr) may not see
+                * changes to task_struct made by pick_next_task().
+                */
+               RCU_INIT_POINTER(rq->curr, next);
                /*
                 * The membarrier system call requires each architecture
                 * to have a full memory barrier after updating
@@ -4214,9 +4227,8 @@ static void __sched notrace preempt_schedule_common(void)
 
 #ifdef CONFIG_PREEMPTION
 /*
- * this is the entry point to schedule() from in-kernel preemption
- * off of preempt_enable. Kernel preemptions off return from interrupt
- * occur there and call schedule directly.
+ * This is the entry point to schedule() from in-kernel preemption
+ * off of preempt_enable.
  */
 asmlinkage __visible void __sched notrace preempt_schedule(void)
 {
@@ -4287,7 +4299,7 @@ EXPORT_SYMBOL_GPL(preempt_schedule_notrace);
 #endif /* CONFIG_PREEMPTION */
 
 /*
- * this is the entry point to schedule() from kernel preemption
+ * This is the entry point to schedule() from kernel preemption
  * off of irq context.
  * Note, that this is called and return with irqs disabled. This will
  * protect us against recursive calling from irq.
@@ -6060,7 +6072,8 @@ void init_idle(struct task_struct *idle, int cpu)
        __set_task_cpu(idle, cpu);
        rcu_read_unlock();
 
-       rq->curr = rq->idle = idle;
+       rq->idle = idle;
+       rcu_assign_pointer(rq->curr, idle);
        idle->on_rq = TASK_ON_RQ_QUEUED;
 #ifdef CONFIG_SMP
        idle->on_cpu = 1;
@@ -6421,8 +6434,6 @@ int sched_cpu_activate(unsigned int cpu)
        }
        rq_unlock_irqrestore(rq, &rf);
 
-       update_max_interval();
-
        return 0;
 }
 
@@ -6763,7 +6774,7 @@ void ___might_sleep(const char *file, int line, int preempt_offset)
        rcu_sleep_check();
 
        if ((preempt_count_equals(preempt_offset) && !irqs_disabled() &&
-            !is_idle_task(current)) ||
+            !is_idle_task(current) && !current->non_block_count) ||
            system_state == SYSTEM_BOOTING || system_state > SYSTEM_RUNNING ||
            oops_in_progress)
                return;
@@ -6779,8 +6790,8 @@ void ___might_sleep(const char *file, int line, int preempt_offset)
                "BUG: sleeping function called from invalid context at %s:%d\n",
                        file, line);
        printk(KERN_ERR
-               "in_atomic(): %d, irqs_disabled(): %d, pid: %d, name: %s\n",
-                       in_atomic(), irqs_disabled(),
+               "in_atomic(): %d, irqs_disabled(): %d, non_block: %d, pid: %d, name: %s\n",
+                       in_atomic(), irqs_disabled(), current->non_block_count,
                        current->pid, current->comm);
 
        if (task_stack_end_corrupted(current))
index d4bbf68..83ab35e 100644 (file)
@@ -749,7 +749,6 @@ void init_entity_runnable_average(struct sched_entity *se)
        /* when this task enqueue'ed, it will contribute to its cfs_rq's load_avg */
 }
 
-static inline u64 cfs_rq_clock_task(struct cfs_rq *cfs_rq);
 static void attach_entity_cfs_rq(struct sched_entity *se);
 
 /*
@@ -1603,7 +1602,7 @@ static void task_numa_compare(struct task_numa_env *env,
                return;
 
        rcu_read_lock();
-       cur = task_rcu_dereference(&dst_rq->curr);
+       cur = rcu_dereference(dst_rq->curr);
        if (cur && ((cur->flags & PF_EXITING) || is_idle_task(cur)))
                cur = NULL;
 
@@ -4354,21 +4353,16 @@ static inline u64 sched_cfs_bandwidth_slice(void)
 }
 
 /*
- * Replenish runtime according to assigned quota and update expiration time.
- * We use sched_clock_cpu directly instead of rq->clock to avoid adding
- * additional synchronization around rq->lock.
+ * Replenish runtime according to assigned quota. We use sched_clock_cpu
+ * directly instead of rq->clock to avoid adding additional synchronization
+ * around rq->lock.
  *
  * requires cfs_b->lock
  */
 void __refill_cfs_bandwidth_runtime(struct cfs_bandwidth *cfs_b)
 {
-       u64 now;
-
-       if (cfs_b->quota == RUNTIME_INF)
-               return;
-
-       now = sched_clock_cpu(smp_processor_id());
-       cfs_b->runtime = cfs_b->quota;
+       if (cfs_b->quota != RUNTIME_INF)
+               cfs_b->runtime = cfs_b->quota;
 }
 
 static inline struct cfs_bandwidth *tg_cfs_bandwidth(struct task_group *tg)
@@ -4376,15 +4370,6 @@ static inline struct cfs_bandwidth *tg_cfs_bandwidth(struct task_group *tg)
        return &tg->cfs_bandwidth;
 }
 
-/* rq->task_clock normalized against any time this cfs_rq has spent throttled */
-static inline u64 cfs_rq_clock_task(struct cfs_rq *cfs_rq)
-{
-       if (unlikely(cfs_rq->throttle_count))
-               return cfs_rq->throttled_clock_task - cfs_rq->throttled_clock_task_time;
-
-       return rq_clock_task(rq_of(cfs_rq)) - cfs_rq->throttled_clock_task_time;
-}
-
 /* returns 0 on failure to allocate runtime */
 static int assign_cfs_rq_runtime(struct cfs_rq *cfs_rq)
 {
@@ -4476,7 +4461,6 @@ static int tg_unthrottle_up(struct task_group *tg, void *data)
 
        cfs_rq->throttle_count--;
        if (!cfs_rq->throttle_count) {
-               /* adjust cfs_rq_clock_task() */
                cfs_rq->throttled_clock_task_time += rq_clock_task(rq) -
                                             cfs_rq->throttled_clock_task;
 
@@ -4994,15 +4978,13 @@ static void init_cfs_rq_runtime(struct cfs_rq *cfs_rq)
 
 void start_cfs_bandwidth(struct cfs_bandwidth *cfs_b)
 {
-       u64 overrun;
-
        lockdep_assert_held(&cfs_b->lock);
 
        if (cfs_b->period_active)
                return;
 
        cfs_b->period_active = 1;
-       overrun = hrtimer_forward_now(&cfs_b->period_timer, cfs_b->period);
+       hrtimer_forward_now(&cfs_b->period_timer, cfs_b->period);
        hrtimer_start_expires(&cfs_b->period_timer, HRTIMER_MODE_ABS_PINNED);
 }
 
@@ -5080,11 +5062,6 @@ static inline bool cfs_bandwidth_used(void)
        return false;
 }
 
-static inline u64 cfs_rq_clock_task(struct cfs_rq *cfs_rq)
-{
-       return rq_clock_task(rq_of(cfs_rq));
-}
-
 static void account_cfs_rq_runtime(struct cfs_rq *cfs_rq, u64 delta_exec) {}
 static bool check_cfs_rq_runtime(struct cfs_rq *cfs_rq) { return false; }
 static void check_enqueue_throttle(struct cfs_rq *cfs_rq) {}
@@ -6412,7 +6389,7 @@ static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu)
                }
 
                /* Evaluate the energy impact of using this CPU. */
-               if (max_spare_cap_cpu >= 0) {
+               if (max_spare_cap_cpu >= 0 && max_spare_cap_cpu != prev_cpu) {
                        cur_delta = compute_energy(p, max_spare_cap_cpu, pd);
                        cur_delta -= base_energy_pd;
                        if (cur_delta < best_delta) {
index c892c62..8dad5aa 100644 (file)
@@ -238,7 +238,6 @@ static void do_idle(void)
        tick_nohz_idle_enter();
 
        while (!need_resched()) {
-               check_pgt_cache();
                rmb();
 
                local_irq_disable();
index aa8d758..a39bed2 100644 (file)
@@ -30,10 +30,42 @@ static void ipi_mb(void *info)
        smp_mb();       /* IPIs should be serializing but paranoid. */
 }
 
+static void ipi_sync_rq_state(void *info)
+{
+       struct mm_struct *mm = (struct mm_struct *) info;
+
+       if (current->mm != mm)
+               return;
+       this_cpu_write(runqueues.membarrier_state,
+                      atomic_read(&mm->membarrier_state));
+       /*
+        * Issue a memory barrier after setting
+        * MEMBARRIER_STATE_GLOBAL_EXPEDITED in the current runqueue to
+        * guarantee that no memory access following registration is reordered
+        * before registration.
+        */
+       smp_mb();
+}
+
+void membarrier_exec_mmap(struct mm_struct *mm)
+{
+       /*
+        * Issue a memory barrier before clearing membarrier_state to
+        * guarantee that no memory access prior to exec is reordered after
+        * clearing this state.
+        */
+       smp_mb();
+       atomic_set(&mm->membarrier_state, 0);
+       /*
+        * Keep the runqueue membarrier_state in sync with this mm
+        * membarrier_state.
+        */
+       this_cpu_write(runqueues.membarrier_state, 0);
+}
+
 static int membarrier_global_expedited(void)
 {
        int cpu;
-       bool fallback = false;
        cpumask_var_t tmpmask;
 
        if (num_online_cpus() == 1)
@@ -45,17 +77,11 @@ static int membarrier_global_expedited(void)
         */
        smp_mb();       /* system call entry is not a mb. */
 
-       /*
-        * Expedited membarrier commands guarantee that they won't
-        * block, hence the GFP_NOWAIT allocation flag and fallback
-        * implementation.
-        */
-       if (!zalloc_cpumask_var(&tmpmask, GFP_NOWAIT)) {
-               /* Fallback for OOM. */
-               fallback = true;
-       }
+       if (!zalloc_cpumask_var(&tmpmask, GFP_KERNEL))
+               return -ENOMEM;
 
        cpus_read_lock();
+       rcu_read_lock();
        for_each_online_cpu(cpu) {
                struct task_struct *p;
 
@@ -70,23 +96,28 @@ static int membarrier_global_expedited(void)
                if (cpu == raw_smp_processor_id())
                        continue;
 
-               rcu_read_lock();
-               p = task_rcu_dereference(&cpu_rq(cpu)->curr);
-               if (p && p->mm && (atomic_read(&p->mm->membarrier_state) &
-                                  MEMBARRIER_STATE_GLOBAL_EXPEDITED)) {
-                       if (!fallback)
-                               __cpumask_set_cpu(cpu, tmpmask);
-                       else
-                               smp_call_function_single(cpu, ipi_mb, NULL, 1);
-               }
-               rcu_read_unlock();
-       }
-       if (!fallback) {
-               preempt_disable();
-               smp_call_function_many(tmpmask, ipi_mb, NULL, 1);
-               preempt_enable();
-               free_cpumask_var(tmpmask);
+               if (!(READ_ONCE(cpu_rq(cpu)->membarrier_state) &
+                   MEMBARRIER_STATE_GLOBAL_EXPEDITED))
+                       continue;
+
+               /*
+                * Skip the CPU if it runs a kernel thread. The scheduler
+                * leaves the prior task mm in place as an optimization when
+                * scheduling a kthread.
+                */
+               p = rcu_dereference(cpu_rq(cpu)->curr);
+               if (p->flags & PF_KTHREAD)
+                       continue;
+
+               __cpumask_set_cpu(cpu, tmpmask);
        }
+       rcu_read_unlock();
+
+       preempt_disable();
+       smp_call_function_many(tmpmask, ipi_mb, NULL, 1);
+       preempt_enable();
+
+       free_cpumask_var(tmpmask);
        cpus_read_unlock();
 
        /*
@@ -101,22 +132,22 @@ static int membarrier_global_expedited(void)
 static int membarrier_private_expedited(int flags)
 {
        int cpu;
-       bool fallback = false;
        cpumask_var_t tmpmask;
+       struct mm_struct *mm = current->mm;
 
        if (flags & MEMBARRIER_FLAG_SYNC_CORE) {
                if (!IS_ENABLED(CONFIG_ARCH_HAS_MEMBARRIER_SYNC_CORE))
                        return -EINVAL;
-               if (!(atomic_read(&current->mm->membarrier_state) &
+               if (!(atomic_read(&mm->membarrier_state) &
                      MEMBARRIER_STATE_PRIVATE_EXPEDITED_SYNC_CORE_READY))
                        return -EPERM;
        } else {
-               if (!(atomic_read(&current->mm->membarrier_state) &
+               if (!(atomic_read(&mm->membarrier_state) &
                      MEMBARRIER_STATE_PRIVATE_EXPEDITED_READY))
                        return -EPERM;
        }
 
-       if (num_online_cpus() == 1)
+       if (atomic_read(&mm->mm_users) == 1 || num_online_cpus() == 1)
                return 0;
 
        /*
@@ -125,17 +156,11 @@ static int membarrier_private_expedited(int flags)
         */
        smp_mb();       /* system call entry is not a mb. */
 
-       /*
-        * Expedited membarrier commands guarantee that they won't
-        * block, hence the GFP_NOWAIT allocation flag and fallback
-        * implementation.
-        */
-       if (!zalloc_cpumask_var(&tmpmask, GFP_NOWAIT)) {
-               /* Fallback for OOM. */
-               fallback = true;
-       }
+       if (!zalloc_cpumask_var(&tmpmask, GFP_KERNEL))
+               return -ENOMEM;
 
        cpus_read_lock();
+       rcu_read_lock();
        for_each_online_cpu(cpu) {
                struct task_struct *p;
 
@@ -150,21 +175,17 @@ static int membarrier_private_expedited(int flags)
                if (cpu == raw_smp_processor_id())
                        continue;
                rcu_read_lock();
-               p = task_rcu_dereference(&cpu_rq(cpu)->curr);
-               if (p && p->mm == current->mm) {
-                       if (!fallback)
-                               __cpumask_set_cpu(cpu, tmpmask);
-                       else
-                               smp_call_function_single(cpu, ipi_mb, NULL, 1);
-               }
-               rcu_read_unlock();
-       }
-       if (!fallback) {
-               preempt_disable();
-               smp_call_function_many(tmpmask, ipi_mb, NULL, 1);
-               preempt_enable();
-               free_cpumask_var(tmpmask);
+               p = rcu_dereference(cpu_rq(cpu)->curr);
+               if (p && p->mm == mm)
+                       __cpumask_set_cpu(cpu, tmpmask);
        }
+       rcu_read_unlock();
+
+       preempt_disable();
+       smp_call_function_many(tmpmask, ipi_mb, NULL, 1);
+       preempt_enable();
+
+       free_cpumask_var(tmpmask);
        cpus_read_unlock();
 
        /*
@@ -177,32 +198,78 @@ static int membarrier_private_expedited(int flags)
        return 0;
 }
 
+static int sync_runqueues_membarrier_state(struct mm_struct *mm)
+{
+       int membarrier_state = atomic_read(&mm->membarrier_state);
+       cpumask_var_t tmpmask;
+       int cpu;
+
+       if (atomic_read(&mm->mm_users) == 1 || num_online_cpus() == 1) {
+               this_cpu_write(runqueues.membarrier_state, membarrier_state);
+
+               /*
+                * For single mm user, we can simply issue a memory barrier
+                * after setting MEMBARRIER_STATE_GLOBAL_EXPEDITED in the
+                * mm and in the current runqueue to guarantee that no memory
+                * access following registration is reordered before
+                * registration.
+                */
+               smp_mb();
+               return 0;
+       }
+
+       if (!zalloc_cpumask_var(&tmpmask, GFP_KERNEL))
+               return -ENOMEM;
+
+       /*
+        * For mm with multiple users, we need to ensure all future
+        * scheduler executions will observe @mm's new membarrier
+        * state.
+        */
+       synchronize_rcu();
+
+       /*
+        * For each cpu runqueue, if the task's mm match @mm, ensure that all
+        * @mm's membarrier state set bits are also set in in the runqueue's
+        * membarrier state. This ensures that a runqueue scheduling
+        * between threads which are users of @mm has its membarrier state
+        * updated.
+        */
+       cpus_read_lock();
+       rcu_read_lock();
+       for_each_online_cpu(cpu) {
+               struct rq *rq = cpu_rq(cpu);
+               struct task_struct *p;
+
+               p = rcu_dereference(rq->curr);
+               if (p && p->mm == mm)
+                       __cpumask_set_cpu(cpu, tmpmask);
+       }
+       rcu_read_unlock();
+
+       preempt_disable();
+       smp_call_function_many(tmpmask, ipi_sync_rq_state, mm, 1);
+       preempt_enable();
+
+       free_cpumask_var(tmpmask);
+       cpus_read_unlock();
+
+       return 0;
+}
+
 static int membarrier_register_global_expedited(void)
 {
        struct task_struct *p = current;
        struct mm_struct *mm = p->mm;
+       int ret;
 
        if (atomic_read(&mm->membarrier_state) &
            MEMBARRIER_STATE_GLOBAL_EXPEDITED_READY)
                return 0;
        atomic_or(MEMBARRIER_STATE_GLOBAL_EXPEDITED, &mm->membarrier_state);
-       if (atomic_read(&mm->mm_users) == 1 && get_nr_threads(p) == 1) {
-               /*
-                * For single mm user, single threaded process, we can
-                * simply issue a memory barrier after setting
-                * MEMBARRIER_STATE_GLOBAL_EXPEDITED to guarantee that
-                * no memory access following registration is reordered
-                * before registration.
-                */
-               smp_mb();
-       } else {
-               /*
-                * For multi-mm user threads, we need to ensure all
-                * future scheduler executions will observe the new
-                * thread flag state for this mm.
-                */
-               synchronize_rcu();
-       }
+       ret = sync_runqueues_membarrier_state(mm);
+       if (ret)
+               return ret;
        atomic_or(MEMBARRIER_STATE_GLOBAL_EXPEDITED_READY,
                  &mm->membarrier_state);
 
@@ -213,12 +280,15 @@ static int membarrier_register_private_expedited(int flags)
 {
        struct task_struct *p = current;
        struct mm_struct *mm = p->mm;
-       int state = MEMBARRIER_STATE_PRIVATE_EXPEDITED_READY;
+       int ready_state = MEMBARRIER_STATE_PRIVATE_EXPEDITED_READY,
+           set_state = MEMBARRIER_STATE_PRIVATE_EXPEDITED,
+           ret;
 
        if (flags & MEMBARRIER_FLAG_SYNC_CORE) {
                if (!IS_ENABLED(CONFIG_ARCH_HAS_MEMBARRIER_SYNC_CORE))
                        return -EINVAL;
-               state = MEMBARRIER_STATE_PRIVATE_EXPEDITED_SYNC_CORE_READY;
+               ready_state =
+                       MEMBARRIER_STATE_PRIVATE_EXPEDITED_SYNC_CORE_READY;
        }
 
        /*
@@ -226,20 +296,15 @@ static int membarrier_register_private_expedited(int flags)
         * groups, which use the same mm. (CLONE_VM but not
         * CLONE_THREAD).
         */
-       if (atomic_read(&mm->membarrier_state) & state)
+       if ((atomic_read(&mm->membarrier_state) & ready_state) == ready_state)
                return 0;
-       atomic_or(MEMBARRIER_STATE_PRIVATE_EXPEDITED, &mm->membarrier_state);
        if (flags & MEMBARRIER_FLAG_SYNC_CORE)
-               atomic_or(MEMBARRIER_STATE_PRIVATE_EXPEDITED_SYNC_CORE,
-                         &mm->membarrier_state);
-       if (!(atomic_read(&mm->mm_users) == 1 && get_nr_threads(p) == 1)) {
-               /*
-                * Ensure all future scheduler executions will observe the
-                * new thread flag state for this process.
-                */
-               synchronize_rcu();
-       }
-       atomic_or(state, &mm->membarrier_state);
+               set_state |= MEMBARRIER_STATE_PRIVATE_EXPEDITED_SYNC_CORE;
+       atomic_or(set_state, &mm->membarrier_state);
+       ret = sync_runqueues_membarrier_state(mm);
+       if (ret)
+               return ret;
+       atomic_or(ready_state, &mm->membarrier_state);
 
        return 0;
 }
@@ -253,8 +318,10 @@ static int membarrier_register_private_expedited(int flags)
  * command specified does not exist, not available on the running
  * kernel, or if the command argument is invalid, this system call
  * returns -EINVAL. For a given command, with flags argument set to 0,
- * this system call is guaranteed to always return the same value until
- * reboot.
+ * if this system call returns -ENOSYS or -EINVAL, it is guaranteed to
+ * always return the same value until reboot. In addition, it can return
+ * -ENOMEM if there is not enough memory available to perform the system
+ * call.
  *
  * All memory accesses performed in program order from each targeted thread
  * is guaranteed to be ordered with respect to sys_membarrier(). If we use
index b3cb895..0db2c1b 100644 (file)
@@ -911,6 +911,10 @@ struct rq {
 
        atomic_t                nr_iowait;
 
+#ifdef CONFIG_MEMBARRIER
+       int membarrier_state;
+#endif
+
 #ifdef CONFIG_SMP
        struct root_domain              *rd;
        struct sched_domain __rcu       *sd;
@@ -2438,3 +2442,33 @@ static inline bool sched_energy_enabled(void)
 static inline bool sched_energy_enabled(void) { return false; }
 
 #endif /* CONFIG_ENERGY_MODEL && CONFIG_CPU_FREQ_GOV_SCHEDUTIL */
+
+#ifdef CONFIG_MEMBARRIER
+/*
+ * The scheduler provides memory barriers required by membarrier between:
+ * - prior user-space memory accesses and store to rq->membarrier_state,
+ * - store to rq->membarrier_state and following user-space memory accesses.
+ * In the same way it provides those guarantees around store to rq->curr.
+ */
+static inline void membarrier_switch_mm(struct rq *rq,
+                                       struct mm_struct *prev_mm,
+                                       struct mm_struct *next_mm)
+{
+       int membarrier_state;
+
+       if (prev_mm == next_mm)
+               return;
+
+       membarrier_state = atomic_read(&next_mm->membarrier_state);
+       if (READ_ONCE(rq->membarrier_state) == membarrier_state)
+               return;
+
+       WRITE_ONCE(rq->membarrier_state, membarrier_state);
+}
+#else
+static inline void membarrier_switch_mm(struct rq *rq,
+                                       struct mm_struct *prev_mm,
+                                       struct mm_struct *next_mm)
+{
+}
+#endif
index 078950d..00fcea2 100644 (file)
@@ -264,7 +264,8 @@ extern struct ctl_table epoll_table[];
 extern struct ctl_table firmware_config_table[];
 #endif
 
-#ifdef HAVE_ARCH_PICK_MMAP_LAYOUT
+#if defined(HAVE_ARCH_PICK_MMAP_LAYOUT) || \
+    defined(CONFIG_ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT)
 int sysctl_legacy_va_layout;
 #endif
 
@@ -1573,7 +1574,8 @@ static struct ctl_table vm_table[] = {
                .proc_handler   = proc_dointvec,
                .extra1         = SYSCTL_ZERO,
        },
-#ifdef HAVE_ARCH_PICK_MMAP_LAYOUT
+#if defined(HAVE_ARCH_PICK_MMAP_LAYOUT) || \
+    defined(CONFIG_ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT)
        {
                .procname       = "legacy_va_layout",
                .data           = &sysctl_legacy_va_layout,
index 0e315a2..4820823 100644 (file)
@@ -1678,24 +1678,26 @@ void timer_clear_idle(void)
 static int collect_expired_timers(struct timer_base *base,
                                  struct hlist_head *heads)
 {
+       unsigned long now = READ_ONCE(jiffies);
+
        /*
         * NOHZ optimization. After a long idle sleep we need to forward the
         * base to current jiffies. Avoid a loop by searching the bitfield for
         * the next expiring timer.
         */
-       if ((long)(jiffies - base->clk) > 2) {
+       if ((long)(now - base->clk) > 2) {
                unsigned long next = __next_timer_interrupt(base);
 
                /*
                 * If the next timer is ahead of time forward to current
                 * jiffies, otherwise forward to the next expiry time:
                 */
-               if (time_after(next, jiffies)) {
+               if (time_after(next, now)) {
                        /*
                         * The call site will increment base->clk and then
                         * terminate the expiry loop immediately.
                         */
-                       base->clk = jiffies;
+                       base->clk = now;
                        return 0;
                }
                base->clk = next;
index ca1255d..492a8bf 100644 (file)
@@ -142,8 +142,13 @@ BPF_CALL_3(bpf_probe_read, void *, dst, u32, size, const void *, unsafe_ptr)
 {
        int ret;
 
+       ret = security_locked_down(LOCKDOWN_BPF_READ);
+       if (ret < 0)
+               goto out;
+
        ret = probe_kernel_read(dst, unsafe_ptr, size);
        if (unlikely(ret < 0))
+out:
                memset(dst, 0, size);
 
        return ret;
@@ -569,6 +574,10 @@ BPF_CALL_3(bpf_probe_read_str, void *, dst, u32, size,
 {
        int ret;
 
+       ret = security_locked_down(LOCKDOWN_BPF_READ);
+       if (ret < 0)
+               goto out;
+
        /*
         * The strncpy_from_unsafe() call will likely not fill the entire
         * buffer, but that's okay in this circumstance as we're probing
@@ -580,6 +589,7 @@ BPF_CALL_3(bpf_probe_read_str, void *, dst, u32, size,
         */
        ret = strncpy_from_unsafe(dst, unsafe_ptr, size);
        if (unlikely(ret < 0))
+out:
                memset(dst, 0, size);
 
        return ret;
index a6697e2..324ffbe 100644 (file)
@@ -11,6 +11,7 @@
 #include <linux/uaccess.h>
 #include <linux/rculist.h>
 #include <linux/error-injection.h>
+#include <linux/security.h>
 
 #include <asm/setup.h>  /* for COMMAND_LINE_SIZE */
 
@@ -460,6 +461,10 @@ static int __register_trace_kprobe(struct trace_kprobe *tk)
 {
        int i, ret;
 
+       ret = security_locked_down(LOCKDOWN_KPROBES);
+       if (ret)
+               return ret;
+
        if (trace_kprobe_is_registered(tk))
                return -EINVAL;
 
@@ -549,10 +554,11 @@ static bool trace_kprobe_has_same_kprobe(struct trace_kprobe *orig,
                for (i = 0; i < orig->tp.nr_args; i++) {
                        if (strcmp(orig->tp.args[i].comm,
                                   comp->tp.args[i].comm))
-                               continue;
+                               break;
                }
 
-               return true;
+               if (i == orig->tp.nr_args)
+                       return true;
        }
 
        return false;
index 34dd6d0..dd88434 100644 (file)
@@ -431,10 +431,11 @@ static bool trace_uprobe_has_same_uprobe(struct trace_uprobe *orig,
                for (i = 0; i < orig->tp.nr_args; i++) {
                        if (strcmp(orig->tp.args[i].comm,
                                   comp->tp.args[i].comm))
-                               continue;
+                               break;
                }
 
-               return true;
+               if (i == orig->tp.nr_args)
+                       return true;
        }
 
        return false;
index 5960e29..93d97f9 100644 (file)
@@ -277,22 +277,6 @@ config READABLE_ASM
           to keep kernel developers who have to stare a lot at assembler listings
           sane.
 
-config UNUSED_SYMBOLS
-       bool "Enable unused/obsolete exported symbols"
-       default y if X86
-       help
-         Unused but exported symbols make the kernel needlessly bigger.  For
-         that reason most of these unused exports will soon be removed.  This
-         option is provided temporarily to provide a transition period in case
-         some external kernel module needs one of these symbols anyway. If you
-         encounter such a case in your module, consider if you are actually
-         using the right API.  (rationale: since nobody in the kernel is using
-         this in a module, there is a pretty good chance it's actually the
-         wrong interface to use).  If you really need the symbol, please send a
-         mail to the linux kernel mailing list mentioning the symbol and why
-         you really need it, and what the merge plan to the mainline kernel for
-         your module is.
-
 config DEBUG_FS
        bool "Debug Filesystem"
        help
@@ -327,7 +311,7 @@ config HEADERS_CHECK
          relevant for userspace, say 'Y'.
 
 config OPTIMIZE_INLINING
-       bool "Allow compiler to uninline functions marked 'inline'"
+       def_bool y
        help
          This option determines if the kernel forces gcc to inline the functions
          developers have marked 'inline'. Doing so takes away freedom from gcc to
@@ -338,8 +322,6 @@ config OPTIMIZE_INLINING
          decision will become the default in the future. Until then this option
          is there to test gcc for this.
 
-         If unsure, say N.
-
 config DEBUG_SECTION_MISMATCH
        bool "Enable full Section mismatch analysis"
        help
@@ -592,17 +574,18 @@ config DEBUG_KMEMLEAK
          In order to access the kmemleak file, debugfs needs to be
          mounted (usually at /sys/kernel/debug).
 
-config DEBUG_KMEMLEAK_EARLY_LOG_SIZE
-       int "Maximum kmemleak early log entries"
+config DEBUG_KMEMLEAK_MEM_POOL_SIZE
+       int "Kmemleak memory pool size"
        depends on DEBUG_KMEMLEAK
-       range 200 40000
-       default 400
+       range 200 1000000
+       default 16000
        help
          Kmemleak must track all the memory allocations to avoid
          reporting false positives. Since memory may be allocated or
-         freed before kmemleak is initialised, an early log buffer is
-         used to store these actions. If kmemleak reports "early log
-         buffer exceeded", please increase this value.
+         freed before kmemleak is fully initialised, use a static pool
+         of metadata objects to track such callbacks. After kmemleak is
+         fully initialised, this memory pool acts as an emergency one
+         if slab allocations fail.
 
 config DEBUG_KMEMLEAK_TEST
        tristate "Simple test for the kernel memory leak detector"
index 7fa97a8..6c9682c 100644 (file)
@@ -134,6 +134,14 @@ config KASAN_S390_4_LEVEL_PAGING
          to 3TB of RAM with KASan enabled). This options allows to force
          4-level paging instead.
 
+config KASAN_SW_TAGS_IDENTIFY
+       bool "Enable memory corruption identification"
+       depends on KASAN_SW_TAGS
+       help
+         This option enables best-effort identification of bug type
+         (use-after-free or out-of-bounds) at the cost of increased
+         memory consumption.
+
 config TEST_KASAN
        tristate "Module for testing KASAN for bug detection"
        depends on m && KASAN
index 1077366..8c98af0 100644 (file)
--- a/lib/bug.c
+++ b/lib/bug.c
@@ -181,6 +181,15 @@ enum bug_trap_type report_bug(unsigned long bugaddr, struct pt_regs *regs)
                }
        }
 
+       /*
+        * BUG() and WARN_ON() families don't print a custom debug message
+        * before triggering the exception handler, so we must add the
+        * "cut here" line now. WARN() issues its own "cut here" before the
+        * extra debugging message it writes before triggering the handler.
+        */
+       if ((bug->flags & BUGFLAG_NO_CUT_HERE) == 0)
+               printk(KERN_DEFAULT CUT_HERE);
+
        if (warning) {
                /* this is a WARN_ON rather than BUG/BUG_ON */
                __warn(file, line, (void *)bugaddr, BUG_GET_TAINT(bug), regs,
@@ -188,8 +197,6 @@ enum bug_trap_type report_bug(unsigned long bugaddr, struct pt_regs *regs)
                return BUG_TRAP_TYPE_WARN;
        }
 
-       printk(KERN_DEFAULT CUT_HERE);
-
        if (file)
                pr_crit("kernel BUG at %s:%u!\n", file, line);
        else
index 25da407..c3e59ca 100644 (file)
@@ -10,6 +10,7 @@
 #include <linux/init.h>
 #include <linux/sort.h>
 #include <linux/uaccess.h>
+#include <linux/extable.h>
 
 #ifndef ARCH_HAS_RELATIVE_EXTABLE
 #define ex_to_insn(x)  ((x)->insn)
index a7bafc4..ae25e2f 100644 (file)
@@ -36,12 +36,12 @@ static inline size_t genradix_depth_size(unsigned depth)
 #define GENRADIX_DEPTH_MASK                            \
        ((unsigned long) (roundup_pow_of_two(GENRADIX_MAX_DEPTH + 1) - 1))
 
-unsigned genradix_root_to_depth(struct genradix_root *r)
+static inline unsigned genradix_root_to_depth(struct genradix_root *r)
 {
        return (unsigned long) r & GENRADIX_DEPTH_MASK;
 }
 
-struct genradix_node *genradix_root_to_node(struct genradix_root *r)
+static inline struct genradix_node *genradix_root_to_node(struct genradix_root *r)
 {
        return (void *) ((unsigned long) r & ~GENRADIX_DEPTH_MASK);
 }
index b1d55b6..147133f 100644 (file)
@@ -270,25 +270,4 @@ void print_hex_dump(const char *level, const char *prefix_str, int prefix_type,
 }
 EXPORT_SYMBOL(print_hex_dump);
 
-#if !defined(CONFIG_DYNAMIC_DEBUG)
-/**
- * print_hex_dump_bytes - shorthand form of print_hex_dump() with default params
- * @prefix_str: string to prefix each line with;
- *  caller supplies trailing spaces for alignment if desired
- * @prefix_type: controls whether prefix of an offset, address, or none
- *  is printed (%DUMP_PREFIX_OFFSET, %DUMP_PREFIX_ADDRESS, %DUMP_PREFIX_NONE)
- * @buf: data blob to dump
- * @len: number of bytes in the @buf
- *
- * Calls print_hex_dump(), with log level of KERN_DEBUG,
- * rowsize of 16, groupsize of 1, and ASCII output included.
- */
-void print_hex_dump_bytes(const char *prefix_str, int prefix_type,
-                         const void *buf, size_t len)
-{
-       print_hex_dump(KERN_DEBUG, prefix_str, prefix_type, 16, 1,
-                      buf, len, true);
-}
-EXPORT_SYMBOL(print_hex_dump_bytes);
-#endif /* !defined(CONFIG_DYNAMIC_DEBUG) */
 #endif /* defined(CONFIG_PRINTK) */
index f1e0569..639d5e7 100644 (file)
@@ -878,7 +878,7 @@ static inline bool page_copy_sane(struct page *page, size_t offset, size_t n)
        head = compound_head(page);
        v += (page - head) << PAGE_SHIFT;
 
-       if (likely(n <= v && v <= (PAGE_SIZE << compound_order(head))))
+       if (likely(n <= v && v <= (page_size(head))))
                return true;
        WARN_ON(1);
        return false;
index ba16c08..717c940 100644 (file)
@@ -83,17 +83,19 @@ next:
                                        ALIGN((uintptr_t)ir, 4)) &&
                                        (ir < limit) && (*ir == 0))
                                ir++;
-                       for (; (ir + 4) <= limit; ir += 4) {
-                               dv = *((u32 *)ir);
-                               if (dv) {
+                       if (IS_ALIGNED((uintptr_t)ir, 4)) {
+                               for (; (ir + 4) <= limit; ir += 4) {
+                                       dv = *((u32 *)ir);
+                                       if (dv) {
 #  if defined(__LITTLE_ENDIAN)
-                                       ir += __builtin_ctz(dv) >> 3;
+                                               ir += __builtin_ctz(dv) >> 3;
 #  elif defined(__BIG_ENDIAN)
-                                       ir += __builtin_clz(dv) >> 3;
+                                               ir += __builtin_clz(dv) >> 3;
 #  else
 #    error "missing endian definition"
 #  endif
-                                       break;
+                                               break;
+                                       }
                                }
                        }
 #endif
index 62b8ee9..41ae3c7 100644 (file)
@@ -77,26 +77,10 @@ static inline void erase_cached(struct test_node *node, struct rb_root_cached *r
 }
 
 
-static inline u32 augment_recompute(struct test_node *node)
-{
-       u32 max = node->val, child_augmented;
-       if (node->rb.rb_left) {
-               child_augmented = rb_entry(node->rb.rb_left, struct test_node,
-                                          rb)->augmented;
-               if (max < child_augmented)
-                       max = child_augmented;
-       }
-       if (node->rb.rb_right) {
-               child_augmented = rb_entry(node->rb.rb_right, struct test_node,
-                                          rb)->augmented;
-               if (max < child_augmented)
-                       max = child_augmented;
-       }
-       return max;
-}
+#define NODE_VAL(node) ((node)->val)
 
-RB_DECLARE_CALLBACKS(static, augment_callbacks, struct test_node, rb,
-                    u32, augmented, augment_recompute)
+RB_DECLARE_CALLBACKS_MAX(static, augment_callbacks,
+                        struct test_node, rb, u32, augmented, NODE_VAL)
 
 static void insert_augmented(struct test_node *node,
                             struct rb_root_cached *root)
@@ -238,7 +222,20 @@ static void check_augmented(int nr_nodes)
        check(nr_nodes);
        for (rb = rb_first(&root.rb_root); rb; rb = rb_next(rb)) {
                struct test_node *node = rb_entry(rb, struct test_node, rb);
-               WARN_ON_ONCE(node->augmented != augment_recompute(node));
+               u32 subtree, max = node->val;
+               if (node->rb.rb_left) {
+                       subtree = rb_entry(node->rb.rb_left, struct test_node,
+                                          rb)->augmented;
+                       if (max < subtree)
+                               max = subtree;
+               }
+               if (node->rb.rb_right) {
+                       subtree = rb_entry(node->rb.rb_right, struct test_node,
+                                          rb)->augmented;
+                       if (max < subtree)
+                               max = subtree;
+               }
+               WARN_ON_ONCE(node->augmented != max);
        }
 }
 
index 5c86ef4..1c26c14 100644 (file)
@@ -6,7 +6,6 @@
  */
 
 #include <linux/mm.h>
-#include <linux/quicklist.h>
 #include <linux/cma.h>
 
 void show_mem(unsigned int filter, nodemask_t *nodemask)
@@ -39,10 +38,6 @@ void show_mem(unsigned int filter, nodemask_t *nodemask)
 #ifdef CONFIG_CMA
        printk("%lu pages cma reserved\n", totalcma_pages);
 #endif
-#ifdef CONFIG_QUICKLIST
-       printk("%lu pages in pagetable cache\n",
-               quicklist_total_size());
-#endif
 #ifdef CONFIG_MEMORY_FAILURE
        printk("%lu pages hwpoisoned\n", atomic_long_read(&num_poisoned_pages));
 #endif
index 461fb62..cd7a10c 100644 (file)
@@ -173,8 +173,9 @@ EXPORT_SYMBOL(strlcpy);
  * doesn't unnecessarily force the tail of the destination buffer to be
  * zeroed.  If zeroing is desired please use strscpy_pad().
  *
- * Return: The number of characters copied (not including the trailing
- *         %NUL) or -E2BIG if the destination buffer wasn't big enough.
+ * Returns:
+ * * The number of characters copied (not including the trailing %NUL)
+ * * -E2BIG if count is 0 or @src was truncated.
  */
 ssize_t strscpy(char *dest, const char *src, size_t count)
 {
@@ -182,7 +183,7 @@ ssize_t strscpy(char *dest, const char *src, size_t count)
        size_t max = count;
        long res = 0;
 
-       if (count == 0)
+       if (count == 0 || WARN_ON_ONCE(count > INT_MAX))
                return -E2BIG;
 
 #ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
@@ -253,8 +254,9 @@ EXPORT_SYMBOL(strscpy);
  * For full explanation of why you may want to consider using the
  * 'strscpy' functions please see the function docstring for strscpy().
  *
- * Return: The number of characters copied (not including the trailing
- *         %NUL) or -E2BIG if the destination buffer wasn't big enough.
+ * Returns:
+ * * The number of characters copied (not including the trailing %NUL)
+ * * -E2BIG if count is 0 or @src was truncated.
  */
 ssize_t strscpy_pad(char *dest, const char *src, size_t count)
 {
index 023ba9f..dccb95a 100644 (file)
@@ -6,6 +6,7 @@
 #include <linux/uaccess.h>
 #include <linux/kernel.h>
 #include <linux/errno.h>
+#include <linux/mm.h>
 
 #include <asm/byteorder.h>
 #include <asm/word-at-a-time.h>
@@ -108,7 +109,7 @@ long strncpy_from_user(char *dst, const char __user *src, long count)
                return 0;
 
        max_addr = user_addr_max();
-       src_addr = (unsigned long)src;
+       src_addr = (unsigned long)untagged_addr(src);
        if (likely(src_addr < max_addr)) {
                unsigned long max = max_addr - src_addr;
                long retval;
index 7f2db3f..28ff554 100644 (file)
@@ -2,6 +2,7 @@
 #include <linux/kernel.h>
 #include <linux/export.h>
 #include <linux/uaccess.h>
+#include <linux/mm.h>
 
 #include <asm/word-at-a-time.h>
 
@@ -109,7 +110,7 @@ long strnlen_user(const char __user *str, long count)
                return 0;
 
        max_addr = user_addr_max();
-       src_addr = (unsigned long)str;
+       src_addr = (unsigned long)untagged_addr(str);
        if (likely(src_addr < max_addr)) {
                unsigned long max = max_addr - src_addr;
                long retval;
index b63b367..49cc4d5 100644 (file)
@@ -18,6 +18,9 @@
 #include <linux/slab.h>
 #include <linux/string.h>
 #include <linux/uaccess.h>
+#include <linux/io.h>
+
+#include <asm/page.h>
 
 /*
  * Note: test functions are marked noinline so that their names appear in
@@ -337,6 +340,42 @@ static noinline void __init kmalloc_uaf2(void)
        kfree(ptr2);
 }
 
+static noinline void __init kfree_via_page(void)
+{
+       char *ptr;
+       size_t size = 8;
+       struct page *page;
+       unsigned long offset;
+
+       pr_info("invalid-free false positive (via page)\n");
+       ptr = kmalloc(size, GFP_KERNEL);
+       if (!ptr) {
+               pr_err("Allocation failed\n");
+               return;
+       }
+
+       page = virt_to_page(ptr);
+       offset = offset_in_page(ptr);
+       kfree(page_address(page) + offset);
+}
+
+static noinline void __init kfree_via_phys(void)
+{
+       char *ptr;
+       size_t size = 8;
+       phys_addr_t phys;
+
+       pr_info("invalid-free false positive (via phys)\n");
+       ptr = kmalloc(size, GFP_KERNEL);
+       if (!ptr) {
+               pr_err("Allocation failed\n");
+               return;
+       }
+
+       phys = virt_to_phys(ptr);
+       kfree(phys_to_virt(phys));
+}
+
 static noinline void __init kmem_cache_oob(void)
 {
        char *p;
@@ -737,6 +776,8 @@ static int __init kmalloc_tests_init(void)
        kmalloc_uaf();
        kmalloc_uaf_memset();
        kmalloc_uaf2();
+       kfree_via_page();
+       kfree_via_phys();
        kmem_cache_oob();
        memcg_accounted_kmem_cache();
        kasan_stack_oob();
index 944eb50..5d94cbf 100644 (file)
@@ -455,6 +455,11 @@ dentry(void)
        test("foo", "%pd", &test_dentry[0]);
        test("foo", "%pd2", &test_dentry[0]);
 
+       test("(null)", "%pd", NULL);
+       test("(efault)", "%pd", PTR_INVALID);
+       test("(null)", "%pD", NULL);
+       test("(efault)", "%pD", PTR_INVALID);
+
        test("romeo", "%pd", &test_dentry[3]);
        test("alfa/romeo", "%pd2", &test_dentry[3]);
        test("bravo/alfa/romeo", "%pd3", &test_dentry[3]);
index b0967cf..e78017a 100644 (file)
@@ -869,6 +869,15 @@ char *dentry_name(char *buf, char *end, const struct dentry *d, struct printf_sp
        return widen_string(buf, n, end, spec);
 }
 
+static noinline_for_stack
+char *file_dentry_name(char *buf, char *end, const struct file *f,
+                       struct printf_spec spec, const char *fmt)
+{
+       if (check_pointer(&buf, end, f, spec))
+               return buf;
+
+       return dentry_name(buf, end, f->f_path.dentry, spec, fmt);
+}
 #ifdef CONFIG_BLOCK
 static noinline_for_stack
 char *bdev_name(char *buf, char *end, struct block_device *bdev,
@@ -2166,9 +2175,7 @@ char *pointer(const char *fmt, char *buf, char *end, void *ptr,
        case 'C':
                return clock(buf, end, ptr, spec, fmt);
        case 'D':
-               return dentry_name(buf, end,
-                                  ((const struct file *)ptr)->f_path.dentry,
-                                  spec, fmt);
+               return file_dentry_name(buf, end, ptr, spec, fmt);
 #ifdef CONFIG_BLOCK
        case 'g':
                return bdev_name(buf, end, ptr, spec, fmt);
index 3a0f34c..93d7a2c 100644 (file)
@@ -27,7 +27,7 @@
 /*-****************************************
 *  Compiler specifics
 ******************************************/
-#define ZSTD_STATIC static __inline __attribute__((unused))
+#define ZSTD_STATIC static inline
 
 /*-**************************************************************
 *  Basic Types
index 56cec63..a5dae9a 100644 (file)
@@ -273,11 +273,6 @@ config BOUNCE
          by default when ZONE_DMA or HIGHMEM is selected, but you
          may say n to override this.
 
-config NR_QUICK
-       int
-       depends on QUICKLIST
-       default "1"
-
 config VIRT_TO_BUS
        bool
        help
@@ -669,23 +664,17 @@ config ZONE_DEVICE
 
          If FS_DAX is enabled, then say Y.
 
-config MIGRATE_VMA_HELPER
-       bool
-
 config DEV_PAGEMAP_OPS
        bool
 
+#
+# Helpers to mirror range of the CPU page tables of a process into device page
+# tables.
+#
 config HMM_MIRROR
-       bool "HMM mirror CPU page table into a device page table"
-       depends on (X86_64 || PPC64)
-       depends on MMU && 64BIT
-       select MMU_NOTIFIER
-       help
-         Select HMM_MIRROR if you want to mirror range of the CPU page table of a
-         process into a device page table. Here, mirror means "keep synchronized".
-         Prerequisites: the device must provide the ability to write-protect its
-         page tables (at PAGE_SIZE granularity), and must be able to recover from
-         the resulting potential page faults.
+       bool
+       depends on MMU
+       depends on MMU_NOTIFIER
 
 config DEVICE_PRIVATE
        bool "Unaddressable device memory (GPU memory, ...)"
@@ -723,6 +712,17 @@ config GUP_BENCHMARK
 config GUP_GET_PTE_LOW_HIGH
        bool
 
+config READ_ONLY_THP_FOR_FS
+       bool "Read-only THP for filesystems (EXPERIMENTAL)"
+       depends on TRANSPARENT_HUGE_PAGECACHE && SHMEM
+
+       help
+         Allow khugepaged to put read-only file-backed pages in THP.
+
+         This is marked experimental because it is a new feature. Write
+         support of file THPs will be developed in the next few release
+         cycles.
+
 config ARCH_HAS_PTE_SPECIAL
        bool
 
index 82b6a20..327b3eb 100644 (file)
@@ -21,7 +21,9 @@ config DEBUG_PAGEALLOC
          Also, the state of page tracking structures is checked more often as
          pages are being allocated and freed, as unexpected state changes
          often happen for same reasons as memory corruption (e.g. double free,
-         use-after-free).
+         use-after-free). The error reports for these checks can be augmented
+         with stack traces of last allocation and freeing of the page, when
+         PAGE_OWNER is also selected and enabled on boot.
 
          For architectures which don't enable ARCH_SUPPORTS_DEBUG_PAGEALLOC,
          fill the pages with poison patterns after free_pages() and verify
index d0b295c..d996846 100644 (file)
@@ -21,6 +21,9 @@ KCOV_INSTRUMENT_memcontrol.o := n
 KCOV_INSTRUMENT_mmzone.o := n
 KCOV_INSTRUMENT_vmstat.o := n
 
+CFLAGS_init-mm.o += $(call cc-disable-warning, override-init)
+CFLAGS_init-mm.o += $(call cc-disable-warning, initializer-overrides)
+
 mmu-y                  := nommu.o
 mmu-$(CONFIG_MMU)      := highmem.o memory.o mincore.o \
                           mlock.o mmap.o mmu_gather.o mprotect.o mremap.o \
@@ -72,7 +75,6 @@ obj-$(CONFIG_FAILSLAB) += failslab.o
 obj-$(CONFIG_MEMORY_HOTPLUG) += memory_hotplug.o
 obj-$(CONFIG_MEMTEST)          += memtest.o
 obj-$(CONFIG_MIGRATION) += migrate.o
-obj-$(CONFIG_QUICKLIST) += quicklist.o
 obj-$(CONFIG_TRANSPARENT_HUGEPAGE) += huge_memory.o khugepaged.o
 obj-$(CONFIG_PAGE_COUNTER) += page_counter.o
 obj-$(CONFIG_MEMCG) += memcontrol.o vmpressure.o
index 952dc2f..ce08b39 100644 (file)
@@ -969,7 +969,7 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn,
                         * is safe to read and it's 0 for tail pages.
                         */
                        if (unlikely(PageCompound(page))) {
-                               low_pfn += (1UL << compound_order(page)) - 1;
+                               low_pfn += compound_nr(page) - 1;
                                goto isolate_fail;
                        }
                }
@@ -1737,8 +1737,7 @@ static unsigned long fast_find_migrateblock(struct compact_control *cc)
  * starting at the block pointed to by the migrate scanner pfn within
  * compact_control.
  */
-static isolate_migrate_t isolate_migratepages(struct zone *zone,
-                                       struct compact_control *cc)
+static isolate_migrate_t isolate_migratepages(struct compact_control *cc)
 {
        unsigned long block_start_pfn;
        unsigned long block_end_pfn;
@@ -1756,8 +1755,8 @@ static isolate_migrate_t isolate_migratepages(struct zone *zone,
         */
        low_pfn = fast_find_migrateblock(cc);
        block_start_pfn = pageblock_start_pfn(low_pfn);
-       if (block_start_pfn < zone->zone_start_pfn)
-               block_start_pfn = zone->zone_start_pfn;
+       if (block_start_pfn < cc->zone->zone_start_pfn)
+               block_start_pfn = cc->zone->zone_start_pfn;
 
        /*
         * fast_find_migrateblock marks a pageblock skipped so to avoid
@@ -1787,8 +1786,8 @@ static isolate_migrate_t isolate_migratepages(struct zone *zone,
                if (!(low_pfn % (SWAP_CLUSTER_MAX * pageblock_nr_pages)))
                        cond_resched();
 
-               page = pageblock_pfn_to_page(block_start_pfn, block_end_pfn,
-                                                                       zone);
+               page = pageblock_pfn_to_page(block_start_pfn,
+                                               block_end_pfn, cc->zone);
                if (!page)
                        continue;
 
@@ -2078,6 +2077,17 @@ compact_zone(struct compact_control *cc, struct capture_control *capc)
        const bool sync = cc->mode != MIGRATE_ASYNC;
        bool update_cached;
 
+       /*
+        * These counters track activities during zone compaction.  Initialize
+        * them before compacting a new zone.
+        */
+       cc->total_migrate_scanned = 0;
+       cc->total_free_scanned = 0;
+       cc->nr_migratepages = 0;
+       cc->nr_freepages = 0;
+       INIT_LIST_HEAD(&cc->freepages);
+       INIT_LIST_HEAD(&cc->migratepages);
+
        cc->migratetype = gfpflags_to_migratetype(cc->gfp_mask);
        ret = compaction_suitable(cc->zone, cc->order, cc->alloc_flags,
                                                        cc->classzone_idx);
@@ -2158,7 +2168,7 @@ compact_zone(struct compact_control *cc, struct capture_control *capc)
                        cc->rescan = true;
                }
 
-               switch (isolate_migratepages(cc->zone, cc)) {
+               switch (isolate_migratepages(cc)) {
                case ISOLATE_ABORT:
                        ret = COMPACT_CONTENDED;
                        putback_movable_pages(&cc->migratepages);
@@ -2281,10 +2291,6 @@ static enum compact_result compact_zone_order(struct zone *zone, int order,
 {
        enum compact_result ret;
        struct compact_control cc = {
-               .nr_freepages = 0,
-               .nr_migratepages = 0,
-               .total_migrate_scanned = 0,
-               .total_free_scanned = 0,
                .order = order,
                .search_order = order,
                .gfp_mask = gfp_mask,
@@ -2305,8 +2311,6 @@ static enum compact_result compact_zone_order(struct zone *zone, int order,
 
        if (capture)
                current->capture_control = &capc;
-       INIT_LIST_HEAD(&cc.freepages);
-       INIT_LIST_HEAD(&cc.migratepages);
 
        ret = compact_zone(&cc, &capc);
 
@@ -2408,8 +2412,6 @@ static void compact_node(int nid)
        struct zone *zone;
        struct compact_control cc = {
                .order = -1,
-               .total_migrate_scanned = 0,
-               .total_free_scanned = 0,
                .mode = MIGRATE_SYNC,
                .ignore_skip_hint = true,
                .whole_zone = true,
@@ -2423,11 +2425,7 @@ static void compact_node(int nid)
                if (!populated_zone(zone))
                        continue;
 
-               cc.nr_freepages = 0;
-               cc.nr_migratepages = 0;
                cc.zone = zone;
-               INIT_LIST_HEAD(&cc.freepages);
-               INIT_LIST_HEAD(&cc.migratepages);
 
                compact_zone(&cc, NULL);
 
@@ -2529,8 +2527,6 @@ static void kcompactd_do_work(pg_data_t *pgdat)
        struct compact_control cc = {
                .order = pgdat->kcompactd_max_order,
                .search_order = pgdat->kcompactd_max_order,
-               .total_migrate_scanned = 0,
-               .total_free_scanned = 0,
                .classzone_idx = pgdat->kcompactd_classzone_idx,
                .mode = MIGRATE_SYNC_LIGHT,
                .ignore_skip_hint = false,
@@ -2554,16 +2550,10 @@ static void kcompactd_do_work(pg_data_t *pgdat)
                                                        COMPACT_CONTINUE)
                        continue;
 
-               cc.nr_freepages = 0;
-               cc.nr_migratepages = 0;
-               cc.total_migrate_scanned = 0;
-               cc.total_free_scanned = 0;
-               cc.zone = zone;
-               INIT_LIST_HEAD(&cc.freepages);
-               INIT_LIST_HEAD(&cc.migratepages);
-
                if (kthread_should_stop())
                        return;
+
+               cc.zone = zone;
                status = compact_zone(&cc, NULL);
 
                if (status == COMPACT_SUCCESS) {
index 40667c2..1146fcf 100644 (file)
@@ -126,7 +126,7 @@ static void page_cache_delete(struct address_space *mapping,
        /* hugetlb pages are represented by a single entry in the xarray */
        if (!PageHuge(page)) {
                xas_set_order(&xas, page->index, compound_order(page));
-               nr = 1U << compound_order(page);
+               nr = compound_nr(page);
        }
 
        VM_BUG_ON_PAGE(!PageLocked(page), page);
@@ -203,8 +203,9 @@ static void unaccount_page_cache_page(struct address_space *mapping,
                __mod_node_page_state(page_pgdat(page), NR_SHMEM, -nr);
                if (PageTransHuge(page))
                        __dec_node_page_state(page, NR_SHMEM_THPS);
-       } else {
-               VM_BUG_ON_PAGE(PageTransHuge(page), page);
+       } else if (PageTransHuge(page)) {
+               __dec_node_page_state(page, NR_FILE_THPS);
+               filemap_nr_thps_dec(mapping);
        }
 
        /*
@@ -281,11 +282,11 @@ EXPORT_SYMBOL(delete_from_page_cache);
  * @pvec: pagevec with pages to delete
  *
  * The function walks over mapping->i_pages and removes pages passed in @pvec
- * from the mapping. The function expects @pvec to be sorted by page index.
+ * from the mapping. The function expects @pvec to be sorted by page index
+ * and is optimised for it to be dense.
  * It tolerates holes in @pvec (mapping entries at those indices are not
  * modified). The function expects only THP head pages to be present in the
- * @pvec and takes care to delete all corresponding tail pages from the
- * mapping as well.
+ * @pvec.
  *
  * The function expects the i_pages lock to be held.
  */
@@ -294,40 +295,43 @@ static void page_cache_delete_batch(struct address_space *mapping,
 {
        XA_STATE(xas, &mapping->i_pages, pvec->pages[0]->index);
        int total_pages = 0;
-       int i = 0, tail_pages = 0;
+       int i = 0;
        struct page *page;
 
        mapping_set_update(&xas, mapping);
        xas_for_each(&xas, page, ULONG_MAX) {
-               if (i >= pagevec_count(pvec) && !tail_pages)
+               if (i >= pagevec_count(pvec))
                        break;
+
+               /* A swap/dax/shadow entry got inserted? Skip it. */
                if (xa_is_value(page))
                        continue;
-               if (!tail_pages) {
-                       /*
-                        * Some page got inserted in our range? Skip it. We
-                        * have our pages locked so they are protected from
-                        * being removed.
-                        */
-                       if (page != pvec->pages[i]) {
-                               VM_BUG_ON_PAGE(page->index >
-                                               pvec->pages[i]->index, page);
-                               continue;
-                       }
-                       WARN_ON_ONCE(!PageLocked(page));
-                       if (PageTransHuge(page) && !PageHuge(page))
-                               tail_pages = HPAGE_PMD_NR - 1;
+               /*
+                * A page got inserted in our range? Skip it. We have our
+                * pages locked so they are protected from being removed.
+                * If we see a page whose index is higher than ours, it
+                * means our page has been removed, which shouldn't be
+                * possible because we're holding the PageLock.
+                */
+               if (page != pvec->pages[i]) {
+                       VM_BUG_ON_PAGE(page->index > pvec->pages[i]->index,
+                                       page);
+                       continue;
+               }
+
+               WARN_ON_ONCE(!PageLocked(page));
+
+               if (page->index == xas.xa_index)
                        page->mapping = NULL;
-                       /*
-                        * Leave page->index set: truncation lookup relies
-                        * upon it
-                        */
+               /* Leave page->index set: truncation lookup relies on it */
+
+               /*
+                * Move to the next page in the vector if this is a regular
+                * page or the index is of the last sub-page of this compound
+                * page.
+                */
+               if (page->index + compound_nr(page) - 1 == xas.xa_index)
                        i++;
-               } else {
-                       VM_BUG_ON_PAGE(page->index + HPAGE_PMD_NR - tail_pages
-                                       != pvec->pages[i]->index, page);
-                       tail_pages--;
-               }
                xas_store(&xas, NULL);
                total_pages++;
        }
@@ -408,7 +412,8 @@ int __filemap_fdatawrite_range(struct address_space *mapping, loff_t start,
                .range_end = end,
        };
 
-       if (!mapping_cap_writeback_dirty(mapping))
+       if (!mapping_cap_writeback_dirty(mapping) ||
+           !mapping_tagged(mapping, PAGECACHE_TAG_DIRTY))
                return 0;
 
        wbc_attach_fdatawrite_inode(&wbc, mapping->host);
@@ -617,10 +622,13 @@ int filemap_fdatawait_keep_errors(struct address_space *mapping)
 }
 EXPORT_SYMBOL(filemap_fdatawait_keep_errors);
 
+/* Returns true if writeback might be needed or already in progress. */
 static bool mapping_needs_writeback(struct address_space *mapping)
 {
-       return (!dax_mapping(mapping) && mapping->nrpages) ||
-           (dax_mapping(mapping) && mapping->nrexceptional);
+       if (dax_mapping(mapping))
+               return mapping->nrexceptional;
+
+       return mapping->nrpages;
 }
 
 int filemap_write_and_wait(struct address_space *mapping)
@@ -1516,7 +1524,7 @@ EXPORT_SYMBOL(page_cache_prev_miss);
 struct page *find_get_entry(struct address_space *mapping, pgoff_t offset)
 {
        XA_STATE(xas, &mapping->i_pages, offset);
-       struct page *head, *page;
+       struct page *page;
 
        rcu_read_lock();
 repeat:
@@ -1531,25 +1539,19 @@ repeat:
        if (!page || xa_is_value(page))
                goto out;
 
-       head = compound_head(page);
-       if (!page_cache_get_speculative(head))
+       if (!page_cache_get_speculative(page))
                goto repeat;
 
-       /* The page was split under us? */
-       if (compound_head(page) != head) {
-               put_page(head);
-               goto repeat;
-       }
-
        /*
-        * Has the page moved?
+        * Has the page moved or been split?
         * This is part of the lockless pagecache protocol. See
         * include/linux/pagemap.h for details.
         */
        if (unlikely(page != xas_reload(&xas))) {
-               put_page(head);
+               put_page(page);
                goto repeat;
        }
+       page = find_subpage(page, offset);
 out:
        rcu_read_unlock();
 
@@ -1646,7 +1648,7 @@ repeat:
                }
 
                /* Has the page been truncated? */
-               if (unlikely(page->mapping != mapping)) {
+               if (unlikely(compound_head(page)->mapping != mapping)) {
                        unlock_page(page);
                        put_page(page);
                        goto repeat;
@@ -1731,7 +1733,6 @@ unsigned find_get_entries(struct address_space *mapping,
 
        rcu_read_lock();
        xas_for_each(&xas, page, ULONG_MAX) {
-               struct page *head;
                if (xas_retry(&xas, page))
                        continue;
                /*
@@ -1742,17 +1743,13 @@ unsigned find_get_entries(struct address_space *mapping,
                if (xa_is_value(page))
                        goto export;
 
-               head = compound_head(page);
-               if (!page_cache_get_speculative(head))
+               if (!page_cache_get_speculative(page))
                        goto retry;
 
-               /* The page was split under us? */
-               if (compound_head(page) != head)
-                       goto put_page;
-
-               /* Has the page moved? */
+               /* Has the page moved or been split? */
                if (unlikely(page != xas_reload(&xas)))
                        goto put_page;
+               page = find_subpage(page, xas.xa_index);
 
 export:
                indices[ret] = xas.xa_index;
@@ -1761,7 +1758,7 @@ export:
                        break;
                continue;
 put_page:
-               put_page(head);
+               put_page(page);
 retry:
                xas_reset(&xas);
        }
@@ -1803,33 +1800,27 @@ unsigned find_get_pages_range(struct address_space *mapping, pgoff_t *start,
 
        rcu_read_lock();
        xas_for_each(&xas, page, end) {
-               struct page *head;
                if (xas_retry(&xas, page))
                        continue;
                /* Skip over shadow, swap and DAX entries */
                if (xa_is_value(page))
                        continue;
 
-               head = compound_head(page);
-               if (!page_cache_get_speculative(head))
+               if (!page_cache_get_speculative(page))
                        goto retry;
 
-               /* The page was split under us? */
-               if (compound_head(page) != head)
-                       goto put_page;
-
-               /* Has the page moved? */
+               /* Has the page moved or been split? */
                if (unlikely(page != xas_reload(&xas)))
                        goto put_page;
 
-               pages[ret] = page;
+               pages[ret] = find_subpage(page, xas.xa_index);
                if (++ret == nr_pages) {
                        *start = xas.xa_index + 1;
                        goto out;
                }
                continue;
 put_page:
-               put_page(head);
+               put_page(page);
 retry:
                xas_reset(&xas);
        }
@@ -1874,7 +1865,6 @@ unsigned find_get_pages_contig(struct address_space *mapping, pgoff_t index,
 
        rcu_read_lock();
        for (page = xas_load(&xas); page; page = xas_next(&xas)) {
-               struct page *head;
                if (xas_retry(&xas, page))
                        continue;
                /*
@@ -1884,24 +1874,19 @@ unsigned find_get_pages_contig(struct address_space *mapping, pgoff_t index,
                if (xa_is_value(page))
                        break;
 
-               head = compound_head(page);
-               if (!page_cache_get_speculative(head))
+               if (!page_cache_get_speculative(page))
                        goto retry;
 
-               /* The page was split under us? */
-               if (compound_head(page) != head)
-                       goto put_page;
-
-               /* Has the page moved? */
+               /* Has the page moved or been split? */
                if (unlikely(page != xas_reload(&xas)))
                        goto put_page;
 
-               pages[ret] = page;
+               pages[ret] = find_subpage(page, xas.xa_index);
                if (++ret == nr_pages)
                        break;
                continue;
 put_page:
-               put_page(head);
+               put_page(page);
 retry:
                xas_reset(&xas);
        }
@@ -1937,7 +1922,6 @@ unsigned find_get_pages_range_tag(struct address_space *mapping, pgoff_t *index,
 
        rcu_read_lock();
        xas_for_each_marked(&xas, page, end, tag) {
-               struct page *head;
                if (xas_retry(&xas, page))
                        continue;
                /*
@@ -1948,26 +1932,21 @@ unsigned find_get_pages_range_tag(struct address_space *mapping, pgoff_t *index,
                if (xa_is_value(page))
                        continue;
 
-               head = compound_head(page);
-               if (!page_cache_get_speculative(head))
+               if (!page_cache_get_speculative(page))
                        goto retry;
 
-               /* The page was split under us? */
-               if (compound_head(page) != head)
-                       goto put_page;
-
-               /* Has the page moved? */
+               /* Has the page moved or been split? */
                if (unlikely(page != xas_reload(&xas)))
                        goto put_page;
 
-               pages[ret] = page;
+               pages[ret] = find_subpage(page, xas.xa_index);
                if (++ret == nr_pages) {
                        *index = xas.xa_index + 1;
                        goto out;
                }
                continue;
 put_page:
-               put_page(head);
+               put_page(page);
 retry:
                xas_reset(&xas);
        }
@@ -2562,12 +2541,12 @@ retry_find:
                goto out_retry;
 
        /* Did it get truncated? */
-       if (unlikely(page->mapping != mapping)) {
+       if (unlikely(compound_head(page)->mapping != mapping)) {
                unlock_page(page);
                put_page(page);
                goto retry_find;
        }
-       VM_BUG_ON_PAGE(page->index != offset, page);
+       VM_BUG_ON_PAGE(page_to_pgoff(page) != offset, page);
 
        /*
         * We have a locked page in the page cache, now we need to check
@@ -2648,7 +2627,7 @@ void filemap_map_pages(struct vm_fault *vmf,
        pgoff_t last_pgoff = start_pgoff;
        unsigned long max_idx;
        XA_STATE(xas, &mapping->i_pages, start_pgoff);
-       struct page *head, *page;
+       struct page *page;
 
        rcu_read_lock();
        xas_for_each(&xas, page, end_pgoff) {
@@ -2657,24 +2636,19 @@ void filemap_map_pages(struct vm_fault *vmf,
                if (xa_is_value(page))
                        goto next;
 
-               head = compound_head(page);
-
                /*
                 * Check for a locked page first, as a speculative
                 * reference may adversely influence page migration.
                 */
-               if (PageLocked(head))
+               if (PageLocked(page))
                        goto next;
-               if (!page_cache_get_speculative(head))
+               if (!page_cache_get_speculative(page))
                        goto next;
 
-               /* The page was split under us? */
-               if (compound_head(page) != head)
-                       goto skip;
-
-               /* Has the page moved? */
+               /* Has the page moved or been split? */
                if (unlikely(page != xas_reload(&xas)))
                        goto skip;
+               page = find_subpage(page, xas.xa_index);
 
                if (!PageUptodate(page) ||
                                PageReadahead(page) ||
index c64dca6..c431ca8 100644 (file)
@@ -46,6 +46,8 @@ int get_vaddr_frames(unsigned long start, unsigned int nr_frames,
        if (WARN_ON_ONCE(nr_frames > vec->nr_allocated))
                nr_frames = vec->nr_allocated;
 
+       start = untagged_addr(start);
+
        down_read(&mm->mmap_sem);
        locked = 1;
        vma = find_vma_intersection(mm, start, start + 1);
index 98f13ab..23a9f9c 100644 (file)
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -29,85 +29,70 @@ struct follow_page_context {
        unsigned int page_mask;
 };
 
-typedef int (*set_dirty_func_t)(struct page *page);
-
-static void __put_user_pages_dirty(struct page **pages,
-                                  unsigned long npages,
-                                  set_dirty_func_t sdf)
-{
-       unsigned long index;
-
-       for (index = 0; index < npages; index++) {
-               struct page *page = compound_head(pages[index]);
-
-               /*
-                * Checking PageDirty at this point may race with
-                * clear_page_dirty_for_io(), but that's OK. Two key cases:
-                *
-                * 1) This code sees the page as already dirty, so it skips
-                * the call to sdf(). That could happen because
-                * clear_page_dirty_for_io() called page_mkclean(),
-                * followed by set_page_dirty(). However, now the page is
-                * going to get written back, which meets the original
-                * intention of setting it dirty, so all is well:
-                * clear_page_dirty_for_io() goes on to call
-                * TestClearPageDirty(), and write the page back.
-                *
-                * 2) This code sees the page as clean, so it calls sdf().
-                * The page stays dirty, despite being written back, so it
-                * gets written back again in the next writeback cycle.
-                * This is harmless.
-                */
-               if (!PageDirty(page))
-                       sdf(page);
-
-               put_user_page(page);
-       }
-}
-
 /**
- * put_user_pages_dirty() - release and dirty an array of gup-pinned pages
- * @pages:  array of pages to be marked dirty and released.
+ * put_user_pages_dirty_lock() - release and optionally dirty gup-pinned pages
+ * @pages:  array of pages to be maybe marked dirty, and definitely released.
  * @npages: number of pages in the @pages array.
+ * @make_dirty: whether to mark the pages dirty
  *
  * "gup-pinned page" refers to a page that has had one of the get_user_pages()
  * variants called on that page.
  *
  * For each page in the @pages array, make that page (or its head page, if a
- * compound page) dirty, if it was previously listed as clean. Then, release
- * the page using put_user_page().
+ * compound page) dirty, if @make_dirty is true, and if the page was previously
+ * listed as clean. In any case, releases all pages using put_user_page(),
+ * possibly via put_user_pages(), for the non-dirty case.
  *
  * Please see the put_user_page() documentation for details.
  *
- * set_page_dirty(), which does not lock the page, is used here.
- * Therefore, it is the caller's responsibility to ensure that this is
- * safe. If not, then put_user_pages_dirty_lock() should be called instead.
+ * set_page_dirty_lock() is used internally. If instead, set_page_dirty() is
+ * required, then the caller should a) verify that this is really correct,
+ * because _lock() is usually required, and b) hand code it:
+ * set_page_dirty_lock(), put_user_page().
  *
  */
-void put_user_pages_dirty(struct page **pages, unsigned long npages)
+void put_user_pages_dirty_lock(struct page **pages, unsigned long npages,
+                              bool make_dirty)
 {
-       __put_user_pages_dirty(pages, npages, set_page_dirty);
-}
-EXPORT_SYMBOL(put_user_pages_dirty);
+       unsigned long index;
 
-/**
- * put_user_pages_dirty_lock() - release and dirty an array of gup-pinned pages
- * @pages:  array of pages to be marked dirty and released.
- * @npages: number of pages in the @pages array.
- *
- * For each page in the @pages array, make that page (or its head page, if a
- * compound page) dirty, if it was previously listed as clean. Then, release
- * the page using put_user_page().
- *
- * Please see the put_user_page() documentation for details.
- *
- * This is just like put_user_pages_dirty(), except that it invokes
- * set_page_dirty_lock(), instead of set_page_dirty().
- *
- */
-void put_user_pages_dirty_lock(struct page **pages, unsigned long npages)
-{
-       __put_user_pages_dirty(pages, npages, set_page_dirty_lock);
+       /*
+        * TODO: this can be optimized for huge pages: if a series of pages is
+        * physically contiguous and part of the same compound page, then a
+        * single operation to the head page should suffice.
+        */
+
+       if (!make_dirty) {
+               put_user_pages(pages, npages);
+               return;
+       }
+
+       for (index = 0; index < npages; index++) {
+               struct page *page = compound_head(pages[index]);
+               /*
+                * Checking PageDirty at this point may race with
+                * clear_page_dirty_for_io(), but that's OK. Two key
+                * cases:
+                *
+                * 1) This code sees the page as already dirty, so it
+                * skips the call to set_page_dirty(). That could happen
+                * because clear_page_dirty_for_io() called
+                * page_mkclean(), followed by set_page_dirty().
+                * However, now the page is going to get written back,
+                * which meets the original intention of setting it
+                * dirty, so all is well: clear_page_dirty_for_io() goes
+                * on to call TestClearPageDirty(), and write the page
+                * back.
+                *
+                * 2) This code sees the page as clean, so it calls
+                * set_page_dirty(). The page stays dirty, despite being
+                * written back, so it gets written back again in the
+                * next writeback cycle. This is harmless.
+                */
+               if (!PageDirty(page))
+                       set_page_dirty_lock(page);
+               put_user_page(page);
+       }
 }
 EXPORT_SYMBOL(put_user_pages_dirty_lock);
 
@@ -399,7 +384,7 @@ retry_locked:
                spin_unlock(ptl);
                return follow_page_pte(vma, address, pmd, flags, &ctx->pgmap);
        }
-       if (flags & FOLL_SPLIT) {
+       if (flags & (FOLL_SPLIT | FOLL_SPLIT_PMD)) {
                int ret;
                page = pmd_page(*pmd);
                if (is_huge_zero_page(page)) {
@@ -408,7 +393,7 @@ retry_locked:
                        split_huge_pmd(vma, pmd, address);
                        if (pmd_trans_unstable(pmd))
                                ret = -EBUSY;
-               } else {
+               } else if (flags & FOLL_SPLIT) {
                        if (unlikely(!try_get_page(page))) {
                                spin_unlock(ptl);
                                return ERR_PTR(-ENOMEM);
@@ -420,6 +405,10 @@ retry_locked:
                        put_page(page);
                        if (pmd_none(*pmd))
                                return no_page_table(vma, flags);
+               } else {  /* flags & FOLL_SPLIT_PMD */
+                       spin_unlock(ptl);
+                       split_huge_pmd(vma, pmd, address);
+                       ret = pte_alloc(mm, pmd) ? -ENOMEM : 0;
                }
 
                return ret ? ERR_PTR(ret) :
@@ -799,6 +788,8 @@ static long __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
        if (!nr_pages)
                return 0;
 
+       start = untagged_addr(start);
+
        VM_BUG_ON(!!pages != !!(gup_flags & FOLL_GET));
 
        /*
@@ -961,6 +952,8 @@ int fixup_user_fault(struct task_struct *tsk, struct mm_struct *mm,
        struct vm_area_struct *vma;
        vm_fault_t ret, major = 0;
 
+       address = untagged_addr(address);
+
        if (unlocked)
                fault_flags |= FAULT_FLAG_ALLOW_RETRY;
 
@@ -1460,7 +1453,7 @@ check_again:
                 * gup may start from a tail page. Advance step by the left
                 * part.
                 */
-               step = (1 << compound_order(head)) - (pages[i] - head);
+               step = compound_nr(head) - (pages[i] - head);
                /*
                 * If we get a page from the CMA zone, since we are going to
                 * be pinning these entries, we might as well move them out
index 16b6731..902f5fa 100644 (file)
--- a/mm/hmm.c
+++ b/mm/hmm.c
@@ -8,7 +8,7 @@
  * Refer to include/linux/hmm.h for information about heterogeneous memory
  * management or HMM for short.
  */
-#include <linux/mm.h>
+#include <linux/pagewalk.h>
 #include <linux/hmm.h>
 #include <linux/init.h>
 #include <linux/rmap.h>
 #include <linux/mmu_notifier.h>
 #include <linux/memory_hotplug.h>
 
-static const struct mmu_notifier_ops hmm_mmu_notifier_ops;
-
-/**
- * hmm_get_or_create - register HMM against an mm (HMM internal)
- *
- * @mm: mm struct to attach to
- * Returns: returns an HMM object, either by referencing the existing
- *          (per-process) object, or by creating a new one.
- *
- * This is not intended to be used directly by device drivers. If mm already
- * has an HMM struct then it get a reference on it and returns it. Otherwise
- * it allocates an HMM struct, initializes it, associate it with the mm and
- * returns it.
- */
-static struct hmm *hmm_get_or_create(struct mm_struct *mm)
+static struct mmu_notifier *hmm_alloc_notifier(struct mm_struct *mm)
 {
        struct hmm *hmm;
 
-       lockdep_assert_held_write(&mm->mmap_sem);
-
-       /* Abuse the page_table_lock to also protect mm->hmm. */
-       spin_lock(&mm->page_table_lock);
-       hmm = mm->hmm;
-       if (mm->hmm && kref_get_unless_zero(&mm->hmm->kref))
-               goto out_unlock;
-       spin_unlock(&mm->page_table_lock);
-
-       hmm = kmalloc(sizeof(*hmm), GFP_KERNEL);
+       hmm = kzalloc(sizeof(*hmm), GFP_KERNEL);
        if (!hmm)
-               return NULL;
+               return ERR_PTR(-ENOMEM);
+
        init_waitqueue_head(&hmm->wq);
        INIT_LIST_HEAD(&hmm->mirrors);
        init_rwsem(&hmm->mirrors_sem);
-       hmm->mmu_notifier.ops = NULL;
        INIT_LIST_HEAD(&hmm->ranges);
        spin_lock_init(&hmm->ranges_lock);
-       kref_init(&hmm->kref);
        hmm->notifiers = 0;
-       hmm->mm = mm;
-
-       hmm->mmu_notifier.ops = &hmm_mmu_notifier_ops;
-       if (__mmu_notifier_register(&hmm->mmu_notifier, mm)) {
-               kfree(hmm);
-               return NULL;
-       }
-
-       mmgrab(hmm->mm);
-
-       /*
-        * We hold the exclusive mmap_sem here so we know that mm->hmm is
-        * still NULL or 0 kref, and is safe to update.
-        */
-       spin_lock(&mm->page_table_lock);
-       mm->hmm = hmm;
-
-out_unlock:
-       spin_unlock(&mm->page_table_lock);
-       return hmm;
+       return &hmm->mmu_notifier;
 }
 
-static void hmm_free_rcu(struct rcu_head *rcu)
+static void hmm_free_notifier(struct mmu_notifier *mn)
 {
-       struct hmm *hmm = container_of(rcu, struct hmm, rcu);
+       struct hmm *hmm = container_of(mn, struct hmm, mmu_notifier);
 
-       mmdrop(hmm->mm);
+       WARN_ON(!list_empty(&hmm->ranges));
+       WARN_ON(!list_empty(&hmm->mirrors));
        kfree(hmm);
 }
 
-static void hmm_free(struct kref *kref)
-{
-       struct hmm *hmm = container_of(kref, struct hmm, kref);
-
-       spin_lock(&hmm->mm->page_table_lock);
-       if (hmm->mm->hmm == hmm)
-               hmm->mm->hmm = NULL;
-       spin_unlock(&hmm->mm->page_table_lock);
-
-       mmu_notifier_unregister_no_release(&hmm->mmu_notifier, hmm->mm);
-       mmu_notifier_call_srcu(&hmm->rcu, hmm_free_rcu);
-}
-
-static inline void hmm_put(struct hmm *hmm)
-{
-       kref_put(&hmm->kref, hmm_free);
-}
-
 static void hmm_release(struct mmu_notifier *mn, struct mm_struct *mm)
 {
        struct hmm *hmm = container_of(mn, struct hmm, mmu_notifier);
        struct hmm_mirror *mirror;
 
-       /* Bail out if hmm is in the process of being freed */
-       if (!kref_get_unless_zero(&hmm->kref))
-               return;
-
        /*
         * Since hmm_range_register() holds the mmget() lock hmm_release() is
         * prevented as long as a range exists.
@@ -137,8 +73,6 @@ static void hmm_release(struct mmu_notifier *mn, struct mm_struct *mm)
                        mirror->ops->release(mirror);
        }
        up_read(&hmm->mirrors_sem);
-
-       hmm_put(hmm);
 }
 
 static void notifiers_decrement(struct hmm *hmm)
@@ -165,23 +99,14 @@ static int hmm_invalidate_range_start(struct mmu_notifier *mn,
 {
        struct hmm *hmm = container_of(mn, struct hmm, mmu_notifier);
        struct hmm_mirror *mirror;
-       struct hmm_update update;
        struct hmm_range *range;
        unsigned long flags;
        int ret = 0;
 
-       if (!kref_get_unless_zero(&hmm->kref))
-               return 0;
-
-       update.start = nrange->start;
-       update.end = nrange->end;
-       update.event = HMM_UPDATE_INVALIDATE;
-       update.blockable = mmu_notifier_range_blockable(nrange);
-
        spin_lock_irqsave(&hmm->ranges_lock, flags);
        hmm->notifiers++;
        list_for_each_entry(range, &hmm->ranges, list) {
-               if (update.end < range->start || update.start >= range->end)
+               if (nrange->end < range->start || nrange->start >= range->end)
                        continue;
 
                range->valid = false;
@@ -198,9 +123,10 @@ static int hmm_invalidate_range_start(struct mmu_notifier *mn,
        list_for_each_entry(mirror, &hmm->mirrors, list) {
                int rc;
 
-               rc = mirror->ops->sync_cpu_device_pagetables(mirror, &update);
+               rc = mirror->ops->sync_cpu_device_pagetables(mirror, nrange);
                if (rc) {
-                       if (WARN_ON(update.blockable || rc != -EAGAIN))
+                       if (WARN_ON(mmu_notifier_range_blockable(nrange) ||
+                           rc != -EAGAIN))
                                continue;
                        ret = -EAGAIN;
                        break;
@@ -211,7 +137,6 @@ static int hmm_invalidate_range_start(struct mmu_notifier *mn,
 out:
        if (ret)
                notifiers_decrement(hmm);
-       hmm_put(hmm);
        return ret;
 }
 
@@ -220,17 +145,15 @@ static void hmm_invalidate_range_end(struct mmu_notifier *mn,
 {
        struct hmm *hmm = container_of(mn, struct hmm, mmu_notifier);
 
-       if (!kref_get_unless_zero(&hmm->kref))
-               return;
-
        notifiers_decrement(hmm);
-       hmm_put(hmm);
 }
 
 static const struct mmu_notifier_ops hmm_mmu_notifier_ops = {
        .release                = hmm_release,
        .invalidate_range_start = hmm_invalidate_range_start,
        .invalidate_range_end   = hmm_invalidate_range_end,
+       .alloc_notifier         = hmm_alloc_notifier,
+       .free_notifier          = hmm_free_notifier,
 };
 
 /*
@@ -242,18 +165,27 @@ static const struct mmu_notifier_ops hmm_mmu_notifier_ops = {
  *
  * To start mirroring a process address space, the device driver must register
  * an HMM mirror struct.
+ *
+ * The caller cannot unregister the hmm_mirror while any ranges are
+ * registered.
+ *
+ * Callers using this function must put a call to mmu_notifier_synchronize()
+ * in their module exit functions.
  */
 int hmm_mirror_register(struct hmm_mirror *mirror, struct mm_struct *mm)
 {
+       struct mmu_notifier *mn;
+
        lockdep_assert_held_write(&mm->mmap_sem);
 
        /* Sanity check */
        if (!mm || !mirror || !mirror->ops)
                return -EINVAL;
 
-       mirror->hmm = hmm_get_or_create(mm);
-       if (!mirror->hmm)
-               return -ENOMEM;
+       mn = mmu_notifier_get_locked(&hmm_mmu_notifier_ops, mm);
+       if (IS_ERR(mn))
+               return PTR_ERR(mn);
+       mirror->hmm = container_of(mn, struct hmm, mmu_notifier);
 
        down_write(&mirror->hmm->mirrors_sem);
        list_add(&mirror->list, &mirror->hmm->mirrors);
@@ -277,7 +209,7 @@ void hmm_mirror_unregister(struct hmm_mirror *mirror)
        down_write(&hmm->mirrors_sem);
        list_del(&mirror->list);
        up_write(&hmm->mirrors_sem);
-       hmm_put(hmm);
+       mmu_notifier_put(&hmm->mmu_notifier);
 }
 EXPORT_SYMBOL(hmm_mirror_unregister);
 
@@ -285,8 +217,7 @@ struct hmm_vma_walk {
        struct hmm_range        *range;
        struct dev_pagemap      *pgmap;
        unsigned long           last;
-       bool                    fault;
-       bool                    block;
+       unsigned int            flags;
 };
 
 static int hmm_vma_do_fault(struct mm_walk *walk, unsigned long addr,
@@ -298,17 +229,27 @@ static int hmm_vma_do_fault(struct mm_walk *walk, unsigned long addr,
        struct vm_area_struct *vma = walk->vma;
        vm_fault_t ret;
 
-       flags |= hmm_vma_walk->block ? 0 : FAULT_FLAG_ALLOW_RETRY;
-       flags |= write_fault ? FAULT_FLAG_WRITE : 0;
+       if (!vma)
+               goto err;
+
+       if (hmm_vma_walk->flags & HMM_FAULT_ALLOW_RETRY)
+               flags |= FAULT_FLAG_ALLOW_RETRY;
+       if (write_fault)
+               flags |= FAULT_FLAG_WRITE;
+
        ret = handle_mm_fault(vma, addr, flags);
-       if (ret & VM_FAULT_RETRY)
+       if (ret & VM_FAULT_RETRY) {
+               /* Note, handle_mm_fault did up_read(&mm->mmap_sem)) */
                return -EAGAIN;
-       if (ret & VM_FAULT_ERROR) {
-               *pfn = range->values[HMM_PFN_ERROR];
-               return -EFAULT;
        }
+       if (ret & VM_FAULT_ERROR)
+               goto err;
 
        return -EBUSY;
+
+err:
+       *pfn = range->values[HMM_PFN_ERROR];
+       return -EFAULT;
 }
 
 static int hmm_pfns_bad(unsigned long addr,
@@ -328,8 +269,8 @@ static int hmm_pfns_bad(unsigned long addr,
 }
 
 /*
- * hmm_vma_walk_hole() - handle a range lacking valid pmd or pte(s)
- * @start: range virtual start address (inclusive)
+ * hmm_vma_walk_hole_() - handle a range lacking valid pmd or pte(s)
+ * @addr: range virtual start address (inclusive)
  * @end: range virtual end address (exclusive)
  * @fault: should we fault or not ?
  * @write_fault: write fault ?
@@ -346,13 +287,15 @@ static int hmm_vma_walk_hole_(unsigned long addr, unsigned long end,
        struct hmm_vma_walk *hmm_vma_walk = walk->private;
        struct hmm_range *range = hmm_vma_walk->range;
        uint64_t *pfns = range->pfns;
-       unsigned long i, page_size;
+       unsigned long i;
 
        hmm_vma_walk->last = addr;
-       page_size = hmm_range_page_size(range);
-       i = (addr - range->start) >> range->page_shift;
+       i = (addr - range->start) >> PAGE_SHIFT;
+
+       if (write_fault && walk->vma && !(walk->vma->vm_flags & VM_WRITE))
+               return -EPERM;
 
-       for (; addr < end; addr += page_size, i++) {
+       for (; addr < end; addr += PAGE_SIZE, i++) {
                pfns[i] = range->values[HMM_PFN_NONE];
                if (fault || write_fault) {
                        int ret;
@@ -373,15 +316,15 @@ static inline void hmm_pte_need_fault(const struct hmm_vma_walk *hmm_vma_walk,
 {
        struct hmm_range *range = hmm_vma_walk->range;
 
-       if (!hmm_vma_walk->fault)
+       if (hmm_vma_walk->flags & HMM_FAULT_SNAPSHOT)
                return;
 
        /*
         * So we not only consider the individual per page request we also
         * consider the default flags requested for the range. The API can
-        * be use in 2 fashions. The first one where the HMM user coalesce
-        * multiple page fault into one request and set flags per pfns for
-        * of those faults. The second one where the HMM user want to pre-
+        * be used 2 ways. The first one where the HMM user coalesces
+        * multiple page faults into one request and sets flags per pfn for
+        * those faults. The second one where the HMM user wants to pre-
         * fault a range with specific flags. For the latter one it is a
         * waste to have the user pre-fill the pfn arrays with a default
         * flags value.
@@ -391,7 +334,7 @@ static inline void hmm_pte_need_fault(const struct hmm_vma_walk *hmm_vma_walk,
        /* We aren't ask to do anything ... */
        if (!(pfns & range->flags[HMM_PFN_VALID]))
                return;
-       /* If this is device memory than only fault if explicitly requested */
+       /* If this is device memory then only fault if explicitly requested */
        if ((cpu_flags & range->flags[HMM_PFN_DEVICE_PRIVATE])) {
                /* Do we fault on device memory ? */
                if (pfns & range->flags[HMM_PFN_DEVICE_PRIVATE]) {
@@ -418,7 +361,7 @@ static void hmm_range_need_fault(const struct hmm_vma_walk *hmm_vma_walk,
 {
        unsigned long i;
 
-       if (!hmm_vma_walk->fault) {
+       if (hmm_vma_walk->flags & HMM_FAULT_SNAPSHOT) {
                *fault = *write_fault = false;
                return;
        }
@@ -458,22 +401,10 @@ static inline uint64_t pmd_to_hmm_pfn_flags(struct hmm_range *range, pmd_t pmd)
                                range->flags[HMM_PFN_VALID];
 }
 
-static inline uint64_t pud_to_hmm_pfn_flags(struct hmm_range *range, pud_t pud)
-{
-       if (!pud_present(pud))
-               return 0;
-       return pud_write(pud) ? range->flags[HMM_PFN_VALID] |
-                               range->flags[HMM_PFN_WRITE] :
-                               range->flags[HMM_PFN_VALID];
-}
-
-static int hmm_vma_handle_pmd(struct mm_walk *walk,
-                             unsigned long addr,
-                             unsigned long end,
-                             uint64_t *pfns,
-                             pmd_t pmd)
-{
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
+static int hmm_vma_handle_pmd(struct mm_walk *walk, unsigned long addr,
+               unsigned long end, uint64_t *pfns, pmd_t pmd)
+{
        struct hmm_vma_walk *hmm_vma_walk = walk->private;
        struct hmm_range *range = hmm_vma_walk->range;
        unsigned long pfn, npages, i;
@@ -488,7 +419,7 @@ static int hmm_vma_handle_pmd(struct mm_walk *walk,
        if (pmd_protnone(pmd) || fault || write_fault)
                return hmm_vma_walk_hole_(addr, end, fault, write_fault, walk);
 
-       pfn = pmd_pfn(pmd) + pte_index(addr);
+       pfn = pmd_pfn(pmd) + ((addr & ~PMD_MASK) >> PAGE_SHIFT);
        for (i = 0; addr < end; addr += PAGE_SIZE, i++, pfn++) {
                if (pmd_devmap(pmd)) {
                        hmm_vma_walk->pgmap = get_dev_pagemap(pfn,
@@ -504,11 +435,12 @@ static int hmm_vma_handle_pmd(struct mm_walk *walk,
        }
        hmm_vma_walk->last = end;
        return 0;
-#else
-       /* If THP is not enabled then we should never reach that code ! */
-       return -EINVAL;
-#endif
 }
+#else /* CONFIG_TRANSPARENT_HUGEPAGE */
+/* stub to allow the code below to compile */
+int hmm_vma_handle_pmd(struct mm_walk *walk, unsigned long addr,
+               unsigned long end, uint64_t *pfns, pmd_t pmd);
+#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
 
 static inline uint64_t pte_to_hmm_pfn_flags(struct hmm_range *range, pte_t pte)
 {
@@ -525,7 +457,6 @@ static int hmm_vma_handle_pte(struct mm_walk *walk, unsigned long addr,
 {
        struct hmm_vma_walk *hmm_vma_walk = walk->private;
        struct hmm_range *range = hmm_vma_walk->range;
-       struct vm_area_struct *vma = walk->vma;
        bool fault, write_fault;
        uint64_t cpu_flags;
        pte_t pte = *ptep;
@@ -546,6 +477,9 @@ static int hmm_vma_handle_pte(struct mm_walk *walk, unsigned long addr,
                swp_entry_t entry = pte_to_swp_entry(pte);
 
                if (!non_swap_entry(entry)) {
+                       cpu_flags = pte_to_hmm_pfn_flags(range, pte);
+                       hmm_pte_need_fault(hmm_vma_walk, orig_pfn, cpu_flags,
+                                          &fault, &write_fault);
                        if (fault || write_fault)
                                goto fault;
                        return 0;
@@ -574,8 +508,7 @@ static int hmm_vma_handle_pte(struct mm_walk *walk, unsigned long addr,
                        if (fault || write_fault) {
                                pte_unmap(ptep);
                                hmm_vma_walk->last = addr;
-                               migration_entry_wait(vma->vm_mm,
-                                                    pmdp, addr);
+                               migration_entry_wait(walk->mm, pmdp, addr);
                                return -EBUSY;
                        }
                        return 0;
@@ -623,21 +556,16 @@ static int hmm_vma_walk_pmd(pmd_t *pmdp,
 {
        struct hmm_vma_walk *hmm_vma_walk = walk->private;
        struct hmm_range *range = hmm_vma_walk->range;
-       struct vm_area_struct *vma = walk->vma;
        uint64_t *pfns = range->pfns;
        unsigned long addr = start, i;
        pte_t *ptep;
        pmd_t pmd;
 
-
 again:
        pmd = READ_ONCE(*pmdp);
        if (pmd_none(pmd))
                return hmm_vma_walk_hole(start, end, walk);
 
-       if (pmd_huge(pmd) && (range->vma->vm_flags & VM_HUGETLB))
-               return hmm_pfns_bad(start, end, walk);
-
        if (thp_migration_supported() && is_pmd_migration_entry(pmd)) {
                bool fault, write_fault;
                unsigned long npages;
@@ -651,7 +579,7 @@ again:
                                     0, &fault, &write_fault);
                if (fault || write_fault) {
                        hmm_vma_walk->last = addr;
-                       pmd_migration_entry_wait(vma->vm_mm, pmdp);
+                       pmd_migration_entry_wait(walk->mm, pmdp);
                        return -EBUSY;
                }
                return 0;
@@ -660,11 +588,11 @@ again:
 
        if (pmd_devmap(pmd) || pmd_trans_huge(pmd)) {
                /*
-                * No need to take pmd_lock here, even if some other threads
+                * No need to take pmd_lock here, even if some other thread
                 * is splitting the huge pmd we will get that event through
                 * mmu_notifier callback.
                 *
-                * So just read pmd value and check again its a transparent
+                * So just read pmd value and check again it's a transparent
                 * huge or device mapping one and compute corresponding pfn
                 * values.
                 */
@@ -678,7 +606,7 @@ again:
        }
 
        /*
-        * We have handled all the valid case above ie either none, migration,
+        * We have handled all the valid cases above ie either none, migration,
         * huge or transparent huge. At this point either it is a valid pmd
         * entry pointing to pte directory or it is a bad pmd that will not
         * recover.
@@ -714,10 +642,19 @@ again:
        return 0;
 }
 
-static int hmm_vma_walk_pud(pud_t *pudp,
-                           unsigned long start,
-                           unsigned long end,
-                           struct mm_walk *walk)
+#if defined(CONFIG_ARCH_HAS_PTE_DEVMAP) && \
+    defined(CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD)
+static inline uint64_t pud_to_hmm_pfn_flags(struct hmm_range *range, pud_t pud)
+{
+       if (!pud_present(pud))
+               return 0;
+       return pud_write(pud) ? range->flags[HMM_PFN_VALID] |
+                               range->flags[HMM_PFN_WRITE] :
+                               range->flags[HMM_PFN_VALID];
+}
+
+static int hmm_vma_walk_pud(pud_t *pudp, unsigned long start, unsigned long end,
+               struct mm_walk *walk)
 {
        struct hmm_vma_walk *hmm_vma_walk = walk->private;
        struct hmm_range *range = hmm_vma_walk->range;
@@ -781,42 +718,29 @@ again:
 
        return 0;
 }
+#else
+#define hmm_vma_walk_pud       NULL
+#endif
 
+#ifdef CONFIG_HUGETLB_PAGE
 static int hmm_vma_walk_hugetlb_entry(pte_t *pte, unsigned long hmask,
                                      unsigned long start, unsigned long end,
                                      struct mm_walk *walk)
 {
-#ifdef CONFIG_HUGETLB_PAGE
-       unsigned long addr = start, i, pfn, mask, size, pfn_inc;
+       unsigned long addr = start, i, pfn;
        struct hmm_vma_walk *hmm_vma_walk = walk->private;
        struct hmm_range *range = hmm_vma_walk->range;
        struct vm_area_struct *vma = walk->vma;
-       struct hstate *h = hstate_vma(vma);
        uint64_t orig_pfn, cpu_flags;
        bool fault, write_fault;
        spinlock_t *ptl;
        pte_t entry;
        int ret = 0;
 
-       size = 1UL << huge_page_shift(h);
-       mask = size - 1;
-       if (range->page_shift != PAGE_SHIFT) {
-               /* Make sure we are looking at full page. */
-               if (start & mask)
-                       return -EINVAL;
-               if (end < (start + size))
-                       return -EINVAL;
-               pfn_inc = size >> PAGE_SHIFT;
-       } else {
-               pfn_inc = 1;
-               size = PAGE_SIZE;
-       }
-
-
-       ptl = huge_pte_lock(hstate_vma(walk->vma), walk->mm, pte);
+       ptl = huge_pte_lock(hstate_vma(vma), walk->mm, pte);
        entry = huge_ptep_get(pte);
 
-       i = (start - range->start) >> range->page_shift;
+       i = (start - range->start) >> PAGE_SHIFT;
        orig_pfn = range->pfns[i];
        range->pfns[i] = range->values[HMM_PFN_NONE];
        cpu_flags = pte_to_hmm_pfn_flags(range, entry);
@@ -828,8 +752,8 @@ static int hmm_vma_walk_hugetlb_entry(pte_t *pte, unsigned long hmask,
                goto unlock;
        }
 
-       pfn = pte_pfn(entry) + ((start & mask) >> range->page_shift);
-       for (; addr < end; addr += size, i++, pfn += pfn_inc)
+       pfn = pte_pfn(entry) + ((start & ~hmask) >> PAGE_SHIFT);
+       for (; addr < end; addr += PAGE_SIZE, i++, pfn++)
                range->pfns[i] = hmm_device_entry_from_pfn(range, pfn) |
                                 cpu_flags;
        hmm_vma_walk->last = end;
@@ -841,10 +765,10 @@ unlock:
                return hmm_vma_walk_hole_(addr, end, fault, write_fault, walk);
 
        return ret;
-#else /* CONFIG_HUGETLB_PAGE */
-       return -EINVAL;
-#endif
 }
+#else
+#define hmm_vma_walk_hugetlb_entry NULL
+#endif /* CONFIG_HUGETLB_PAGE */
 
 static void hmm_pfns_clear(struct hmm_range *range,
                           uint64_t *pfns,
@@ -859,44 +783,32 @@ static void hmm_pfns_clear(struct hmm_range *range,
  * hmm_range_register() - start tracking change to CPU page table over a range
  * @range: range
  * @mm: the mm struct for the range of virtual address
- * @start: start virtual address (inclusive)
- * @end: end virtual address (exclusive)
- * @page_shift: expect page shift for the range
- * Returns 0 on success, -EFAULT if the address space is no longer valid
+ *
+ * Return: 0 on success, -EFAULT if the address space is no longer valid
  *
  * Track updates to the CPU page table see include/linux/hmm.h
  */
-int hmm_range_register(struct hmm_range *range,
-                      struct hmm_mirror *mirror,
-                      unsigned long start,
-                      unsigned long end,
-                      unsigned page_shift)
+int hmm_range_register(struct hmm_range *range, struct hmm_mirror *mirror)
 {
-       unsigned long mask = ((1UL << page_shift) - 1UL);
        struct hmm *hmm = mirror->hmm;
        unsigned long flags;
 
        range->valid = false;
        range->hmm = NULL;
 
-       if ((start & mask) || (end & mask))
+       if ((range->start & (PAGE_SIZE - 1)) || (range->end & (PAGE_SIZE - 1)))
                return -EINVAL;
-       if (start >= end)
+       if (range->start >= range->end)
                return -EINVAL;
 
-       range->page_shift = page_shift;
-       range->start = start;
-       range->end = end;
-
        /* Prevent hmm_release() from running while the range is valid */
-       if (!mmget_not_zero(hmm->mm))
+       if (!mmget_not_zero(hmm->mmu_notifier.mm))
                return -EFAULT;
 
        /* Initialize range to track CPU page table updates. */
        spin_lock_irqsave(&hmm->ranges_lock, flags);
 
        range->hmm = hmm;
-       kref_get(&hmm->kref);
        list_add(&range->list, &hmm->ranges);
 
        /*
@@ -928,8 +840,7 @@ void hmm_range_unregister(struct hmm_range *range)
        spin_unlock_irqrestore(&hmm->ranges_lock, flags);
 
        /* Drop reference taken by hmm_range_register() */
-       mmput(hmm->mm);
-       hmm_put(hmm);
+       mmput(hmm->mmu_notifier.mm);
 
        /*
         * The range is now invalid and the ref on the hmm is dropped, so
@@ -941,105 +852,33 @@ void hmm_range_unregister(struct hmm_range *range)
 }
 EXPORT_SYMBOL(hmm_range_unregister);
 
-/*
- * hmm_range_snapshot() - snapshot CPU page table for a range
- * @range: range
- * Return: -EINVAL if invalid argument, -ENOMEM out of memory, -EPERM invalid
- *          permission (for instance asking for write and range is read only),
- *          -EBUSY if you need to retry, -EFAULT invalid (ie either no valid
- *          vma or it is illegal to access that range), number of valid pages
- *          in range->pfns[] (from range start address).
- *
- * This snapshots the CPU page table for a range of virtual addresses. Snapshot
- * validity is tracked by range struct. See in include/linux/hmm.h for example
- * on how to use.
- */
-long hmm_range_snapshot(struct hmm_range *range)
-{
-       const unsigned long device_vma = VM_IO | VM_PFNMAP | VM_MIXEDMAP;
-       unsigned long start = range->start, end;
-       struct hmm_vma_walk hmm_vma_walk;
-       struct hmm *hmm = range->hmm;
-       struct vm_area_struct *vma;
-       struct mm_walk mm_walk;
-
-       lockdep_assert_held(&hmm->mm->mmap_sem);
-       do {
-               /* If range is no longer valid force retry. */
-               if (!range->valid)
-                       return -EBUSY;
-
-               vma = find_vma(hmm->mm, start);
-               if (vma == NULL || (vma->vm_flags & device_vma))
-                       return -EFAULT;
-
-               if (is_vm_hugetlb_page(vma)) {
-                       if (huge_page_shift(hstate_vma(vma)) !=
-                                   range->page_shift &&
-                           range->page_shift != PAGE_SHIFT)
-                               return -EINVAL;
-               } else {
-                       if (range->page_shift != PAGE_SHIFT)
-                               return -EINVAL;
-               }
-
-               if (!(vma->vm_flags & VM_READ)) {
-                       /*
-                        * If vma do not allow read access, then assume that it
-                        * does not allow write access, either. HMM does not
-                        * support architecture that allow write without read.
-                        */
-                       hmm_pfns_clear(range, range->pfns,
-                               range->start, range->end);
-                       return -EPERM;
-               }
-
-               range->vma = vma;
-               hmm_vma_walk.pgmap = NULL;
-               hmm_vma_walk.last = start;
-               hmm_vma_walk.fault = false;
-               hmm_vma_walk.range = range;
-               mm_walk.private = &hmm_vma_walk;
-               end = min(range->end, vma->vm_end);
-
-               mm_walk.vma = vma;
-               mm_walk.mm = vma->vm_mm;
-               mm_walk.pte_entry = NULL;
-               mm_walk.test_walk = NULL;
-               mm_walk.hugetlb_entry = NULL;
-               mm_walk.pud_entry = hmm_vma_walk_pud;
-               mm_walk.pmd_entry = hmm_vma_walk_pmd;
-               mm_walk.pte_hole = hmm_vma_walk_hole;
-               mm_walk.hugetlb_entry = hmm_vma_walk_hugetlb_entry;
-
-               walk_page_range(start, end, &mm_walk);
-               start = end;
-       } while (start < range->end);
-
-       return (hmm_vma_walk.last - range->start) >> PAGE_SHIFT;
-}
-EXPORT_SYMBOL(hmm_range_snapshot);
+static const struct mm_walk_ops hmm_walk_ops = {
+       .pud_entry      = hmm_vma_walk_pud,
+       .pmd_entry      = hmm_vma_walk_pmd,
+       .pte_hole       = hmm_vma_walk_hole,
+       .hugetlb_entry  = hmm_vma_walk_hugetlb_entry,
+};
 
-/*
- * hmm_range_fault() - try to fault some address in a virtual address range
- * @range: range being faulted
- * @block: allow blocking on fault (if true it sleeps and do not drop mmap_sem)
- * Return: number of valid pages in range->pfns[] (from range start
- *          address). This may be zero. If the return value is negative,
- *          then one of the following values may be returned:
+/**
+ * hmm_range_fault - try to fault some address in a virtual address range
+ * @range:     range being faulted
+ * @flags:     HMM_FAULT_* flags
  *
- *           -EINVAL  invalid arguments or mm or virtual address are in an
- *                    invalid vma (for instance device file vma).
- *           -ENOMEM: Out of memory.
- *           -EPERM:  Invalid permission (for instance asking for write and
- *                    range is read only).
- *           -EAGAIN: If you need to retry and mmap_sem was drop. This can only
- *                    happens if block argument is false.
- *           -EBUSY:  If the the range is being invalidated and you should wait
- *                    for invalidation to finish.
- *           -EFAULT: Invalid (ie either no valid vma or it is illegal to access
- *                    that range), number of valid pages in range->pfns[] (from
- *                    range start address).
+ * Return: the number of valid pages in range->pfns[] (from range start
+ * address), which may be zero.  On error one of the following status codes
+ * can be returned:
+ *
+ * -EINVAL:    Invalid arguments or mm or virtual address is in an invalid vma
+ *             (e.g., device file vma).
+ * -ENOMEM:    Out of memory.
+ * -EPERM:     Invalid permission (e.g., asking for write and range is read
+ *             only).
+ * -EAGAIN:    A page fault needs to be retried and mmap_sem was dropped.
+ * -EBUSY:     The range has been invalidated and the caller needs to wait for
+ *             the invalidation to finish.
+ * -EFAULT:    Invalid (i.e., either no valid vma or it is illegal to access
+ *             that range) number of valid pages in range->pfns[] (from
+ *              range start address).
  *
  * This is similar to a regular CPU page fault except that it will not trigger
  * any memory migration if the memory being faulted is not accessible by CPUs
@@ -1048,37 +887,26 @@ EXPORT_SYMBOL(hmm_range_snapshot);
  * On error, for one virtual address in the range, the function will mark the
  * corresponding HMM pfn entry with an error flag.
  */
-long hmm_range_fault(struct hmm_range *range, bool block)
+long hmm_range_fault(struct hmm_range *range, unsigned int flags)
 {
        const unsigned long device_vma = VM_IO | VM_PFNMAP | VM_MIXEDMAP;
        unsigned long start = range->start, end;
        struct hmm_vma_walk hmm_vma_walk;
        struct hmm *hmm = range->hmm;
        struct vm_area_struct *vma;
-       struct mm_walk mm_walk;
        int ret;
 
-       lockdep_assert_held(&hmm->mm->mmap_sem);
+       lockdep_assert_held(&hmm->mmu_notifier.mm->mmap_sem);
 
        do {
                /* If range is no longer valid force retry. */
                if (!range->valid)
                        return -EBUSY;
 
-               vma = find_vma(hmm->mm, start);
+               vma = find_vma(hmm->mmu_notifier.mm, start);
                if (vma == NULL || (vma->vm_flags & device_vma))
                        return -EFAULT;
 
-               if (is_vm_hugetlb_page(vma)) {
-                       if (huge_page_shift(hstate_vma(vma)) !=
-                           range->page_shift &&
-                           range->page_shift != PAGE_SHIFT)
-                               return -EINVAL;
-               } else {
-                       if (range->page_shift != PAGE_SHIFT)
-                               return -EINVAL;
-               }
-
                if (!(vma->vm_flags & VM_READ)) {
                        /*
                         * If vma do not allow read access, then assume that it
@@ -1090,27 +918,18 @@ long hmm_range_fault(struct hmm_range *range, bool block)
                        return -EPERM;
                }
 
-               range->vma = vma;
                hmm_vma_walk.pgmap = NULL;
                hmm_vma_walk.last = start;
-               hmm_vma_walk.fault = true;
-               hmm_vma_walk.block = block;
+               hmm_vma_walk.flags = flags;
                hmm_vma_walk.range = range;
-               mm_walk.private = &hmm_vma_walk;
                end = min(range->end, vma->vm_end);
 
-               mm_walk.vma = vma;
-               mm_walk.mm = vma->vm_mm;
-               mm_walk.pte_entry = NULL;
-               mm_walk.test_walk = NULL;
-               mm_walk.hugetlb_entry = NULL;
-               mm_walk.pud_entry = hmm_vma_walk_pud;
-               mm_walk.pmd_entry = hmm_vma_walk_pmd;
-               mm_walk.pte_hole = hmm_vma_walk_hole;
-               mm_walk.hugetlb_entry = hmm_vma_walk_hugetlb_entry;
+               walk_page_range(vma->vm_mm, start, end, &hmm_walk_ops,
+                               &hmm_vma_walk);
 
                do {
-                       ret = walk_page_range(start, end, &mm_walk);
+                       ret = walk_page_range(vma->vm_mm, start, end,
+                                       &hmm_walk_ops, &hmm_vma_walk);
                        start = hmm_vma_walk.last;
 
                        /* Keep trying while the range is valid. */
@@ -1133,25 +952,22 @@ long hmm_range_fault(struct hmm_range *range, bool block)
 EXPORT_SYMBOL(hmm_range_fault);
 
 /**
- * hmm_range_dma_map() - hmm_range_fault() and dma map page all in one.
- * @range: range being faulted
- * @device: device against to dma map page to
- * @daddrs: dma address of mapped pages
- * @block: allow blocking on fault (if true it sleeps and do not drop mmap_sem)
- * Return: number of pages mapped on success, -EAGAIN if mmap_sem have been
- *          drop and you need to try again, some other error value otherwise
+ * hmm_range_dma_map - hmm_range_fault() and dma map page all in one.
+ * @range:     range being faulted
+ * @device:    device to map page to
+ * @daddrs:    array of dma addresses for the mapped pages
+ * @flags:     HMM_FAULT_*
  *
- * Note same usage pattern as hmm_range_fault().
+ * Return: the number of pages mapped on success (including zero), or any
+ * status return from hmm_range_fault() otherwise.
  */
-long hmm_range_dma_map(struct hmm_range *range,
-                      struct device *device,
-                      dma_addr_t *daddrs,
-                      bool block)
+long hmm_range_dma_map(struct hmm_range *range, struct device *device,
+               dma_addr_t *daddrs, unsigned int flags)
 {
        unsigned long i, npages, mapped;
        long ret;
 
-       ret = hmm_range_fault(range, block);
+       ret = hmm_range_fault(range, flags);
        if (ret <= 0)
                return ret ? ret : -EBUSY;
 
@@ -1222,7 +1038,6 @@ EXPORT_SYMBOL(hmm_range_dma_map);
 /**
  * hmm_range_dma_unmap() - unmap range of that was map with hmm_range_dma_map()
  * @range: range being unmapped
- * @vma: the vma against which the range (optional)
  * @device: device against which dma map was done
  * @daddrs: dma address of mapped pages
  * @dirty: dirty page if it had the write flag set
@@ -1234,7 +1049,6 @@ EXPORT_SYMBOL(hmm_range_dma_map);
  * concurrent mmu notifier or sync_cpu_device_pagetables() to make progress.
  */
 long hmm_range_dma_unmap(struct hmm_range *range,
-                        struct vm_area_struct *vma,
                         struct device *device,
                         dma_addr_t *daddrs,
                         bool dirty)
index de1f159..73fc517 100644 (file)
@@ -496,11 +496,25 @@ pmd_t maybe_pmd_mkwrite(pmd_t pmd, struct vm_area_struct *vma)
        return pmd;
 }
 
-static inline struct list_head *page_deferred_list(struct page *page)
+#ifdef CONFIG_MEMCG
+static inline struct deferred_split *get_deferred_split_queue(struct page *page)
 {
-       /* ->lru in the tail pages is occupied by compound_head. */
-       return &page[2].deferred_list;
+       struct mem_cgroup *memcg = compound_head(page)->mem_cgroup;
+       struct pglist_data *pgdat = NODE_DATA(page_to_nid(page));
+
+       if (memcg)
+               return &memcg->deferred_split_queue;
+       else
+               return &pgdat->deferred_split_queue;
+}
+#else
+static inline struct deferred_split *get_deferred_split_queue(struct page *page)
+{
+       struct pglist_data *pgdat = NODE_DATA(page_to_nid(page));
+
+       return &pgdat->deferred_split_queue;
 }
+#endif
 
 void prep_transhuge_page(struct page *page)
 {
@@ -2497,6 +2511,8 @@ static void __split_huge_page(struct page *page, struct list_head *list,
        struct page *head = compound_head(page);
        pg_data_t *pgdat = page_pgdat(head);
        struct lruvec *lruvec;
+       struct address_space *swap_cache = NULL;
+       unsigned long offset = 0;
        int i;
 
        lruvec = mem_cgroup_page_lruvec(head, pgdat);
@@ -2504,6 +2520,14 @@ static void __split_huge_page(struct page *page, struct list_head *list,
        /* complete memcg works before add pages to LRU */
        mem_cgroup_split_huge_fixup(head);
 
+       if (PageAnon(head) && PageSwapCache(head)) {
+               swp_entry_t entry = { .val = page_private(head) };
+
+               offset = swp_offset(entry);
+               swap_cache = swap_address_space(entry);
+               xa_lock(&swap_cache->i_pages);
+       }
+
        for (i = HPAGE_PMD_NR - 1; i >= 1; i--) {
                __split_huge_page_tail(head, i, lruvec, list);
                /* Some pages can be beyond i_size: drop them from page cache */
@@ -2513,6 +2537,12 @@ static void __split_huge_page(struct page *page, struct list_head *list,
                        if (IS_ENABLED(CONFIG_SHMEM) && PageSwapBacked(head))
                                shmem_uncharge(head->mapping->host, 1);
                        put_page(head + i);
+               } else if (!PageAnon(page)) {
+                       __xa_store(&head->mapping->i_pages, head[i].index,
+                                       head + i, 0);
+               } else if (swap_cache) {
+                       __xa_store(&swap_cache->i_pages, offset + i,
+                                       head + i, 0);
                }
        }
 
@@ -2523,10 +2553,12 @@ static void __split_huge_page(struct page *page, struct list_head *list,
        /* See comment in __split_huge_page_tail() */
        if (PageAnon(head)) {
                /* Additional pin to swap cache */
-               if (PageSwapCache(head))
+               if (PageSwapCache(head)) {
                        page_ref_add(head, 2);
-               else
+                       xa_unlock(&swap_cache->i_pages);
+               } else {
                        page_ref_inc(head);
+               }
        } else {
                /* Additional pin to page cache */
                page_ref_add(head, 2);
@@ -2673,6 +2705,7 @@ int split_huge_page_to_list(struct page *page, struct list_head *list)
 {
        struct page *head = compound_head(page);
        struct pglist_data *pgdata = NODE_DATA(page_to_nid(head));
+       struct deferred_split *ds_queue = get_deferred_split_queue(page);
        struct anon_vma *anon_vma = NULL;
        struct address_space *mapping = NULL;
        int count, mapcount, extra_pins, ret;
@@ -2759,17 +2792,17 @@ int split_huge_page_to_list(struct page *page, struct list_head *list)
        }
 
        /* Prevent deferred_split_scan() touching ->_refcount */
-       spin_lock(&pgdata->split_queue_lock);
+       spin_lock(&ds_queue->split_queue_lock);
        count = page_count(head);
        mapcount = total_mapcount(head);
        if (!mapcount && page_ref_freeze(head, 1 + extra_pins)) {
                if (!list_empty(page_deferred_list(head))) {
-                       pgdata->split_queue_len--;
+                       ds_queue->split_queue_len--;
                        list_del(page_deferred_list(head));
                }
                if (mapping)
                        __dec_node_page_state(page, NR_SHMEM_THPS);
-               spin_unlock(&pgdata->split_queue_lock);
+               spin_unlock(&ds_queue->split_queue_lock);
                __split_huge_page(page, list, end, flags);
                if (PageSwapCache(head)) {
                        swp_entry_t entry = { .val = page_private(head) };
@@ -2786,7 +2819,7 @@ int split_huge_page_to_list(struct page *page, struct list_head *list)
                        dump_page(page, "total_mapcount(head) > 0");
                        BUG();
                }
-               spin_unlock(&pgdata->split_queue_lock);
+               spin_unlock(&ds_queue->split_queue_lock);
 fail:          if (mapping)
                        xa_unlock(&mapping->i_pages);
                spin_unlock_irqrestore(&pgdata->lru_lock, flags);
@@ -2808,53 +2841,86 @@ out:
 
 void free_transhuge_page(struct page *page)
 {
-       struct pglist_data *pgdata = NODE_DATA(page_to_nid(page));
+       struct deferred_split *ds_queue = get_deferred_split_queue(page);
        unsigned long flags;
 
-       spin_lock_irqsave(&pgdata->split_queue_lock, flags);
+       spin_lock_irqsave(&ds_queue->split_queue_lock, flags);
        if (!list_empty(page_deferred_list(page))) {
-               pgdata->split_queue_len--;
+               ds_queue->split_queue_len--;
                list_del(page_deferred_list(page));
        }
-       spin_unlock_irqrestore(&pgdata->split_queue_lock, flags);
+       spin_unlock_irqrestore(&ds_queue->split_queue_lock, flags);
        free_compound_page(page);
 }
 
 void deferred_split_huge_page(struct page *page)
 {
-       struct pglist_data *pgdata = NODE_DATA(page_to_nid(page));
+       struct deferred_split *ds_queue = get_deferred_split_queue(page);
+#ifdef CONFIG_MEMCG
+       struct mem_cgroup *memcg = compound_head(page)->mem_cgroup;
+#endif
        unsigned long flags;
 
        VM_BUG_ON_PAGE(!PageTransHuge(page), page);
 
-       spin_lock_irqsave(&pgdata->split_queue_lock, flags);
+       /*
+        * The try_to_unmap() in page reclaim path might reach here too,
+        * this may cause a race condition to corrupt deferred split queue.
+        * And, if page reclaim is already handling the same page, it is
+        * unnecessary to handle it again in shrinker.
+        *
+        * Check PageSwapCache to determine if the page is being
+        * handled by page reclaim since THP swap would add the page into
+        * swap cache before calling try_to_unmap().
+        */
+       if (PageSwapCache(page))
+               return;
+
+       spin_lock_irqsave(&ds_queue->split_queue_lock, flags);
        if (list_empty(page_deferred_list(page))) {
                count_vm_event(THP_DEFERRED_SPLIT_PAGE);
-               list_add_tail(page_deferred_list(page), &pgdata->split_queue);
-               pgdata->split_queue_len++;
+               list_add_tail(page_deferred_list(page), &ds_queue->split_queue);
+               ds_queue->split_queue_len++;
+#ifdef CONFIG_MEMCG
+               if (memcg)
+                       memcg_set_shrinker_bit(memcg, page_to_nid(page),
+                                              deferred_split_shrinker.id);
+#endif
        }
-       spin_unlock_irqrestore(&pgdata->split_queue_lock, flags);
+       spin_unlock_irqrestore(&ds_queue->split_queue_lock, flags);
 }
 
 static unsigned long deferred_split_count(struct shrinker *shrink,
                struct shrink_control *sc)
 {
        struct pglist_data *pgdata = NODE_DATA(sc->nid);
-       return READ_ONCE(pgdata->split_queue_len);
+       struct deferred_split *ds_queue = &pgdata->deferred_split_queue;
+
+#ifdef CONFIG_MEMCG
+       if (sc->memcg)
+               ds_queue = &sc->memcg->deferred_split_queue;
+#endif
+       return READ_ONCE(ds_queue->split_queue_len);
 }
 
 static unsigned long deferred_split_scan(struct shrinker *shrink,
                struct shrink_control *sc)
 {
        struct pglist_data *pgdata = NODE_DATA(sc->nid);
+       struct deferred_split *ds_queue = &pgdata->deferred_split_queue;
        unsigned long flags;
        LIST_HEAD(list), *pos, *next;
        struct page *page;
        int split = 0;
 
-       spin_lock_irqsave(&pgdata->split_queue_lock, flags);
+#ifdef CONFIG_MEMCG
+       if (sc->memcg)
+               ds_queue = &sc->memcg->deferred_split_queue;
+#endif
+
+       spin_lock_irqsave(&ds_queue->split_queue_lock, flags);
        /* Take pin on all head pages to avoid freeing them under us */
-       list_for_each_safe(pos, next, &pgdata->split_queue) {
+       list_for_each_safe(pos, next, &ds_queue->split_queue) {
                page = list_entry((void *)pos, struct page, mapping);
                page = compound_head(page);
                if (get_page_unless_zero(page)) {
@@ -2862,12 +2928,12 @@ static unsigned long deferred_split_scan(struct shrinker *shrink,
                } else {
                        /* We lost race with put_compound_page() */
                        list_del_init(page_deferred_list(page));
-                       pgdata->split_queue_len--;
+                       ds_queue->split_queue_len--;
                }
                if (!--sc->nr_to_scan)
                        break;
        }
-       spin_unlock_irqrestore(&pgdata->split_queue_lock, flags);
+       spin_unlock_irqrestore(&ds_queue->split_queue_lock, flags);
 
        list_for_each_safe(pos, next, &list) {
                page = list_entry((void *)pos, struct page, mapping);
@@ -2881,15 +2947,15 @@ next:
                put_page(page);
        }
 
-       spin_lock_irqsave(&pgdata->split_queue_lock, flags);
-       list_splice_tail(&list, &pgdata->split_queue);
-       spin_unlock_irqrestore(&pgdata->split_queue_lock, flags);
+       spin_lock_irqsave(&ds_queue->split_queue_lock, flags);
+       list_splice_tail(&list, &ds_queue->split_queue);
+       spin_unlock_irqrestore(&ds_queue->split_queue_lock, flags);
 
        /*
         * Stop shrinker if we didn't split any page, but the queue is empty.
         * This can happen if pages were freed under us.
         */
-       if (!split && list_empty(&pgdata->split_queue))
+       if (!split && list_empty(&ds_queue->split_queue))
                return SHRINK_STOP;
        return split;
 }
@@ -2898,7 +2964,8 @@ static struct shrinker deferred_split_shrinker = {
        .count_objects = deferred_split_count,
        .scan_objects = deferred_split_scan,
        .seeks = DEFAULT_SEEKS,
-       .flags = SHRINKER_NUMA_AWARE,
+       .flags = SHRINKER_NUMA_AWARE | SHRINKER_MEMCG_AWARE |
+                SHRINKER_NONSLAB,
 };
 
 #ifdef CONFIG_DEBUG_FS
index 6d7296d..ef37c85 100644 (file)
@@ -1405,12 +1405,25 @@ pgoff_t __basepage_index(struct page *page)
 }
 
 static struct page *alloc_buddy_huge_page(struct hstate *h,
-               gfp_t gfp_mask, int nid, nodemask_t *nmask)
+               gfp_t gfp_mask, int nid, nodemask_t *nmask,
+               nodemask_t *node_alloc_noretry)
 {
        int order = huge_page_order(h);
        struct page *page;
+       bool alloc_try_hard = true;
 
-       gfp_mask |= __GFP_COMP|__GFP_RETRY_MAYFAIL|__GFP_NOWARN;
+       /*
+        * By default we always try hard to allocate the page with
+        * __GFP_RETRY_MAYFAIL flag.  However, if we are allocating pages in
+        * a loop (to adjust global huge page counts) and previous allocation
+        * failed, do not continue to try hard on the same node.  Use the
+        * node_alloc_noretry bitmap to manage this state information.
+        */
+       if (node_alloc_noretry && node_isset(nid, *node_alloc_noretry))
+               alloc_try_hard = false;
+       gfp_mask |= __GFP_COMP|__GFP_NOWARN;
+       if (alloc_try_hard)
+               gfp_mask |= __GFP_RETRY_MAYFAIL;
        if (nid == NUMA_NO_NODE)
                nid = numa_mem_id();
        page = __alloc_pages_nodemask(gfp_mask, order, nid, nmask);
@@ -1419,6 +1432,22 @@ static struct page *alloc_buddy_huge_page(struct hstate *h,
        else
                __count_vm_event(HTLB_BUDDY_PGALLOC_FAIL);
 
+       /*
+        * If we did not specify __GFP_RETRY_MAYFAIL, but still got a page this
+        * indicates an overall state change.  Clear bit so that we resume
+        * normal 'try hard' allocations.
+        */
+       if (node_alloc_noretry && page && !alloc_try_hard)
+               node_clear(nid, *node_alloc_noretry);
+
+       /*
+        * If we tried hard to get a page but failed, set bit so that
+        * subsequent attempts will not try as hard until there is an
+        * overall state change.
+        */
+       if (node_alloc_noretry && !page && alloc_try_hard)
+               node_set(nid, *node_alloc_noretry);
+
        return page;
 }
 
@@ -1427,7 +1456,8 @@ static struct page *alloc_buddy_huge_page(struct hstate *h,
  * should use this function to get new hugetlb pages
  */
 static struct page *alloc_fresh_huge_page(struct hstate *h,
-               gfp_t gfp_mask, int nid, nodemask_t *nmask)
+               gfp_t gfp_mask, int nid, nodemask_t *nmask,
+               nodemask_t *node_alloc_noretry)
 {
        struct page *page;
 
@@ -1435,7 +1465,7 @@ static struct page *alloc_fresh_huge_page(struct hstate *h,
                page = alloc_gigantic_page(h, gfp_mask, nid, nmask);
        else
                page = alloc_buddy_huge_page(h, gfp_mask,
-                               nid, nmask);
+                               nid, nmask, node_alloc_noretry);
        if (!page)
                return NULL;
 
@@ -1450,14 +1480,16 @@ static struct page *alloc_fresh_huge_page(struct hstate *h,
  * Allocates a fresh page to the hugetlb allocator pool in the node interleaved
  * manner.
  */
-static int alloc_pool_huge_page(struct hstate *h, nodemask_t *nodes_allowed)
+static int alloc_pool_huge_page(struct hstate *h, nodemask_t *nodes_allowed,
+                               nodemask_t *node_alloc_noretry)
 {
        struct page *page;
        int nr_nodes, node;
        gfp_t gfp_mask = htlb_alloc_mask(h) | __GFP_THISNODE;
 
        for_each_node_mask_to_alloc(h, nr_nodes, node, nodes_allowed) {
-               page = alloc_fresh_huge_page(h, gfp_mask, node, nodes_allowed);
+               page = alloc_fresh_huge_page(h, gfp_mask, node, nodes_allowed,
+                                               node_alloc_noretry);
                if (page)
                        break;
        }
@@ -1601,7 +1633,7 @@ static struct page *alloc_surplus_huge_page(struct hstate *h, gfp_t gfp_mask,
                goto out_unlock;
        spin_unlock(&hugetlb_lock);
 
-       page = alloc_fresh_huge_page(h, gfp_mask, nid, nmask);
+       page = alloc_fresh_huge_page(h, gfp_mask, nid, nmask, NULL);
        if (!page)
                return NULL;
 
@@ -1637,7 +1669,7 @@ struct page *alloc_migrate_huge_page(struct hstate *h, gfp_t gfp_mask,
        if (hstate_is_gigantic(h))
                return NULL;
 
-       page = alloc_fresh_huge_page(h, gfp_mask, nid, nmask);
+       page = alloc_fresh_huge_page(h, gfp_mask, nid, nmask, NULL);
        if (!page)
                return NULL;
 
@@ -2207,13 +2239,33 @@ static void __init gather_bootmem_prealloc(void)
 static void __init hugetlb_hstate_alloc_pages(struct hstate *h)
 {
        unsigned long i;
+       nodemask_t *node_alloc_noretry;
+
+       if (!hstate_is_gigantic(h)) {
+               /*
+                * Bit mask controlling how hard we retry per-node allocations.
+                * Ignore errors as lower level routines can deal with
+                * node_alloc_noretry == NULL.  If this kmalloc fails at boot
+                * time, we are likely in bigger trouble.
+                */
+               node_alloc_noretry = kmalloc(sizeof(*node_alloc_noretry),
+                                               GFP_KERNEL);
+       } else {
+               /* allocations done at boot time */
+               node_alloc_noretry = NULL;
+       }
+
+       /* bit mask controlling how hard we retry per-node allocations */
+       if (node_alloc_noretry)
+               nodes_clear(*node_alloc_noretry);
 
        for (i = 0; i < h->max_huge_pages; ++i) {
                if (hstate_is_gigantic(h)) {
                        if (!alloc_bootmem_huge_page(h))
                                break;
                } else if (!alloc_pool_huge_page(h,
-                                        &node_states[N_MEMORY]))
+                                        &node_states[N_MEMORY],
+                                        node_alloc_noretry))
                        break;
                cond_resched();
        }
@@ -2225,6 +2277,8 @@ static void __init hugetlb_hstate_alloc_pages(struct hstate *h)
                        h->max_huge_pages, buf, i);
                h->max_huge_pages = i;
        }
+
+       kfree(node_alloc_noretry);
 }
 
 static void __init hugetlb_init_hstates(void)
@@ -2323,6 +2377,17 @@ static int set_max_huge_pages(struct hstate *h, unsigned long count, int nid,
                              nodemask_t *nodes_allowed)
 {
        unsigned long min_count, ret;
+       NODEMASK_ALLOC(nodemask_t, node_alloc_noretry, GFP_KERNEL);
+
+       /*
+        * Bit mask controlling how hard we retry per-node allocations.
+        * If we can not allocate the bit mask, do not attempt to allocate
+        * the requested huge pages.
+        */
+       if (node_alloc_noretry)
+               nodes_clear(*node_alloc_noretry);
+       else
+               return -ENOMEM;
 
        spin_lock(&hugetlb_lock);
 
@@ -2356,6 +2421,7 @@ static int set_max_huge_pages(struct hstate *h, unsigned long count, int nid,
        if (hstate_is_gigantic(h) && !IS_ENABLED(CONFIG_CONTIG_ALLOC)) {
                if (count > persistent_huge_pages(h)) {
                        spin_unlock(&hugetlb_lock);
+                       NODEMASK_FREE(node_alloc_noretry);
                        return -EINVAL;
                }
                /* Fall through to decrease pool */
@@ -2388,7 +2454,8 @@ static int set_max_huge_pages(struct hstate *h, unsigned long count, int nid,
                /* yield cpu to avoid soft lockup */
                cond_resched();
 
-               ret = alloc_pool_huge_page(h, nodes_allowed);
+               ret = alloc_pool_huge_page(h, nodes_allowed,
+                                               node_alloc_noretry);
                spin_lock(&hugetlb_lock);
                if (!ret)
                        goto out;
@@ -2429,6 +2496,8 @@ out:
        h->max_huge_pages = persistent_huge_pages(h);
        spin_unlock(&hugetlb_lock);
 
+       NODEMASK_FREE(node_alloc_noretry);
+
        return 0;
 }
 
index 68c2f2f..f1930fa 100644 (file)
@@ -139,7 +139,7 @@ static void hugetlb_cgroup_move_parent(int idx, struct hugetlb_cgroup *h_cg,
        if (!page_hcg || page_hcg != h_cg)
                goto out;
 
-       nr_pages = 1 << compound_order(page);
+       nr_pages = compound_nr(page);
        if (!parent) {
                parent = root_h_cgroup;
                /* root has no limit */
index a787a31..fb1e150 100644 (file)
@@ -35,6 +35,6 @@ struct mm_struct init_mm = {
        .arg_lock       =  __SPIN_LOCK_UNLOCKED(init_mm.arg_lock),
        .mmlist         = LIST_HEAD_INIT(init_mm.mmlist),
        .user_ns        = &init_user_ns,
-       .cpu_bitmap     = { [BITS_TO_LONGS(NR_CPUS)] = 0},
+       .cpu_bitmap     = CPU_BITS_NONE,
        INIT_MM_CONTEXT(init_mm)
 };
index e323908..0d5f720 100644 (file)
@@ -39,7 +39,7 @@ vm_fault_t do_swap_page(struct vm_fault *vmf);
 void free_pgtables(struct mmu_gather *tlb, struct vm_area_struct *start_vma,
                unsigned long floor, unsigned long ceiling);
 
-static inline bool can_madv_dontneed_vma(struct vm_area_struct *vma)
+static inline bool can_madv_lru_vma(struct vm_area_struct *vma)
 {
        return !(vma->vm_flags & (VM_LOCKED|VM_HUGETLB|VM_PFNMAP));
 }
index 95d16a4..6814d6d 100644 (file)
@@ -304,7 +304,6 @@ size_t kasan_metadata_size(struct kmem_cache *cache)
 struct kasan_alloc_meta *get_alloc_info(struct kmem_cache *cache,
                                        const void *object)
 {
-       BUILD_BUG_ON(sizeof(struct kasan_alloc_meta) > 32);
        return (void *)object + cache->kasan_info.alloc_meta_offset;
 }
 
@@ -315,14 +314,31 @@ struct kasan_free_meta *get_free_info(struct kmem_cache *cache,
        return (void *)object + cache->kasan_info.free_meta_offset;
 }
 
+
+static void kasan_set_free_info(struct kmem_cache *cache,
+               void *object, u8 tag)
+{
+       struct kasan_alloc_meta *alloc_meta;
+       u8 idx = 0;
+
+       alloc_meta = get_alloc_info(cache, object);
+
+#ifdef CONFIG_KASAN_SW_TAGS_IDENTIFY
+       idx = alloc_meta->free_track_idx;
+       alloc_meta->free_pointer_tag[idx] = tag;
+       alloc_meta->free_track_idx = (idx + 1) % KASAN_NR_FREE_STACKS;
+#endif
+
+       set_track(&alloc_meta->free_track[idx], GFP_NOWAIT);
+}
+
 void kasan_poison_slab(struct page *page)
 {
        unsigned long i;
 
-       for (i = 0; i < (1 << compound_order(page)); i++)
+       for (i = 0; i < compound_nr(page); i++)
                page_kasan_tag_reset(page + i);
-       kasan_poison_shadow(page_address(page),
-                       PAGE_SIZE << compound_order(page),
+       kasan_poison_shadow(page_address(page), page_size(page),
                        KASAN_KMALLOC_REDZONE);
 }
 
@@ -452,7 +468,8 @@ static bool __kasan_slab_free(struct kmem_cache *cache, void *object,
                        unlikely(!(cache->flags & SLAB_KASAN)))
                return false;
 
-       set_track(&get_alloc_info(cache, object)->free_track, GFP_NOWAIT);
+       kasan_set_free_info(cache, object, tag);
+
        quarantine_put(get_free_info(cache, object), cache);
 
        return IS_ENABLED(CONFIG_KASAN_GENERIC);
@@ -524,7 +541,7 @@ void * __must_check kasan_kmalloc_large(const void *ptr, size_t size,
        page = virt_to_page(ptr);
        redzone_start = round_up((unsigned long)(ptr + size),
                                KASAN_SHADOW_SCALE_SIZE);
-       redzone_end = (unsigned long)ptr + (PAGE_SIZE << compound_order(page));
+       redzone_end = (unsigned long)ptr + page_size(page);
 
        kasan_unpoison_shadow(ptr, size);
        kasan_poison_shadow((void *)redzone_start, redzone_end - redzone_start,
@@ -560,8 +577,7 @@ void kasan_poison_kfree(void *ptr, unsigned long ip)
                        kasan_report_invalid_free(ptr, ip);
                        return;
                }
-               kasan_poison_shadow(ptr, PAGE_SIZE << compound_order(page),
-                               KASAN_FREE_PAGE);
+               kasan_poison_shadow(ptr, page_size(page), KASAN_FREE_PAGE);
        } else {
                __kasan_slab_free(page->slab_cache, ptr, ip, false);
        }
index 014f19e..35cff6b 100644 (file)
@@ -95,9 +95,19 @@ struct kasan_track {
        depot_stack_handle_t stack;
 };
 
+#ifdef CONFIG_KASAN_SW_TAGS_IDENTIFY
+#define KASAN_NR_FREE_STACKS 5
+#else
+#define KASAN_NR_FREE_STACKS 1
+#endif
+
 struct kasan_alloc_meta {
        struct kasan_track alloc_track;
-       struct kasan_track free_track;
+       struct kasan_track free_track[KASAN_NR_FREE_STACKS];
+#ifdef CONFIG_KASAN_SW_TAGS_IDENTIFY
+       u8 free_pointer_tag[KASAN_NR_FREE_STACKS];
+       u8 free_track_idx;
+#endif
 };
 
 struct qlist_node {
@@ -146,6 +156,8 @@ void kasan_report(unsigned long addr, size_t size,
                bool is_write, unsigned long ip);
 void kasan_report_invalid_free(void *object, unsigned long ip);
 
+struct page *kasan_addr_to_page(const void *addr);
+
 #if defined(CONFIG_KASAN_GENERIC) && \
        (defined(CONFIG_SLAB) || defined(CONFIG_SLUB))
 void quarantine_put(struct kasan_free_meta *info, struct kmem_cache *cache);
index 0e5f965..6217821 100644 (file)
@@ -111,7 +111,7 @@ static void print_track(struct kasan_track *track, const char *prefix)
        }
 }
 
-static struct page *addr_to_page(const void *addr)
+struct page *kasan_addr_to_page(const void *addr)
 {
        if ((addr >= (void *)PAGE_OFFSET) &&
                        (addr < high_memory))
@@ -151,15 +151,38 @@ static void describe_object_addr(struct kmem_cache *cache, void *object,
                (void *)(object_addr + cache->object_size));
 }
 
+static struct kasan_track *kasan_get_free_track(struct kmem_cache *cache,
+               void *object, u8 tag)
+{
+       struct kasan_alloc_meta *alloc_meta;
+       int i = 0;
+
+       alloc_meta = get_alloc_info(cache, object);
+
+#ifdef CONFIG_KASAN_SW_TAGS_IDENTIFY
+       for (i = 0; i < KASAN_NR_FREE_STACKS; i++) {
+               if (alloc_meta->free_pointer_tag[i] == tag)
+                       break;
+       }
+       if (i == KASAN_NR_FREE_STACKS)
+               i = alloc_meta->free_track_idx;
+#endif
+
+       return &alloc_meta->free_track[i];
+}
+
 static void describe_object(struct kmem_cache *cache, void *object,
-                               const void *addr)
+                               const void *addr, u8 tag)
 {
        struct kasan_alloc_meta *alloc_info = get_alloc_info(cache, object);
 
        if (cache->flags & SLAB_KASAN) {
+               struct kasan_track *free_track;
+
                print_track(&alloc_info->alloc_track, "Allocated");
                pr_err("\n");
-               print_track(&alloc_info->free_track, "Freed");
+               free_track = kasan_get_free_track(cache, object, tag);
+               print_track(free_track, "Freed");
                pr_err("\n");
        }
 
@@ -344,9 +367,9 @@ static void print_address_stack_frame(const void *addr)
        print_decoded_frame_descr(frame_descr);
 }
 
-static void print_address_description(void *addr)
+static void print_address_description(void *addr, u8 tag)
 {
-       struct page *page = addr_to_page(addr);
+       struct page *page = kasan_addr_to_page(addr);
 
        dump_stack();
        pr_err("\n");
@@ -355,7 +378,7 @@ static void print_address_description(void *addr)
                struct kmem_cache *cache = page->slab_cache;
                void *object = nearest_obj(cache, page, addr);
 
-               describe_object(cache, object, addr);
+               describe_object(cache, object, addr, tag);
        }
 
        if (kernel_or_module_addr(addr) && !init_task_stack_addr(addr)) {
@@ -435,13 +458,14 @@ static bool report_enabled(void)
 void kasan_report_invalid_free(void *object, unsigned long ip)
 {
        unsigned long flags;
+       u8 tag = get_tag(object);
 
+       object = reset_tag(object);
        start_report(&flags);
        pr_err("BUG: KASAN: double-free or invalid-free in %pS\n", (void *)ip);
-       print_tags(get_tag(object), reset_tag(object));
-       object = reset_tag(object);
+       print_tags(tag, object);
        pr_err("\n");
-       print_address_description(object);
+       print_address_description(object, tag);
        pr_err("\n");
        print_shadow_for_address(object);
        end_report(&flags);
@@ -479,7 +503,7 @@ void __kasan_report(unsigned long addr, size_t size, bool is_write, unsigned lon
        pr_err("\n");
 
        if (addr_has_shadow(untagged_addr)) {
-               print_address_description(untagged_addr);
+               print_address_description(untagged_addr, get_tag(tagged_addr));
                pr_err("\n");
                print_shadow_for_address(info.first_bad_addr);
        } else {
index 8eaf5f7..969ae08 100644 (file)
 
 const char *get_bug_type(struct kasan_access_info *info)
 {
+#ifdef CONFIG_KASAN_SW_TAGS_IDENTIFY
+       struct kasan_alloc_meta *alloc_meta;
+       struct kmem_cache *cache;
+       struct page *page;
+       const void *addr;
+       void *object;
+       u8 tag;
+       int i;
+
+       tag = get_tag(info->access_addr);
+       addr = reset_tag(info->access_addr);
+       page = kasan_addr_to_page(addr);
+       if (page && PageSlab(page)) {
+               cache = page->slab_cache;
+               object = nearest_obj(cache, page, (void *)addr);
+               alloc_meta = get_alloc_info(cache, object);
+
+               for (i = 0; i < KASAN_NR_FREE_STACKS; i++)
+                       if (alloc_meta->free_pointer_tag[i] == tag)
+                               return "use-after-free";
+               return "out-of-bounds";
+       }
+
+#endif
        return "invalid-access";
 }
 
index ccede24..0a1b4b4 100644 (file)
@@ -48,6 +48,7 @@ enum scan_result {
        SCAN_CGROUP_CHARGE_FAIL,
        SCAN_EXCEED_SWAP_PTE,
        SCAN_TRUNCATED,
+       SCAN_PAGE_HAS_PRIVATE,
 };
 
 #define CREATE_TRACE_POINTS
@@ -76,6 +77,8 @@ static __read_mostly DEFINE_HASHTABLE(mm_slots_hash, MM_SLOTS_HASH_BITS);
 
 static struct kmem_cache *mm_slot_cache __read_mostly;
 
+#define MAX_PTE_MAPPED_THP 8
+
 /**
  * struct mm_slot - hash lookup from mm to mm_slot
  * @hash: hash collision list
@@ -86,6 +89,10 @@ struct mm_slot {
        struct hlist_node hash;
        struct list_head mm_node;
        struct mm_struct *mm;
+
+       /* pte-mapped THP in this mm */
+       int nr_pte_mapped_thp;
+       unsigned long pte_mapped_thp[MAX_PTE_MAPPED_THP];
 };
 
 /**
@@ -404,7 +411,11 @@ static bool hugepage_vma_check(struct vm_area_struct *vma,
            (vm_flags & VM_NOHUGEPAGE) ||
            test_bit(MMF_DISABLE_THP, &vma->vm_mm->flags))
                return false;
-       if (shmem_file(vma->vm_file)) {
+
+       if (shmem_file(vma->vm_file) ||
+           (IS_ENABLED(CONFIG_READ_ONLY_THP_FOR_FS) &&
+            vma->vm_file &&
+            (vm_flags & VM_DENYWRITE))) {
                if (!IS_ENABLED(CONFIG_TRANSPARENT_HUGE_PAGECACHE))
                        return false;
                return IS_ALIGNED((vma->vm_start >> PAGE_SHIFT) - vma->vm_pgoff,
@@ -456,8 +467,9 @@ int khugepaged_enter_vma_merge(struct vm_area_struct *vma,
        unsigned long hstart, hend;
 
        /*
-        * khugepaged does not yet work on non-shmem files or special
-        * mappings. And file-private shmem THP is not supported.
+        * khugepaged only supports read-only files for non-shmem files.
+        * khugepaged does not yet work on special mappings. And
+        * file-private shmem THP is not supported.
         */
        if (!hugepage_vma_check(vma, vm_flags))
                return 0;
@@ -1248,6 +1260,159 @@ static void collect_mm_slot(struct mm_slot *mm_slot)
 }
 
 #if defined(CONFIG_SHMEM) && defined(CONFIG_TRANSPARENT_HUGE_PAGECACHE)
+/*
+ * Notify khugepaged that given addr of the mm is pte-mapped THP. Then
+ * khugepaged should try to collapse the page table.
+ */
+static int khugepaged_add_pte_mapped_thp(struct mm_struct *mm,
+                                        unsigned long addr)
+{
+       struct mm_slot *mm_slot;
+
+       VM_BUG_ON(addr & ~HPAGE_PMD_MASK);
+
+       spin_lock(&khugepaged_mm_lock);
+       mm_slot = get_mm_slot(mm);
+       if (likely(mm_slot && mm_slot->nr_pte_mapped_thp < MAX_PTE_MAPPED_THP))
+               mm_slot->pte_mapped_thp[mm_slot->nr_pte_mapped_thp++] = addr;
+       spin_unlock(&khugepaged_mm_lock);
+       return 0;
+}
+
+/**
+ * Try to collapse a pte-mapped THP for mm at address haddr.
+ *
+ * This function checks whether all the PTEs in the PMD are pointing to the
+ * right THP. If so, retract the page table so the THP can refault in with
+ * as pmd-mapped.
+ */
+void collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr)
+{
+       unsigned long haddr = addr & HPAGE_PMD_MASK;
+       struct vm_area_struct *vma = find_vma(mm, haddr);
+       struct page *hpage = NULL;
+       pte_t *start_pte, *pte;
+       pmd_t *pmd, _pmd;
+       spinlock_t *ptl;
+       int count = 0;
+       int i;
+
+       if (!vma || !vma->vm_file ||
+           vma->vm_start > haddr || vma->vm_end < haddr + HPAGE_PMD_SIZE)
+               return;
+
+       /*
+        * This vm_flags may not have VM_HUGEPAGE if the page was not
+        * collapsed by this mm. But we can still collapse if the page is
+        * the valid THP. Add extra VM_HUGEPAGE so hugepage_vma_check()
+        * will not fail the vma for missing VM_HUGEPAGE
+        */
+       if (!hugepage_vma_check(vma, vma->vm_flags | VM_HUGEPAGE))
+               return;
+
+       pmd = mm_find_pmd(mm, haddr);
+       if (!pmd)
+               return;
+
+       start_pte = pte_offset_map_lock(mm, pmd, haddr, &ptl);
+
+       /* step 1: check all mapped PTEs are to the right huge page */
+       for (i = 0, addr = haddr, pte = start_pte;
+            i < HPAGE_PMD_NR; i++, addr += PAGE_SIZE, pte++) {
+               struct page *page;
+
+               /* empty pte, skip */
+               if (pte_none(*pte))
+                       continue;
+
+               /* page swapped out, abort */
+               if (!pte_present(*pte))
+                       goto abort;
+
+               page = vm_normal_page(vma, addr, *pte);
+
+               if (!page || !PageCompound(page))
+                       goto abort;
+
+               if (!hpage) {
+                       hpage = compound_head(page);
+                       /*
+                        * The mapping of the THP should not change.
+                        *
+                        * Note that uprobe, debugger, or MAP_PRIVATE may
+                        * change the page table, but the new page will
+                        * not pass PageCompound() check.
+                        */
+                       if (WARN_ON(hpage->mapping != vma->vm_file->f_mapping))
+                               goto abort;
+               }
+
+               /*
+                * Confirm the page maps to the correct subpage.
+                *
+                * Note that uprobe, debugger, or MAP_PRIVATE may change
+                * the page table, but the new page will not pass
+                * PageCompound() check.
+                */
+               if (WARN_ON(hpage + i != page))
+                       goto abort;
+               count++;
+       }
+
+       /* step 2: adjust rmap */
+       for (i = 0, addr = haddr, pte = start_pte;
+            i < HPAGE_PMD_NR; i++, addr += PAGE_SIZE, pte++) {
+               struct page *page;
+
+               if (pte_none(*pte))
+                       continue;
+               page = vm_normal_page(vma, addr, *pte);
+               page_remove_rmap(page, false);
+       }
+
+       pte_unmap_unlock(start_pte, ptl);
+
+       /* step 3: set proper refcount and mm_counters. */
+       if (hpage) {
+               page_ref_sub(hpage, count);
+               add_mm_counter(vma->vm_mm, mm_counter_file(hpage), -count);
+       }
+
+       /* step 4: collapse pmd */
+       ptl = pmd_lock(vma->vm_mm, pmd);
+       _pmd = pmdp_collapse_flush(vma, addr, pmd);
+       spin_unlock(ptl);
+       mm_dec_nr_ptes(mm);
+       pte_free(mm, pmd_pgtable(_pmd));
+       return;
+
+abort:
+       pte_unmap_unlock(start_pte, ptl);
+}
+
+static int khugepaged_collapse_pte_mapped_thps(struct mm_slot *mm_slot)
+{
+       struct mm_struct *mm = mm_slot->mm;
+       int i;
+
+       if (likely(mm_slot->nr_pte_mapped_thp == 0))
+               return 0;
+
+       if (!down_write_trylock(&mm->mmap_sem))
+               return -EBUSY;
+
+       if (unlikely(khugepaged_test_exit(mm)))
+               goto out;
+
+       for (i = 0; i < mm_slot->nr_pte_mapped_thp; i++)
+               collapse_pte_mapped_thp(mm, mm_slot->pte_mapped_thp[i]);
+
+out:
+       mm_slot->nr_pte_mapped_thp = 0;
+       up_write(&mm->mmap_sem);
+       return 0;
+}
+
 static void retract_page_tables(struct address_space *mapping, pgoff_t pgoff)
 {
        struct vm_area_struct *vma;
@@ -1256,7 +1421,22 @@ static void retract_page_tables(struct address_space *mapping, pgoff_t pgoff)
 
        i_mmap_lock_write(mapping);
        vma_interval_tree_foreach(vma, &mapping->i_mmap, pgoff, pgoff) {
-               /* probably overkill */
+               /*
+                * Check vma->anon_vma to exclude MAP_PRIVATE mappings that
+                * got written to. These VMAs are likely not worth investing
+                * down_write(mmap_sem) as PMD-mapping is likely to be split
+                * later.
+                *
+                * Not that vma->anon_vma check is racy: it can be set up after
+                * the check but before we took mmap_sem by the fault path.
+                * But page lock would prevent establishing any new ptes of the
+                * page, so we are safe.
+                *
+                * An alternative would be drop the check, but check that page
+                * table is clear before calling pmdp_collapse_flush() under
+                * ptl. It has higher chance to recover THP for the VMA, but
+                * has higher cost too.
+                */
                if (vma->anon_vma)
                        continue;
                addr = vma->vm_start + ((pgoff - vma->vm_pgoff) << PAGE_SHIFT);
@@ -1269,9 +1449,10 @@ static void retract_page_tables(struct address_space *mapping, pgoff_t pgoff)
                        continue;
                /*
                 * We need exclusive mmap_sem to retract page table.
-                * If trylock fails we would end up with pte-mapped THP after
-                * re-fault. Not ideal, but it's more important to not disturb
-                * the system too much.
+                *
+                * We use trylock due to lock inversion: we need to acquire
+                * mmap_sem while holding page lock. Fault path does it in
+                * reverse order. Trylock is a way to avoid deadlock.
                 */
                if (down_write_trylock(&vma->vm_mm->mmap_sem)) {
                        spinlock_t *ptl = pmd_lock(vma->vm_mm, pmd);
@@ -1281,18 +1462,21 @@ static void retract_page_tables(struct address_space *mapping, pgoff_t pgoff)
                        up_write(&vma->vm_mm->mmap_sem);
                        mm_dec_nr_ptes(vma->vm_mm);
                        pte_free(vma->vm_mm, pmd_pgtable(_pmd));
+               } else {
+                       /* Try again later */
+                       khugepaged_add_pte_mapped_thp(vma->vm_mm, addr);
                }
        }
        i_mmap_unlock_write(mapping);
 }
 
 /**
- * collapse_shmem - collapse small tmpfs/shmem pages into huge one.
+ * collapse_file - collapse filemap/tmpfs/shmem pages into huge one.
  *
  * Basic scheme is simple, details are more complex:
  *  - allocate and lock a new huge page;
  *  - scan page cache replacing old pages with the new one
- *    + swap in pages if necessary;
+ *    + swap/gup in pages if necessary;
  *    + fill in gaps;
  *    + keep old pages around in case rollback is required;
  *  - if replacing succeeds:
@@ -1304,10 +1488,11 @@ static void retract_page_tables(struct address_space *mapping, pgoff_t pgoff)
  *    + restore gaps in the page cache;
  *    + unlock and free huge page;
  */
-static void collapse_shmem(struct mm_struct *mm,
-               struct address_space *mapping, pgoff_t start,
+static void collapse_file(struct mm_struct *mm,
+               struct file *file, pgoff_t start,
                struct page **hpage, int node)
 {
+       struct address_space *mapping = file->f_mapping;
        gfp_t gfp;
        struct page *new_page;
        struct mem_cgroup *memcg;
@@ -1315,7 +1500,9 @@ static void collapse_shmem(struct mm_struct *mm,
        LIST_HEAD(pagelist);
        XA_STATE_ORDER(xas, &mapping->i_pages, start, HPAGE_PMD_ORDER);
        int nr_none = 0, result = SCAN_SUCCEED;
+       bool is_shmem = shmem_file(file);
 
+       VM_BUG_ON(!IS_ENABLED(CONFIG_READ_ONLY_THP_FOR_FS) && !is_shmem);
        VM_BUG_ON(start & (HPAGE_PMD_NR - 1));
 
        /* Only allocate from the target node */
@@ -1347,7 +1534,8 @@ static void collapse_shmem(struct mm_struct *mm,
        } while (1);
 
        __SetPageLocked(new_page);
-       __SetPageSwapBacked(new_page);
+       if (is_shmem)
+               __SetPageSwapBacked(new_page);
        new_page->index = start;
        new_page->mapping = mapping;
 
@@ -1362,41 +1550,75 @@ static void collapse_shmem(struct mm_struct *mm,
                struct page *page = xas_next(&xas);
 
                VM_BUG_ON(index != xas.xa_index);
-               if (!page) {
-                       /*
-                        * Stop if extent has been truncated or hole-punched,
-                        * and is now completely empty.
-                        */
-                       if (index == start) {
-                               if (!xas_next_entry(&xas, end - 1)) {
-                                       result = SCAN_TRUNCATED;
+               if (is_shmem) {
+                       if (!page) {
+                               /*
+                                * Stop if extent has been truncated or
+                                * hole-punched, and is now completely
+                                * empty.
+                                */
+                               if (index == start) {
+                                       if (!xas_next_entry(&xas, end - 1)) {
+                                               result = SCAN_TRUNCATED;
+                                               goto xa_locked;
+                                       }
+                                       xas_set(&xas, index);
+                               }
+                               if (!shmem_charge(mapping->host, 1)) {
+                                       result = SCAN_FAIL;
                                        goto xa_locked;
                                }
-                               xas_set(&xas, index);
+                               xas_store(&xas, new_page);
+                               nr_none++;
+                               continue;
                        }
-                       if (!shmem_charge(mapping->host, 1)) {
-                               result = SCAN_FAIL;
+
+                       if (xa_is_value(page) || !PageUptodate(page)) {
+                               xas_unlock_irq(&xas);
+                               /* swap in or instantiate fallocated page */
+                               if (shmem_getpage(mapping->host, index, &page,
+                                                 SGP_NOHUGE)) {
+                                       result = SCAN_FAIL;
+                                       goto xa_unlocked;
+                               }
+                       } else if (trylock_page(page)) {
+                               get_page(page);
+                               xas_unlock_irq(&xas);
+                       } else {
+                               result = SCAN_PAGE_LOCK;
                                goto xa_locked;
                        }
-                       xas_store(&xas, new_page + (index % HPAGE_PMD_NR));
-                       nr_none++;
-                       continue;
-               }
-
-               if (xa_is_value(page) || !PageUptodate(page)) {
-                       xas_unlock_irq(&xas);
-                       /* swap in or instantiate fallocated page */
-                       if (shmem_getpage(mapping->host, index, &page,
-                                               SGP_NOHUGE)) {
+               } else {        /* !is_shmem */
+                       if (!page || xa_is_value(page)) {
+                               xas_unlock_irq(&xas);
+                               page_cache_sync_readahead(mapping, &file->f_ra,
+                                                         file, index,
+                                                         PAGE_SIZE);
+                               /* drain pagevecs to help isolate_lru_page() */
+                               lru_add_drain();
+                               page = find_lock_page(mapping, index);
+                               if (unlikely(page == NULL)) {
+                                       result = SCAN_FAIL;
+                                       goto xa_unlocked;
+                               }
+                       } else if (!PageUptodate(page)) {
+                               xas_unlock_irq(&xas);
+                               wait_on_page_locked(page);
+                               if (!trylock_page(page)) {
+                                       result = SCAN_PAGE_LOCK;
+                                       goto xa_unlocked;
+                               }
+                               get_page(page);
+                       } else if (PageDirty(page)) {
                                result = SCAN_FAIL;
-                               goto xa_unlocked;
+                               goto xa_locked;
+                       } else if (trylock_page(page)) {
+                               get_page(page);
+                               xas_unlock_irq(&xas);
+                       } else {
+                               result = SCAN_PAGE_LOCK;
+                               goto xa_locked;
                        }
-               } else if (trylock_page(page)) {
-                       get_page(page);
-                       xas_unlock_irq(&xas);
-               } else {
-                       result = SCAN_PAGE_LOCK;
-                       goto xa_locked;
                }
 
                /*
@@ -1425,6 +1647,12 @@ static void collapse_shmem(struct mm_struct *mm,
                        goto out_unlock;
                }
 
+               if (page_has_private(page) &&
+                   !try_to_release_page(page, GFP_KERNEL)) {
+                       result = SCAN_PAGE_HAS_PRIVATE;
+                       goto out_unlock;
+               }
+
                if (page_mapped(page))
                        unmap_mapping_pages(mapping, index, 1, false);
 
@@ -1454,7 +1682,7 @@ static void collapse_shmem(struct mm_struct *mm,
                list_add_tail(&page->lru, &pagelist);
 
                /* Finally, replace with the new page. */
-               xas_store(&xas, new_page + (index % HPAGE_PMD_NR));
+               xas_store(&xas, new_page);
                continue;
 out_unlock:
                unlock_page(page);
@@ -1462,12 +1690,20 @@ out_unlock:
                goto xa_unlocked;
        }
 
-       __inc_node_page_state(new_page, NR_SHMEM_THPS);
+       if (is_shmem)
+               __inc_node_page_state(new_page, NR_SHMEM_THPS);
+       else {
+               __inc_node_page_state(new_page, NR_FILE_THPS);
+               filemap_nr_thps_inc(mapping);
+       }
+
        if (nr_none) {
                struct zone *zone = page_zone(new_page);
 
                __mod_node_page_state(zone->zone_pgdat, NR_FILE_PAGES, nr_none);
-               __mod_node_page_state(zone->zone_pgdat, NR_SHMEM, nr_none);
+               if (is_shmem)
+                       __mod_node_page_state(zone->zone_pgdat,
+                                             NR_SHMEM, nr_none);
        }
 
 xa_locked:
@@ -1505,10 +1741,15 @@ xa_unlocked:
 
                SetPageUptodate(new_page);
                page_ref_add(new_page, HPAGE_PMD_NR - 1);
-               set_page_dirty(new_page);
                mem_cgroup_commit_charge(new_page, memcg, false, true);
+
+               if (is_shmem) {
+                       set_page_dirty(new_page);
+                       lru_cache_add_anon(new_page);
+               } else {
+                       lru_cache_add_file(new_page);
+               }
                count_memcg_events(memcg, THP_COLLAPSE_ALLOC, 1);
-               lru_cache_add_anon(new_page);
 
                /*
                 * Remove pte page tables, so we can re-fault the page as huge.
@@ -1523,7 +1764,9 @@ xa_unlocked:
                /* Something went wrong: roll back page cache changes */
                xas_lock_irq(&xas);
                mapping->nrpages -= nr_none;
-               shmem_uncharge(mapping->host, nr_none);
+
+               if (is_shmem)
+                       shmem_uncharge(mapping->host, nr_none);
 
                xas_set(&xas, start);
                xas_for_each(&xas, page, end - 1) {
@@ -1563,11 +1806,11 @@ out:
        /* TODO: tracepoints */
 }
 
-static void khugepaged_scan_shmem(struct mm_struct *mm,
-               struct address_space *mapping,
-               pgoff_t start, struct page **hpage)
+static void khugepaged_scan_file(struct mm_struct *mm,
+               struct file *file, pgoff_t start, struct page **hpage)
 {
        struct page *page = NULL;
+       struct address_space *mapping = file->f_mapping;
        XA_STATE(xas, &mapping->i_pages, start);
        int present, swap;
        int node = NUMA_NO_NODE;
@@ -1606,7 +1849,8 @@ static void khugepaged_scan_shmem(struct mm_struct *mm,
                        break;
                }
 
-               if (page_count(page) != 1 + page_mapcount(page)) {
+               if (page_count(page) !=
+                   1 + page_mapcount(page) + page_has_private(page)) {
                        result = SCAN_PAGE_COUNT;
                        break;
                }
@@ -1631,19 +1875,23 @@ static void khugepaged_scan_shmem(struct mm_struct *mm,
                        result = SCAN_EXCEED_NONE_PTE;
                } else {
                        node = khugepaged_find_target_node();
-                       collapse_shmem(mm, mapping, start, hpage, node);
+                       collapse_file(mm, file, start, hpage, node);
                }
        }
 
        /* TODO: tracepoints */
 }
 #else
-static void khugepaged_scan_shmem(struct mm_struct *mm,
-               struct address_space *mapping,
-               pgoff_t start, struct page **hpage)
+static void khugepaged_scan_file(struct mm_struct *mm,
+               struct file *file, pgoff_t start, struct page **hpage)
 {
        BUILD_BUG();
 }
+
+static int khugepaged_collapse_pte_mapped_thps(struct mm_slot *mm_slot)
+{
+       return 0;
+}
 #endif
 
 static unsigned int khugepaged_scan_mm_slot(unsigned int pages,
@@ -1668,6 +1916,7 @@ static unsigned int khugepaged_scan_mm_slot(unsigned int pages,
                khugepaged_scan.mm_slot = mm_slot;
        }
        spin_unlock(&khugepaged_mm_lock);
+       khugepaged_collapse_pte_mapped_thps(mm_slot);
 
        mm = mm_slot->mm;
        /*
@@ -1713,17 +1962,18 @@ skip:
                        VM_BUG_ON(khugepaged_scan.address < hstart ||
                                  khugepaged_scan.address + HPAGE_PMD_SIZE >
                                  hend);
-                       if (shmem_file(vma->vm_file)) {
+                       if (IS_ENABLED(CONFIG_SHMEM) && vma->vm_file) {
                                struct file *file;
                                pgoff_t pgoff = linear_page_index(vma,
                                                khugepaged_scan.address);
-                               if (!shmem_huge_enabled(vma))
+
+                               if (shmem_file(vma->vm_file)
+                                   && !shmem_huge_enabled(vma))
                                        goto skip;
                                file = get_file(vma->vm_file);
                                up_read(&mm->mmap_sem);
                                ret = 1;
-                               khugepaged_scan_shmem(mm, file->f_mapping,
-                                               pgoff, hpage);
+                               khugepaged_scan_file(mm, file, pgoff, hpage);
                                fput(file);
                        } else {
                                ret = khugepaged_scan_pmd(mm, vma,
index f6e6029..03a8d84 100644 (file)
@@ -168,6 +168,8 @@ struct kmemleak_object {
 #define OBJECT_REPORTED                (1 << 1)
 /* flag set to not scan the object */
 #define OBJECT_NO_SCAN         (1 << 2)
+/* flag set to fully scan the object when scan_area allocation failed */
+#define OBJECT_FULL_SCAN       (1 << 3)
 
 #define HEX_PREFIX             "    "
 /* number of bytes to print per line; must be 16 or 32 */
@@ -183,6 +185,10 @@ struct kmemleak_object {
 static LIST_HEAD(object_list);
 /* the list of gray-colored objects (see color_gray comment below) */
 static LIST_HEAD(gray_list);
+/* memory pool allocation */
+static struct kmemleak_object mem_pool[CONFIG_DEBUG_KMEMLEAK_MEM_POOL_SIZE];
+static int mem_pool_free_count = ARRAY_SIZE(mem_pool);
+static LIST_HEAD(mem_pool_free_list);
 /* search tree for object boundaries */
 static struct rb_root object_tree_root = RB_ROOT;
 /* rw_lock protecting the access to object_list and object_tree_root */
@@ -193,13 +199,11 @@ static struct kmem_cache *object_cache;
 static struct kmem_cache *scan_area_cache;
 
 /* set if tracing memory operations is enabled */
-static int kmemleak_enabled;
+static int kmemleak_enabled = 1;
 /* same as above but only for the kmemleak_free() callback */
-static int kmemleak_free_enabled;
+static int kmemleak_free_enabled = 1;
 /* set in the late_initcall if there were no errors */
 static int kmemleak_initialized;
-/* enables or disables early logging of the memory operations */
-static int kmemleak_early_log = 1;
 /* set if a kmemleak warning was issued */
 static int kmemleak_warning;
 /* set if a fatal kmemleak error has occurred */
@@ -227,49 +231,6 @@ static bool kmemleak_found_leaks;
 static bool kmemleak_verbose;
 module_param_named(verbose, kmemleak_verbose, bool, 0600);
 
-/*
- * Early object allocation/freeing logging. Kmemleak is initialized after the
- * kernel allocator. However, both the kernel allocator and kmemleak may
- * allocate memory blocks which need to be tracked. Kmemleak defines an
- * arbitrary buffer to hold the allocation/freeing information before it is
- * fully initialized.
- */
-
-/* kmemleak operation type for early logging */
-enum {
-       KMEMLEAK_ALLOC,
-       KMEMLEAK_ALLOC_PERCPU,
-       KMEMLEAK_FREE,
-       KMEMLEAK_FREE_PART,
-       KMEMLEAK_FREE_PERCPU,
-       KMEMLEAK_NOT_LEAK,
-       KMEMLEAK_IGNORE,
-       KMEMLEAK_SCAN_AREA,
-       KMEMLEAK_NO_SCAN,
-       KMEMLEAK_SET_EXCESS_REF
-};
-
-/*
- * Structure holding the information passed to kmemleak callbacks during the
- * early logging.
- */
-struct early_log {
-       int op_type;                    /* kmemleak operation type */
-       int min_count;                  /* minimum reference count */
-       const void *ptr;                /* allocated/freed memory block */
-       union {
-               size_t size;            /* memory block size */
-               unsigned long excess_ref; /* surplus reference passing */
-       };
-       unsigned long trace[MAX_TRACE]; /* stack trace */
-       unsigned int trace_len;         /* stack trace length */
-};
-
-/* early logging buffer and current position */
-static struct early_log
-       early_log[CONFIG_DEBUG_KMEMLEAK_EARLY_LOG_SIZE] __initdata;
-static int crt_early_log __initdata;
-
 static void kmemleak_disable(void);
 
 /*
@@ -450,6 +411,54 @@ static int get_object(struct kmemleak_object *object)
 }
 
 /*
+ * Memory pool allocation and freeing. kmemleak_lock must not be held.
+ */
+static struct kmemleak_object *mem_pool_alloc(gfp_t gfp)
+{
+       unsigned long flags;
+       struct kmemleak_object *object;
+
+       /* try the slab allocator first */
+       if (object_cache) {
+               object = kmem_cache_alloc(object_cache, gfp_kmemleak_mask(gfp));
+               if (object)
+                       return object;
+       }
+
+       /* slab allocation failed, try the memory pool */
+       write_lock_irqsave(&kmemleak_lock, flags);
+       object = list_first_entry_or_null(&mem_pool_free_list,
+                                         typeof(*object), object_list);
+       if (object)
+               list_del(&object->object_list);
+       else if (mem_pool_free_count)
+               object = &mem_pool[--mem_pool_free_count];
+       else
+               pr_warn_once("Memory pool empty, consider increasing CONFIG_DEBUG_KMEMLEAK_MEM_POOL_SIZE\n");
+       write_unlock_irqrestore(&kmemleak_lock, flags);
+
+       return object;
+}
+
+/*
+ * Return the object to either the slab allocator or the memory pool.
+ */
+static void mem_pool_free(struct kmemleak_object *object)
+{
+       unsigned long flags;
+
+       if (object < mem_pool || object >= mem_pool + ARRAY_SIZE(mem_pool)) {
+               kmem_cache_free(object_cache, object);
+               return;
+       }
+
+       /* add the object to the memory pool free list */
+       write_lock_irqsave(&kmemleak_lock, flags);
+       list_add(&object->object_list, &mem_pool_free_list);
+       write_unlock_irqrestore(&kmemleak_lock, flags);
+}
+
+/*
  * RCU callback to free a kmemleak_object.
  */
 static void free_object_rcu(struct rcu_head *rcu)
@@ -467,7 +476,7 @@ static void free_object_rcu(struct rcu_head *rcu)
                hlist_del(&area->node);
                kmem_cache_free(scan_area_cache, area);
        }
-       kmem_cache_free(object_cache, object);
+       mem_pool_free(object);
 }
 
 /*
@@ -485,7 +494,15 @@ static void put_object(struct kmemleak_object *object)
        /* should only get here after delete_object was called */
        WARN_ON(object->flags & OBJECT_ALLOCATED);
 
-       call_rcu(&object->rcu, free_object_rcu);
+       /*
+        * It may be too early for the RCU callbacks, however, there is no
+        * concurrent object_list traversal when !object_cache and all objects
+        * came from the memory pool. Free the object directly.
+        */
+       if (object_cache)
+               call_rcu(&object->rcu, free_object_rcu);
+       else
+               free_object_rcu(&object->rcu);
 }
 
 /*
@@ -550,7 +567,7 @@ static struct kmemleak_object *create_object(unsigned long ptr, size_t size,
        struct rb_node **link, *rb_parent;
        unsigned long untagged_ptr;
 
-       object = kmem_cache_alloc(object_cache, gfp_kmemleak_mask(gfp));
+       object = mem_pool_alloc(gfp);
        if (!object) {
                pr_warn("Cannot allocate a kmemleak_object structure\n");
                kmemleak_disable();
@@ -689,9 +706,7 @@ static void delete_object_part(unsigned long ptr, size_t size)
        /*
         * Create one or two objects that may result from the memory block
         * split. Note that partial freeing is only done by free_bootmem() and
-        * this happens before kmemleak_init() is called. The path below is
-        * only executed during early log recording in kmemleak_init(), so
-        * GFP_KERNEL is enough.
+        * this happens before kmemleak_init() is called.
         */
        start = object->pointer;
        end = object->pointer + object->size;
@@ -763,7 +778,7 @@ static void add_scan_area(unsigned long ptr, size_t size, gfp_t gfp)
 {
        unsigned long flags;
        struct kmemleak_object *object;
-       struct kmemleak_scan_area *area;
+       struct kmemleak_scan_area *area = NULL;
 
        object = find_and_get_object(ptr, 1);
        if (!object) {
@@ -772,13 +787,16 @@ static void add_scan_area(unsigned long ptr, size_t size, gfp_t gfp)
                return;
        }
 
-       area = kmem_cache_alloc(scan_area_cache, gfp_kmemleak_mask(gfp));
-       if (!area) {
-               pr_warn("Cannot allocate a scan area\n");
-               goto out;
-       }
+       if (scan_area_cache)
+               area = kmem_cache_alloc(scan_area_cache, gfp_kmemleak_mask(gfp));
 
        spin_lock_irqsave(&object->lock, flags);
+       if (!area) {
+               pr_warn_once("Cannot allocate a scan area, scanning the full object\n");
+               /* mark the object for full scan to avoid false positives */
+               object->flags |= OBJECT_FULL_SCAN;
+               goto out_unlock;
+       }
        if (size == SIZE_MAX) {
                size = object->pointer + object->size - ptr;
        } else if (ptr + size > object->pointer + object->size) {
@@ -795,7 +813,6 @@ static void add_scan_area(unsigned long ptr, size_t size, gfp_t gfp)
        hlist_add_head(&area->node, &object->area_list);
 out_unlock:
        spin_unlock_irqrestore(&object->lock, flags);
-out:
        put_object(object);
 }
 
@@ -845,86 +862,6 @@ static void object_no_scan(unsigned long ptr)
        put_object(object);
 }
 
-/*
- * Log an early kmemleak_* call to the early_log buffer. These calls will be
- * processed later once kmemleak is fully initialized.
- */
-static void __init log_early(int op_type, const void *ptr, size_t size,
-                            int min_count)
-{
-       unsigned long flags;
-       struct early_log *log;
-
-       if (kmemleak_error) {
-               /* kmemleak stopped recording, just count the requests */
-               crt_early_log++;
-               return;
-       }
-
-       if (crt_early_log >= ARRAY_SIZE(early_log)) {
-               crt_early_log++;
-               kmemleak_disable();
-               return;
-       }
-
-       /*
-        * There is no need for locking since the kernel is still in UP mode
-        * at this stage. Disabling the IRQs is enough.
-        */
-       local_irq_save(flags);
-       log = &early_log[crt_early_log];
-       log->op_type = op_type;
-       log->ptr = ptr;
-       log->size = size;
-       log->min_count = min_count;
-       log->trace_len = __save_stack_trace(log->trace);
-       crt_early_log++;
-       local_irq_restore(flags);
-}
-
-/*
- * Log an early allocated block and populate the stack trace.
- */
-static void early_alloc(struct early_log *log)
-{
-       struct kmemleak_object *object;
-       unsigned long flags;
-       int i;
-
-       if (!kmemleak_enabled || !log->ptr || IS_ERR(log->ptr))
-               return;
-
-       /*
-        * RCU locking needed to ensure object is not freed via put_object().
-        */
-       rcu_read_lock();
-       object = create_object((unsigned long)log->ptr, log->size,
-                              log->min_count, GFP_ATOMIC);
-       if (!object)
-               goto out;
-       spin_lock_irqsave(&object->lock, flags);
-       for (i = 0; i < log->trace_len; i++)
-               object->trace[i] = log->trace[i];
-       object->trace_len = log->trace_len;
-       spin_unlock_irqrestore(&object->lock, flags);
-out:
-       rcu_read_unlock();
-}
-
-/*
- * Log an early allocated block and populate the stack trace.
- */
-static void early_alloc_percpu(struct early_log *log)
-{
-       unsigned int cpu;
-       const void __percpu *ptr = log->ptr;
-
-       for_each_possible_cpu(cpu) {
-               log->ptr = per_cpu_ptr(ptr, cpu);
-               early_alloc(log);
-       }
-}
-
 /**
  * kmemleak_alloc - register a newly allocated object
  * @ptr:       pointer to beginning of the object
@@ -946,8 +883,6 @@ void __ref kmemleak_alloc(const void *ptr, size_t size, int min_count,
 
        if (kmemleak_enabled && ptr && !IS_ERR(ptr))
                create_object((unsigned long)ptr, size, min_count, gfp);
-       else if (kmemleak_early_log)
-               log_early(KMEMLEAK_ALLOC, ptr, size, min_count);
 }
 EXPORT_SYMBOL_GPL(kmemleak_alloc);
 
@@ -975,8 +910,6 @@ void __ref kmemleak_alloc_percpu(const void __percpu *ptr, size_t size,
                for_each_possible_cpu(cpu)
                        create_object((unsigned long)per_cpu_ptr(ptr, cpu),
                                      size, 0, gfp);
-       else if (kmemleak_early_log)
-               log_early(KMEMLEAK_ALLOC_PERCPU, ptr, size, 0);
 }
 EXPORT_SYMBOL_GPL(kmemleak_alloc_percpu);
 
@@ -1001,11 +934,6 @@ void __ref kmemleak_vmalloc(const struct vm_struct *area, size_t size, gfp_t gfp
                create_object((unsigned long)area->addr, size, 2, gfp);
                object_set_excess_ref((unsigned long)area,
                                      (unsigned long)area->addr);
-       } else if (kmemleak_early_log) {
-               log_early(KMEMLEAK_ALLOC, area->addr, size, 2);
-               /* reusing early_log.size for storing area->addr */
-               log_early(KMEMLEAK_SET_EXCESS_REF,
-                         area, (unsigned long)area->addr, 0);
        }
 }
 EXPORT_SYMBOL_GPL(kmemleak_vmalloc);
@@ -1023,8 +951,6 @@ void __ref kmemleak_free(const void *ptr)
 
        if (kmemleak_free_enabled && ptr && !IS_ERR(ptr))
                delete_object_full((unsigned long)ptr);
-       else if (kmemleak_early_log)
-               log_early(KMEMLEAK_FREE, ptr, 0, 0);
 }
 EXPORT_SYMBOL_GPL(kmemleak_free);
 
@@ -1043,8 +969,6 @@ void __ref kmemleak_free_part(const void *ptr, size_t size)
 
        if (kmemleak_enabled && ptr && !IS_ERR(ptr))
                delete_object_part((unsigned long)ptr, size);
-       else if (kmemleak_early_log)
-               log_early(KMEMLEAK_FREE_PART, ptr, size, 0);
 }
 EXPORT_SYMBOL_GPL(kmemleak_free_part);
 
@@ -1065,8 +989,6 @@ void __ref kmemleak_free_percpu(const void __percpu *ptr)
                for_each_possible_cpu(cpu)
                        delete_object_full((unsigned long)per_cpu_ptr(ptr,
                                                                      cpu));
-       else if (kmemleak_early_log)
-               log_early(KMEMLEAK_FREE_PERCPU, ptr, 0, 0);
 }
 EXPORT_SYMBOL_GPL(kmemleak_free_percpu);
 
@@ -1117,8 +1039,6 @@ void __ref kmemleak_not_leak(const void *ptr)
 
        if (kmemleak_enabled && ptr && !IS_ERR(ptr))
                make_gray_object((unsigned long)ptr);
-       else if (kmemleak_early_log)
-               log_early(KMEMLEAK_NOT_LEAK, ptr, 0, 0);
 }
 EXPORT_SYMBOL(kmemleak_not_leak);
 
@@ -1137,8 +1057,6 @@ void __ref kmemleak_ignore(const void *ptr)
 
        if (kmemleak_enabled && ptr && !IS_ERR(ptr))
                make_black_object((unsigned long)ptr);
-       else if (kmemleak_early_log)
-               log_early(KMEMLEAK_IGNORE, ptr, 0, 0);
 }
 EXPORT_SYMBOL(kmemleak_ignore);
 
@@ -1159,8 +1077,6 @@ void __ref kmemleak_scan_area(const void *ptr, size_t size, gfp_t gfp)
 
        if (kmemleak_enabled && ptr && size && !IS_ERR(ptr))
                add_scan_area((unsigned long)ptr, size, gfp);
-       else if (kmemleak_early_log)
-               log_early(KMEMLEAK_SCAN_AREA, ptr, size, 0);
 }
 EXPORT_SYMBOL(kmemleak_scan_area);
 
@@ -1179,8 +1095,6 @@ void __ref kmemleak_no_scan(const void *ptr)
 
        if (kmemleak_enabled && ptr && !IS_ERR(ptr))
                object_no_scan((unsigned long)ptr);
-       else if (kmemleak_early_log)
-               log_early(KMEMLEAK_NO_SCAN, ptr, 0, 0);
 }
 EXPORT_SYMBOL(kmemleak_no_scan);
 
@@ -1408,7 +1322,8 @@ static void scan_object(struct kmemleak_object *object)
        if (!(object->flags & OBJECT_ALLOCATED))
                /* already freed object */
                goto out;
-       if (hlist_empty(&object->area_list)) {
+       if (hlist_empty(&object->area_list) ||
+           object->flags & OBJECT_FULL_SCAN) {
                void *start = (void *)object->pointer;
                void *end = (void *)(object->pointer + object->size);
                void *next;
@@ -1966,7 +1881,6 @@ static void kmemleak_disable(void)
 
        /* stop any memory operation tracing */
        kmemleak_enabled = 0;
-       kmemleak_early_log = 0;
 
        /* check whether it is too early for a kernel thread */
        if (kmemleak_initialized)
@@ -1994,20 +1908,11 @@ static int __init kmemleak_boot_config(char *str)
 }
 early_param("kmemleak", kmemleak_boot_config);
 
-static void __init print_log_trace(struct early_log *log)
-{
-       pr_notice("Early log backtrace:\n");
-       stack_trace_print(log->trace, log->trace_len, 2);
-}
-
 /*
  * Kmemleak initialization.
  */
 void __init kmemleak_init(void)
 {
-       int i;
-       unsigned long flags;
-
 #ifdef CONFIG_DEBUG_KMEMLEAK_DEFAULT_OFF
        if (!kmemleak_skip_disable) {
                kmemleak_disable();
@@ -2015,28 +1920,15 @@ void __init kmemleak_init(void)
        }
 #endif
 
+       if (kmemleak_error)
+               return;
+
        jiffies_min_age = msecs_to_jiffies(MSECS_MIN_AGE);
        jiffies_scan_wait = msecs_to_jiffies(SECS_SCAN_WAIT * 1000);
 
        object_cache = KMEM_CACHE(kmemleak_object, SLAB_NOLEAKTRACE);
        scan_area_cache = KMEM_CACHE(kmemleak_scan_area, SLAB_NOLEAKTRACE);
 
-       if (crt_early_log > ARRAY_SIZE(early_log))
-               pr_warn("Early log buffer exceeded (%d), please increase DEBUG_KMEMLEAK_EARLY_LOG_SIZE\n",
-                       crt_early_log);
-
-       /* the kernel is still in UP mode, so disabling the IRQs is enough */
-       local_irq_save(flags);
-       kmemleak_early_log = 0;
-       if (kmemleak_error) {
-               local_irq_restore(flags);
-               return;
-       } else {
-               kmemleak_enabled = 1;
-               kmemleak_free_enabled = 1;
-       }
-       local_irq_restore(flags);
-
        /* register the data/bss sections */
        create_object((unsigned long)_sdata, _edata - _sdata,
                      KMEMLEAK_GREY, GFP_ATOMIC);
@@ -2047,57 +1939,6 @@ void __init kmemleak_init(void)
                create_object((unsigned long)__start_ro_after_init,
                              __end_ro_after_init - __start_ro_after_init,
                              KMEMLEAK_GREY, GFP_ATOMIC);
-
-       /*
-        * This is the point where tracking allocations is safe. Automatic
-        * scanning is started during the late initcall. Add the early logged
-        * callbacks to the kmemleak infrastructure.
-        */
-       for (i = 0; i < crt_early_log; i++) {
-               struct early_log *log = &early_log[i];
-
-               switch (log->op_type) {
-               case KMEMLEAK_ALLOC:
-                       early_alloc(log);
-                       break;
-               case KMEMLEAK_ALLOC_PERCPU:
-                       early_alloc_percpu(log);
-                       break;
-               case KMEMLEAK_FREE:
-                       kmemleak_free(log->ptr);
-                       break;
-               case KMEMLEAK_FREE_PART:
-                       kmemleak_free_part(log->ptr, log->size);
-                       break;
-               case KMEMLEAK_FREE_PERCPU:
-                       kmemleak_free_percpu(log->ptr);
-                       break;
-               case KMEMLEAK_NOT_LEAK:
-                       kmemleak_not_leak(log->ptr);
-                       break;
-               case KMEMLEAK_IGNORE:
-                       kmemleak_ignore(log->ptr);
-                       break;
-               case KMEMLEAK_SCAN_AREA:
-                       kmemleak_scan_area(log->ptr, log->size, GFP_KERNEL);
-                       break;
-               case KMEMLEAK_NO_SCAN:
-                       kmemleak_no_scan(log->ptr);
-                       break;
-               case KMEMLEAK_SET_EXCESS_REF:
-                       object_set_excess_ref((unsigned long)log->ptr,
-                                             log->excess_ref);
-                       break;
-               default:
-                       kmemleak_warn("Unknown early log operation: %d\n",
-                                     log->op_type);
-               }
-
-               if (kmemleak_warning) {
-                       print_log_trace(log);
-                       kmemleak_warning = 0;
-               }
-       }
 }
 
 /*
@@ -2126,7 +1967,8 @@ static int __init kmemleak_late_init(void)
                mutex_unlock(&scan_mutex);
        }
 
-       pr_info("Kernel memory leak detector initialized\n");
+       pr_info("Kernel memory leak detector initialized (mem pool available: %d)\n",
+               mem_pool_free_count);
 
        return 0;
 }
index 3dc4346..dbee2eb 100644 (file)
--- a/mm/ksm.c
+++ b/mm/ksm.c
@@ -1029,24 +1029,6 @@ static u32 calc_checksum(struct page *page)
        return checksum;
 }
 
-static int memcmp_pages(struct page *page1, struct page *page2)
-{
-       char *addr1, *addr2;
-       int ret;
-
-       addr1 = kmap_atomic(page1);
-       addr2 = kmap_atomic(page2);
-       ret = memcmp(addr1, addr2, PAGE_SIZE);
-       kunmap_atomic(addr2);
-       kunmap_atomic(addr1);
-       return ret;
-}
-
-static inline int pages_identical(struct page *page1, struct page *page2)
-{
-       return !memcmp_pages(page1, page2);
-}
-
 static int write_protect_page(struct vm_area_struct *vma, struct page *page,
                              pte_t *orig_pte)
 {
index bac973b..2be9f3f 100644 (file)
@@ -11,6 +11,7 @@
 #include <linux/syscalls.h>
 #include <linux/mempolicy.h>
 #include <linux/page-isolation.h>
+#include <linux/page_idle.h>
 #include <linux/userfaultfd_k.h>
 #include <linux/hugetlb.h>
 #include <linux/falloc.h>
@@ -21,6 +22,7 @@
 #include <linux/file.h>
 #include <linux/blkdev.h>
 #include <linux/backing-dev.h>
+#include <linux/pagewalk.h>
 #include <linux/swap.h>
 #include <linux/swapops.h>
 #include <linux/shmem_fs.h>
 
 #include "internal.h"
 
+struct madvise_walk_private {
+       struct mmu_gather *tlb;
+       bool pageout;
+};
+
 /*
  * Any behaviour which results in changes to the vma->vm_flags needs to
  * take mmap_sem for writing. Others, which simply traverse vmas, need
@@ -41,6 +48,8 @@ static int madvise_need_mmap_write(int behavior)
        case MADV_REMOVE:
        case MADV_WILLNEED:
        case MADV_DONTNEED:
+       case MADV_COLD:
+       case MADV_PAGEOUT:
        case MADV_FREE:
                return 0;
        default:
@@ -106,28 +115,14 @@ static long madvise_behavior(struct vm_area_struct *vma,
        case MADV_MERGEABLE:
        case MADV_UNMERGEABLE:
                error = ksm_madvise(vma, start, end, behavior, &new_flags);
-               if (error) {
-                       /*
-                        * madvise() returns EAGAIN if kernel resources, such as
-                        * slab, are temporarily unavailable.
-                        */
-                       if (error == -ENOMEM)
-                               error = -EAGAIN;
-                       goto out;
-               }
+               if (error)
+                       goto out_convert_errno;
                break;
        case MADV_HUGEPAGE:
        case MADV_NOHUGEPAGE:
                error = hugepage_madvise(vma, &new_flags, behavior);
-               if (error) {
-                       /*
-                        * madvise() returns EAGAIN if kernel resources, such as
-                        * slab, are temporarily unavailable.
-                        */
-                       if (error == -ENOMEM)
-                               error = -EAGAIN;
-                       goto out;
-               }
+               if (error)
+                       goto out_convert_errno;
                break;
        }
 
@@ -153,15 +148,8 @@ static long madvise_behavior(struct vm_area_struct *vma,
                        goto out;
                }
                error = __split_vma(mm, vma, start, 1);
-               if (error) {
-                       /*
-                        * madvise() returns EAGAIN if kernel resources, such as
-                        * slab, are temporarily unavailable.
-                        */
-                       if (error == -ENOMEM)
-                               error = -EAGAIN;
-                       goto out;
-               }
+               if (error)
+                       goto out_convert_errno;
        }
 
        if (end != vma->vm_end) {
@@ -170,15 +158,8 @@ static long madvise_behavior(struct vm_area_struct *vma,
                        goto out;
                }
                error = __split_vma(mm, vma, end, 0);
-               if (error) {
-                       /*
-                        * madvise() returns EAGAIN if kernel resources, such as
-                        * slab, are temporarily unavailable.
-                        */
-                       if (error == -ENOMEM)
-                               error = -EAGAIN;
-                       goto out;
-               }
+               if (error)
+                       goto out_convert_errno;
        }
 
 success:
@@ -186,6 +167,14 @@ success:
         * vm_flags is protected by the mmap_sem held in write mode.
         */
        vma->vm_flags = new_flags;
+
+out_convert_errno:
+       /*
+        * madvise() returns EAGAIN if kernel resources, such as
+        * slab, are temporarily unavailable.
+        */
+       if (error == -ENOMEM)
+               error = -EAGAIN;
 out:
        return error;
 }
@@ -226,19 +215,9 @@ static int swapin_walk_pmd_entry(pmd_t *pmd, unsigned long start,
        return 0;
 }
 
-static void force_swapin_readahead(struct vm_area_struct *vma,
-               unsigned long start, unsigned long end)
-{
-       struct mm_walk walk = {
-               .mm = vma->vm_mm,
-               .pmd_entry = swapin_walk_pmd_entry,
-               .private = vma,
-       };
-
-       walk_page_range(start, end, &walk);
-
-       lru_add_drain();        /* Push any new pages onto the LRU now */
-}
+static const struct mm_walk_ops swapin_walk_ops = {
+       .pmd_entry              = swapin_walk_pmd_entry,
+};
 
 static void force_shm_swapin_readahead(struct vm_area_struct *vma,
                unsigned long start, unsigned long end,
@@ -281,7 +260,8 @@ static long madvise_willneed(struct vm_area_struct *vma,
        *prev = vma;
 #ifdef CONFIG_SWAP
        if (!file) {
-               force_swapin_readahead(vma, start, end);
+               walk_page_range(vma->vm_mm, start, end, &swapin_walk_ops, vma);
+               lru_add_drain(); /* Push any new pages onto the LRU now */
                return 0;
        }
 
@@ -317,6 +297,254 @@ static long madvise_willneed(struct vm_area_struct *vma,
        return 0;
 }
 
+static int madvise_cold_or_pageout_pte_range(pmd_t *pmd,
+                               unsigned long addr, unsigned long end,
+                               struct mm_walk *walk)
+{
+       struct madvise_walk_private *private = walk->private;
+       struct mmu_gather *tlb = private->tlb;
+       bool pageout = private->pageout;
+       struct mm_struct *mm = tlb->mm;
+       struct vm_area_struct *vma = walk->vma;
+       pte_t *orig_pte, *pte, ptent;
+       spinlock_t *ptl;
+       struct page *page = NULL;
+       LIST_HEAD(page_list);
+
+       if (fatal_signal_pending(current))
+               return -EINTR;
+
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+       if (pmd_trans_huge(*pmd)) {
+               pmd_t orig_pmd;
+               unsigned long next = pmd_addr_end(addr, end);
+
+               tlb_change_page_size(tlb, HPAGE_PMD_SIZE);
+               ptl = pmd_trans_huge_lock(pmd, vma);
+               if (!ptl)
+                       return 0;
+
+               orig_pmd = *pmd;
+               if (is_huge_zero_pmd(orig_pmd))
+                       goto huge_unlock;
+
+               if (unlikely(!pmd_present(orig_pmd))) {
+                       VM_BUG_ON(thp_migration_supported() &&
+                                       !is_pmd_migration_entry(orig_pmd));
+                       goto huge_unlock;
+               }
+
+               page = pmd_page(orig_pmd);
+               if (next - addr != HPAGE_PMD_SIZE) {
+                       int err;
+
+                       if (page_mapcount(page) != 1)
+                               goto huge_unlock;
+
+                       get_page(page);
+                       spin_unlock(ptl);
+                       lock_page(page);
+                       err = split_huge_page(page);
+                       unlock_page(page);
+                       put_page(page);
+                       if (!err)
+                               goto regular_page;
+                       return 0;
+               }
+
+               if (pmd_young(orig_pmd)) {
+                       pmdp_invalidate(vma, addr, pmd);
+                       orig_pmd = pmd_mkold(orig_pmd);
+
+                       set_pmd_at(mm, addr, pmd, orig_pmd);
+                       tlb_remove_pmd_tlb_entry(tlb, pmd, addr);
+               }
+
+               ClearPageReferenced(page);
+               test_and_clear_page_young(page);
+               if (pageout) {
+                       if (!isolate_lru_page(page))
+                               list_add(&page->lru, &page_list);
+               } else
+                       deactivate_page(page);
+huge_unlock:
+               spin_unlock(ptl);
+               if (pageout)
+                       reclaim_pages(&page_list);
+               return 0;
+       }
+
+       if (pmd_trans_unstable(pmd))
+               return 0;
+regular_page:
+#endif
+       tlb_change_page_size(tlb, PAGE_SIZE);
+       orig_pte = pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
+       flush_tlb_batched_pending(mm);
+       arch_enter_lazy_mmu_mode();
+       for (; addr < end; pte++, addr += PAGE_SIZE) {
+               ptent = *pte;
+
+               if (pte_none(ptent))
+                       continue;
+
+               if (!pte_present(ptent))
+                       continue;
+
+               page = vm_normal_page(vma, addr, ptent);
+               if (!page)
+                       continue;
+
+               /*
+                * Creating a THP page is expensive so split it only if we
+                * are sure it's worth. Split it if we are only owner.
+                */
+               if (PageTransCompound(page)) {
+                       if (page_mapcount(page) != 1)
+                               break;
+                       get_page(page);
+                       if (!trylock_page(page)) {
+                               put_page(page);
+                               break;
+                       }
+                       pte_unmap_unlock(orig_pte, ptl);
+                       if (split_huge_page(page)) {
+                               unlock_page(page);
+                               put_page(page);
+                               pte_offset_map_lock(mm, pmd, addr, &ptl);
+                               break;
+                       }
+                       unlock_page(page);
+                       put_page(page);
+                       pte = pte_offset_map_lock(mm, pmd, addr, &ptl);
+                       pte--;
+                       addr -= PAGE_SIZE;
+                       continue;
+               }
+
+               VM_BUG_ON_PAGE(PageTransCompound(page), page);
+
+               if (pte_young(ptent)) {
+                       ptent = ptep_get_and_clear_full(mm, addr, pte,
+                                                       tlb->fullmm);
+                       ptent = pte_mkold(ptent);
+                       set_pte_at(mm, addr, pte, ptent);
+                       tlb_remove_tlb_entry(tlb, pte, addr);
+               }
+
+               /*
+                * We are deactivating a page for accelerating reclaiming.
+                * VM couldn't reclaim the page unless we clear PG_young.
+                * As a side effect, it makes confuse idle-page tracking
+                * because they will miss recent referenced history.
+                */
+               ClearPageReferenced(page);
+               test_and_clear_page_young(page);
+               if (pageout) {
+                       if (!isolate_lru_page(page))
+                               list_add(&page->lru, &page_list);
+               } else
+                       deactivate_page(page);
+       }
+
+       arch_leave_lazy_mmu_mode();
+       pte_unmap_unlock(orig_pte, ptl);
+       if (pageout)
+               reclaim_pages(&page_list);
+       cond_resched();
+
+       return 0;
+}
+
+static const struct mm_walk_ops cold_walk_ops = {
+       .pmd_entry = madvise_cold_or_pageout_pte_range,
+};
+
+static void madvise_cold_page_range(struct mmu_gather *tlb,
+                            struct vm_area_struct *vma,
+                            unsigned long addr, unsigned long end)
+{
+       struct madvise_walk_private walk_private = {
+               .pageout = false,
+               .tlb = tlb,
+       };
+
+       tlb_start_vma(tlb, vma);
+       walk_page_range(vma->vm_mm, addr, end, &cold_walk_ops, &walk_private);
+       tlb_end_vma(tlb, vma);
+}
+
+static long madvise_cold(struct vm_area_struct *vma,
+                       struct vm_area_struct **prev,
+                       unsigned long start_addr, unsigned long end_addr)
+{
+       struct mm_struct *mm = vma->vm_mm;
+       struct mmu_gather tlb;
+
+       *prev = vma;
+       if (!can_madv_lru_vma(vma))
+               return -EINVAL;
+
+       lru_add_drain();
+       tlb_gather_mmu(&tlb, mm, start_addr, end_addr);
+       madvise_cold_page_range(&tlb, vma, start_addr, end_addr);
+       tlb_finish_mmu(&tlb, start_addr, end_addr);
+
+       return 0;
+}
+
+static void madvise_pageout_page_range(struct mmu_gather *tlb,
+                            struct vm_area_struct *vma,
+                            unsigned long addr, unsigned long end)
+{
+       struct madvise_walk_private walk_private = {
+               .pageout = true,
+               .tlb = tlb,
+       };
+
+       tlb_start_vma(tlb, vma);
+       walk_page_range(vma->vm_mm, addr, end, &cold_walk_ops, &walk_private);
+       tlb_end_vma(tlb, vma);
+}
+
+static inline bool can_do_pageout(struct vm_area_struct *vma)
+{
+       if (vma_is_anonymous(vma))
+               return true;
+       if (!vma->vm_file)
+               return false;
+       /*
+        * paging out pagecache only for non-anonymous mappings that correspond
+        * to the files the calling process could (if tried) open for writing;
+        * otherwise we'd be including shared non-exclusive mappings, which
+        * opens a side channel.
+        */
+       return inode_owner_or_capable(file_inode(vma->vm_file)) ||
+               inode_permission(file_inode(vma->vm_file), MAY_WRITE) == 0;
+}
+
+static long madvise_pageout(struct vm_area_struct *vma,
+                       struct vm_area_struct **prev,
+                       unsigned long start_addr, unsigned long end_addr)
+{
+       struct mm_struct *mm = vma->vm_mm;
+       struct mmu_gather tlb;
+
+       *prev = vma;
+       if (!can_madv_lru_vma(vma))
+               return -EINVAL;
+
+       if (!can_do_pageout(vma))
+               return 0;
+
+       lru_add_drain();
+       tlb_gather_mmu(&tlb, mm, start_addr, end_addr);
+       madvise_pageout_page_range(&tlb, vma, start_addr, end_addr);
+       tlb_finish_mmu(&tlb, start_addr, end_addr);
+
+       return 0;
+}
+
 static int madvise_free_pte_range(pmd_t *pmd, unsigned long addr,
                                unsigned long end, struct mm_walk *walk)
 
@@ -450,20 +678,9 @@ next:
        return 0;
 }
 
-static void madvise_free_page_range(struct mmu_gather *tlb,
-                            struct vm_area_struct *vma,
-                            unsigned long addr, unsigned long end)
-{
-       struct mm_walk free_walk = {
-               .pmd_entry = madvise_free_pte_range,
-               .mm = vma->vm_mm,
-               .private = tlb,
-       };
-
-       tlb_start_vma(tlb, vma);
-       walk_page_range(addr, end, &free_walk);
-       tlb_end_vma(tlb, vma);
-}
+static const struct mm_walk_ops madvise_free_walk_ops = {
+       .pmd_entry              = madvise_free_pte_range,
+};
 
 static int madvise_free_single_vma(struct vm_area_struct *vma,
                        unsigned long start_addr, unsigned long end_addr)
@@ -490,7 +707,10 @@ static int madvise_free_single_vma(struct vm_area_struct *vma,
        update_hiwater_rss(mm);
 
        mmu_notifier_invalidate_range_start(&range);
-       madvise_free_page_range(&tlb, vma, range.start, range.end);
+       tlb_start_vma(&tlb, vma);
+       walk_page_range(vma->vm_mm, range.start, range.end,
+                       &madvise_free_walk_ops, &tlb);
+       tlb_end_vma(&tlb, vma);
        mmu_notifier_invalidate_range_end(&range);
        tlb_finish_mmu(&tlb, range.start, range.end);
 
@@ -529,7 +749,7 @@ static long madvise_dontneed_free(struct vm_area_struct *vma,
                                  int behavior)
 {
        *prev = vma;
-       if (!can_madv_dontneed_vma(vma))
+       if (!can_madv_lru_vma(vma))
                return -EINVAL;
 
        if (!userfaultfd_remove(vma, start, end)) {
@@ -551,7 +771,7 @@ static long madvise_dontneed_free(struct vm_area_struct *vma,
                         */
                        return -ENOMEM;
                }
-               if (!can_madv_dontneed_vma(vma))
+               if (!can_madv_lru_vma(vma))
                        return -EINVAL;
                if (end > vma->vm_end) {
                        /*
@@ -705,6 +925,10 @@ madvise_vma(struct vm_area_struct *vma, struct vm_area_struct **prev,
                return madvise_remove(vma, prev, start, end);
        case MADV_WILLNEED:
                return madvise_willneed(vma, prev, start, end);
+       case MADV_COLD:
+               return madvise_cold(vma, prev, start, end);
+       case MADV_PAGEOUT:
+               return madvise_pageout(vma, prev, start, end);
        case MADV_FREE:
        case MADV_DONTNEED:
                return madvise_dontneed_free(vma, prev, start, end, behavior);
@@ -726,6 +950,8 @@ madvise_behavior_valid(int behavior)
        case MADV_WILLNEED:
        case MADV_DONTNEED:
        case MADV_FREE:
+       case MADV_COLD:
+       case MADV_PAGEOUT:
 #ifdef CONFIG_KSM
        case MADV_MERGEABLE:
        case MADV_UNMERGEABLE:
@@ -820,6 +1046,8 @@ SYSCALL_DEFINE3(madvise, unsigned long, start, size_t, len_in, int, behavior)
        size_t len;
        struct blk_plug plug;
 
+       start = untagged_addr(start);
+
        if (!madvise_behavior_valid(behavior))
                return error;
 
index 597d581..c313c49 100644 (file)
@@ -25,7 +25,7 @@
 #include <linux/page_counter.h>
 #include <linux/memcontrol.h>
 #include <linux/cgroup.h>
-#include <linux/mm.h>
+#include <linux/pagewalk.h>
 #include <linux/sched/mm.h>
 #include <linux/shmem_fs.h>
 #include <linux/hugetlb.h>
@@ -57,6 +57,7 @@
 #include <linux/lockdep.h>
 #include <linux/file.h>
 #include <linux/tracehook.h>
+#include <linux/psi.h>
 #include <linux/seq_buf.h>
 #include "internal.h"
 #include <net/sock.h>
@@ -317,6 +318,7 @@ DEFINE_STATIC_KEY_FALSE(memcg_kmem_enabled_key);
 EXPORT_SYMBOL(memcg_kmem_enabled_key);
 
 struct workqueue_struct *memcg_kmem_cache_wq;
+#endif
 
 static int memcg_shrinker_map_size;
 static DEFINE_MUTEX(memcg_shrinker_map_mutex);
@@ -440,14 +442,6 @@ void memcg_set_shrinker_bit(struct mem_cgroup *memcg, int nid, int shrinker_id)
        }
 }
 
-#else /* CONFIG_MEMCG_KMEM */
-static int memcg_alloc_shrinker_maps(struct mem_cgroup *memcg)
-{
-       return 0;
-}
-static void memcg_free_shrinker_maps(struct mem_cgroup *memcg) { }
-#endif /* CONFIG_MEMCG_KMEM */
-
 /**
  * mem_cgroup_css_from_page - css of the memcg associated with a page
  * @page: page of interest
@@ -2270,21 +2264,22 @@ static void drain_all_stock(struct mem_cgroup *root_memcg)
        for_each_online_cpu(cpu) {
                struct memcg_stock_pcp *stock = &per_cpu(memcg_stock, cpu);
                struct mem_cgroup *memcg;
+               bool flush = false;
 
+               rcu_read_lock();
                memcg = stock->cached;
-               if (!memcg || !stock->nr_pages || !css_tryget(&memcg->css))
-                       continue;
-               if (!mem_cgroup_is_descendant(memcg, root_memcg)) {
-                       css_put(&memcg->css);
-                       continue;
-               }
-               if (!test_and_set_bit(FLUSHING_CACHED_CHARGE, &stock->flags)) {
+               if (memcg && stock->nr_pages &&
+                   mem_cgroup_is_descendant(memcg, root_memcg))
+                       flush = true;
+               rcu_read_unlock();
+
+               if (flush &&
+                   !test_and_set_bit(FLUSHING_CACHED_CHARGE, &stock->flags)) {
                        if (cpu == curcpu)
                                drain_local_stock(&stock->work);
                        else
                                schedule_work_on(cpu, &stock->work);
                }
-               css_put(&memcg->css);
        }
        put_cpu();
        mutex_unlock(&percpu_charge_mutex);
@@ -2359,11 +2354,67 @@ static void high_work_func(struct work_struct *work)
 }
 
 /*
+ * Clamp the maximum sleep time per allocation batch to 2 seconds. This is
+ * enough to still cause a significant slowdown in most cases, while still
+ * allowing diagnostics and tracing to proceed without becoming stuck.
+ */
+#define MEMCG_MAX_HIGH_DELAY_JIFFIES (2UL*HZ)
+
+/*
+ * When calculating the delay, we use these either side of the exponentiation to
+ * maintain precision and scale to a reasonable number of jiffies (see the table
+ * below.
+ *
+ * - MEMCG_DELAY_PRECISION_SHIFT: Extra precision bits while translating the
+ *   overage ratio to a delay.
+ * - MEMCG_DELAY_SCALING_SHIFT: The number of bits to scale down down the
+ *   proposed penalty in order to reduce to a reasonable number of jiffies, and
+ *   to produce a reasonable delay curve.
+ *
+ * MEMCG_DELAY_SCALING_SHIFT just happens to be a number that produces a
+ * reasonable delay curve compared to precision-adjusted overage, not
+ * penalising heavily at first, but still making sure that growth beyond the
+ * limit penalises misbehaviour cgroups by slowing them down exponentially. For
+ * example, with a high of 100 megabytes:
+ *
+ *  +-------+------------------------+
+ *  | usage | time to allocate in ms |
+ *  +-------+------------------------+
+ *  | 100M  |                      0 |
+ *  | 101M  |                      6 |
+ *  | 102M  |                     25 |
+ *  | 103M  |                     57 |
+ *  | 104M  |                    102 |
+ *  | 105M  |                    159 |
+ *  | 106M  |                    230 |
+ *  | 107M  |                    313 |
+ *  | 108M  |                    409 |
+ *  | 109M  |                    518 |
+ *  | 110M  |                    639 |
+ *  | 111M  |                    774 |
+ *  | 112M  |                    921 |
+ *  | 113M  |                   1081 |
+ *  | 114M  |                   1254 |
+ *  | 115M  |                   1439 |
+ *  | 116M  |                   1638 |
+ *  | 117M  |                   1849 |
+ *  | 118M  |                   2000 |
+ *  | 119M  |                   2000 |
+ *  | 120M  |                   2000 |
+ *  +-------+------------------------+
+ */
+ #define MEMCG_DELAY_PRECISION_SHIFT 20
+ #define MEMCG_DELAY_SCALING_SHIFT 14
+
+/*
  * Scheduled by try_charge() to be executed from the userland return path
  * and reclaims memory over the high limit.
  */
 void mem_cgroup_handle_over_high(void)
 {
+       unsigned long usage, high, clamped_high;
+       unsigned long pflags;
+       unsigned long penalty_jiffies, overage;
        unsigned int nr_pages = current->memcg_nr_pages_over_high;
        struct mem_cgroup *memcg;
 
@@ -2372,8 +2423,75 @@ void mem_cgroup_handle_over_high(void)
 
        memcg = get_mem_cgroup_from_mm(current->mm);
        reclaim_high(memcg, nr_pages, GFP_KERNEL);
-       css_put(&memcg->css);
        current->memcg_nr_pages_over_high = 0;
+
+       /*
+        * memory.high is breached and reclaim is unable to keep up. Throttle
+        * allocators proactively to slow down excessive growth.
+        *
+        * We use overage compared to memory.high to calculate the number of
+        * jiffies to sleep (penalty_jiffies). Ideally this value should be
+        * fairly lenient on small overages, and increasingly harsh when the
+        * memcg in question makes it clear that it has no intention of stopping
+        * its crazy behaviour, so we exponentially increase the delay based on
+        * overage amount.
+        */
+
+       usage = page_counter_read(&memcg->memory);
+       high = READ_ONCE(memcg->high);
+
+       if (usage <= high)
+               goto out;
+
+       /*
+        * Prevent division by 0 in overage calculation by acting as if it was a
+        * threshold of 1 page
+        */
+       clamped_high = max(high, 1UL);
+
+       overage = div_u64((u64)(usage - high) << MEMCG_DELAY_PRECISION_SHIFT,
+                         clamped_high);
+
+       penalty_jiffies = ((u64)overage * overage * HZ)
+               >> (MEMCG_DELAY_PRECISION_SHIFT + MEMCG_DELAY_SCALING_SHIFT);
+
+       /*
+        * Factor in the task's own contribution to the overage, such that four
+        * N-sized allocations are throttled approximately the same as one
+        * 4N-sized allocation.
+        *
+        * MEMCG_CHARGE_BATCH pages is nominal, so work out how much smaller or
+        * larger the current charge patch is than that.
+        */
+       penalty_jiffies = penalty_jiffies * nr_pages / MEMCG_CHARGE_BATCH;
+
+       /*
+        * Clamp the max delay per usermode return so as to still keep the
+        * application moving forwards and also permit diagnostics, albeit
+        * extremely slowly.
+        */
+       penalty_jiffies = min(penalty_jiffies, MEMCG_MAX_HIGH_DELAY_JIFFIES);
+
+       /*
+        * Don't sleep if the amount of jiffies this memcg owes us is so low
+        * that it's not even worth doing, in an attempt to be nice to those who
+        * go only a small amount over their memory.high value and maybe haven't
+        * been aggressively reclaimed enough yet.
+        */
+       if (penalty_jiffies <= HZ / 100)
+               goto out;
+
+       /*
+        * If we exit early, we're guaranteed to die (since
+        * schedule_timeout_killable sets TASK_KILLABLE). This means we don't
+        * need to account for any ill-begotten jiffies to pay them off later.
+        */
+       psi_memstall_enter(&pflags);
+       schedule_timeout_killable(penalty_jiffies);
+       psi_memstall_leave(&pflags);
+
+out:
+       css_put(&memcg->css);
 }
 
 static int try_charge(struct mem_cgroup *memcg, gfp_t gfp_mask,
@@ -2825,6 +2943,16 @@ int __memcg_kmem_charge_memcg(struct page *page, gfp_t gfp, int order,
 
        if (!cgroup_subsys_on_dfl(memory_cgrp_subsys) &&
            !page_counter_try_charge(&memcg->kmem, nr_pages, &counter)) {
+
+               /*
+                * Enforce __GFP_NOFAIL allocation because callers are not
+                * prepared to see failures and likely do not have any failure
+                * handling code.
+                */
+               if (gfp & __GFP_NOFAIL) {
+                       page_counter_charge(&memcg->kmem, nr_pages);
+                       return 0;
+               }
                cancel_charge(memcg, nr_pages);
                return -ENOMEM;
        }
@@ -3512,6 +3640,9 @@ static ssize_t mem_cgroup_write(struct kernfs_open_file *of,
                        ret = mem_cgroup_resize_max(memcg, nr_pages, true);
                        break;
                case _KMEM:
+                       pr_warn_once("kmem.limit_in_bytes is deprecated and will be removed. "
+                                    "Please report your usecase to linux-mm@kvack.org if you "
+                                    "depend on this functionality.\n");
                        ret = memcg_update_kmem_max(memcg, nr_pages);
                        break;
                case _TCP:
@@ -4805,11 +4936,6 @@ static void mem_cgroup_id_put_many(struct mem_cgroup *memcg, unsigned int n)
        }
 }
 
-static inline void mem_cgroup_id_get(struct mem_cgroup *memcg)
-{
-       mem_cgroup_id_get_many(memcg, 1);
-}
-
 static inline void mem_cgroup_id_put(struct mem_cgroup *memcg)
 {
        mem_cgroup_id_put_many(memcg, 1);
@@ -4955,6 +5081,11 @@ static struct mem_cgroup *mem_cgroup_alloc(void)
                memcg->cgwb_frn[i].done =
                        __WB_COMPLETION_INIT(&memcg_cgwb_frn_waitq);
 #endif
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+       spin_lock_init(&memcg->deferred_split_queue.split_queue_lock);
+       INIT_LIST_HEAD(&memcg->deferred_split_queue.split_queue);
+       memcg->deferred_split_queue.split_queue_len = 0;
+#endif
        idr_replace(&mem_cgroup_idr, memcg, memcg->id.id);
        return memcg;
 fail:
@@ -5333,6 +5464,14 @@ static int mem_cgroup_move_account(struct page *page,
                __mod_memcg_state(to, NR_WRITEBACK, nr_pages);
        }
 
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+       if (compound && !list_empty(page_deferred_list(page))) {
+               spin_lock(&from->deferred_split_queue.split_queue_lock);
+               list_del_init(page_deferred_list(page));
+               from->deferred_split_queue.split_queue_len--;
+               spin_unlock(&from->deferred_split_queue.split_queue_lock);
+       }
+#endif
        /*
         * It is safe to change page->mem_cgroup here because the page
         * is referenced, charged, and isolated - we can't race with
@@ -5341,6 +5480,17 @@ static int mem_cgroup_move_account(struct page *page,
 
        /* caller should have done css_get */
        page->mem_cgroup = to;
+
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+       if (compound && list_empty(page_deferred_list(page))) {
+               spin_lock(&to->deferred_split_queue.split_queue_lock);
+               list_add_tail(page_deferred_list(page),
+                             &to->deferred_split_queue.split_queue);
+               to->deferred_split_queue.split_queue_len++;
+               spin_unlock(&to->deferred_split_queue.split_queue_lock);
+       }
+#endif
+
        spin_unlock_irqrestore(&from->move_lock, flags);
 
        ret = 0;
@@ -5499,17 +5649,16 @@ static int mem_cgroup_count_precharge_pte_range(pmd_t *pmd,
        return 0;
 }
 
+static const struct mm_walk_ops precharge_walk_ops = {
+       .pmd_entry      = mem_cgroup_count_precharge_pte_range,
+};
+
 static unsigned long mem_cgroup_count_precharge(struct mm_struct *mm)
 {
        unsigned long precharge;
 
-       struct mm_walk mem_cgroup_count_precharge_walk = {
-               .pmd_entry = mem_cgroup_count_precharge_pte_range,
-               .mm = mm,
-       };
        down_read(&mm->mmap_sem);
-       walk_page_range(0, mm->highest_vm_end,
-                       &mem_cgroup_count_precharge_walk);
+       walk_page_range(mm, 0, mm->highest_vm_end, &precharge_walk_ops, NULL);
        up_read(&mm->mmap_sem);
 
        precharge = mc.precharge;
@@ -5778,13 +5927,12 @@ put:                    /* get_mctgt_type() gets the page */
        return ret;
 }
 
+static const struct mm_walk_ops charge_walk_ops = {
+       .pmd_entry      = mem_cgroup_move_charge_pte_range,
+};
+
 static void mem_cgroup_move_charge(void)
 {
-       struct mm_walk mem_cgroup_move_charge_walk = {
-               .pmd_entry = mem_cgroup_move_charge_pte_range,
-               .mm = mc.mm,
-       };
-
        lru_add_drain_all();
        /*
         * Signal lock_page_memcg() to take the memcg's move_lock
@@ -5810,7 +5958,8 @@ retry:
         * When we have consumed all precharges and failed in doing
         * additional charge, the page walk just aborts.
         */
-       walk_page_range(0, mc.mm->highest_vm_end, &mem_cgroup_move_charge_walk);
+       walk_page_range(mc.mm, 0, mc.mm->highest_vm_end, &charge_walk_ops,
+                       NULL);
 
        up_read(&mc.mm->mmap_sem);
        atomic_dec(&mc.from->moving_account);
@@ -6512,7 +6661,7 @@ static void uncharge_page(struct page *page, struct uncharge_gather *ug)
                unsigned int nr_pages = 1;
 
                if (PageTransHuge(page)) {
-                       nr_pages <<= compound_order(page);
+                       nr_pages = compound_nr(page);
                        ug->nr_huge += nr_pages;
                }
                if (PageAnon(page))
@@ -6524,7 +6673,7 @@ static void uncharge_page(struct page *page, struct uncharge_gather *ug)
                }
                ug->pgpgout++;
        } else {
-               ug->nr_kmem += 1 << compound_order(page);
+               ug->nr_kmem += compound_nr(page);
                __ClearPageKmemcg(page);
        }
 
index 650e65a..2647c89 100644 (file)
@@ -39,6 +39,7 @@ static void memfd_tag_pins(struct xa_state *xas)
        xas_for_each(xas, page, ULONG_MAX) {
                if (xa_is_value(page))
                        continue;
+               page = find_subpage(page, xas->xa_index);
                if (page_count(page) - page_mapcount(page) > 1)
                        xas_set_mark(xas, MEMFD_TAG_PINNED);
 
@@ -88,6 +89,7 @@ static int memfd_wait_for_pins(struct address_space *mapping)
                        bool clear = true;
                        if (xa_is_value(page))
                                continue;
+                       page = find_subpage(page, xas.xa_index);
                        if (page_count(page) - page_mapcount(page) != 1) {
                                /*
                                 * On the last scan, we clean up all those tags
index b1dff75..b1ca51a 100644 (file)
@@ -518,7 +518,7 @@ static void print_bad_pte(struct vm_area_struct *vma, unsigned long addr,
                 (long long)pte_val(pte), (long long)pmd_val(*pmd));
        if (page)
                dump_page(page, "bad pte");
-       pr_alert("addr:%p vm_flags:%08lx anon_vma:%p mapping:%p index:%lx\n",
+       pr_alert("addr:%px vm_flags:%08lx anon_vma:%px mapping:%px index:%lx\n",
                 (void *)addr, vma->vm_flags, vma->anon_vma, mapping, index);
        pr_alert("file:%pD fault:%ps mmap:%ps readpage:%ps\n",
                 vma->vm_file,
@@ -1026,6 +1026,9 @@ again:
                if (pte_none(ptent))
                        continue;
 
+               if (need_resched())
+                       break;
+
                if (pte_present(ptent)) {
                        struct page *page;
 
@@ -1093,7 +1096,6 @@ again:
                if (unlikely(details))
                        continue;
 
-               entry = pte_to_swp_entry(ptent);
                if (!non_swap_entry(entry))
                        rss[MM_SWAPENTS]--;
                else if (is_migration_entry(entry)) {
@@ -1124,8 +1126,11 @@ again:
        if (force_flush) {
                force_flush = 0;
                tlb_flush_mmu(tlb);
-               if (addr != end)
-                       goto again;
+       }
+
+       if (addr != end) {
+               cond_resched();
+               goto again;
        }
 
        return addr;
index c73f099..b1be791 100644 (file)
@@ -632,33 +632,30 @@ static void generic_online_page(struct page *page, unsigned int order)
 #endif
 }
 
-static int online_pages_blocks(unsigned long start, unsigned long nr_pages)
-{
-       unsigned long end = start + nr_pages;
-       int order, onlined_pages = 0;
-
-       while (start < end) {
-               order = min(MAX_ORDER - 1,
-                       get_order(PFN_PHYS(end) - PFN_PHYS(start)));
-               (*online_page_callback)(pfn_to_page(start), order);
-
-               onlined_pages += (1UL << order);
-               start += (1UL << order);
-       }
-       return onlined_pages;
-}
-
 static int online_pages_range(unsigned long start_pfn, unsigned long nr_pages,
                        void *arg)
 {
-       unsigned long onlined_pages = *(unsigned long *)arg;
+       const unsigned long end_pfn = start_pfn + nr_pages;
+       unsigned long pfn;
+       int order;
 
-       if (PageReserved(pfn_to_page(start_pfn)))
-               onlined_pages += online_pages_blocks(start_pfn, nr_pages);
+       /*
+        * Online the pages. The callback might decide to keep some pages
+        * PG_reserved (to add them to the buddy later), but we still account
+        * them as being online/belonging to this zone ("present").
+        */
+       for (pfn = start_pfn; pfn < end_pfn; pfn += 1ul << order) {
+               order = min(MAX_ORDER - 1, get_order(PFN_PHYS(end_pfn - pfn)));
+               /* __free_pages_core() wants pfns to be aligned to the order */
+               if (WARN_ON_ONCE(!IS_ALIGNED(pfn, 1ul << order)))
+                       order = 0;
+               (*online_page_callback)(pfn_to_page(pfn), order);
+       }
 
-       online_mem_sections(start_pfn, start_pfn + nr_pages);
+       /* mark all involved sections as online */
+       online_mem_sections(start_pfn, end_pfn);
 
-       *(unsigned long *)arg = onlined_pages;
+       *(unsigned long *)arg += nr_pages;
        return 0;
 }
 
@@ -714,8 +711,13 @@ static void __meminit resize_pgdat_range(struct pglist_data *pgdat, unsigned lon
                pgdat->node_start_pfn = start_pfn;
 
        pgdat->node_spanned_pages = max(start_pfn + nr_pages, old_end_pfn) - pgdat->node_start_pfn;
-}
 
+}
+/*
+ * Associate the pfn range with the given zone, initializing the memmaps
+ * and resizing the pgdat/zone data to span the added pages. After this
+ * call, all affected pages are PG_reserved.
+ */
 void __ref move_pfn_range_to_zone(struct zone *zone, unsigned long start_pfn,
                unsigned long nr_pages, struct vmem_altmap *altmap)
 {
@@ -804,20 +806,6 @@ struct zone * zone_for_pfn_range(int online_type, int nid, unsigned start_pfn,
        return default_zone_for_pfn(nid, start_pfn, nr_pages);
 }
 
-/*
- * Associates the given pfn range with the given node and the zone appropriate
- * for the given online type.
- */
-static struct zone * __meminit move_pfn_range(int online_type, int nid,
-               unsigned long start_pfn, unsigned long nr_pages)
-{
-       struct zone *zone;
-
-       zone = zone_for_pfn_range(online_type, nid, start_pfn, nr_pages);
-       move_pfn_range_to_zone(zone, start_pfn, nr_pages, NULL);
-       return zone;
-}
-
 int __ref online_pages(unsigned long pfn, unsigned long nr_pages, int online_type)
 {
        unsigned long flags;
@@ -840,7 +828,8 @@ int __ref online_pages(unsigned long pfn, unsigned long nr_pages, int online_typ
        put_device(&mem->dev);
 
        /* associate pfn range with the zone */
-       zone = move_pfn_range(online_type, nid, pfn, nr_pages);
+       zone = zone_for_pfn_range(online_type, nid, pfn, nr_pages);
+       move_pfn_range_to_zone(zone, pfn, nr_pages, NULL);
 
        arg.start_pfn = pfn;
        arg.nr_pages = nr_pages;
@@ -864,6 +853,7 @@ int __ref online_pages(unsigned long pfn, unsigned long nr_pages, int online_typ
        ret = walk_system_ram_range(pfn, nr_pages, &onlined_pages,
                online_pages_range);
        if (ret) {
+               /* not a single memory resource was applicable */
                if (need_zonelists_rebuild)
                        zone_pcp_reset(zone);
                goto failed_addition;
@@ -877,27 +867,22 @@ int __ref online_pages(unsigned long pfn, unsigned long nr_pages, int online_typ
 
        shuffle_zone(zone);
 
-       if (onlined_pages) {
-               node_states_set_node(nid, &arg);
-               if (need_zonelists_rebuild)
-                       build_all_zonelists(NULL);
-               else
-                       zone_pcp_update(zone);
-       }
+       node_states_set_node(nid, &arg);
+       if (need_zonelists_rebuild)
+               build_all_zonelists(NULL);
+       else
+               zone_pcp_update(zone);
 
        init_per_zone_wmark_min();
 
-       if (onlined_pages) {
-               kswapd_run(nid);
-               kcompactd_run(nid);
-       }
+       kswapd_run(nid);
+       kcompactd_run(nid);
 
        vm_total_pages = nr_free_pagecache_pages();
 
        writeback_set_ratelimit();
 
-       if (onlined_pages)
-               memory_notify(MEM_ONLINE, &arg);
+       memory_notify(MEM_ONLINE, &arg);
        mem_hotplug_done();
        return 0;
 
@@ -933,8 +918,11 @@ static pg_data_t __ref *hotadd_new_pgdat(int nid, u64 start)
                if (!pgdat)
                        return NULL;
 
+               pgdat->per_cpu_nodestats =
+                       alloc_percpu(struct per_cpu_nodestat);
                arch_refresh_nodedata(nid, pgdat);
        } else {
+               int cpu;
                /*
                 * Reset the nr_zones, order and classzone_idx before reuse.
                 * Note that kswapd will init kswapd_classzone_idx properly
@@ -943,6 +931,12 @@ static pg_data_t __ref *hotadd_new_pgdat(int nid, u64 start)
                pgdat->nr_zones = 0;
                pgdat->kswapd_order = 0;
                pgdat->kswapd_classzone_idx = 0;
+               for_each_online_cpu(cpu) {
+                       struct per_cpu_nodestat *p;
+
+                       p = per_cpu_ptr(pgdat->per_cpu_nodestats, cpu);
+                       memset(p, 0, sizeof(*p));
+               }
        }
 
        /* we can use NODE_DATA(nid) from here */
@@ -952,7 +946,6 @@ static pg_data_t __ref *hotadd_new_pgdat(int nid, u64 start)
 
        /* init node's zones as empty zones, we don't have any present pages.*/
        free_area_init_core_hotplug(nid);
-       pgdat->per_cpu_nodestats = alloc_percpu(struct per_cpu_nodestat);
 
        /*
         * The node we allocated has no zone fallback lists. For avoiding
@@ -1309,7 +1302,7 @@ static unsigned long scan_movable_pages(unsigned long start, unsigned long end)
                head = compound_head(page);
                if (page_huge_active(head))
                        return pfn;
-               skip = (1 << compound_order(head)) - (page - head);
+               skip = compound_nr(head) - (page - head);
                pfn += skip - 1;
        }
        return 0;
@@ -1347,7 +1340,7 @@ do_migrate_range(unsigned long start_pfn, unsigned long end_pfn)
 
                if (PageHuge(page)) {
                        struct page *head = compound_head(page);
-                       pfn = page_to_pfn(head) + (1<<compound_order(head)) - 1;
+                       pfn = page_to_pfn(head) + compound_nr(head) - 1;
                        isolate_huge_page(head, &source);
                        continue;
                } else if (PageTransHuge(page))
@@ -1662,7 +1655,7 @@ static int check_memblock_offlined_cb(struct memory_block *mem, void *arg)
                phys_addr_t beginpa, endpa;
 
                beginpa = PFN_PHYS(section_nr_to_pfn(mem->start_section_nr));
-               endpa = PFN_PHYS(section_nr_to_pfn(mem->end_section_nr + 1))-1;
+               endpa = beginpa + memory_block_size_bytes() - 1;
                pr_warn("removing memory fails, because memory [%pa-%pa] is onlined\n",
                        &beginpa, &endpa);
 
@@ -1800,7 +1793,7 @@ void __remove_memory(int nid, u64 start, u64 size)
 {
 
        /*
-        * trigger BUG() is some memory is not offlined prior to calling this
+        * trigger BUG() if some memory is not offlined prior to calling this
         * function
         */
        if (try_remove_memory(nid, start, size))
index 65e0874..de27d08 100644 (file)
@@ -68,7 +68,7 @@
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 
 #include <linux/mempolicy.h>
-#include <linux/mm.h>
+#include <linux/pagewalk.h>
 #include <linux/highmem.h>
 #include <linux/hugetlb.h>
 #include <linux/kernel.h>
@@ -655,6 +655,12 @@ static int queue_pages_test_walk(unsigned long start, unsigned long end,
        return 1;
 }
 
+static const struct mm_walk_ops queue_pages_walk_ops = {
+       .hugetlb_entry          = queue_pages_hugetlb,
+       .pmd_entry              = queue_pages_pte_range,
+       .test_walk              = queue_pages_test_walk,
+};
+
 /*
  * Walk through page tables and collect pages to be migrated.
  *
@@ -679,15 +685,8 @@ queue_pages_range(struct mm_struct *mm, unsigned long start, unsigned long end,
                .nmask = nodes,
                .prev = NULL,
        };
-       struct mm_walk queue_pages_walk = {
-               .hugetlb_entry = queue_pages_hugetlb,
-               .pmd_entry = queue_pages_pte_range,
-               .test_walk = queue_pages_test_walk,
-               .mm = mm,
-               .private = &qp,
-       };
 
-       return walk_page_range(start, end, &queue_pages_walk);
+       return walk_page_range(mm, start, end, &queue_pages_walk_ops, &qp);
 }
 
 /*
@@ -1406,6 +1405,7 @@ static long kernel_mbind(unsigned long start, unsigned long len,
        int err;
        unsigned short mode_flags;
 
+       start = untagged_addr(start);
        mode_flags = mode & MPOL_MODE_FLAGS;
        mode &= ~MPOL_MODE_FLAGS;
        if (mode >= MPOL_MAX)
@@ -1513,10 +1513,6 @@ static int kernel_migrate_pages(pid_t pid, unsigned long maxnode,
        if (nodes_empty(*new))
                goto out_put;
 
-       nodes_and(*new, *new, node_states[N_MEMORY]);
-       if (nodes_empty(*new))
-               goto out_put;
-
        err = security_task_movememory(task);
        if (err)
                goto out_put;
@@ -1563,6 +1559,8 @@ static int kernel_get_mempolicy(int __user *policy,
        int uninitialized_var(pval);
        nodemask_t nodes;
 
+       addr = untagged_addr(addr);
+
        if (nmask != NULL && maxnode < nr_node_ids)
                return -EINVAL;
 
index ed70c4e..32c79b5 100644 (file)
@@ -21,13 +21,13 @@ DEFINE_STATIC_KEY_FALSE(devmap_managed_key);
 EXPORT_SYMBOL(devmap_managed_key);
 static atomic_t devmap_managed_enable;
 
-static void devmap_managed_enable_put(void *data)
+static void devmap_managed_enable_put(void)
 {
        if (atomic_dec_and_test(&devmap_managed_enable))
                static_branch_disable(&devmap_managed_key);
 }
 
-static int devmap_managed_enable_get(struct device *dev, struct dev_pagemap *pgmap)
+static int devmap_managed_enable_get(struct dev_pagemap *pgmap)
 {
        if (!pgmap->ops || !pgmap->ops->page_free) {
                WARN(1, "Missing page_free method\n");
@@ -36,13 +36,16 @@ static int devmap_managed_enable_get(struct device *dev, struct dev_pagemap *pgm
 
        if (atomic_inc_return(&devmap_managed_enable) == 1)
                static_branch_enable(&devmap_managed_key);
-       return devm_add_action_or_reset(dev, devmap_managed_enable_put, NULL);
+       return 0;
 }
 #else
-static int devmap_managed_enable_get(struct device *dev, struct dev_pagemap *pgmap)
+static int devmap_managed_enable_get(struct dev_pagemap *pgmap)
 {
        return -EINVAL;
 }
+static void devmap_managed_enable_put(void)
+{
+}
 #endif /* CONFIG_DEV_PAGEMAP_OPS */
 
 static void pgmap_array_delete(struct resource *res)
@@ -99,10 +102,8 @@ static void dev_pagemap_cleanup(struct dev_pagemap *pgmap)
                pgmap->ref = NULL;
 }
 
-static void devm_memremap_pages_release(void *data)
+void memunmap_pages(struct dev_pagemap *pgmap)
 {
-       struct dev_pagemap *pgmap = data;
-       struct device *dev = pgmap->dev;
        struct resource *res = &pgmap->res;
        unsigned long pfn;
        int nid;
@@ -129,8 +130,14 @@ static void devm_memremap_pages_release(void *data)
 
        untrack_pfn(NULL, PHYS_PFN(res->start), resource_size(res));
        pgmap_array_delete(res);
-       dev_WARN_ONCE(dev, pgmap->altmap.alloc,
-                     "%s: failed to free all reserved pages\n", __func__);
+       WARN_ONCE(pgmap->altmap.alloc, "failed to free all reserved pages\n");
+       devmap_managed_enable_put();
+}
+EXPORT_SYMBOL_GPL(memunmap_pages);
+
+static void devm_memremap_pages_release(void *data)
+{
+       memunmap_pages(data);
 }
 
 static void dev_pagemap_percpu_release(struct percpu_ref *ref)
@@ -141,27 +148,12 @@ static void dev_pagemap_percpu_release(struct percpu_ref *ref)
        complete(&pgmap->done);
 }
 
-/**
- * devm_memremap_pages - remap and provide memmap backing for the given resource
- * @dev: hosting device for @res
- * @pgmap: pointer to a struct dev_pagemap
- *
- * Notes:
- * 1/ At a minimum the res and type members of @pgmap must be initialized
- *    by the caller before passing it to this function
- *
- * 2/ The altmap field may optionally be initialized, in which case
- *    PGMAP_ALTMAP_VALID must be set in pgmap->flags.
- *
- * 3/ The ref field may optionally be provided, in which pgmap->ref must be
- *    'live' on entry and will be killed and reaped at
- *    devm_memremap_pages_release() time, or if this routine fails.
- *
- * 4/ res is expected to be a host memory range that could feasibly be
- *    treated as a "System RAM" range, i.e. not a device mmio range, but
- *    this is not enforced.
+/*
+ * Not device managed version of dev_memremap_pages, undone by
+ * memunmap_pages().  Please use dev_memremap_pages if you have a struct
+ * device available.
  */
-void *devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap)
+void *memremap_pages(struct dev_pagemap *pgmap, int nid)
 {
        struct resource *res = &pgmap->res;
        struct dev_pagemap *conflict_pgmap;
@@ -172,7 +164,7 @@ void *devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap)
                .altmap = pgmap_altmap(pgmap),
        };
        pgprot_t pgprot = PAGE_KERNEL;
-       int error, nid, is_ram;
+       int error, is_ram;
        bool need_devmap_managed = true;
 
        switch (pgmap->type) {
@@ -220,14 +212,14 @@ void *devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap)
        }
 
        if (need_devmap_managed) {
-               error = devmap_managed_enable_get(dev, pgmap);
+               error = devmap_managed_enable_get(pgmap);
                if (error)
                        return ERR_PTR(error);
        }
 
        conflict_pgmap = get_dev_pagemap(PHYS_PFN(res->start), NULL);
        if (conflict_pgmap) {
-               dev_WARN(dev, "Conflicting mapping in same section\n");
+               WARN(1, "Conflicting mapping in same section\n");
                put_dev_pagemap(conflict_pgmap);
                error = -ENOMEM;
                goto err_array;
@@ -235,7 +227,7 @@ void *devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap)
 
        conflict_pgmap = get_dev_pagemap(PHYS_PFN(res->end), NULL);
        if (conflict_pgmap) {
-               dev_WARN(dev, "Conflicting mapping in same section\n");
+               WARN(1, "Conflicting mapping in same section\n");
                put_dev_pagemap(conflict_pgmap);
                error = -ENOMEM;
                goto err_array;
@@ -251,14 +243,11 @@ void *devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap)
                goto err_array;
        }
 
-       pgmap->dev = dev;
-
        error = xa_err(xa_store_range(&pgmap_array, PHYS_PFN(res->start),
                                PHYS_PFN(res->end), pgmap, GFP_KERNEL));
        if (error)
                goto err_array;
 
-       nid = dev_to_node(dev);
        if (nid < 0)
                nid = numa_mem_id();
 
@@ -314,12 +303,6 @@ void *devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap)
                                PHYS_PFN(res->start),
                                PHYS_PFN(resource_size(res)), pgmap);
        percpu_ref_get_many(pgmap->ref, pfn_end(pgmap) - pfn_first(pgmap));
-
-       error = devm_add_action_or_reset(dev, devm_memremap_pages_release,
-                       pgmap);
-       if (error)
-               return ERR_PTR(error);
-
        return __va(res->start);
 
  err_add_memory:
@@ -331,8 +314,46 @@ void *devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap)
  err_array:
        dev_pagemap_kill(pgmap);
        dev_pagemap_cleanup(pgmap);
+       devmap_managed_enable_put();
        return ERR_PTR(error);
 }
+EXPORT_SYMBOL_GPL(memremap_pages);
+
+/**
+ * devm_memremap_pages - remap and provide memmap backing for the given resource
+ * @dev: hosting device for @res
+ * @pgmap: pointer to a struct dev_pagemap
+ *
+ * Notes:
+ * 1/ At a minimum the res and type members of @pgmap must be initialized
+ *    by the caller before passing it to this function
+ *
+ * 2/ The altmap field may optionally be initialized, in which case
+ *    PGMAP_ALTMAP_VALID must be set in pgmap->flags.
+ *
+ * 3/ The ref field may optionally be provided, in which pgmap->ref must be
+ *    'live' on entry and will be killed and reaped at
+ *    devm_memremap_pages_release() time, or if this routine fails.
+ *
+ * 4/ res is expected to be a host memory range that could feasibly be
+ *    treated as a "System RAM" range, i.e. not a device mmio range, but
+ *    this is not enforced.
+ */
+void *devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap)
+{
+       int error;
+       void *ret;
+
+       ret = memremap_pages(pgmap, dev_to_node(dev));
+       if (IS_ERR(ret))
+               return ret;
+
+       error = devm_add_action_or_reset(dev, devm_memremap_pages_release,
+                       pgmap);
+       if (error)
+               return ERR_PTR(error);
+       return ret;
+}
 EXPORT_SYMBOL_GPL(devm_memremap_pages);
 
 void devm_memunmap_pages(struct device *dev, struct dev_pagemap *pgmap)
index a42858d..4fe45d1 100644 (file)
@@ -38,6 +38,7 @@
 #include <linux/hugetlb.h>
 #include <linux/hugetlb_cgroup.h>
 #include <linux/gfp.h>
+#include <linux/pagewalk.h>
 #include <linux/pfn_t.h>
 #include <linux/memremap.h>
 #include <linux/userfaultfd_k.h>
@@ -459,7 +460,7 @@ int migrate_page_move_mapping(struct address_space *mapping,
 
                for (i = 1; i < HPAGE_PMD_NR; i++) {
                        xas_next(&xas);
-                       xas_store(&xas, newpage + i);
+                       xas_store(&xas, newpage);
                }
        }
 
@@ -1611,7 +1612,7 @@ static int do_pages_move(struct mm_struct *mm, nodemask_t task_nodes,
                        goto out_flush;
                if (get_user(node, nodes + i))
                        goto out_flush;
-               addr = (unsigned long)p;
+               addr = (unsigned long)untagged_addr(p);
 
                err = -ENODEV;
                if (node < 0 || node >= MAX_NUMNODES)
@@ -1891,7 +1892,7 @@ static int numamigrate_isolate_page(pg_data_t *pgdat, struct page *page)
        VM_BUG_ON_PAGE(compound_order(page) && !PageTransHuge(page), page);
 
        /* Avoid migrating to a node that is nearly full */
-       if (!migrate_balanced_pgdat(pgdat, 1UL << compound_order(page)))
+       if (!migrate_balanced_pgdat(pgdat, compound_nr(page)))
                return 0;
 
        if (isolate_lru_page(page))
@@ -2119,17 +2120,7 @@ out_unlock:
 
 #endif /* CONFIG_NUMA */
 
-#if defined(CONFIG_MIGRATE_VMA_HELPER)
-struct migrate_vma {
-       struct vm_area_struct   *vma;
-       unsigned long           *dst;
-       unsigned long           *src;
-       unsigned long           cpages;
-       unsigned long           npages;
-       unsigned long           start;
-       unsigned long           end;
-};
-
+#ifdef CONFIG_DEVICE_PRIVATE
 static int migrate_vma_collect_hole(unsigned long start,
                                    unsigned long end,
                                    struct mm_walk *walk)
@@ -2227,17 +2218,15 @@ again:
                pte_t pte;
 
                pte = *ptep;
-               pfn = pte_pfn(pte);
 
                if (pte_none(pte)) {
                        mpfn = MIGRATE_PFN_MIGRATE;
                        migrate->cpages++;
-                       pfn = 0;
                        goto next;
                }
 
                if (!pte_present(pte)) {
-                       mpfn = pfn = 0;
+                       mpfn = 0;
 
                        /*
                         * Only care about unaddressable device page special
@@ -2249,15 +2238,15 @@ again:
                                goto next;
 
                        page = device_private_entry_to_page(entry);
-                       mpfn = migrate_pfn(page_to_pfn(page))|
-                               MIGRATE_PFN_DEVICE | MIGRATE_PFN_MIGRATE;
+                       mpfn = migrate_pfn(page_to_pfn(page)) |
+                                       MIGRATE_PFN_MIGRATE;
                        if (is_write_device_private_entry(entry))
                                mpfn |= MIGRATE_PFN_WRITE;
                } else {
+                       pfn = pte_pfn(pte);
                        if (is_zero_pfn(pfn)) {
                                mpfn = MIGRATE_PFN_MIGRATE;
                                migrate->cpages++;
-                               pfn = 0;
                                goto next;
                        }
                        page = vm_normal_page(migrate->vma, addr, pte);
@@ -2267,10 +2256,9 @@ again:
 
                /* FIXME support THP */
                if (!page || !page->mapping || PageTransCompound(page)) {
-                       mpfn = pfn = 0;
+                       mpfn = 0;
                        goto next;
                }
-               pfn = page_to_pfn(page);
 
                /*
                 * By getting a reference on the page we pin it and that blocks
@@ -2329,6 +2317,11 @@ next:
        return 0;
 }
 
+static const struct mm_walk_ops migrate_vma_walk_ops = {
+       .pmd_entry              = migrate_vma_collect_pmd,
+       .pte_hole               = migrate_vma_collect_hole,
+};
+
 /*
  * migrate_vma_collect() - collect pages over a range of virtual addresses
  * @migrate: migrate struct containing all migration information
@@ -2340,21 +2333,15 @@ next:
 static void migrate_vma_collect(struct migrate_vma *migrate)
 {
        struct mmu_notifier_range range;
-       struct mm_walk mm_walk = {
-               .pmd_entry = migrate_vma_collect_pmd,
-               .pte_hole = migrate_vma_collect_hole,
-               .vma = migrate->vma,
-               .mm = migrate->vma->vm_mm,
-               .private = migrate,
-       };
 
-       mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, NULL, mm_walk.mm,
-                               migrate->start,
-                               migrate->end);
+       mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, NULL,
+                       migrate->vma->vm_mm, migrate->start, migrate->end);
        mmu_notifier_invalidate_range_start(&range);
-       walk_page_range(migrate->start, migrate->end, &mm_walk);
-       mmu_notifier_invalidate_range_end(&range);
 
+       walk_page_range(migrate->vma->vm_mm, migrate->start, migrate->end,
+                       &migrate_vma_walk_ops, migrate);
+
+       mmu_notifier_invalidate_range_end(&range);
        migrate->end = migrate->start + (migrate->npages << PAGE_SHIFT);
 }
 
@@ -2577,6 +2564,110 @@ restore:
        }
 }
 
+/**
+ * migrate_vma_setup() - prepare to migrate a range of memory
+ * @args: contains the vma, start, and and pfns arrays for the migration
+ *
+ * Returns: negative errno on failures, 0 when 0 or more pages were migrated
+ * without an error.
+ *
+ * Prepare to migrate a range of memory virtual address range by collecting all
+ * the pages backing each virtual address in the range, saving them inside the
+ * src array.  Then lock those pages and unmap them. Once the pages are locked
+ * and unmapped, check whether each page is pinned or not.  Pages that aren't
+ * pinned have the MIGRATE_PFN_MIGRATE flag set (by this function) in the
+ * corresponding src array entry.  Then restores any pages that are pinned, by
+ * remapping and unlocking those pages.
+ *
+ * The caller should then allocate destination memory and copy source memory to
+ * it for all those entries (ie with MIGRATE_PFN_VALID and MIGRATE_PFN_MIGRATE
+ * flag set).  Once these are allocated and copied, the caller must update each
+ * corresponding entry in the dst array with the pfn value of the destination
+ * page and with the MIGRATE_PFN_VALID and MIGRATE_PFN_LOCKED flags set
+ * (destination pages must have their struct pages locked, via lock_page()).
+ *
+ * Note that the caller does not have to migrate all the pages that are marked
+ * with MIGRATE_PFN_MIGRATE flag in src array unless this is a migration from
+ * device memory to system memory.  If the caller cannot migrate a device page
+ * back to system memory, then it must return VM_FAULT_SIGBUS, which has severe
+ * consequences for the userspace process, so it must be avoided if at all
+ * possible.
+ *
+ * For empty entries inside CPU page table (pte_none() or pmd_none() is true) we
+ * do set MIGRATE_PFN_MIGRATE flag inside the corresponding source array thus
+ * allowing the caller to allocate device memory for those unback virtual
+ * address.  For this the caller simply has to allocate device memory and
+ * properly set the destination entry like for regular migration.  Note that
+ * this can still fails and thus inside the device driver must check if the
+ * migration was successful for those entries after calling migrate_vma_pages()
+ * just like for regular migration.
+ *
+ * After that, the callers must call migrate_vma_pages() to go over each entry
+ * in the src array that has the MIGRATE_PFN_VALID and MIGRATE_PFN_MIGRATE flag
+ * set. If the corresponding entry in dst array has MIGRATE_PFN_VALID flag set,
+ * then migrate_vma_pages() to migrate struct page information from the source
+ * struct page to the destination struct page.  If it fails to migrate the
+ * struct page information, then it clears the MIGRATE_PFN_MIGRATE flag in the
+ * src array.
+ *
+ * At this point all successfully migrated pages have an entry in the src
+ * array with MIGRATE_PFN_VALID and MIGRATE_PFN_MIGRATE flag set and the dst
+ * array entry with MIGRATE_PFN_VALID flag set.
+ *
+ * Once migrate_vma_pages() returns the caller may inspect which pages were
+ * successfully migrated, and which were not.  Successfully migrated pages will
+ * have the MIGRATE_PFN_MIGRATE flag set for their src array entry.
+ *
+ * It is safe to update device page table after migrate_vma_pages() because
+ * both destination and source page are still locked, and the mmap_sem is held
+ * in read mode (hence no one can unmap the range being migrated).
+ *
+ * Once the caller is done cleaning up things and updating its page table (if it
+ * chose to do so, this is not an obligation) it finally calls
+ * migrate_vma_finalize() to update the CPU page table to point to new pages
+ * for successfully migrated pages or otherwise restore the CPU page table to
+ * point to the original source pages.
+ */
+int migrate_vma_setup(struct migrate_vma *args)
+{
+       long nr_pages = (args->end - args->start) >> PAGE_SHIFT;
+
+       args->start &= PAGE_MASK;
+       args->end &= PAGE_MASK;
+       if (!args->vma || is_vm_hugetlb_page(args->vma) ||
+           (args->vma->vm_flags & VM_SPECIAL) || vma_is_dax(args->vma))
+               return -EINVAL;
+       if (nr_pages <= 0)
+               return -EINVAL;
+       if (args->start < args->vma->vm_start ||
+           args->start >= args->vma->vm_end)
+               return -EINVAL;
+       if (args->end <= args->vma->vm_start || args->end > args->vma->vm_end)
+               return -EINVAL;
+       if (!args->src || !args->dst)
+               return -EINVAL;
+
+       memset(args->src, 0, sizeof(*args->src) * nr_pages);
+       args->cpages = 0;
+       args->npages = 0;
+
+       migrate_vma_collect(args);
+
+       if (args->cpages)
+               migrate_vma_prepare(args);
+       if (args->cpages)
+               migrate_vma_unmap(args);
+
+       /*
+        * At this point pages are locked and unmapped, and thus they have
+        * stable content and can safely be copied to destination memory that
+        * is allocated by the drivers.
+        */
+       return 0;
+
+}
+EXPORT_SYMBOL(migrate_vma_setup);
+
 static void migrate_vma_insert_page(struct migrate_vma *migrate,
                                    unsigned long addr,
                                    struct page *page,
@@ -2708,7 +2799,7 @@ abort:
        *src &= ~MIGRATE_PFN_MIGRATE;
 }
 
-/*
+/**
  * migrate_vma_pages() - migrate meta-data from src page to dst page
  * @migrate: migrate struct containing all migration information
  *
@@ -2716,7 +2807,7 @@ abort:
  * struct page. This effectively finishes the migration from source page to the
  * destination page.
  */
-static void migrate_vma_pages(struct migrate_vma *migrate)
+void migrate_vma_pages(struct migrate_vma *migrate)
 {
        const unsigned long npages = migrate->npages;
        const unsigned long start = migrate->start;
@@ -2790,8 +2881,9 @@ static void migrate_vma_pages(struct migrate_vma *migrate)
        if (notified)
                mmu_notifier_invalidate_range_only_end(&range);
 }
+EXPORT_SYMBOL(migrate_vma_pages);
 
-/*
+/**
  * migrate_vma_finalize() - restore CPU page table entry
  * @migrate: migrate struct containing all migration information
  *
@@ -2802,7 +2894,7 @@ static void migrate_vma_pages(struct migrate_vma *migrate)
  * This also unlocks the pages and puts them back on the lru, or drops the extra
  * refcount, for device pages.
  */
-static void migrate_vma_finalize(struct migrate_vma *migrate)
+void migrate_vma_finalize(struct migrate_vma *migrate)
 {
        const unsigned long npages = migrate->npages;
        unsigned long i;
@@ -2845,124 +2937,5 @@ static void migrate_vma_finalize(struct migrate_vma *migrate)
                }
        }
 }
-
-/*
- * migrate_vma() - migrate a range of memory inside vma
- *
- * @ops: migration callback for allocating destination memory and copying
- * @vma: virtual memory area containing the range to be migrated
- * @start: start address of the range to migrate (inclusive)
- * @end: end address of the range to migrate (exclusive)
- * @src: array of hmm_pfn_t containing source pfns
- * @dst: array of hmm_pfn_t containing destination pfns
- * @private: pointer passed back to each of the callback
- * Returns: 0 on success, error code otherwise
- *
- * This function tries to migrate a range of memory virtual address range, using
- * callbacks to allocate and copy memory from source to destination. First it
- * collects all the pages backing each virtual address in the range, saving this
- * inside the src array. Then it locks those pages and unmaps them. Once the pages
- * are locked and unmapped, it checks whether each page is pinned or not. Pages
- * that aren't pinned have the MIGRATE_PFN_MIGRATE flag set (by this function)
- * in the corresponding src array entry. It then restores any pages that are
- * pinned, by remapping and unlocking those pages.
- *
- * At this point it calls the alloc_and_copy() callback. For documentation on
- * what is expected from that callback, see struct migrate_vma_ops comments in
- * include/linux/migrate.h
- *
- * After the alloc_and_copy() callback, this function goes over each entry in
- * the src array that has the MIGRATE_PFN_VALID and MIGRATE_PFN_MIGRATE flag
- * set. If the corresponding entry in dst array has MIGRATE_PFN_VALID flag set,
- * then the function tries to migrate struct page information from the source
- * struct page to the destination struct page. If it fails to migrate the struct
- * page information, then it clears the MIGRATE_PFN_MIGRATE flag in the src
- * array.
- *
- * At this point all successfully migrated pages have an entry in the src
- * array with MIGRATE_PFN_VALID and MIGRATE_PFN_MIGRATE flag set and the dst
- * array entry with MIGRATE_PFN_VALID flag set.
- *
- * It then calls the finalize_and_map() callback. See comments for "struct
- * migrate_vma_ops", in include/linux/migrate.h for details about
- * finalize_and_map() behavior.
- *
- * After the finalize_and_map() callback, for successfully migrated pages, this
- * function updates the CPU page table to point to new pages, otherwise it
- * restores the CPU page table to point to the original source pages.
- *
- * Function returns 0 after the above steps, even if no pages were migrated
- * (The function only returns an error if any of the arguments are invalid.)
- *
- * Both src and dst array must be big enough for (end - start) >> PAGE_SHIFT
- * unsigned long entries.
- */
-int migrate_vma(const struct migrate_vma_ops *ops,
-               struct vm_area_struct *vma,
-               unsigned long start,
-               unsigned long end,
-               unsigned long *src,
-               unsigned long *dst,
-               void *private)
-{
-       struct migrate_vma migrate;
-
-       /* Sanity check the arguments */
-       start &= PAGE_MASK;
-       end &= PAGE_MASK;
-       if (!vma || is_vm_hugetlb_page(vma) || (vma->vm_flags & VM_SPECIAL) ||
-                       vma_is_dax(vma))
-               return -EINVAL;
-       if (start < vma->vm_start || start >= vma->vm_end)
-               return -EINVAL;
-       if (end <= vma->vm_start || end > vma->vm_end)
-               return -EINVAL;
-       if (!ops || !src || !dst || start >= end)
-               return -EINVAL;
-
-       memset(src, 0, sizeof(*src) * ((end - start) >> PAGE_SHIFT));
-       migrate.src = src;
-       migrate.dst = dst;
-       migrate.start = start;
-       migrate.npages = 0;
-       migrate.cpages = 0;
-       migrate.end = end;
-       migrate.vma = vma;
-
-       /* Collect, and try to unmap source pages */
-       migrate_vma_collect(&migrate);
-       if (!migrate.cpages)
-               return 0;
-
-       /* Lock and isolate page */
-       migrate_vma_prepare(&migrate);
-       if (!migrate.cpages)
-               return 0;
-
-       /* Unmap pages */
-       migrate_vma_unmap(&migrate);
-       if (!migrate.cpages)
-               return 0;
-
-       /*
-        * At this point pages are locked and unmapped, and thus they have
-        * stable content and can safely be copied to destination memory that
-        * is allocated by the callback.
-        *
-        * Note that migration can fail in migrate_vma_struct_page() for each
-        * individual page.
-        */
-       ops->alloc_and_copy(vma, src, dst, start, end, private);
-
-       /* This does the real migration of struct page */
-       migrate_vma_pages(&migrate);
-
-       ops->finalize_and_map(vma, src, dst, start, end, private);
-
-       /* Unlock and remap pages */
-       migrate_vma_finalize(&migrate);
-
-       return 0;
-}
-EXPORT_SYMBOL(migrate_vma);
-#endif /* defined(MIGRATE_VMA_HELPER) */
+EXPORT_SYMBOL(migrate_vma_finalize);
+#endif /* CONFIG_DEVICE_PRIVATE */
index 4fe91d4..49b6fa2 100644 (file)
@@ -10,7 +10,7 @@
  */
 #include <linux/pagemap.h>
 #include <linux/gfp.h>
-#include <linux/mm.h>
+#include <linux/pagewalk.h>
 #include <linux/mman.h>
 #include <linux/syscalls.h>
 #include <linux/swap.h>
@@ -193,6 +193,12 @@ static inline bool can_do_mincore(struct vm_area_struct *vma)
                inode_permission(file_inode(vma->vm_file), MAY_WRITE) == 0;
 }
 
+static const struct mm_walk_ops mincore_walk_ops = {
+       .pmd_entry              = mincore_pte_range,
+       .pte_hole               = mincore_unmapped_range,
+       .hugetlb_entry          = mincore_hugetlb,
+};
+
 /*
  * Do a chunk of "sys_mincore()". We've already checked
  * all the arguments, we hold the mmap semaphore: we should
@@ -203,12 +209,6 @@ static long do_mincore(unsigned long addr, unsigned long pages, unsigned char *v
        struct vm_area_struct *vma;
        unsigned long end;
        int err;
-       struct mm_walk mincore_walk = {
-               .pmd_entry = mincore_pte_range,
-               .pte_hole = mincore_unmapped_range,
-               .hugetlb_entry = mincore_hugetlb,
-               .private = vec,
-       };
 
        vma = find_vma(current->mm, addr);
        if (!vma || addr < vma->vm_start)
@@ -219,8 +219,7 @@ static long do_mincore(unsigned long addr, unsigned long pages, unsigned char *v
                memset(vec, 1, pages);
                return pages;
        }
-       mincore_walk.mm = vma->vm_mm;
-       err = walk_page_range(addr, end, &mincore_walk);
+       err = walk_page_range(vma->vm_mm, addr, end, &mincore_walk_ops, vec);
        if (err < 0)
                return err;
        return (end - addr) >> PAGE_SHIFT;
@@ -257,6 +256,8 @@ SYSCALL_DEFINE3(mincore, unsigned long, start, size_t, len,
        unsigned long pages;
        unsigned char *tmp;
 
+       start = untagged_addr(start);
+
        /* Check the start address: needs to be page-aligned.. */
        if (start & ~PAGE_MASK)
                return -EINVAL;
index a90099d..a72c1ee 100644 (file)
@@ -674,6 +674,8 @@ static __must_check int do_mlock(unsigned long start, size_t len, vm_flags_t fla
        unsigned long lock_limit;
        int error = -ENOMEM;
 
+       start = untagged_addr(start);
+
        if (!can_do_mlock())
                return -EPERM;
 
@@ -735,6 +737,8 @@ SYSCALL_DEFINE2(munlock, unsigned long, start, size_t, len)
 {
        int ret;
 
+       start = untagged_addr(start);
+
        len = PAGE_ALIGN(len + (offset_in_page(start)));
        start &= PAGE_MASK;
 
index 6bc21fc..a7d8c84 100644 (file)
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -201,6 +201,8 @@ SYSCALL_DEFINE1(brk, unsigned long, brk)
        bool downgraded = false;
        LIST_HEAD(uf);
 
+       brk = untagged_addr(brk);
+
        if (down_write_killable(&mm->mmap_sem))
                return -EINTR;
 
@@ -289,9 +291,9 @@ out:
        return retval;
 }
 
-static long vma_compute_subtree_gap(struct vm_area_struct *vma)
+static inline unsigned long vma_compute_gap(struct vm_area_struct *vma)
 {
-       unsigned long max, prev_end, subtree_gap;
+       unsigned long gap, prev_end;
 
        /*
         * Note: in the rare case of a VM_GROWSDOWN above a VM_GROWSUP, we
@@ -299,14 +301,21 @@ static long vma_compute_subtree_gap(struct vm_area_struct *vma)
         * an unmapped area; whereas when expanding we only require one.
         * That's a little inconsistent, but keeps the code here simpler.
         */
-       max = vm_start_gap(vma);
+       gap = vm_start_gap(vma);
        if (vma->vm_prev) {
                prev_end = vm_end_gap(vma->vm_prev);
-               if (max > prev_end)
-                       max -= prev_end;
+               if (gap > prev_end)
+                       gap -= prev_end;
                else
-                       max = 0;
+                       gap = 0;
        }
+       return gap;
+}
+
+#ifdef CONFIG_DEBUG_VM_RB
+static unsigned long vma_compute_subtree_gap(struct vm_area_struct *vma)
+{
+       unsigned long max = vma_compute_gap(vma), subtree_gap;
        if (vma->vm_rb.rb_left) {
                subtree_gap = rb_entry(vma->vm_rb.rb_left,
                                struct vm_area_struct, vm_rb)->rb_subtree_gap;
@@ -322,7 +331,6 @@ static long vma_compute_subtree_gap(struct vm_area_struct *vma)
        return max;
 }
 
-#ifdef CONFIG_DEBUG_VM_RB
 static int browse_rb(struct mm_struct *mm)
 {
        struct rb_root *root = &mm->mm_rb;
@@ -428,8 +436,9 @@ static void validate_mm(struct mm_struct *mm)
 #define validate_mm(mm) do { } while (0)
 #endif
 
-RB_DECLARE_CALLBACKS(static, vma_gap_callbacks, struct vm_area_struct, vm_rb,
-                    unsigned long, rb_subtree_gap, vma_compute_subtree_gap)
+RB_DECLARE_CALLBACKS_MAX(static, vma_gap_callbacks,
+                        struct vm_area_struct, vm_rb,
+                        unsigned long, rb_subtree_gap, vma_compute_gap)
 
 /*
  * Update augmented rbtree rb_subtree_gap values after vma->vm_start or
@@ -439,8 +448,8 @@ RB_DECLARE_CALLBACKS(static, vma_gap_callbacks, struct vm_area_struct, vm_rb,
 static void vma_gap_update(struct vm_area_struct *vma)
 {
        /*
-        * As it turns out, RB_DECLARE_CALLBACKS() already created a callback
-        * function that does exactly what we want.
+        * As it turns out, RB_DECLARE_CALLBACKS_MAX() already created
+        * a callback function that does exactly what we want.
         */
        vma_gap_callbacks_propagate(&vma->vm_rb, NULL);
 }
@@ -1358,6 +1367,9 @@ static inline u64 file_mmap_size_max(struct file *file, struct inode *inode)
        if (S_ISBLK(inode->i_mode))
                return MAX_LFS_FILESIZE;
 
+       if (S_ISSOCK(inode->i_mode))
+               return MAX_LFS_FILESIZE;
+
        /* Special "we do even unsigned file positions" case */
        if (file->f_mode & FMODE_UNSIGNED_OFFSET)
                return 0;
@@ -1577,6 +1589,8 @@ unsigned long ksys_mmap_pgoff(unsigned long addr, unsigned long len,
        struct file *file = NULL;
        unsigned long retval;
 
+       addr = untagged_addr(addr);
+
        if (!(flags & MAP_ANONYMOUS)) {
                audit_mmap_fd(fd, flags);
                file = fget(fd);
@@ -2274,12 +2288,9 @@ find_vma_prev(struct mm_struct *mm, unsigned long addr,
        if (vma) {
                *pprev = vma->vm_prev;
        } else {
-               struct rb_node *rb_node = mm->mm_rb.rb_node;
-               *pprev = NULL;
-               while (rb_node) {
-                       *pprev = rb_entry(rb_node, struct vm_area_struct, vm_rb);
-                       rb_node = rb_node->rb_right;
-               }
+               struct rb_node *rb_node = rb_last(&mm->mm_rb);
+
+               *pprev = rb_node ? rb_entry(rb_node, struct vm_area_struct, vm_rb) : NULL;
        }
        return vma;
 }
@@ -2878,6 +2889,7 @@ EXPORT_SYMBOL(vm_munmap);
 
 SYSCALL_DEFINE2(munmap, unsigned long, addr, size_t, len)
 {
+       addr = untagged_addr(addr);
        profile_munmap(addr);
        return __vm_munmap(addr, len, true);
 }
index 8c943a6..7d70e5c 100644 (file)
@@ -271,8 +271,6 @@ void tlb_finish_mmu(struct mmu_gather *tlb,
 
        tlb_flush_mmu(tlb);
 
-       /* keep the page table cache within bounds */
-       check_pgt_cache();
 #ifndef CONFIG_HAVE_MMU_GATHER_NO_GATHER
        tlb_batch_list_free(tlb);
 #endif
index b567062..7fde886 100644 (file)
 /* global SRCU for all MMs */
 DEFINE_STATIC_SRCU(srcu);
 
-/*
- * This function allows mmu_notifier::release callback to delay a call to
- * a function that will free appropriate resources. The function must be
- * quick and must not block.
- */
-void mmu_notifier_call_srcu(struct rcu_head *rcu,
-                           void (*func)(struct rcu_head *rcu))
-{
-       call_srcu(&srcu, rcu, func);
-}
-EXPORT_SYMBOL_GPL(mmu_notifier_call_srcu);
+#ifdef CONFIG_LOCKDEP
+struct lockdep_map __mmu_notifier_invalidate_range_start_map = {
+       .name = "mmu_notifier_invalidate_range_start"
+};
+#endif
 
 /*
  * This function can't run concurrently against mmu_notifier_register
@@ -174,11 +168,19 @@ int __mmu_notifier_invalidate_range_start(struct mmu_notifier_range *range)
        id = srcu_read_lock(&srcu);
        hlist_for_each_entry_rcu(mn, &range->mm->mmu_notifier_mm->list, hlist) {
                if (mn->ops->invalidate_range_start) {
-                       int _ret = mn->ops->invalidate_range_start(mn, range);
+                       int _ret;
+
+                       if (!mmu_notifier_range_blockable(range))
+                               non_block_start();
+                       _ret = mn->ops->invalidate_range_start(mn, range);
+                       if (!mmu_notifier_range_blockable(range))
+                               non_block_end();
                        if (_ret) {
                                pr_info("%pS callback failed with %d in %sblockable context.\n",
                                        mn->ops->invalidate_range_start, _ret,
                                        !mmu_notifier_range_blockable(range) ? "non-" : "");
+                               WARN_ON(mmu_notifier_range_blockable(range) ||
+                                       ret != -EAGAIN);
                                ret = _ret;
                        }
                }
@@ -187,7 +189,6 @@ int __mmu_notifier_invalidate_range_start(struct mmu_notifier_range *range)
 
        return ret;
 }
-EXPORT_SYMBOL_GPL(__mmu_notifier_invalidate_range_start);
 
 void __mmu_notifier_invalidate_range_end(struct mmu_notifier_range *range,
                                         bool only_end)
@@ -195,6 +196,7 @@ void __mmu_notifier_invalidate_range_end(struct mmu_notifier_range *range,
        struct mmu_notifier *mn;
        int id;
 
+       lock_map_acquire(&__mmu_notifier_invalidate_range_start_map);
        id = srcu_read_lock(&srcu);
        hlist_for_each_entry_rcu(mn, &range->mm->mmu_notifier_mm->list, hlist) {
                /*
@@ -214,12 +216,17 @@ void __mmu_notifier_invalidate_range_end(struct mmu_notifier_range *range,
                        mn->ops->invalidate_range(mn, range->mm,
                                                  range->start,
                                                  range->end);
-               if (mn->ops->invalidate_range_end)
+               if (mn->ops->invalidate_range_end) {
+                       if (!mmu_notifier_range_blockable(range))
+                               non_block_start();
                        mn->ops->invalidate_range_end(mn, range);
+                       if (!mmu_notifier_range_blockable(range))
+                               non_block_end();
+               }
        }
        srcu_read_unlock(&srcu, id);
+       lock_map_release(&__mmu_notifier_invalidate_range_start_map);
 }
-EXPORT_SYMBOL_GPL(__mmu_notifier_invalidate_range_end);
 
 void __mmu_notifier_invalidate_range(struct mm_struct *mm,
                                  unsigned long start, unsigned long end)
@@ -234,35 +241,49 @@ void __mmu_notifier_invalidate_range(struct mm_struct *mm,
        }
        srcu_read_unlock(&srcu, id);
 }
-EXPORT_SYMBOL_GPL(__mmu_notifier_invalidate_range);
 
-static int do_mmu_notifier_register(struct mmu_notifier *mn,
-                                   struct mm_struct *mm,
-                                   int take_mmap_sem)
+/*
+ * Same as mmu_notifier_register but here the caller must hold the
+ * mmap_sem in write mode.
+ */
+int __mmu_notifier_register(struct mmu_notifier *mn, struct mm_struct *mm)
 {
-       struct mmu_notifier_mm *mmu_notifier_mm;
+       struct mmu_notifier_mm *mmu_notifier_mm = NULL;
        int ret;
 
+       lockdep_assert_held_write(&mm->mmap_sem);
        BUG_ON(atomic_read(&mm->mm_users) <= 0);
 
-       ret = -ENOMEM;
-       mmu_notifier_mm = kmalloc(sizeof(struct mmu_notifier_mm), GFP_KERNEL);
-       if (unlikely(!mmu_notifier_mm))
-               goto out;
+       if (IS_ENABLED(CONFIG_LOCKDEP)) {
+               fs_reclaim_acquire(GFP_KERNEL);
+               lock_map_acquire(&__mmu_notifier_invalidate_range_start_map);
+               lock_map_release(&__mmu_notifier_invalidate_range_start_map);
+               fs_reclaim_release(GFP_KERNEL);
+       }
 
-       if (take_mmap_sem)
-               down_write(&mm->mmap_sem);
-       ret = mm_take_all_locks(mm);
-       if (unlikely(ret))
-               goto out_clean;
+       mn->mm = mm;
+       mn->users = 1;
+
+       if (!mm->mmu_notifier_mm) {
+               /*
+                * kmalloc cannot be called under mm_take_all_locks(), but we
+                * know that mm->mmu_notifier_mm can't change while we hold
+                * the write side of the mmap_sem.
+                */
+               mmu_notifier_mm =
+                       kmalloc(sizeof(struct mmu_notifier_mm), GFP_KERNEL);
+               if (!mmu_notifier_mm)
+                       return -ENOMEM;
 
-       if (!mm_has_notifiers(mm)) {
                INIT_HLIST_HEAD(&mmu_notifier_mm->list);
                spin_lock_init(&mmu_notifier_mm->lock);
-
-               mm->mmu_notifier_mm = mmu_notifier_mm;
-               mmu_notifier_mm = NULL;
        }
+
+       ret = mm_take_all_locks(mm);
+       if (unlikely(ret))
+               goto out_clean;
+
+       /* Pairs with the mmdrop in mmu_notifier_unregister_* */
        mmgrab(mm);
 
        /*
@@ -273,48 +294,118 @@ static int do_mmu_notifier_register(struct mmu_notifier *mn,
         * We can't race against any other mmu notifier method either
         * thanks to mm_take_all_locks().
         */
+       if (mmu_notifier_mm)
+               mm->mmu_notifier_mm = mmu_notifier_mm;
+
        spin_lock(&mm->mmu_notifier_mm->lock);
        hlist_add_head_rcu(&mn->hlist, &mm->mmu_notifier_mm->list);
        spin_unlock(&mm->mmu_notifier_mm->lock);
 
        mm_drop_all_locks(mm);
+       BUG_ON(atomic_read(&mm->mm_users) <= 0);
+       return 0;
+
 out_clean:
-       if (take_mmap_sem)
-               up_write(&mm->mmap_sem);
        kfree(mmu_notifier_mm);
-out:
-       BUG_ON(atomic_read(&mm->mm_users) <= 0);
        return ret;
 }
+EXPORT_SYMBOL_GPL(__mmu_notifier_register);
 
-/*
+/**
+ * mmu_notifier_register - Register a notifier on a mm
+ * @mn: The notifier to attach
+ * @mm: The mm to attach the notifier to
+ *
  * Must not hold mmap_sem nor any other VM related lock when calling
  * this registration function. Must also ensure mm_users can't go down
  * to zero while this runs to avoid races with mmu_notifier_release,
  * so mm has to be current->mm or the mm should be pinned safely such
  * as with get_task_mm(). If the mm is not current->mm, the mm_users
  * pin should be released by calling mmput after mmu_notifier_register
- * returns. mmu_notifier_unregister must be always called to
- * unregister the notifier. mm_count is automatically pinned to allow
- * mmu_notifier_unregister to safely run at any time later, before or
- * after exit_mmap. ->release will always be called before exit_mmap
- * frees the pages.
+ * returns.
+ *
+ * mmu_notifier_unregister() or mmu_notifier_put() must be always called to
+ * unregister the notifier.
+ *
+ * While the caller has a mmu_notifier get the mn->mm pointer will remain
+ * valid, and can be converted to an active mm pointer via mmget_not_zero().
  */
 int mmu_notifier_register(struct mmu_notifier *mn, struct mm_struct *mm)
 {
-       return do_mmu_notifier_register(mn, mm, 1);
+       int ret;
+
+       down_write(&mm->mmap_sem);
+       ret = __mmu_notifier_register(mn, mm);
+       up_write(&mm->mmap_sem);
+       return ret;
 }
 EXPORT_SYMBOL_GPL(mmu_notifier_register);
 
-/*
- * Same as mmu_notifier_register but here the caller must hold the
- * mmap_sem in write mode.
+static struct mmu_notifier *
+find_get_mmu_notifier(struct mm_struct *mm, const struct mmu_notifier_ops *ops)
+{
+       struct mmu_notifier *mn;
+
+       spin_lock(&mm->mmu_notifier_mm->lock);
+       hlist_for_each_entry_rcu (mn, &mm->mmu_notifier_mm->list, hlist) {
+               if (mn->ops != ops)
+                       continue;
+
+               if (likely(mn->users != UINT_MAX))
+                       mn->users++;
+               else
+                       mn = ERR_PTR(-EOVERFLOW);
+               spin_unlock(&mm->mmu_notifier_mm->lock);
+               return mn;
+       }
+       spin_unlock(&mm->mmu_notifier_mm->lock);
+       return NULL;
+}
+
+/**
+ * mmu_notifier_get_locked - Return the single struct mmu_notifier for
+ *                           the mm & ops
+ * @ops: The operations struct being subscribe with
+ * @mm : The mm to attach notifiers too
+ *
+ * This function either allocates a new mmu_notifier via
+ * ops->alloc_notifier(), or returns an already existing notifier on the
+ * list. The value of the ops pointer is used to determine when two notifiers
+ * are the same.
+ *
+ * Each call to mmu_notifier_get() must be paired with a call to
+ * mmu_notifier_put(). The caller must hold the write side of mm->mmap_sem.
+ *
+ * While the caller has a mmu_notifier get the mm pointer will remain valid,
+ * and can be converted to an active mm pointer via mmget_not_zero().
  */
-int __mmu_notifier_register(struct mmu_notifier *mn, struct mm_struct *mm)
+struct mmu_notifier *mmu_notifier_get_locked(const struct mmu_notifier_ops *ops,
+                                            struct mm_struct *mm)
 {
-       return do_mmu_notifier_register(mn, mm, 0);
+       struct mmu_notifier *mn;
+       int ret;
+
+       lockdep_assert_held_write(&mm->mmap_sem);
+
+       if (mm->mmu_notifier_mm) {
+               mn = find_get_mmu_notifier(mm, ops);
+               if (mn)
+                       return mn;
+       }
+
+       mn = ops->alloc_notifier(mm);
+       if (IS_ERR(mn))
+               return mn;
+       mn->ops = ops;
+       ret = __mmu_notifier_register(mn, mm);
+       if (ret)
+               goto out_free;
+       return mn;
+out_free:
+       mn->ops->free_notifier(mn);
+       return ERR_PTR(ret);
 }
-EXPORT_SYMBOL_GPL(__mmu_notifier_register);
+EXPORT_SYMBOL_GPL(mmu_notifier_get_locked);
 
 /* this is called after the last mmu_notifier_unregister() returned */
 void __mmu_notifier_mm_destroy(struct mm_struct *mm)
@@ -375,24 +466,74 @@ void mmu_notifier_unregister(struct mmu_notifier *mn, struct mm_struct *mm)
 }
 EXPORT_SYMBOL_GPL(mmu_notifier_unregister);
 
-/*
- * Same as mmu_notifier_unregister but no callback and no srcu synchronization.
+static void mmu_notifier_free_rcu(struct rcu_head *rcu)
+{
+       struct mmu_notifier *mn = container_of(rcu, struct mmu_notifier, rcu);
+       struct mm_struct *mm = mn->mm;
+
+       mn->ops->free_notifier(mn);
+       /* Pairs with the get in __mmu_notifier_register() */
+       mmdrop(mm);
+}
+
+/**
+ * mmu_notifier_put - Release the reference on the notifier
+ * @mn: The notifier to act on
+ *
+ * This function must be paired with each mmu_notifier_get(), it releases the
+ * reference obtained by the get. If this is the last reference then process
+ * to free the notifier will be run asynchronously.
+ *
+ * Unlike mmu_notifier_unregister() the get/put flow only calls ops->release
+ * when the mm_struct is destroyed. Instead free_notifier is always called to
+ * release any resources held by the user.
+ *
+ * As ops->release is not guaranteed to be called, the user must ensure that
+ * all sptes are dropped, and no new sptes can be established before
+ * mmu_notifier_put() is called.
+ *
+ * This function can be called from the ops->release callback, however the
+ * caller must still ensure it is called pairwise with mmu_notifier_get().
+ *
+ * Modules calling this function must call mmu_notifier_synchronize() in
+ * their __exit functions to ensure the async work is completed.
  */
-void mmu_notifier_unregister_no_release(struct mmu_notifier *mn,
-                                       struct mm_struct *mm)
+void mmu_notifier_put(struct mmu_notifier *mn)
 {
+       struct mm_struct *mm = mn->mm;
+
        spin_lock(&mm->mmu_notifier_mm->lock);
-       /*
-        * Can not use list_del_rcu() since __mmu_notifier_release
-        * can delete it before we hold the lock.
-        */
+       if (WARN_ON(!mn->users) || --mn->users)
+               goto out_unlock;
        hlist_del_init_rcu(&mn->hlist);
        spin_unlock(&mm->mmu_notifier_mm->lock);
 
-       BUG_ON(atomic_read(&mm->mm_count) <= 0);
-       mmdrop(mm);
+       call_srcu(&srcu, &mn->rcu, mmu_notifier_free_rcu);
+       return;
+
+out_unlock:
+       spin_unlock(&mm->mmu_notifier_mm->lock);
+}
+EXPORT_SYMBOL_GPL(mmu_notifier_put);
+
+/**
+ * mmu_notifier_synchronize - Ensure all mmu_notifiers are freed
+ *
+ * This function ensures that all outstanding async SRU work from
+ * mmu_notifier_put() is completed. After it returns any mmu_notifier_ops
+ * associated with an unused mmu_notifier will no longer be called.
+ *
+ * Before using the caller must ensure that all of its mmu_notifiers have been
+ * fully released via mmu_notifier_put().
+ *
+ * Modules using the mmu_notifier_put() API should call this in their __exit
+ * function to avoid module unloading races.
+ */
+void mmu_notifier_synchronize(void)
+{
+       synchronize_srcu(&srcu);
 }
-EXPORT_SYMBOL_GPL(mmu_notifier_unregister_no_release);
+EXPORT_SYMBOL_GPL(mmu_notifier_synchronize);
 
 bool
 mmu_notifier_range_update_to_read_only(const struct mmu_notifier_range *range)
index bf38dfb..7967825 100644 (file)
@@ -9,7 +9,7 @@
  *  (C) Copyright 2002 Red Hat Inc, All Rights Reserved
  */
 
-#include <linux/mm.h>
+#include <linux/pagewalk.h>
 #include <linux/hugetlb.h>
 #include <linux/shm.h>
 #include <linux/mman.h>
@@ -329,20 +329,11 @@ static int prot_none_test(unsigned long addr, unsigned long next,
        return 0;
 }
 
-static int prot_none_walk(struct vm_area_struct *vma, unsigned long start,
-                          unsigned long end, unsigned long newflags)
-{
-       pgprot_t new_pgprot = vm_get_page_prot(newflags);
-       struct mm_walk prot_none_walk = {
-               .pte_entry = prot_none_pte_entry,
-               .hugetlb_entry = prot_none_hugetlb_entry,
-               .test_walk = prot_none_test,
-               .mm = current->mm,
-               .private = &new_pgprot,
-       };
-
-       return walk_page_range(start, end, &prot_none_walk);
-}
+static const struct mm_walk_ops prot_none_walk_ops = {
+       .pte_entry              = prot_none_pte_entry,
+       .hugetlb_entry          = prot_none_hugetlb_entry,
+       .test_walk              = prot_none_test,
+};
 
 int
 mprotect_fixup(struct vm_area_struct *vma, struct vm_area_struct **pprev,
@@ -369,7 +360,10 @@ mprotect_fixup(struct vm_area_struct *vma, struct vm_area_struct **pprev,
        if (arch_has_pfn_modify_check() &&
            (vma->vm_flags & (VM_PFNMAP|VM_MIXEDMAP)) &&
            (newflags & (VM_READ|VM_WRITE|VM_EXEC)) == 0) {
-               error = prot_none_walk(vma, start, end, newflags);
+               pgprot_t new_pgprot = vm_get_page_prot(newflags);
+
+               error = walk_page_range(current->mm, start, end,
+                               &prot_none_walk_ops, &new_pgprot);
                if (error)
                        return error;
        }
@@ -465,6 +459,8 @@ static int do_mprotect_pkey(unsigned long start, size_t len,
        const bool rier = (current->personality & READ_IMPLIES_EXEC) &&
                                (prot & PROT_READ);
 
+       start = untagged_addr(start);
+
        prot &= ~(PROT_GROWSDOWN|PROT_GROWSUP);
        if (grows == (PROT_GROWSDOWN|PROT_GROWSUP)) /* can't be both */
                return -EINVAL;
index fc241d2..1fc8a29 100644 (file)
@@ -606,6 +606,9 @@ SYSCALL_DEFINE5(mremap, unsigned long, addr, unsigned long, old_len,
        LIST_HEAD(uf_unmap_early);
        LIST_HEAD(uf_unmap);
 
+       addr = untagged_addr(addr);
+       new_addr = untagged_addr(new_addr);
+
        if (flags & ~(MREMAP_FIXED | MREMAP_MAYMOVE))
                return ret;
 
index ef30a42..c3bd3e7 100644 (file)
@@ -37,6 +37,8 @@ SYSCALL_DEFINE3(msync, unsigned long, start, size_t, len, int, flags)
        int unmapped_error = 0;
        int error = -EINVAL;
 
+       start = untagged_addr(start);
+
        if (flags & ~(MS_ASYNC | MS_INVALIDATE | MS_SYNC))
                goto out;
        if (offset_in_page(start))
index fed1b6e..99b7ec3 100644 (file)
@@ -108,7 +108,7 @@ unsigned int kobjsize(const void *objp)
         * The ksize() function is only guaranteed to work for pointers
         * returned by kmalloc(). So handle arbitrary pointers here.
         */
-       return PAGE_SIZE << compound_order(page);
+       return page_size(page);
 }
 
 /**
index eda2e2a..71e3ace 100644 (file)
@@ -73,7 +73,7 @@ static inline bool is_memcg_oom(struct oom_control *oc)
 /**
  * oom_cpuset_eligible() - check task eligiblity for kill
  * @start: task struct of which task to consider
- * @mask: nodemask passed to page allocator for mempolicy ooms
+ * @oc: pointer to struct oom_control
  *
  * Task eligibility is determined by whether or not a candidate task, @tsk,
  * shares the same mempolicy nodes as current if it is bound by such a policy
@@ -287,7 +287,7 @@ static enum oom_constraint constrained_alloc(struct oom_control *oc)
            !nodes_subset(node_states[N_MEMORY], *oc->nodemask)) {
                oc->totalpages = total_swap_pages;
                for_each_node_mask(nid, *oc->nodemask)
-                       oc->totalpages += node_spanned_pages(nid);
+                       oc->totalpages += node_present_pages(nid);
                return CONSTRAINT_MEMORY_POLICY;
        }
 
@@ -300,7 +300,7 @@ static enum oom_constraint constrained_alloc(struct oom_control *oc)
        if (cpuset_limited) {
                oc->totalpages = total_swap_pages;
                for_each_node_mask(nid, cpuset_current_mems_allowed)
-                       oc->totalpages += node_spanned_pages(nid);
+                       oc->totalpages += node_present_pages(nid);
                return CONSTRAINT_CPUSET;
        }
        return CONSTRAINT_NONE;
@@ -523,7 +523,7 @@ bool __oom_reap_task_mm(struct mm_struct *mm)
        set_bit(MMF_UNSTABLE, &mm->flags);
 
        for (vma = mm->mmap ; vma; vma = vma->vm_next) {
-               if (!can_madv_dontneed_vma(vma))
+               if (!can_madv_lru_vma(vma))
                        continue;
 
                /*
@@ -884,12 +884,13 @@ static void __oom_kill_process(struct task_struct *victim, const char *message)
         */
        do_send_sig_info(SIGKILL, SEND_SIG_PRIV, victim, PIDTYPE_TGID);
        mark_oom_victim(victim);
-       pr_err("%s: Killed process %d (%s) total-vm:%lukB, anon-rss:%lukB, file-rss:%lukB, shmem-rss:%lukB\n",
-               message, task_pid_nr(victim), victim->comm,
-               K(victim->mm->total_vm),
-               K(get_mm_counter(victim->mm, MM_ANONPAGES)),
-               K(get_mm_counter(victim->mm, MM_FILEPAGES)),
-               K(get_mm_counter(victim->mm, MM_SHMEMPAGES)));
+       pr_err("%s: Killed process %d (%s) total-vm:%lukB, anon-rss:%lukB, file-rss:%lukB, shmem-rss:%lukB, UID:%u pgtables:%lukB oom_score_adj:%hd\n",
+               message, task_pid_nr(victim), victim->comm, K(mm->total_vm),
+               K(get_mm_counter(mm, MM_ANONPAGES)),
+               K(get_mm_counter(mm, MM_FILEPAGES)),
+               K(get_mm_counter(mm, MM_SHMEMPAGES)),
+               from_kuid(&init_user_ns, task_uid(victim)),
+               mm_pgtables_bytes(mm), victim->signal->oom_score_adj);
        task_unlock(victim);
 
        /*
@@ -1068,9 +1069,10 @@ bool out_of_memory(struct oom_control *oc)
         * The OOM killer does not compensate for IO-less reclaim.
         * pagefault_out_of_memory lost its gfp context so we have to
         * make sure exclude 0 mask - all other users should have at least
-        * ___GFP_DIRECT_RECLAIM to get here.
+        * ___GFP_DIRECT_RECLAIM to get here. But mem_cgroup_oom() has to
+        * invoke the OOM killer even if it is a GFP_NOFS allocation.
         */
-       if (oc->gfp_mask && !(oc->gfp_mask & __GFP_FS))
+       if (oc->gfp_mask && !(oc->gfp_mask & __GFP_FS) && !is_memcg_oom(oc))
                return true;
 
        /*
index 6991cce..3334a76 100644 (file)
@@ -670,6 +670,7 @@ out:
 
 void free_compound_page(struct page *page)
 {
+       mem_cgroup_uncharge(page);
        __free_pages_ok(page, compound_order(page));
 }
 
@@ -3955,14 +3956,22 @@ should_compact_retry(struct alloc_context *ac, int order, int alloc_flags,
                goto check_priority;
 
        /*
+        * compaction was skipped because there are not enough order-0 pages
+        * to work with, so we retry only if it looks like reclaim can help.
+        */
+       if (compaction_needs_reclaim(compact_result)) {
+               ret = compaction_zonelist_suitable(ac, order, alloc_flags);
+               goto out;
+       }
+
+       /*
         * make sure the compaction wasn't deferred or didn't bail out early
         * due to locks contention before we declare that we should give up.
-        * But do not retry if the given zonelist is not suitable for
-        * compaction.
+        * But the next retry should use a higher priority if allowed, so
+        * we don't just keep bailing out endlessly.
         */
        if (compaction_withdrawn(compact_result)) {
-               ret = compaction_zonelist_suitable(ac, order, alloc_flags);
-               goto out;
+               goto check_priority;
        }
 
        /*
@@ -5971,7 +5980,7 @@ void __ref memmap_init_zone_device(struct zone *zone,
                }
        }
 
-       pr_info("%s initialised, %lu pages in %ums\n", dev_name(pgmap->dev),
+       pr_info("%s initialised %lu pages in %ums\n", __func__,
                size, jiffies_to_msecs(jiffies - start));
 }
 
@@ -6638,9 +6647,11 @@ static unsigned long __init calc_memmap_size(unsigned long spanned_pages,
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
 static void pgdat_init_split_queue(struct pglist_data *pgdat)
 {
-       spin_lock_init(&pgdat->split_queue_lock);
-       INIT_LIST_HEAD(&pgdat->split_queue);
-       pgdat->split_queue_len = 0;
+       struct deferred_split *ds_queue = &pgdat->deferred_split_queue;
+
+       spin_lock_init(&ds_queue->split_queue_lock);
+       INIT_LIST_HEAD(&ds_queue->split_queue);
+       ds_queue->split_queue_len = 0;
 }
 #else
 static void pgdat_init_split_queue(struct pglist_data *pgdat) {}
@@ -8196,7 +8207,7 @@ bool has_unmovable_pages(struct zone *zone, struct page *page, int count,
                        if (!hugepage_migration_supported(page_hstate(head)))
                                goto unmovable;
 
-                       skip_pages = (1 << compound_order(head)) - (page - head);
+                       skip_pages = compound_nr(head) - (page - head);
                        iter += skip_pages - 1;
                        continue;
                }
index addcbb2..dee9311 100644 (file)
@@ -24,6 +24,9 @@ struct page_owner {
        short last_migrate_reason;
        gfp_t gfp_mask;
        depot_stack_handle_t handle;
+#ifdef CONFIG_DEBUG_PAGEALLOC
+       depot_stack_handle_t free_handle;
+#endif
 };
 
 static bool page_owner_disabled = true;
@@ -102,19 +105,6 @@ static inline struct page_owner *get_page_owner(struct page_ext *page_ext)
        return (void *)page_ext + page_owner_ops.offset;
 }
 
-void __reset_page_owner(struct page *page, unsigned int order)
-{
-       int i;
-       struct page_ext *page_ext;
-
-       for (i = 0; i < (1 << order); i++) {
-               page_ext = lookup_page_ext(page + i);
-               if (unlikely(!page_ext))
-                       continue;
-               __clear_bit(PAGE_EXT_OWNER, &page_ext->flags);
-       }
-}
-
 static inline bool check_recursive_alloc(unsigned long *entries,
                                         unsigned int nr_entries,
                                         unsigned long ip)
@@ -154,18 +144,50 @@ static noinline depot_stack_handle_t save_stack(gfp_t flags)
        return handle;
 }
 
-static inline void __set_page_owner_handle(struct page_ext *page_ext,
-       depot_stack_handle_t handle, unsigned int order, gfp_t gfp_mask)
+void __reset_page_owner(struct page *page, unsigned int order)
 {
+       int i;
+       struct page_ext *page_ext;
+#ifdef CONFIG_DEBUG_PAGEALLOC
+       depot_stack_handle_t handle = 0;
        struct page_owner *page_owner;
 
-       page_owner = get_page_owner(page_ext);
-       page_owner->handle = handle;
-       page_owner->order = order;
-       page_owner->gfp_mask = gfp_mask;
-       page_owner->last_migrate_reason = -1;
+       if (debug_pagealloc_enabled())
+               handle = save_stack(GFP_NOWAIT | __GFP_NOWARN);
+#endif
 
-       __set_bit(PAGE_EXT_OWNER, &page_ext->flags);
+       for (i = 0; i < (1 << order); i++) {
+               page_ext = lookup_page_ext(page + i);
+               if (unlikely(!page_ext))
+                       continue;
+               __clear_bit(PAGE_EXT_OWNER_ACTIVE, &page_ext->flags);
+#ifdef CONFIG_DEBUG_PAGEALLOC
+               if (debug_pagealloc_enabled()) {
+                       page_owner = get_page_owner(page_ext);
+                       page_owner->free_handle = handle;
+               }
+#endif
+       }
+}
+
+static inline void __set_page_owner_handle(struct page *page,
+       struct page_ext *page_ext, depot_stack_handle_t handle,
+       unsigned int order, gfp_t gfp_mask)
+{
+       struct page_owner *page_owner;
+       int i;
+
+       for (i = 0; i < (1 << order); i++) {
+               page_owner = get_page_owner(page_ext);
+               page_owner->handle = handle;
+               page_owner->order = order;
+               page_owner->gfp_mask = gfp_mask;
+               page_owner->last_migrate_reason = -1;
+               __set_bit(PAGE_EXT_OWNER, &page_ext->flags);
+               __set_bit(PAGE_EXT_OWNER_ACTIVE, &page_ext->flags);
+
+               page_ext = lookup_page_ext(page + i);
+       }
 }
 
 noinline void __set_page_owner(struct page *page, unsigned int order,
@@ -178,7 +200,7 @@ noinline void __set_page_owner(struct page *page, unsigned int order,
                return;
 
        handle = save_stack(gfp_mask);
-       __set_page_owner_handle(page_ext, handle, order, gfp_mask);
+       __set_page_owner_handle(page, page_ext, handle, order, gfp_mask);
 }
 
 void __set_page_owner_migrate_reason(struct page *page, int reason)
@@ -204,8 +226,11 @@ void __split_page_owner(struct page *page, unsigned int order)
 
        page_owner = get_page_owner(page_ext);
        page_owner->order = 0;
-       for (i = 1; i < (1 << order); i++)
-               __copy_page_owner(page, page + i);
+       for (i = 1; i < (1 << order); i++) {
+               page_ext = lookup_page_ext(page + i);
+               page_owner = get_page_owner(page_ext);
+               page_owner->order = 0;
+       }
 }
 
 void __copy_page_owner(struct page *oldpage, struct page *newpage)
@@ -235,6 +260,7 @@ void __copy_page_owner(struct page *oldpage, struct page *newpage)
         * the new page, which will be freed.
         */
        __set_bit(PAGE_EXT_OWNER, &new_ext->flags);
+       __set_bit(PAGE_EXT_OWNER_ACTIVE, &new_ext->flags);
 }
 
 void pagetypeinfo_showmixedcount_print(struct seq_file *m,
@@ -294,7 +320,7 @@ void pagetypeinfo_showmixedcount_print(struct seq_file *m,
                        if (unlikely(!page_ext))
                                continue;
 
-                       if (!test_bit(PAGE_EXT_OWNER, &page_ext->flags))
+                       if (!test_bit(PAGE_EXT_OWNER_ACTIVE, &page_ext->flags))
                                continue;
 
                        page_owner = get_page_owner(page_ext);
@@ -405,20 +431,36 @@ void __dump_page_owner(struct page *page)
        mt = gfpflags_to_migratetype(gfp_mask);
 
        if (!test_bit(PAGE_EXT_OWNER, &page_ext->flags)) {
-               pr_alert("page_owner info is not active (free page?)\n");
+               pr_alert("page_owner info is not present (never set?)\n");
                return;
        }
 
+       if (test_bit(PAGE_EXT_OWNER_ACTIVE, &page_ext->flags))
+               pr_alert("page_owner tracks the page as allocated\n");
+       else
+               pr_alert("page_owner tracks the page as freed\n");
+
+       pr_alert("page last allocated via order %u, migratetype %s, gfp_mask %#x(%pGg)\n",
+                page_owner->order, migratetype_names[mt], gfp_mask, &gfp_mask);
+
        handle = READ_ONCE(page_owner->handle);
        if (!handle) {
-               pr_alert("page_owner info is not active (free page?)\n");
-               return;
+               pr_alert("page_owner allocation stack trace missing\n");
+       } else {
+               nr_entries = stack_depot_fetch(handle, &entries);
+               stack_trace_print(entries, nr_entries, 0);
        }
 
-       nr_entries = stack_depot_fetch(handle, &entries);
-       pr_alert("page allocated via order %u, migratetype %s, gfp_mask %#x(%pGg)\n",
-                page_owner->order, migratetype_names[mt], gfp_mask, &gfp_mask);
-       stack_trace_print(entries, nr_entries, 0);
+#ifdef CONFIG_DEBUG_PAGEALLOC
+       handle = READ_ONCE(page_owner->free_handle);
+       if (!handle) {
+               pr_alert("page_owner free stack trace missing\n");
+       } else {
+               nr_entries = stack_depot_fetch(handle, &entries);
+               pr_alert("page last free stack trace:\n");
+               stack_trace_print(entries, nr_entries, 0);
+       }
+#endif
 
        if (page_owner->last_migrate_reason != -1)
                pr_alert("page has been migrated, last migrate reason: %s\n",
@@ -481,9 +523,23 @@ read_page_owner(struct file *file, char __user *buf, size_t count, loff_t *ppos)
                if (!test_bit(PAGE_EXT_OWNER, &page_ext->flags))
                        continue;
 
+               /*
+                * Although we do have the info about past allocation of free
+                * pages, it's not relevant for current memory usage.
+                */
+               if (!test_bit(PAGE_EXT_OWNER_ACTIVE, &page_ext->flags))
+                       continue;
+
                page_owner = get_page_owner(page_ext);
 
                /*
+                * Don't print "tail" pages of high-order allocations as that
+                * would inflate the stats.
+                */
+               if (!IS_ALIGNED(pfn, 1 << page_owner->order))
+                       continue;
+
+               /*
                 * Access to page_ext->handle isn't synchronous so we should
                 * be careful to access it.
                 */
@@ -562,7 +618,8 @@ static void init_pages_in_zone(pg_data_t *pgdat, struct zone *zone)
                                continue;
 
                        /* Found early allocated page */
-                       __set_page_owner_handle(page_ext, early_handle, 0, 0);
+                       __set_page_owner_handle(page, page_ext, early_handle,
+                                               0, 0);
                        count++;
                }
                cond_resched();
index 21d4f97..34b9181 100644 (file)
@@ -101,7 +101,7 @@ static void unpoison_page(struct page *page)
        /*
         * Page poisoning when enabled poisons each and every page
         * that is freed to buddy. Thus no extra check is done to
-        * see if a page was posioned.
+        * see if a page was poisoned.
         */
        check_poison_mem(addr, PAGE_SIZE);
        kunmap_atomic(addr);
index 11df03e..eff4b45 100644 (file)
@@ -153,8 +153,7 @@ bool page_vma_mapped_walk(struct page_vma_mapped_walk *pvmw)
 
        if (unlikely(PageHuge(pvmw->page))) {
                /* when pud is not present, pte will be NULL */
-               pvmw->pte = huge_pte_offset(mm, pvmw->address,
-                                           PAGE_SIZE << compound_order(page));
+               pvmw->pte = huge_pte_offset(mm, pvmw->address, page_size(page));
                if (!pvmw->pte)
                        return false;
 
index c3084ff..d48c2a9 100644 (file)
@@ -1,5 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-#include <linux/mm.h>
+#include <linux/pagewalk.h>
 #include <linux/highmem.h>
 #include <linux/sched.h>
 #include <linux/hugetlb.h>
@@ -9,10 +9,11 @@ static int walk_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
 {
        pte_t *pte;
        int err = 0;
+       const struct mm_walk_ops *ops = walk->ops;
 
        pte = pte_offset_map(pmd, addr);
        for (;;) {
-               err = walk->pte_entry(pte, addr, addr + PAGE_SIZE, walk);
+               err = ops->pte_entry(pte, addr, addr + PAGE_SIZE, walk);
                if (err)
                       break;
                addr += PAGE_SIZE;
@@ -30,6 +31,7 @@ static int walk_pmd_range(pud_t *pud, unsigned long addr, unsigned long end,
 {
        pmd_t *pmd;
        unsigned long next;
+       const struct mm_walk_ops *ops = walk->ops;
        int err = 0;
 
        pmd = pmd_offset(pud, addr);
@@ -37,8 +39,8 @@ static int walk_pmd_range(pud_t *pud, unsigned long addr, unsigned long end,
 again:
                next = pmd_addr_end(addr, end);
                if (pmd_none(*pmd) || !walk->vma) {
-                       if (walk->pte_hole)
-                               err = walk->pte_hole(addr, next, walk);
+                       if (ops->pte_hole)
+                               err = ops->pte_hole(addr, next, walk);
                        if (err)
                                break;
                        continue;
@@ -47,8 +49,8 @@ again:
                 * This implies that each ->pmd_entry() handler
                 * needs to know about pmd_trans_huge() pmds
                 */
-               if (walk->pmd_entry)
-                       err = walk->pmd_entry(pmd, addr, next, walk);
+               if (ops->pmd_entry)
+                       err = ops->pmd_entry(pmd, addr, next, walk);
                if (err)
                        break;
 
@@ -56,7 +58,7 @@ again:
                 * Check this here so we only break down trans_huge
                 * pages when we _need_ to
                 */
-               if (!walk->pte_entry)
+               if (!ops->pte_entry)
                        continue;
 
                split_huge_pmd(walk->vma, pmd, addr);
@@ -75,6 +77,7 @@ static int walk_pud_range(p4d_t *p4d, unsigned long addr, unsigned long end,
 {
        pud_t *pud;
        unsigned long next;
+       const struct mm_walk_ops *ops = walk->ops;
        int err = 0;
 
        pud = pud_offset(p4d, addr);
@@ -82,18 +85,18 @@ static int walk_pud_range(p4d_t *p4d, unsigned long addr, unsigned long end,
  again:
                next = pud_addr_end(addr, end);
                if (pud_none(*pud) || !walk->vma) {
-                       if (walk->pte_hole)
-                               err = walk->pte_hole(addr, next, walk);
+                       if (ops->pte_hole)
+                               err = ops->pte_hole(addr, next, walk);
                        if (err)
                                break;
                        continue;
                }
 
-               if (walk->pud_entry) {
+               if (ops->pud_entry) {
                        spinlock_t *ptl = pud_trans_huge_lock(pud, walk->vma);
 
                        if (ptl) {
-                               err = walk->pud_entry(pud, addr, next, walk);
+                               err = ops->pud_entry(pud, addr, next, walk);
                                spin_unlock(ptl);
                                if (err)
                                        break;
@@ -105,7 +108,7 @@ static int walk_pud_range(p4d_t *p4d, unsigned long addr, unsigned long end,
                if (pud_none(*pud))
                        goto again;
 
-               if (walk->pmd_entry || walk->pte_entry)
+               if (ops->pmd_entry || ops->pte_entry)
                        err = walk_pmd_range(pud, addr, next, walk);
                if (err)
                        break;
@@ -119,19 +122,20 @@ static int walk_p4d_range(pgd_t *pgd, unsigned long addr, unsigned long end,
 {
        p4d_t *p4d;
        unsigned long next;
+       const struct mm_walk_ops *ops = walk->ops;
        int err = 0;
 
        p4d = p4d_offset(pgd, addr);
        do {
                next = p4d_addr_end(addr, end);
                if (p4d_none_or_clear_bad(p4d)) {
-                       if (walk->pte_hole)
-                               err = walk->pte_hole(addr, next, walk);
+                       if (ops->pte_hole)
+                               err = ops->pte_hole(addr, next, walk);
                        if (err)
                                break;
                        continue;
                }
-               if (walk->pmd_entry || walk->pte_entry)
+               if (ops->pmd_entry || ops->pte_entry)
                        err = walk_pud_range(p4d, addr, next, walk);
                if (err)
                        break;
@@ -145,19 +149,20 @@ static int walk_pgd_range(unsigned long addr, unsigned long end,
 {
        pgd_t *pgd;
        unsigned long next;
+       const struct mm_walk_ops *ops = walk->ops;
        int err = 0;
 
        pgd = pgd_offset(walk->mm, addr);
        do {
                next = pgd_addr_end(addr, end);
                if (pgd_none_or_clear_bad(pgd)) {
-                       if (walk->pte_hole)
-                               err = walk->pte_hole(addr, next, walk);
+                       if (ops->pte_hole)
+                               err = ops->pte_hole(addr, next, walk);
                        if (err)
                                break;
                        continue;
                }
-               if (walk->pmd_entry || walk->pte_entry)
+               if (ops->pmd_entry || ops->pte_entry)
                        err = walk_p4d_range(pgd, addr, next, walk);
                if (err)
                        break;
@@ -183,6 +188,7 @@ static int walk_hugetlb_range(unsigned long addr, unsigned long end,
        unsigned long hmask = huge_page_mask(h);
        unsigned long sz = huge_page_size(h);
        pte_t *pte;
+       const struct mm_walk_ops *ops = walk->ops;
        int err = 0;
 
        do {
@@ -190,9 +196,9 @@ static int walk_hugetlb_range(unsigned long addr, unsigned long end,
                pte = huge_pte_offset(walk->mm, addr & hmask, sz);
 
                if (pte)
-                       err = walk->hugetlb_entry(pte, hmask, addr, next, walk);
-               else if (walk->pte_hole)
-                       err = walk->pte_hole(addr, next, walk);
+                       err = ops->hugetlb_entry(pte, hmask, addr, next, walk);
+               else if (ops->pte_hole)
+                       err = ops->pte_hole(addr, next, walk);
 
                if (err)
                        break;
@@ -220,9 +226,10 @@ static int walk_page_test(unsigned long start, unsigned long end,
                        struct mm_walk *walk)
 {
        struct vm_area_struct *vma = walk->vma;
+       const struct mm_walk_ops *ops = walk->ops;
 
-       if (walk->test_walk)
-               return walk->test_walk(start, end, walk);
+       if (ops->test_walk)
+               return ops->test_walk(start, end, walk);
 
        /*
         * vma(VM_PFNMAP) doesn't have any valid struct pages behind VM_PFNMAP
@@ -234,8 +241,8 @@ static int walk_page_test(unsigned long start, unsigned long end,
         */
        if (vma->vm_flags & VM_PFNMAP) {
                int err = 1;
-               if (walk->pte_hole)
-                       err = walk->pte_hole(start, end, walk);
+               if (ops->pte_hole)
+                       err = ops->pte_hole(start, end, walk);
                return err ? err : 1;
        }
        return 0;
@@ -248,7 +255,7 @@ static int __walk_page_range(unsigned long start, unsigned long end,
        struct vm_area_struct *vma = walk->vma;
 
        if (vma && is_vm_hugetlb_page(vma)) {
-               if (walk->hugetlb_entry)
+               if (walk->ops->hugetlb_entry)
                        err = walk_hugetlb_range(start, end, walk);
        } else
                err = walk_pgd_range(start, end, walk);
@@ -258,11 +265,13 @@ static int __walk_page_range(unsigned long start, unsigned long end,
 
 /**
  * walk_page_range - walk page table with caller specific callbacks
- * @start: start address of the virtual address range
- * @end: end address of the virtual address range
- * @walk: mm_walk structure defining the callbacks and the target address space
+ * @mm:                mm_struct representing the target process of page table walk
+ * @start:     start address of the virtual address range
+ * @end:       end address of the virtual address range
+ * @ops:       operation to call during the walk
+ * @private:   private data for callbacks' usage
  *
- * Recursively walk the page table tree of the process represented by @walk->mm
+ * Recursively walk the page table tree of the process represented by @mm
  * within the virtual address range [@start, @end). During walking, we can do
  * some caller-specific works for each entry, by setting up pmd_entry(),
  * pte_entry(), and/or hugetlb_entry(). If you don't set up for some of these
@@ -278,47 +287,52 @@ static int __walk_page_range(unsigned long start, unsigned long end,
  *
  * Before starting to walk page table, some callers want to check whether
  * they really want to walk over the current vma, typically by checking
- * its vm_flags. walk_page_test() and @walk->test_walk() are used for this
+ * its vm_flags. walk_page_test() and @ops->test_walk() are used for this
  * purpose.
  *
  * struct mm_walk keeps current values of some common data like vma and pmd,
  * which are useful for the access from callbacks. If you want to pass some
- * caller-specific data to callbacks, @walk->private should be helpful.
+ * caller-specific data to callbacks, @private should be helpful.
  *
  * Locking:
- *   Callers of walk_page_range() and walk_page_vma() should hold
- *   @walk->mm->mmap_sem, because these function traverse vma list and/or
- *   access to vma's data.
+ *   Callers of walk_page_range() and walk_page_vma() should hold @mm->mmap_sem,
+ *   because these function traverse vma list and/or access to vma's data.
  */
-int walk_page_range(unsigned long start, unsigned long end,
-                   struct mm_walk *walk)
+int walk_page_range(struct mm_struct *mm, unsigned long start,
+               unsigned long end, const struct mm_walk_ops *ops,
+               void *private)
 {
        int err = 0;
        unsigned long next;
        struct vm_area_struct *vma;
+       struct mm_walk walk = {
+               .ops            = ops,
+               .mm             = mm,
+               .private        = private,
+       };
 
        if (start >= end)
                return -EINVAL;
 
-       if (!walk->mm)
+       if (!walk.mm)
                return -EINVAL;
 
-       VM_BUG_ON_MM(!rwsem_is_locked(&walk->mm->mmap_sem), walk->mm);
+       lockdep_assert_held(&walk.mm->mmap_sem);
 
-       vma = find_vma(walk->mm, start);
+       vma = find_vma(walk.mm, start);
        do {
                if (!vma) { /* after the last vma */
-                       walk->vma = NULL;
+                       walk.vma = NULL;
                        next = end;
                } else if (start < vma->vm_start) { /* outside vma */
-                       walk->vma = NULL;
+                       walk.vma = NULL;
                        next = min(end, vma->vm_start);
                } else { /* inside vma */
-                       walk->vma = vma;
+                       walk.vma = vma;
                        next = min(end, vma->vm_end);
                        vma = vma->vm_next;
 
-                       err = walk_page_test(start, next, walk);
+                       err = walk_page_test(start, next, &walk);
                        if (err > 0) {
                                /*
                                 * positive return values are purely for
@@ -331,28 +345,34 @@ int walk_page_range(unsigned long start, unsigned long end,
                        if (err < 0)
                                break;
                }
-               if (walk->vma || walk->pte_hole)
-                       err = __walk_page_range(start, next, walk);
+               if (walk.vma || walk.ops->pte_hole)
+                       err = __walk_page_range(start, next, &walk);
                if (err)
                        break;
        } while (start = next, start < end);
        return err;
 }
 
-int walk_page_vma(struct vm_area_struct *vma, struct mm_walk *walk)
+int walk_page_vma(struct vm_area_struct *vma, const struct mm_walk_ops *ops,
+               void *private)
 {
+       struct mm_walk walk = {
+               .ops            = ops,
+               .mm             = vma->vm_mm,
+               .vma            = vma,
+               .private        = private,
+       };
        int err;
 
-       if (!walk->mm)
+       if (!walk.mm)
                return -EINVAL;
 
-       VM_BUG_ON(!rwsem_is_locked(&walk->mm->mmap_sem));
-       VM_BUG_ON(!vma);
-       walk->vma = vma;
-       err = walk_page_test(vma->vm_start, vma->vm_end, walk);
+       lockdep_assert_held(&walk.mm->mmap_sem);
+
+       err = walk_page_test(vma->vm_start, vma->vm_end, &walk);
        if (err > 0)
                return 0;
        if (err < 0)
                return err;
-       return __walk_page_range(vma->vm_start, vma->vm_end, walk);
+       return __walk_page_range(vma->vm_start, vma->vm_end, &walk);
 }
diff --git a/mm/quicklist.c b/mm/quicklist.c
deleted file mode 100644 (file)
index 5e98ac7..0000000
+++ /dev/null
@@ -1,103 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Quicklist support.
- *
- * Quicklists are light weight lists of pages that have a defined state
- * on alloc and free. Pages must be in the quicklist specific defined state
- * (zero by default) when the page is freed. It seems that the initial idea
- * for such lists first came from Dave Miller and then various other people
- * improved on it.
- *
- * Copyright (C) 2007 SGI,
- *     Christoph Lameter <cl@linux.com>
- *             Generalized, added support for multiple lists and
- *             constructors / destructors.
- */
-#include <linux/kernel.h>
-
-#include <linux/gfp.h>
-#include <linux/mm.h>
-#include <linux/mmzone.h>
-#include <linux/quicklist.h>
-
-DEFINE_PER_CPU(struct quicklist [CONFIG_NR_QUICK], quicklist);
-
-#define FRACTION_OF_NODE_MEM   16
-
-static unsigned long max_pages(unsigned long min_pages)
-{
-       unsigned long node_free_pages, max;
-       int node = numa_node_id();
-       struct zone *zones = NODE_DATA(node)->node_zones;
-       int num_cpus_on_node;
-
-       node_free_pages =
-#ifdef CONFIG_ZONE_DMA
-               zone_page_state(&zones[ZONE_DMA], NR_FREE_PAGES) +
-#endif
-#ifdef CONFIG_ZONE_DMA32
-               zone_page_state(&zones[ZONE_DMA32], NR_FREE_PAGES) +
-#endif
-               zone_page_state(&zones[ZONE_NORMAL], NR_FREE_PAGES);
-
-       max = node_free_pages / FRACTION_OF_NODE_MEM;
-
-       num_cpus_on_node = cpumask_weight(cpumask_of_node(node));
-       max /= num_cpus_on_node;
-
-       return max(max, min_pages);
-}
-
-static long min_pages_to_free(struct quicklist *q,
-       unsigned long min_pages, long max_free)
-{
-       long pages_to_free;
-
-       pages_to_free = q->nr_pages - max_pages(min_pages);
-
-       return min(pages_to_free, max_free);
-}
-
-/*
- * Trim down the number of pages in the quicklist
- */
-void quicklist_trim(int nr, void (*dtor)(void *),
-       unsigned long min_pages, unsigned long max_free)
-{
-       long pages_to_free;
-       struct quicklist *q;
-
-       q = &get_cpu_var(quicklist)[nr];
-       if (q->nr_pages > min_pages) {
-               pages_to_free = min_pages_to_free(q, min_pages, max_free);
-
-               while (pages_to_free > 0) {
-                       /*
-                        * We pass a gfp_t of 0 to quicklist_alloc here
-                        * because we will never call into the page allocator.
-                        */
-                       void *p = quicklist_alloc(nr, 0, NULL);
-
-                       if (dtor)
-                               dtor(p);
-                       free_page((unsigned long)p);
-                       pages_to_free--;
-               }
-       }
-       put_cpu_var(quicklist);
-}
-
-unsigned long quicklist_total_size(void)
-{
-       unsigned long count = 0;
-       int cpu;
-       struct quicklist *ql, *q;
-
-       for_each_online_cpu(cpu) {
-               ql = per_cpu(quicklist, cpu);
-               for (q = ql; q < ql + CONFIG_NR_QUICK; q++)
-                       count += q->nr_pages;
-       }
-       return count;
-}
-
index 003377e..d9a23bb 100644 (file)
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -898,15 +898,13 @@ static bool page_mkclean_one(struct page *page, struct vm_area_struct *vma,
         */
        mmu_notifier_range_init(&range, MMU_NOTIFY_PROTECTION_PAGE,
                                0, vma, vma->vm_mm, address,
-                               min(vma->vm_end, address +
-                                   (PAGE_SIZE << compound_order(page))));
+                               min(vma->vm_end, address + page_size(page)));
        mmu_notifier_invalidate_range_start(&range);
 
        while (page_vma_mapped_walk(&pvmw)) {
-               unsigned long cstart;
                int ret = 0;
 
-               cstart = address = pvmw.address;
+               address = pvmw.address;
                if (pvmw.pte) {
                        pte_t entry;
                        pte_t *pte = pvmw.pte;
@@ -933,7 +931,6 @@ static bool page_mkclean_one(struct page *page, struct vm_area_struct *vma,
                        entry = pmd_wrprotect(entry);
                        entry = pmd_mkclean(entry);
                        set_pmd_at(vma->vm_mm, address, pmd, entry);
-                       cstart &= PMD_MASK;
                        ret = 1;
 #else
                        /* unexpected pmd-mapped page? */
@@ -1192,8 +1189,10 @@ void page_add_file_rmap(struct page *page, bool compound)
                }
                if (!atomic_inc_and_test(compound_mapcount_ptr(page)))
                        goto out;
-               VM_BUG_ON_PAGE(!PageSwapBacked(page), page);
-               __inc_node_page_state(page, NR_SHMEM_PMDMAPPED);
+               if (PageSwapBacked(page))
+                       __inc_node_page_state(page, NR_SHMEM_PMDMAPPED);
+               else
+                       __inc_node_page_state(page, NR_FILE_PMDMAPPED);
        } else {
                if (PageTransCompound(page) && page_mapping(page)) {
                        VM_WARN_ON_ONCE(!PageLocked(page));
@@ -1232,8 +1231,10 @@ static void page_remove_file_rmap(struct page *page, bool compound)
                }
                if (!atomic_add_negative(-1, compound_mapcount_ptr(page)))
                        goto out;
-               VM_BUG_ON_PAGE(!PageSwapBacked(page), page);
-               __dec_node_page_state(page, NR_SHMEM_PMDMAPPED);
+               if (PageSwapBacked(page))
+                       __dec_node_page_state(page, NR_SHMEM_PMDMAPPED);
+               else
+                       __dec_node_page_state(page, NR_FILE_PMDMAPPED);
        } else {
                if (!atomic_add_negative(-1, &page->_mapcount))
                        goto out;
@@ -1374,8 +1375,7 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
         */
        mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, vma, vma->vm_mm,
                                address,
-                               min(vma->vm_end, address +
-                                   (PAGE_SIZE << compound_order(page))));
+                               min(vma->vm_end, address + page_size(page)));
        if (PageHuge(page)) {
                /*
                 * If sharing is possible, start and end will be adjusted
@@ -1524,8 +1524,7 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
                if (PageHWPoison(page) && !(flags & TTU_IGNORE_HWPOISON)) {
                        pteval = swp_entry_to_pte(make_hwpoison_entry(subpage));
                        if (PageHuge(page)) {
-                               int nr = 1 << compound_order(page);
-                               hugetlb_count_sub(nr, mm);
+                               hugetlb_count_sub(compound_nr(page), mm);
                                set_huge_swap_pte_at(mm, address,
                                                     pvmw.pte, pteval,
                                                     vma_mmu_pagesize(vma));
index 0f7fd4a..30ce722 100644 (file)
@@ -609,7 +609,7 @@ static int shmem_add_to_page_cache(struct page *page,
 {
        XA_STATE_ORDER(xas, &mapping->i_pages, index, compound_order(page));
        unsigned long i = 0;
-       unsigned long nr = 1UL << compound_order(page);
+       unsigned long nr = compound_nr(page);
 
        VM_BUG_ON_PAGE(PageTail(page), page);
        VM_BUG_ON_PAGE(index != round_down(index, nr), page);
@@ -631,7 +631,7 @@ static int shmem_add_to_page_cache(struct page *page,
                if (xas_error(&xas))
                        goto unlock;
 next:
-               xas_store(&xas, page + i);
+               xas_store(&xas, page);
                if (++i < nr) {
                        xas_next(&xas);
                        goto next;
@@ -1734,7 +1734,7 @@ unlock:
  * vm. If we swap it in we mark it dirty since we also free the swap
  * entry since a page cannot live in both the swap and page cache.
  *
- * fault_mm and fault_type are only supplied by shmem_fault:
+ * vmf and fault_type are only supplied by shmem_fault:
  * otherwise they are NULL.
  */
 static int shmem_getpage_gfp(struct inode *inode, pgoff_t index,
@@ -1884,7 +1884,7 @@ alloc_nohuge:
        lru_cache_add_anon(page);
 
        spin_lock_irq(&info->lock);
-       info->alloced += 1 << compound_order(page);
+       info->alloced += compound_nr(page);
        inode->i_blocks += BLOCKS_PER_PAGE << compound_order(page);
        shmem_recalc_inode(inode);
        spin_unlock_irq(&info->lock);
@@ -1925,7 +1925,7 @@ clear:
                struct page *head = compound_head(page);
                int i;
 
-               for (i = 0; i < (1 << compound_order(head)); i++) {
+               for (i = 0; i < compound_nr(head); i++) {
                        clear_highpage(head + i);
                        flush_dcache_page(head + i);
                }
@@ -1952,7 +1952,7 @@ clear:
         * Error recovery.
         */
 unacct:
-       shmem_inode_unacct_blocks(inode, 1 << compound_order(page));
+       shmem_inode_unacct_blocks(inode, compound_nr(page));
 
        if (PageTransHuge(page)) {
                unlock_page(page);
index 9057b80..68e455f 100644 (file)
--- a/mm/slab.h
+++ b/mm/slab.h
@@ -30,6 +30,69 @@ struct kmem_cache {
        struct list_head list;  /* List of all slab caches on the system */
 };
 
+#else /* !CONFIG_SLOB */
+
+struct memcg_cache_array {
+       struct rcu_head rcu;
+       struct kmem_cache *entries[0];
+};
+
+/*
+ * This is the main placeholder for memcg-related information in kmem caches.
+ * Both the root cache and the child caches will have it. For the root cache,
+ * this will hold a dynamically allocated array large enough to hold
+ * information about the currently limited memcgs in the system. To allow the
+ * array to be accessed without taking any locks, on relocation we free the old
+ * version only after a grace period.
+ *
+ * Root and child caches hold different metadata.
+ *
+ * @root_cache:        Common to root and child caches.  NULL for root, pointer to
+ *             the root cache for children.
+ *
+ * The following fields are specific to root caches.
+ *
+ * @memcg_caches: kmemcg ID indexed table of child caches.  This table is
+ *             used to index child cachces during allocation and cleared
+ *             early during shutdown.
+ *
+ * @root_caches_node: List node for slab_root_caches list.
+ *
+ * @children:  List of all child caches.  While the child caches are also
+ *             reachable through @memcg_caches, a child cache remains on
+ *             this list until it is actually destroyed.
+ *
+ * The following fields are specific to child caches.
+ *
+ * @memcg:     Pointer to the memcg this cache belongs to.
+ *
+ * @children_node: List node for @root_cache->children list.
+ *
+ * @kmem_caches_node: List node for @memcg->kmem_caches list.
+ */
+struct memcg_cache_params {
+       struct kmem_cache *root_cache;
+       union {
+               struct {
+                       struct memcg_cache_array __rcu *memcg_caches;
+                       struct list_head __root_caches_node;
+                       struct list_head children;
+                       bool dying;
+               };
+               struct {
+                       struct mem_cgroup *memcg;
+                       struct list_head children_node;
+                       struct list_head kmem_caches_node;
+                       struct percpu_ref refcnt;
+
+                       void (*work_fn)(struct kmem_cache *);
+                       union {
+                               struct rcu_head rcu_head;
+                               struct work_struct work;
+                       };
+               };
+       };
+};
 #endif /* CONFIG_SLOB */
 
 #ifdef CONFIG_SLAB
@@ -174,6 +237,7 @@ int __kmem_cache_shrink(struct kmem_cache *);
 void __kmemcg_cache_deactivate(struct kmem_cache *s);
 void __kmemcg_cache_deactivate_after_rcu(struct kmem_cache *s);
 void slab_kmem_cache_release(struct kmem_cache *);
+void kmem_cache_shrink_all(struct kmem_cache *s);
 
 struct seq_file;
 struct file;
index 807490f..6491c3a 100644 (file)
@@ -981,6 +981,43 @@ int kmem_cache_shrink(struct kmem_cache *cachep)
 }
 EXPORT_SYMBOL(kmem_cache_shrink);
 
+/**
+ * kmem_cache_shrink_all - shrink a cache and all memcg caches for root cache
+ * @s: The cache pointer
+ */
+void kmem_cache_shrink_all(struct kmem_cache *s)
+{
+       struct kmem_cache *c;
+
+       if (!IS_ENABLED(CONFIG_MEMCG_KMEM) || !is_root_cache(s)) {
+               kmem_cache_shrink(s);
+               return;
+       }
+
+       get_online_cpus();
+       get_online_mems();
+       kasan_cache_shrink(s);
+       __kmem_cache_shrink(s);
+
+       /*
+        * We have to take the slab_mutex to protect from the memcg list
+        * modification.
+        */
+       mutex_lock(&slab_mutex);
+       for_each_memcg_cache(c, s) {
+               /*
+                * Don't need to shrink deactivated memcg caches.
+                */
+               if (s->flags & SLAB_DEACTIVATED)
+                       continue;
+               kasan_cache_shrink(c);
+               __kmem_cache_shrink(c);
+       }
+       mutex_unlock(&slab_mutex);
+       put_online_mems();
+       put_online_cpus();
+}
+
 bool slab_is_available(void)
 {
        return slab_state >= UP;
index 7f421d0..cf377be 100644 (file)
--- a/mm/slob.c
+++ b/mm/slob.c
@@ -539,7 +539,7 @@ size_t __ksize(const void *block)
 
        sp = virt_to_page(block);
        if (unlikely(!PageSlab(sp)))
-               return PAGE_SIZE << compound_order(sp);
+               return page_size(sp);
 
        align = max_t(size_t, ARCH_KMALLOC_MINALIGN, ARCH_SLAB_MINALIGN);
        m = (unsigned int *)(block - align);
index 8834563..42c1b3a 100644 (file)
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -829,7 +829,7 @@ static int slab_pad_check(struct kmem_cache *s, struct page *page)
                return 1;
 
        start = page_address(page);
-       length = PAGE_SIZE << compound_order(page);
+       length = page_size(page);
        end = start + length;
        remainder = length % s->size;
        if (!remainder)
@@ -1074,13 +1074,14 @@ static void setup_object_debug(struct kmem_cache *s, struct page *page,
        init_tracking(s, object);
 }
 
-static void setup_page_debug(struct kmem_cache *s, void *addr, int order)
+static
+void setup_page_debug(struct kmem_cache *s, struct page *page, void *addr)
 {
        if (!(s->flags & SLAB_POISON))
                return;
 
        metadata_access_enable();
-       memset(addr, POISON_INUSE, PAGE_SIZE << order);
+       memset(addr, POISON_INUSE, page_size(page));
        metadata_access_disable();
 }
 
@@ -1340,8 +1341,8 @@ slab_flags_t kmem_cache_flags(unsigned int object_size,
 #else /* !CONFIG_SLUB_DEBUG */
 static inline void setup_object_debug(struct kmem_cache *s,
                        struct page *page, void *object) {}
-static inline void setup_page_debug(struct kmem_cache *s,
-                       void *addr, int order) {}
+static inline
+void setup_page_debug(struct kmem_cache *s, struct page *page, void *addr) {}
 
 static inline int alloc_debug_processing(struct kmem_cache *s,
        struct page *page, void *object, unsigned long addr) { return 0; }
@@ -1639,7 +1640,7 @@ static struct page *allocate_slab(struct kmem_cache *s, gfp_t flags, int node)
        struct kmem_cache_order_objects oo = s->oo;
        gfp_t alloc_gfp;
        void *start, *p, *next;
-       int idx, order;
+       int idx;
        bool shuffle;
 
        flags &= gfp_allowed_mask;
@@ -1673,7 +1674,6 @@ static struct page *allocate_slab(struct kmem_cache *s, gfp_t flags, int node)
 
        page->objects = oo_objects(oo);
 
-       order = compound_order(page);
        page->slab_cache = s;
        __SetPageSlab(page);
        if (page_is_pfmemalloc(page))
@@ -1683,7 +1683,7 @@ static struct page *allocate_slab(struct kmem_cache *s, gfp_t flags, int node)
 
        start = page_address(page);
 
-       setup_page_debug(s, start, order);
+       setup_page_debug(s, page, start);
 
        shuffle = shuffle_freelist(s, page);
 
@@ -2004,6 +2004,7 @@ static inline unsigned long next_tid(unsigned long tid)
        return tid + TID_STEP;
 }
 
+#ifdef SLUB_DEBUG_CMPXCHG
 static inline unsigned int tid_to_cpu(unsigned long tid)
 {
        return tid % TID_STEP;
@@ -2013,6 +2014,7 @@ static inline unsigned long tid_to_event(unsigned long tid)
 {
        return tid / TID_STEP;
 }
+#endif
 
 static inline unsigned int init_tid(int cpu)
 {
@@ -3930,7 +3932,7 @@ size_t __ksize(const void *object)
 
        if (unlikely(!PageSlab(page))) {
                WARN_ON(!PageCompound(page));
-               return PAGE_SIZE << compound_order(page);
+               return page_size(page);
        }
 
        return slab_ksize(page->slab_cache);
@@ -5298,7 +5300,7 @@ static ssize_t shrink_store(struct kmem_cache *s,
                        const char *buf, size_t length)
 {
        if (buf[0] == '1')
-               kmem_cache_shrink(s);
+               kmem_cache_shrink_all(s);
        else
                return -EINVAL;
        return length;
index 72f010d..bf32de9 100644 (file)
@@ -11,6 +11,8 @@
 #include <linux/export.h>
 #include <linux/spinlock.h>
 #include <linux/vmalloc.h>
+#include <linux/swap.h>
+#include <linux/swapops.h>
 
 #include "internal.h"
 #include <asm/dma.h>
@@ -470,6 +472,12 @@ struct page __init *__populate_section_memmap(unsigned long pfn,
 static void *sparsemap_buf __meminitdata;
 static void *sparsemap_buf_end __meminitdata;
 
+static inline void __meminit sparse_buffer_free(unsigned long size)
+{
+       WARN_ON(!sparsemap_buf || size == 0);
+       memblock_free_early(__pa(sparsemap_buf), size);
+}
+
 static void __init sparse_buffer_init(unsigned long size, int nid)
 {
        phys_addr_t addr = __pa(MAX_DMA_ADDRESS);
@@ -486,7 +494,7 @@ static void __init sparse_buffer_fini(void)
        unsigned long size = sparsemap_buf_end - sparsemap_buf;
 
        if (sparsemap_buf && size > 0)
-               memblock_free_early(__pa(sparsemap_buf), size);
+               sparse_buffer_free(size);
        sparsemap_buf = NULL;
 }
 
@@ -495,11 +503,15 @@ void * __meminit sparse_buffer_alloc(unsigned long size)
        void *ptr = NULL;
 
        if (sparsemap_buf) {
-               ptr = PTR_ALIGN(sparsemap_buf, size);
+               ptr = (void *) roundup((unsigned long)sparsemap_buf, size);
                if (ptr + size > sparsemap_buf_end)
                        ptr = NULL;
-               else
+               else {
+                       /* Free redundant aligned space */
+                       if ((unsigned long)(ptr - sparsemap_buf) > 0)
+                               sparse_buffer_free((unsigned long)(ptr - sparsemap_buf));
                        sparsemap_buf = ptr + size;
+               }
        }
        return ptr;
 }
@@ -867,7 +879,7 @@ int __meminit sparse_add_section(int nid, unsigned long start_pfn,
         */
        page_init_poison(pfn_to_page(start_pfn), sizeof(struct page) * nr_pages);
 
-       ms = __pfn_to_section(start_pfn);
+       ms = __nr_to_section(section_nr);
        set_section_nid(section_nr, nid);
        section_mark_present(ms);
 
@@ -884,9 +896,6 @@ static void clear_hwpoisoned_pages(struct page *memmap, int nr_pages)
 {
        int i;
 
-       if (!memmap)
-               return;
-
        /*
         * A further optimization is to have per section refcounted
         * num_poisoned_pages.  But that would need more space per memmap, so
@@ -898,7 +907,7 @@ static void clear_hwpoisoned_pages(struct page *memmap, int nr_pages)
 
        for (i = 0; i < nr_pages; i++) {
                if (PageHWPoison(&memmap[i])) {
-                       atomic_long_sub(1, &num_poisoned_pages);
+                       num_poisoned_pages_dec();
                        ClearPageHWPoison(&memmap[i]);
                }
        }
index ae30039..38c3fa4 100644 (file)
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -47,6 +47,7 @@ int page_cluster;
 static DEFINE_PER_CPU(struct pagevec, lru_add_pvec);
 static DEFINE_PER_CPU(struct pagevec, lru_rotate_pvecs);
 static DEFINE_PER_CPU(struct pagevec, lru_deactivate_file_pvecs);
+static DEFINE_PER_CPU(struct pagevec, lru_deactivate_pvecs);
 static DEFINE_PER_CPU(struct pagevec, lru_lazyfree_pvecs);
 #ifdef CONFIG_SMP
 static DEFINE_PER_CPU(struct pagevec, activate_page_pvecs);
@@ -71,12 +72,12 @@ static void __page_cache_release(struct page *page)
                spin_unlock_irqrestore(&pgdat->lru_lock, flags);
        }
        __ClearPageWaiters(page);
-       mem_cgroup_uncharge(page);
 }
 
 static void __put_single_page(struct page *page)
 {
        __page_cache_release(page);
+       mem_cgroup_uncharge(page);
        free_unref_page(page);
 }
 
@@ -515,7 +516,6 @@ static void lru_deactivate_file_fn(struct page *page, struct lruvec *lruvec,
        del_page_from_lru_list(page, lruvec, lru + active);
        ClearPageActive(page);
        ClearPageReferenced(page);
-       add_page_to_lru_list(page, lruvec, lru);
 
        if (PageWriteback(page) || PageDirty(page)) {
                /*
@@ -523,13 +523,14 @@ static void lru_deactivate_file_fn(struct page *page, struct lruvec *lruvec,
                 * It can make readahead confusing.  But race window
                 * is _really_ small and  it's non-critical problem.
                 */
+               add_page_to_lru_list(page, lruvec, lru);
                SetPageReclaim(page);
        } else {
                /*
                 * The page's writeback ends up during pagevec
                 * We moves tha page into tail of inactive.
                 */
-               list_move_tail(&page->lru, &lruvec->lists[lru]);
+               add_page_to_lru_list_tail(page, lruvec, lru);
                __count_vm_event(PGROTATED);
        }
 
@@ -538,6 +539,22 @@ static void lru_deactivate_file_fn(struct page *page, struct lruvec *lruvec,
        update_page_reclaim_stat(lruvec, file, 0);
 }
 
+static void lru_deactivate_fn(struct page *page, struct lruvec *lruvec,
+                           void *arg)
+{
+       if (PageLRU(page) && PageActive(page) && !PageUnevictable(page)) {
+               int file = page_is_file_cache(page);
+               int lru = page_lru_base_type(page);
+
+               del_page_from_lru_list(page, lruvec, lru + LRU_ACTIVE);
+               ClearPageActive(page);
+               ClearPageReferenced(page);
+               add_page_to_lru_list(page, lruvec, lru);
+
+               __count_vm_events(PGDEACTIVATE, hpage_nr_pages(page));
+               update_page_reclaim_stat(lruvec, file, 0);
+       }
+}
 
 static void lru_lazyfree_fn(struct page *page, struct lruvec *lruvec,
                            void *arg)
@@ -590,6 +607,10 @@ void lru_add_drain_cpu(int cpu)
        if (pagevec_count(pvec))
                pagevec_lru_move_fn(pvec, lru_deactivate_file_fn, NULL);
 
+       pvec = &per_cpu(lru_deactivate_pvecs, cpu);
+       if (pagevec_count(pvec))
+               pagevec_lru_move_fn(pvec, lru_deactivate_fn, NULL);
+
        pvec = &per_cpu(lru_lazyfree_pvecs, cpu);
        if (pagevec_count(pvec))
                pagevec_lru_move_fn(pvec, lru_lazyfree_fn, NULL);
@@ -623,6 +644,26 @@ void deactivate_file_page(struct page *page)
        }
 }
 
+/*
+ * deactivate_page - deactivate a page
+ * @page: page to deactivate
+ *
+ * deactivate_page() moves @page to the inactive list if @page was on the active
+ * list and was not an unevictable page.  This is done to accelerate the reclaim
+ * of @page.
+ */
+void deactivate_page(struct page *page)
+{
+       if (PageLRU(page) && PageActive(page) && !PageUnevictable(page)) {
+               struct pagevec *pvec = &get_cpu_var(lru_deactivate_pvecs);
+
+               get_page(page);
+               if (!pagevec_add(pvec, page) || PageCompound(page))
+                       pagevec_lru_move_fn(pvec, lru_deactivate_fn, NULL);
+               put_cpu_var(lru_deactivate_pvecs);
+       }
+}
+
 /**
  * mark_page_lazyfree - make an anon page lazyfree
  * @page: page to deactivate
@@ -687,6 +728,7 @@ void lru_add_drain_all(void)
                if (pagevec_count(&per_cpu(lru_add_pvec, cpu)) ||
                    pagevec_count(&per_cpu(lru_rotate_pvecs, cpu)) ||
                    pagevec_count(&per_cpu(lru_deactivate_file_pvecs, cpu)) ||
+                   pagevec_count(&per_cpu(lru_deactivate_pvecs, cpu)) ||
                    pagevec_count(&per_cpu(lru_lazyfree_pvecs, cpu)) ||
                    need_activate_page_drain(cpu)) {
                        INIT_WORK(work, lru_add_drain_per_cpu);
@@ -844,17 +886,15 @@ void lru_add_page_tail(struct page *page, struct page *page_tail,
                get_page(page_tail);
                list_add_tail(&page_tail->lru, list);
        } else {
-               struct list_head *list_head;
                /*
                 * Head page has not yet been counted, as an hpage,
                 * so we must account for each subpage individually.
                 *
-                * Use the standard add function to put page_tail on the list,
-                * but then correct its position so they all end up in order.
+                * Put page_tail on the list at the correct position
+                * so they all end up in order.
                 */
-               add_page_to_lru_list(page_tail, lruvec, page_lru(page_tail));
-               list_head = page_tail->lru.prev;
-               list_move_tail(&page_tail->lru, list_head);
+               add_page_to_lru_list_tail(page_tail, lruvec,
+                                         page_lru(page_tail));
        }
 
        if (!PageUnevictable(page))
index 8368621..8e7ce9a 100644 (file)
@@ -116,7 +116,7 @@ int add_to_swap_cache(struct page *page, swp_entry_t entry, gfp_t gfp)
        struct address_space *address_space = swap_address_space(entry);
        pgoff_t idx = swp_offset(entry);
        XA_STATE_ORDER(xas, &address_space->i_pages, idx, compound_order(page));
-       unsigned long i, nr = 1UL << compound_order(page);
+       unsigned long i, nr = compound_nr(page);
 
        VM_BUG_ON_PAGE(!PageLocked(page), page);
        VM_BUG_ON_PAGE(PageSwapCache(page), page);
@@ -133,7 +133,7 @@ int add_to_swap_cache(struct page *page, swp_entry_t entry, gfp_t gfp)
                for (i = 0; i < nr; i++) {
                        VM_BUG_ON_PAGE(xas.xa_index != idx + i, page);
                        set_page_private(page + i, entry.val + i);
-                       xas_store(&xas, page + i);
+                       xas_store(&xas, page);
                        xas_next(&xas);
                }
                address_space->nrpages += nr;
@@ -168,7 +168,7 @@ void __delete_from_swap_cache(struct page *page, swp_entry_t entry)
 
        for (i = 0; i < nr; i++) {
                void *entry = xas_store(&xas, NULL);
-               VM_BUG_ON_PAGE(entry != page + i, entry);
+               VM_BUG_ON_PAGE(entry != page, entry);
                set_page_private(page + i, 0);
                xas_next(&xas);
        }
index 98e9248..660717a 100644 (file)
@@ -11,6 +11,7 @@
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 
 #include <linux/mm.h>
+#include <linux/highmem.h>
 #include <linux/slab.h>
 #include <linux/sched.h>
 #include <linux/sched/task.h>
@@ -227,7 +228,12 @@ static inline void check_heap_object(const void *ptr, unsigned long n,
        if (!virt_addr_valid(ptr))
                return;
 
-       page = virt_to_head_page(ptr);
+       /*
+        * When CONFIG_HIGHMEM=y, kmap_to_page() will give either the
+        * highmem page or fallback to virt_to_page(). The following
+        * is effectively a highmem-aware virt_to_head_page().
+        */
+       page = compound_head(kmap_to_page((void *)ptr));
 
        if (PageSlab(page)) {
                /* Check slab allocator for flags and size. */
index e6351a8..3ad6db9 100644 (file)
--- a/mm/util.c
+++ b/mm/util.c
 #include <linux/hugetlb.h>
 #include <linux/vmalloc.h>
 #include <linux/userfaultfd_k.h>
+#include <linux/elf.h>
+#include <linux/elf-randomize.h>
+#include <linux/personality.h>
+#include <linux/random.h>
+#include <linux/processor.h>
+#include <linux/sizes.h>
+#include <linux/compat.h>
 
 #include <linux/uaccess.h>
 
@@ -293,7 +300,105 @@ int vma_is_stack_for_current(struct vm_area_struct *vma)
        return (vma->vm_start <= KSTK_ESP(t) && vma->vm_end >= KSTK_ESP(t));
 }
 
-#if defined(CONFIG_MMU) && !defined(HAVE_ARCH_PICK_MMAP_LAYOUT)
+#ifndef STACK_RND_MASK
+#define STACK_RND_MASK (0x7ff >> (PAGE_SHIFT - 12))     /* 8MB of VA */
+#endif
+
+unsigned long randomize_stack_top(unsigned long stack_top)
+{
+       unsigned long random_variable = 0;
+
+       if (current->flags & PF_RANDOMIZE) {
+               random_variable = get_random_long();
+               random_variable &= STACK_RND_MASK;
+               random_variable <<= PAGE_SHIFT;
+       }
+#ifdef CONFIG_STACK_GROWSUP
+       return PAGE_ALIGN(stack_top) + random_variable;
+#else
+       return PAGE_ALIGN(stack_top) - random_variable;
+#endif
+}
+
+#ifdef CONFIG_ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT
+unsigned long arch_randomize_brk(struct mm_struct *mm)
+{
+       /* Is the current task 32bit ? */
+       if (!IS_ENABLED(CONFIG_64BIT) || is_compat_task())
+               return randomize_page(mm->brk, SZ_32M);
+
+       return randomize_page(mm->brk, SZ_1G);
+}
+
+unsigned long arch_mmap_rnd(void)
+{
+       unsigned long rnd;
+
+#ifdef CONFIG_HAVE_ARCH_MMAP_RND_COMPAT_BITS
+       if (is_compat_task())
+               rnd = get_random_long() & ((1UL << mmap_rnd_compat_bits) - 1);
+       else
+#endif /* CONFIG_HAVE_ARCH_MMAP_RND_COMPAT_BITS */
+               rnd = get_random_long() & ((1UL << mmap_rnd_bits) - 1);
+
+       return rnd << PAGE_SHIFT;
+}
+
+static int mmap_is_legacy(struct rlimit *rlim_stack)
+{
+       if (current->personality & ADDR_COMPAT_LAYOUT)
+               return 1;
+
+       if (rlim_stack->rlim_cur == RLIM_INFINITY)
+               return 1;
+
+       return sysctl_legacy_va_layout;
+}
+
+/*
+ * Leave enough space between the mmap area and the stack to honour ulimit in
+ * the face of randomisation.
+ */
+#define MIN_GAP                (SZ_128M)
+#define MAX_GAP                (STACK_TOP / 6 * 5)
+
+static unsigned long mmap_base(unsigned long rnd, struct rlimit *rlim_stack)
+{
+       unsigned long gap = rlim_stack->rlim_cur;
+       unsigned long pad = stack_guard_gap;
+
+       /* Account for stack randomization if necessary */
+       if (current->flags & PF_RANDOMIZE)
+               pad += (STACK_RND_MASK << PAGE_SHIFT);
+
+       /* Values close to RLIM_INFINITY can overflow. */
+       if (gap + pad > gap)
+               gap += pad;
+
+       if (gap < MIN_GAP)
+               gap = MIN_GAP;
+       else if (gap > MAX_GAP)
+               gap = MAX_GAP;
+
+       return PAGE_ALIGN(STACK_TOP - gap - rnd);
+}
+
+void arch_pick_mmap_layout(struct mm_struct *mm, struct rlimit *rlim_stack)
+{
+       unsigned long random_factor = 0UL;
+
+       if (current->flags & PF_RANDOMIZE)
+               random_factor = arch_mmap_rnd();
+
+       if (mmap_is_legacy(rlim_stack)) {
+               mm->mmap_base = TASK_UNMAPPED_BASE + random_factor;
+               mm->get_unmapped_area = arch_get_unmapped_area;
+       } else {
+               mm->mmap_base = mmap_base(random_factor, rlim_stack);
+               mm->get_unmapped_area = arch_get_unmapped_area_topdown;
+       }
+}
+#elif defined(CONFIG_MMU) && !defined(HAVE_ARCH_PICK_MMAP_LAYOUT)
 void arch_pick_mmap_layout(struct mm_struct *mm, struct rlimit *rlim_stack)
 {
        mm->mmap_base = TASK_UNMAPPED_BASE;
@@ -521,7 +626,7 @@ bool page_mapped(struct page *page)
                return true;
        if (PageHuge(page))
                return false;
-       for (i = 0; i < (1 << compound_order(page)); i++) {
+       for (i = 0; i < compound_nr(page); i++) {
                if (atomic_read(&page[i]._mapcount) >= 0)
                        return true;
        }
@@ -783,3 +888,16 @@ out_mm:
 out:
        return res;
 }
+
+int memcmp_pages(struct page *page1, struct page *page2)
+{
+       char *addr1, *addr2;
+       int ret;
+
+       addr1 = kmap_atomic(page1);
+       addr2 = kmap_atomic(page2);
+       ret = memcmp(addr1, addr2, PAGE_SIZE);
+       kunmap_atomic(addr2);
+       kunmap_atomic(addr1);
+       return ret;
+}
index c1246d7..a3c70e2 100644 (file)
@@ -329,8 +329,6 @@ EXPORT_SYMBOL(vmalloc_to_pfn);
 #define DEBUG_AUGMENT_PROPAGATE_CHECK 0
 #define DEBUG_AUGMENT_LOWEST_MATCH_CHECK 0
 
-#define VM_LAZY_FREE   0x02
-#define VM_VM_AREA     0x04
 
 static DEFINE_SPINLOCK(vmap_area_lock);
 /* Export for kexec only */
@@ -398,9 +396,8 @@ compute_subtree_max_size(struct vmap_area *va)
                get_subtree_max_size(va->rb_node.rb_right));
 }
 
-RB_DECLARE_CALLBACKS(static, free_vmap_area_rb_augment_cb,
-       struct vmap_area, rb_node, unsigned long, subtree_max_size,
-       compute_subtree_max_size)
+RB_DECLARE_CALLBACKS_MAX(static, free_vmap_area_rb_augment_cb,
+       struct vmap_area, rb_node, unsigned long, subtree_max_size, va_size)
 
 static void purge_vmap_area_lazy(void);
 static BLOCKING_NOTIFIER_HEAD(vmap_notify_list);
@@ -1116,7 +1113,7 @@ retry:
 
        va->va_start = addr;
        va->va_end = addr + size;
-       va->flags = 0;
+       va->vm = NULL;
        insert_vmap_area(va, &vmap_area_root, &vmap_area_list);
 
        spin_unlock(&vmap_area_lock);
@@ -1282,7 +1279,14 @@ static bool __purge_vmap_area_lazy(unsigned long start, unsigned long end)
        llist_for_each_entry_safe(va, n_va, valist, purge_list) {
                unsigned long nr = (va->va_end - va->va_start) >> PAGE_SHIFT;
 
-               __free_vmap_area(va);
+               /*
+                * Finally insert or merge lazily-freed area. It is
+                * detached and there is no need to "unlink" it from
+                * anything.
+                */
+               merge_or_add_vmap_area(va,
+                       &free_vmap_area_root, &free_vmap_area_list);
+
                atomic_long_sub(nr, &vmap_lazy_nr);
 
                if (atomic_long_read(&vmap_lazy_nr) < resched_threshold)
@@ -1324,6 +1328,10 @@ static void free_vmap_area_noflush(struct vmap_area *va)
 {
        unsigned long nr_lazy;
 
+       spin_lock(&vmap_area_lock);
+       unlink_va(va, &vmap_area_root);
+       spin_unlock(&vmap_area_lock);
+
        nr_lazy = atomic_long_add_return((va->va_end - va->va_start) >>
                                PAGE_SHIFT, &vmap_lazy_nr);
 
@@ -1918,7 +1926,6 @@ void __init vmalloc_init(void)
                if (WARN_ON_ONCE(!va))
                        continue;
 
-               va->flags = VM_VM_AREA;
                va->va_start = (unsigned long)tmp->addr;
                va->va_end = va->va_start + tmp->size;
                va->vm = tmp;
@@ -2016,7 +2023,6 @@ static void setup_vmalloc_vm(struct vm_struct *vm, struct vmap_area *va,
        vm->size = va->va_end - va->va_start;
        vm->caller = caller;
        va->vm = vm;
-       va->flags |= VM_VM_AREA;
        spin_unlock(&vmap_area_lock);
 }
 
@@ -2121,10 +2127,10 @@ struct vm_struct *find_vm_area(const void *addr)
        struct vmap_area *va;
 
        va = find_vmap_area((unsigned long)addr);
-       if (va && va->flags & VM_VM_AREA)
-               return va->vm;
+       if (!va)
+               return NULL;
 
-       return NULL;
+       return va->vm;
 }
 
 /**
@@ -2143,14 +2149,12 @@ struct vm_struct *remove_vm_area(const void *addr)
 
        might_sleep();
 
-       va = find_vmap_area((unsigned long)addr);
-       if (va && va->flags & VM_VM_AREA) {
+       spin_lock(&vmap_area_lock);
+       va = __find_vmap_area((unsigned long)addr);
+       if (va && va->vm) {
                struct vm_struct *vm = va->vm;
 
-               spin_lock(&vmap_area_lock);
                va->vm = NULL;
-               va->flags &= ~VM_VM_AREA;
-               va->flags |= VM_LAZY_FREE;
                spin_unlock(&vmap_area_lock);
 
                kasan_free_shadow(vm);
@@ -2158,6 +2162,8 @@ struct vm_struct *remove_vm_area(const void *addr)
 
                return vm;
        }
+
+       spin_unlock(&vmap_area_lock);
        return NULL;
 }
 
@@ -2402,7 +2408,6 @@ static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask,
        nr_pages = get_vm_area_size(area) >> PAGE_SHIFT;
        array_size = (nr_pages * sizeof(struct page *));
 
-       area->nr_pages = nr_pages;
        /* Please note that the recursion is strictly bounded. */
        if (array_size > PAGE_SIZE) {
                pages = __vmalloc_node(array_size, 1, nested_gfp|highmem_mask,
@@ -2410,13 +2415,16 @@ static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask,
        } else {
                pages = kmalloc_node(array_size, nested_gfp, node);
        }
-       area->pages = pages;
-       if (!area->pages) {
+
+       if (!pages) {
                remove_vm_area(area->addr);
                kfree(area);
                return NULL;
        }
 
+       area->pages = pages;
+       area->nr_pages = nr_pages;
+
        for (i = 0; i < area->nr_pages; i++) {
                struct page *page;
 
@@ -2851,7 +2859,7 @@ long vread(char *buf, char *addr, unsigned long count)
                if (!count)
                        break;
 
-               if (!(va->flags & VM_VM_AREA))
+               if (!va->vm)
                        continue;
 
                vm = va->vm;
@@ -2931,7 +2939,7 @@ long vwrite(char *buf, char *addr, unsigned long count)
                if (!count)
                        break;
 
-               if (!(va->flags & VM_VM_AREA))
+               if (!va->vm)
                        continue;
 
                vm = va->vm;
@@ -3450,6 +3458,22 @@ static void show_numa_info(struct seq_file *m, struct vm_struct *v)
        }
 }
 
+static void show_purge_info(struct seq_file *m)
+{
+       struct llist_node *head;
+       struct vmap_area *va;
+
+       head = READ_ONCE(vmap_purge_list.first);
+       if (head == NULL)
+               return;
+
+       llist_for_each_entry(va, head, purge_list) {
+               seq_printf(m, "0x%pK-0x%pK %7ld unpurged vm_area\n",
+                       (void *)va->va_start, (void *)va->va_end,
+                       va->va_end - va->va_start);
+       }
+}
+
 static int s_show(struct seq_file *m, void *p)
 {
        struct vmap_area *va;
@@ -3458,14 +3482,13 @@ static int s_show(struct seq_file *m, void *p)
        va = list_entry(p, struct vmap_area, list);
 
        /*
-        * s_show can encounter race with remove_vm_area, !VM_VM_AREA on
-        * behalf of vmap area is being tear down or vm_map_ram allocation.
+        * s_show can encounter race with remove_vm_area, !vm on behalf
+        * of vmap area is being tear down or vm_map_ram allocation.
         */
-       if (!(va->flags & VM_VM_AREA)) {
-               seq_printf(m, "0x%pK-0x%pK %7ld %s\n",
+       if (!va->vm) {
+               seq_printf(m, "0x%pK-0x%pK %7ld vm_map_ram\n",
                        (void *)va->va_start, (void *)va->va_end,
-                       va->va_end - va->va_start,
-                       va->flags & VM_LAZY_FREE ? "unpurged vm_area" : "vm_map_ram");
+                       va->va_end - va->va_start);
 
                return 0;
        }
@@ -3504,6 +3527,16 @@ static int s_show(struct seq_file *m, void *p)
 
        show_numa_info(m, v);
        seq_putc(m, '\n');
+
+       /*
+        * As a final step, dump "unpurged" areas. Note,
+        * that entire "/proc/vmallocinfo" output will not
+        * be address sorted, because the purge list is not
+        * sorted.
+        */
+       if (list_is_last(&va->list, &vmap_area_list))
+               show_purge_info(m);
+
        return 0;
 }
 
index a6c5d0b..e5d52d6 100644 (file)
@@ -171,11 +171,22 @@ int vm_swappiness = 60;
  */
 unsigned long vm_total_pages;
 
+static void set_task_reclaim_state(struct task_struct *task,
+                                  struct reclaim_state *rs)
+{
+       /* Check for an overwrite */
+       WARN_ON_ONCE(rs && task->reclaim_state);
+
+       /* Check for the nulling of an already-nulled member */
+       WARN_ON_ONCE(!rs && !task->reclaim_state);
+
+       task->reclaim_state = rs;
+}
+
 static LIST_HEAD(shrinker_list);
 static DECLARE_RWSEM(shrinker_rwsem);
 
-#ifdef CONFIG_MEMCG_KMEM
-
+#ifdef CONFIG_MEMCG
 /*
  * We allow subsystems to populate their shrinker-related
  * LRU lists before register_shrinker_prepared() is called
@@ -227,30 +238,7 @@ static void unregister_memcg_shrinker(struct shrinker *shrinker)
        idr_remove(&shrinker_idr, id);
        up_write(&shrinker_rwsem);
 }
-#else /* CONFIG_MEMCG_KMEM */
-static int prealloc_memcg_shrinker(struct shrinker *shrinker)
-{
-       return 0;
-}
-
-static void unregister_memcg_shrinker(struct shrinker *shrinker)
-{
-}
-#endif /* CONFIG_MEMCG_KMEM */
 
-static void set_task_reclaim_state(struct task_struct *task,
-                                  struct reclaim_state *rs)
-{
-       /* Check for an overwrite */
-       WARN_ON_ONCE(rs && task->reclaim_state);
-
-       /* Check for the nulling of an already-nulled member */
-       WARN_ON_ONCE(!rs && !task->reclaim_state);
-
-       task->reclaim_state = rs;
-}
-
-#ifdef CONFIG_MEMCG
 static bool global_reclaim(struct scan_control *sc)
 {
        return !sc->target_mem_cgroup;
@@ -305,6 +293,15 @@ static bool memcg_congested(pg_data_t *pgdat,
 
 }
 #else
+static int prealloc_memcg_shrinker(struct shrinker *shrinker)
+{
+       return 0;
+}
+
+static void unregister_memcg_shrinker(struct shrinker *shrinker)
+{
+}
+
 static bool global_reclaim(struct scan_control *sc)
 {
        return true;
@@ -591,7 +588,7 @@ static unsigned long do_shrink_slab(struct shrink_control *shrinkctl,
        return freed;
 }
 
-#ifdef CONFIG_MEMCG_KMEM
+#ifdef CONFIG_MEMCG
 static unsigned long shrink_slab_memcg(gfp_t gfp_mask, int nid,
                        struct mem_cgroup *memcg, int priority)
 {
@@ -599,7 +596,7 @@ static unsigned long shrink_slab_memcg(gfp_t gfp_mask, int nid,
        unsigned long ret, freed = 0;
        int i;
 
-       if (!memcg_kmem_enabled() || !mem_cgroup_online(memcg))
+       if (!mem_cgroup_online(memcg))
                return 0;
 
        if (!down_read_trylock(&shrinker_rwsem))
@@ -625,6 +622,11 @@ static unsigned long shrink_slab_memcg(gfp_t gfp_mask, int nid,
                        continue;
                }
 
+               /* Call non-slab shrinkers even though kmem is disabled */
+               if (!memcg_kmem_enabled() &&
+                   !(shrinker->flags & SHRINKER_NONSLAB))
+                       continue;
+
                ret = do_shrink_slab(&sc, shrinker, priority);
                if (ret == SHRINK_EMPTY) {
                        clear_bit(i, map->map);
@@ -661,13 +663,13 @@ unlock:
        up_read(&shrinker_rwsem);
        return freed;
 }
-#else /* CONFIG_MEMCG_KMEM */
+#else /* CONFIG_MEMCG */
 static unsigned long shrink_slab_memcg(gfp_t gfp_mask, int nid,
                        struct mem_cgroup *memcg, int priority)
 {
        return 0;
 }
-#endif /* CONFIG_MEMCG_KMEM */
+#endif /* CONFIG_MEMCG */
 
 /**
  * shrink_slab - shrink slab caches
@@ -1121,7 +1123,7 @@ static unsigned long shrink_page_list(struct list_head *page_list,
                                      struct scan_control *sc,
                                      enum ttu_flags ttu_flags,
                                      struct reclaim_stat *stat,
-                                     bool force_reclaim)
+                                     bool ignore_references)
 {
        LIST_HEAD(ret_pages);
        LIST_HEAD(free_pages);
@@ -1135,7 +1137,7 @@ static unsigned long shrink_page_list(struct list_head *page_list,
                struct address_space *mapping;
                struct page *page;
                int may_enter_fs;
-               enum page_references references = PAGEREF_RECLAIM_CLEAN;
+               enum page_references references = PAGEREF_RECLAIM;
                bool dirty, writeback;
                unsigned int nr_pages;
 
@@ -1149,7 +1151,7 @@ static unsigned long shrink_page_list(struct list_head *page_list,
 
                VM_BUG_ON_PAGE(PageActive(page), page);
 
-               nr_pages = 1 << compound_order(page);
+               nr_pages = compound_nr(page);
 
                /* Account the number of base pages even though THP */
                sc->nr_scanned += nr_pages;
@@ -1266,7 +1268,7 @@ static unsigned long shrink_page_list(struct list_head *page_list,
                        }
                }
 
-               if (!force_reclaim)
+               if (!ignore_references)
                        references = page_check_references(page, sc);
 
                switch (references) {
@@ -1487,10 +1489,9 @@ free_it:
                 * Is there need to periodically free_page_list? It would
                 * appear not as the counts should be low
                 */
-               if (unlikely(PageTransHuge(page))) {
-                       mem_cgroup_uncharge(page);
+               if (unlikely(PageTransHuge(page)))
                        (*get_compound_page_dtor(page))(page);
-               else
+               else
                        list_add(&page->lru, &free_pages);
                continue;
 
@@ -1705,7 +1706,7 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan,
 
                VM_BUG_ON_PAGE(!PageLRU(page), page);
 
-               nr_pages = 1 << compound_order(page);
+               nr_pages = compound_nr(page);
                total_scan += nr_pages;
 
                if (page_zonenum(page) > sc->reclaim_idx) {
@@ -1911,7 +1912,6 @@ static unsigned noinline_for_stack move_pages_to_lru(struct lruvec *lruvec,
 
                        if (unlikely(PageCompound(page))) {
                                spin_unlock_irq(&pgdat->lru_lock);
-                               mem_cgroup_uncharge(page);
                                (*get_compound_page_dtor(page))(page);
                                spin_lock_irq(&pgdat->lru_lock);
                        } else
@@ -2145,6 +2145,62 @@ static void shrink_active_list(unsigned long nr_to_scan,
                        nr_deactivate, nr_rotated, sc->priority, file);
 }
 
+unsigned long reclaim_pages(struct list_head *page_list)
+{
+       int nid = -1;
+       unsigned long nr_reclaimed = 0;
+       LIST_HEAD(node_page_list);
+       struct reclaim_stat dummy_stat;
+       struct page *page;
+       struct scan_control sc = {
+               .gfp_mask = GFP_KERNEL,
+               .priority = DEF_PRIORITY,
+               .may_writepage = 1,
+               .may_unmap = 1,
+               .may_swap = 1,
+       };
+
+       while (!list_empty(page_list)) {
+               page = lru_to_page(page_list);
+               if (nid == -1) {
+                       nid = page_to_nid(page);
+                       INIT_LIST_HEAD(&node_page_list);
+               }
+
+               if (nid == page_to_nid(page)) {
+                       ClearPageActive(page);
+                       list_move(&page->lru, &node_page_list);
+                       continue;
+               }
+
+               nr_reclaimed += shrink_page_list(&node_page_list,
+                                               NODE_DATA(nid),
+                                               &sc, 0,
+                                               &dummy_stat, false);
+               while (!list_empty(&node_page_list)) {
+                       page = lru_to_page(&node_page_list);
+                       list_del(&page->lru);
+                       putback_lru_page(page);
+               }
+
+               nid = -1;
+       }
+
+       if (!list_empty(&node_page_list)) {
+               nr_reclaimed += shrink_page_list(&node_page_list,
+                                               NODE_DATA(nid),
+                                               &sc, 0,
+                                               &dummy_stat, false);
+               while (!list_empty(&node_page_list)) {
+                       page = lru_to_page(&node_page_list);
+                       list_del(&page->lru);
+                       putback_lru_page(page);
+               }
+       }
+
+       return nr_reclaimed;
+}
+
 /*
  * The inactive anon list should be small enough that the VM never has
  * to do too much work.
@@ -2586,7 +2642,6 @@ static bool in_reclaim_compaction(struct scan_control *sc)
  */
 static inline bool should_continue_reclaim(struct pglist_data *pgdat,
                                        unsigned long nr_reclaimed,
-                                       unsigned long nr_scanned,
                                        struct scan_control *sc)
 {
        unsigned long pages_for_compaction;
@@ -2597,40 +2652,18 @@ static inline bool should_continue_reclaim(struct pglist_data *pgdat,
        if (!in_reclaim_compaction(sc))
                return false;
 
-       /* Consider stopping depending on scan and reclaim activity */
-       if (sc->gfp_mask & __GFP_RETRY_MAYFAIL) {
-               /*
-                * For __GFP_RETRY_MAYFAIL allocations, stop reclaiming if the
-                * full LRU list has been scanned and we are still failing
-                * to reclaim pages. This full LRU scan is potentially
-                * expensive but a __GFP_RETRY_MAYFAIL caller really wants to succeed
-                */
-               if (!nr_reclaimed && !nr_scanned)
-                       return false;
-       } else {
-               /*
-                * For non-__GFP_RETRY_MAYFAIL allocations which can presumably
-                * fail without consequence, stop if we failed to reclaim
-                * any pages from the last SWAP_CLUSTER_MAX number of
-                * pages that were scanned. This will return to the
-                * caller faster at the risk reclaim/compaction and
-                * the resulting allocation attempt fails
-                */
-               if (!nr_reclaimed)
-                       return false;
-       }
-
        /*
-        * If we have not reclaimed enough pages for compaction and the
-        * inactive lists are large enough, continue reclaiming
+        * Stop if we failed to reclaim any pages from the last SWAP_CLUSTER_MAX
+        * number of pages that were scanned. This will return to the caller
+        * with the risk reclaim/compaction and the resulting allocation attempt
+        * fails. In the past we have tried harder for __GFP_RETRY_MAYFAIL
+        * allocations through requiring that the full LRU list has been scanned
+        * first, by assuming that zero delta of sc->nr_scanned means full LRU
+        * scan, but that approximation was wrong, and there were corner cases
+        * where always a non-zero amount of pages were scanned.
         */
-       pages_for_compaction = compact_gap(sc->order);
-       inactive_lru_pages = node_page_state(pgdat, NR_INACTIVE_FILE);
-       if (get_nr_swap_pages() > 0)
-               inactive_lru_pages += node_page_state(pgdat, NR_INACTIVE_ANON);
-       if (sc->nr_reclaimed < pages_for_compaction &&
-                       inactive_lru_pages > pages_for_compaction)
-               return true;
+       if (!nr_reclaimed)
+               return false;
 
        /* If compaction would go ahead or the allocation would succeed, stop */
        for (z = 0; z <= sc->reclaim_idx; z++) {
@@ -2647,7 +2680,17 @@ static inline bool should_continue_reclaim(struct pglist_data *pgdat,
                        ;
                }
        }
-       return true;
+
+       /*
+        * If we have not reclaimed enough pages for compaction and the
+        * inactive lists are large enough, continue reclaiming
+        */
+       pages_for_compaction = compact_gap(sc->order);
+       inactive_lru_pages = node_page_state(pgdat, NR_INACTIVE_FILE);
+       if (get_nr_swap_pages() > 0)
+               inactive_lru_pages += node_page_state(pgdat, NR_INACTIVE_ANON);
+
+       return inactive_lru_pages > pages_for_compaction;
 }
 
 static bool pgdat_memcg_congested(pg_data_t *pgdat, struct mem_cgroup *memcg)
@@ -2664,10 +2707,6 @@ static bool shrink_node(pg_data_t *pgdat, struct scan_control *sc)
 
        do {
                struct mem_cgroup *root = sc->target_mem_cgroup;
-               struct mem_cgroup_reclaim_cookie reclaim = {
-                       .pgdat = pgdat,
-                       .priority = sc->priority,
-               };
                unsigned long node_lru_pages = 0;
                struct mem_cgroup *memcg;
 
@@ -2676,7 +2715,7 @@ static bool shrink_node(pg_data_t *pgdat, struct scan_control *sc)
                nr_reclaimed = sc->nr_reclaimed;
                nr_scanned = sc->nr_scanned;
 
-               memcg = mem_cgroup_iter(root, NULL, &reclaim);
+               memcg = mem_cgroup_iter(root, NULL, NULL);
                do {
                        unsigned long lru_pages;
                        unsigned long reclaimed;
@@ -2719,21 +2758,7 @@ static bool shrink_node(pg_data_t *pgdat, struct scan_control *sc)
                                   sc->nr_scanned - scanned,
                                   sc->nr_reclaimed - reclaimed);
 
-                       /*
-                        * Kswapd have to scan all memory cgroups to fulfill
-                        * the overall scan target for the node.
-                        *
-                        * Limit reclaim, on the other hand, only cares about
-                        * nr_to_reclaim pages to be reclaimed and it will
-                        * retry with decreasing priority if one round over the
-                        * whole hierarchy is not sufficient.
-                        */
-                       if (!current_is_kswapd() &&
-                                       sc->nr_reclaimed >= sc->nr_to_reclaim) {
-                               mem_cgroup_iter_break(root, memcg);
-                               break;
-                       }
-               } while ((memcg = mem_cgroup_iter(root, memcg, &reclaim)));
+               } while ((memcg = mem_cgroup_iter(root, memcg, NULL)));
 
                if (reclaim_state) {
                        sc->nr_reclaimed += reclaim_state->reclaimed_slab;
@@ -2810,7 +2835,7 @@ static bool shrink_node(pg_data_t *pgdat, struct scan_control *sc)
                        wait_iff_congested(BLK_RW_ASYNC, HZ/10);
 
        } while (should_continue_reclaim(pgdat, sc->nr_reclaimed - nr_reclaimed,
-                                        sc->nr_scanned - nr_scanned, sc));
+                                        sc));
 
        /*
         * Kswapd gives up on balancing particular nodes after too
index fd7e16c..6afc892 100644 (file)
@@ -1158,6 +1158,8 @@ const char * const vmstat_text[] = {
        "nr_shmem",
        "nr_shmem_hugepages",
        "nr_shmem_pmdmapped",
+       "nr_file_hugepages",
+       "nr_file_pmdmapped",
        "nr_anon_transparent_hugepages",
        "nr_unstable",
        "nr_vmscan_write",
index 75b7962..05bdf90 100644 (file)
@@ -41,7 +41,6 @@
 #include <linux/workqueue.h>
 #include <linux/slab.h>
 #include <linux/spinlock.h>
-#include <linux/wait.h>
 #include <linux/zpool.h>
 #include <linux/magic.h>
 
@@ -146,8 +145,6 @@ struct z3fold_header {
  * @release_wq:        workqueue for safe page release
  * @work:      work_struct for safe page release
  * @inode:     inode for z3fold pseudo filesystem
- * @destroying: bool to stop migration once we start destruction
- * @isolated: int to count the number of pages currently in isolation
  *
  * This structure is allocated at pool creation time and maintains metadata
  * pertaining to a particular z3fold pool.
@@ -166,11 +163,8 @@ struct z3fold_pool {
        const struct zpool_ops *zpool_ops;
        struct workqueue_struct *compact_wq;
        struct workqueue_struct *release_wq;
-       struct wait_queue_head isolate_wait;
        struct work_struct work;
        struct inode *inode;
-       bool destroying;
-       int isolated;
 };
 
 /*
@@ -301,14 +295,11 @@ static void z3fold_unregister_migration(struct z3fold_pool *pool)
  }
 
 /* Initializes the z3fold header of a newly allocated z3fold page */
-static struct z3fold_header *init_z3fold_page(struct page *page,
+static struct z3fold_header *init_z3fold_page(struct page *page, bool headless,
                                        struct z3fold_pool *pool, gfp_t gfp)
 {
        struct z3fold_header *zhdr = page_address(page);
-       struct z3fold_buddy_slots *slots = alloc_slots(pool, gfp);
-
-       if (!slots)
-               return NULL;
+       struct z3fold_buddy_slots *slots;
 
        INIT_LIST_HEAD(&page->lru);
        clear_bit(PAGE_HEADLESS, &page->private);
@@ -316,6 +307,12 @@ static struct z3fold_header *init_z3fold_page(struct page *page,
        clear_bit(NEEDS_COMPACTING, &page->private);
        clear_bit(PAGE_STALE, &page->private);
        clear_bit(PAGE_CLAIMED, &page->private);
+       if (headless)
+               return zhdr;
+
+       slots = alloc_slots(pool, gfp);
+       if (!slots)
+               return NULL;
 
        spin_lock_init(&zhdr->page_lock);
        kref_init(&zhdr->refcount);
@@ -372,9 +369,10 @@ static inline int __idx(struct z3fold_header *zhdr, enum buddy bud)
  * Encodes the handle of a particular buddy within a z3fold page
  * Pool lock should be held as this function accesses first_num
  */
-static unsigned long encode_handle(struct z3fold_header *zhdr, enum buddy bud)
+static unsigned long __encode_handle(struct z3fold_header *zhdr,
+                               struct z3fold_buddy_slots *slots,
+                               enum buddy bud)
 {
-       struct z3fold_buddy_slots *slots;
        unsigned long h = (unsigned long)zhdr;
        int idx = 0;
 
@@ -391,11 +389,15 @@ static unsigned long encode_handle(struct z3fold_header *zhdr, enum buddy bud)
        if (bud == LAST)
                h |= (zhdr->last_chunks << BUDDY_SHIFT);
 
-       slots = zhdr->slots;
        slots->slot[idx] = h;
        return (unsigned long)&slots->slot[idx];
 }
 
+static unsigned long encode_handle(struct z3fold_header *zhdr, enum buddy bud)
+{
+       return __encode_handle(zhdr, zhdr->slots, bud);
+}
+
 /* Returns the z3fold page where a given handle is stored */
 static inline struct z3fold_header *handle_to_z3fold_header(unsigned long h)
 {
@@ -630,6 +632,7 @@ static void do_compact_page(struct z3fold_header *zhdr, bool locked)
        }
 
        if (unlikely(PageIsolated(page) ||
+                    test_bit(PAGE_CLAIMED, &page->private) ||
                     test_bit(PAGE_STALE, &page->private))) {
                z3fold_page_unlock(zhdr);
                return;
@@ -775,7 +778,6 @@ static struct z3fold_pool *z3fold_create_pool(const char *name, gfp_t gfp,
                goto out_c;
        spin_lock_init(&pool->lock);
        spin_lock_init(&pool->stale_lock);
-       init_waitqueue_head(&pool->isolate_wait);
        pool->unbuddied = __alloc_percpu(sizeof(struct list_head)*NCHUNKS, 2);
        if (!pool->unbuddied)
                goto out_pool;
@@ -815,15 +817,6 @@ out:
        return NULL;
 }
 
-static bool pool_isolated_are_drained(struct z3fold_pool *pool)
-{
-       bool ret;
-
-       spin_lock(&pool->lock);
-       ret = pool->isolated == 0;
-       spin_unlock(&pool->lock);
-       return ret;
-}
 /**
  * z3fold_destroy_pool() - destroys an existing z3fold pool
  * @pool:      the z3fold pool to be destroyed
@@ -833,22 +826,6 @@ static bool pool_isolated_are_drained(struct z3fold_pool *pool)
 static void z3fold_destroy_pool(struct z3fold_pool *pool)
 {
        kmem_cache_destroy(pool->c_handle);
-       /*
-        * We set pool-> destroying under lock to ensure that
-        * z3fold_page_isolate() sees any changes to destroying. This way we
-        * avoid the need for any memory barriers.
-        */
-
-       spin_lock(&pool->lock);
-       pool->destroying = true;
-       spin_unlock(&pool->lock);
-
-       /*
-        * We need to ensure that no pages are being migrated while we destroy
-        * these workqueues, as migration can queue work on either of the
-        * workqueues.
-        */
-       wait_event(pool->isolate_wait, !pool_isolated_are_drained(pool));
 
        /*
         * We need to destroy pool->compact_wq before pool->release_wq,
@@ -956,7 +933,7 @@ retry:
        if (!page)
                return -ENOMEM;
 
-       zhdr = init_z3fold_page(page, pool, gfp);
+       zhdr = init_z3fold_page(page, bud == HEADLESS, pool, gfp);
        if (!zhdr) {
                __free_page(page);
                return -ENOMEM;
@@ -1132,6 +1109,7 @@ static int z3fold_reclaim_page(struct z3fold_pool *pool, unsigned int retries)
        struct z3fold_header *zhdr = NULL;
        struct page *page = NULL;
        struct list_head *pos;
+       struct z3fold_buddy_slots slots;
        unsigned long first_handle = 0, middle_handle = 0, last_handle = 0;
 
        spin_lock(&pool->lock);
@@ -1150,16 +1128,22 @@ static int z3fold_reclaim_page(struct z3fold_pool *pool, unsigned int retries)
                        /* this bit could have been set by free, in which case
                         * we pass over to the next page in the pool.
                         */
-                       if (test_and_set_bit(PAGE_CLAIMED, &page->private))
+                       if (test_and_set_bit(PAGE_CLAIMED, &page->private)) {
+                               page = NULL;
                                continue;
+                       }
 
-                       if (unlikely(PageIsolated(page)))
+                       if (unlikely(PageIsolated(page))) {
+                               clear_bit(PAGE_CLAIMED, &page->private);
+                               page = NULL;
                                continue;
+                       }
+                       zhdr = page_address(page);
                        if (test_bit(PAGE_HEADLESS, &page->private))
                                break;
 
-                       zhdr = page_address(page);
                        if (!z3fold_page_trylock(zhdr)) {
+                               clear_bit(PAGE_CLAIMED, &page->private);
                                zhdr = NULL;
                                continue; /* can't evict at this point */
                        }
@@ -1177,26 +1161,30 @@ static int z3fold_reclaim_page(struct z3fold_pool *pool, unsigned int retries)
 
                if (!test_bit(PAGE_HEADLESS, &page->private)) {
                        /*
-                        * We need encode the handles before unlocking, since
-                        * we can race with free that will set
-                        * (first|last)_chunks to 0
+                        * We need encode the handles before unlocking, and
+                        * use our local slots structure because z3fold_free
+                        * can zero out zhdr->slots and we can't do much
+                        * about that
                         */
                        first_handle = 0;
                        last_handle = 0;
                        middle_handle = 0;
                        if (zhdr->first_chunks)
-                               first_handle = encode_handle(zhdr, FIRST);
+                               first_handle = __encode_handle(zhdr, &slots,
+                                                               FIRST);
                        if (zhdr->middle_chunks)
-                               middle_handle = encode_handle(zhdr, MIDDLE);
+                               middle_handle = __encode_handle(zhdr, &slots,
+                                                               MIDDLE);
                        if (zhdr->last_chunks)
-                               last_handle = encode_handle(zhdr, LAST);
+                               last_handle = __encode_handle(zhdr, &slots,
+                                                               LAST);
                        /*
                         * it's safe to unlock here because we hold a
                         * reference to this page
                         */
                        z3fold_page_unlock(zhdr);
                } else {
-                       first_handle = encode_handle(zhdr, HEADLESS);
+                       first_handle = __encode_handle(zhdr, &slots, HEADLESS);
                        last_handle = middle_handle = 0;
                }
 
@@ -1226,9 +1214,9 @@ next:
                        spin_lock(&pool->lock);
                        list_add(&page->lru, &pool->lru);
                        spin_unlock(&pool->lock);
+                       clear_bit(PAGE_CLAIMED, &page->private);
                } else {
                        z3fold_page_lock(zhdr);
-                       clear_bit(PAGE_CLAIMED, &page->private);
                        if (kref_put(&zhdr->refcount,
                                        release_z3fold_page_locked)) {
                                atomic64_dec(&pool->pages_nr);
@@ -1243,6 +1231,7 @@ next:
                        list_add(&page->lru, &pool->lru);
                        spin_unlock(&pool->lock);
                        z3fold_page_unlock(zhdr);
+                       clear_bit(PAGE_CLAIMED, &page->private);
                }
 
                /* We started off locked to we need to lock the pool back */
@@ -1339,28 +1328,6 @@ static u64 z3fold_get_pool_size(struct z3fold_pool *pool)
        return atomic64_read(&pool->pages_nr);
 }
 
-/*
- * z3fold_dec_isolated() expects to be called while pool->lock is held.
- */
-static void z3fold_dec_isolated(struct z3fold_pool *pool)
-{
-       assert_spin_locked(&pool->lock);
-       VM_BUG_ON(pool->isolated <= 0);
-       pool->isolated--;
-
-       /*
-        * If we have no more isolated pages, we have to see if
-        * z3fold_destroy_pool() is waiting for a signal.
-        */
-       if (pool->isolated == 0 && waitqueue_active(&pool->isolate_wait))
-               wake_up_all(&pool->isolate_wait);
-}
-
-static void z3fold_inc_isolated(struct z3fold_pool *pool)
-{
-       pool->isolated++;
-}
-
 static bool z3fold_page_isolate(struct page *page, isolate_mode_t mode)
 {
        struct z3fold_header *zhdr;
@@ -1369,7 +1336,8 @@ static bool z3fold_page_isolate(struct page *page, isolate_mode_t mode)
        VM_BUG_ON_PAGE(!PageMovable(page), page);
        VM_BUG_ON_PAGE(PageIsolated(page), page);
 
-       if (test_bit(PAGE_HEADLESS, &page->private))
+       if (test_bit(PAGE_HEADLESS, &page->private) ||
+           test_bit(PAGE_CLAIMED, &page->private))
                return false;
 
        zhdr = page_address(page);
@@ -1387,34 +1355,6 @@ static bool z3fold_page_isolate(struct page *page, isolate_mode_t mode)
                spin_lock(&pool->lock);
                if (!list_empty(&page->lru))
                        list_del(&page->lru);
-               /*
-                * We need to check for destruction while holding pool->lock, as
-                * otherwise destruction could see 0 isolated pages, and
-                * proceed.
-                */
-               if (unlikely(pool->destroying)) {
-                       spin_unlock(&pool->lock);
-                       /*
-                        * If this page isn't stale, somebody else holds a
-                        * reference to it. Let't drop our refcount so that they
-                        * can call the release logic.
-                        */
-                       if (unlikely(kref_put(&zhdr->refcount,
-                                             release_z3fold_page_locked))) {
-                               /*
-                                * If we get here we have kref problems, so we
-                                * should freak out.
-                                */
-                               WARN(1, "Z3fold is experiencing kref problems\n");
-                               z3fold_page_unlock(zhdr);
-                               return false;
-                       }
-                       z3fold_page_unlock(zhdr);
-                       return false;
-               }
-
-
-               z3fold_inc_isolated(pool);
                spin_unlock(&pool->lock);
                z3fold_page_unlock(zhdr);
                return true;
@@ -1483,10 +1423,6 @@ static int z3fold_page_migrate(struct address_space *mapping, struct page *newpa
 
        queue_work_on(new_zhdr->cpu, pool->compact_wq, &new_zhdr->work);
 
-       spin_lock(&pool->lock);
-       z3fold_dec_isolated(pool);
-       spin_unlock(&pool->lock);
-
        page_mapcount_reset(page);
        put_page(page);
        return 0;
@@ -1506,14 +1442,10 @@ static void z3fold_page_putback(struct page *page)
        INIT_LIST_HEAD(&page->lru);
        if (kref_put(&zhdr->refcount, release_z3fold_page_locked)) {
                atomic64_dec(&pool->pages_nr);
-               spin_lock(&pool->lock);
-               z3fold_dec_isolated(pool);
-               spin_unlock(&pool->lock);
                return;
        }
        spin_lock(&pool->lock);
        list_add(&page->lru, &pool->lru);
-       z3fold_dec_isolated(pool);
        spin_unlock(&pool->lock);
        z3fold_page_unlock(zhdr);
 }
index a2dd910..8636692 100644 (file)
@@ -239,6 +239,22 @@ const char *zpool_get_type(struct zpool *zpool)
 }
 
 /**
+ * zpool_malloc_support_movable() - Check if the zpool support
+ * allocate movable memory
+ * @zpool:     The zpool to check
+ *
+ * This returns if the zpool support allocate movable memory.
+ *
+ * Implementations must guarantee this to be thread-safe.
+ *
+ * Returns: true if if the zpool support allocate movable memory, false if not
+ */
+bool zpool_malloc_support_movable(struct zpool *zpool)
+{
+       return zpool->driver->malloc_support_movable;
+}
+
+/**
  * zpool_malloc() - Allocate memory
  * @zpool:     The zpool to allocate from.
  * @size:      The amount of memory to allocate.
index e98bb6a..2b2b9aa 100644 (file)
@@ -443,15 +443,16 @@ static u64 zs_zpool_total_size(void *pool)
 }
 
 static struct zpool_driver zs_zpool_driver = {
-       .type =         "zsmalloc",
-       .owner =        THIS_MODULE,
-       .create =       zs_zpool_create,
-       .destroy =      zs_zpool_destroy,
-       .malloc =       zs_zpool_malloc,
-       .free =         zs_zpool_free,
-       .map =          zs_zpool_map,
-       .unmap =        zs_zpool_unmap,
-       .total_size =   zs_zpool_total_size,
+       .type =                   "zsmalloc",
+       .owner =                  THIS_MODULE,
+       .create =                 zs_zpool_create,
+       .destroy =                zs_zpool_destroy,
+       .malloc_support_movable = true,
+       .malloc =                 zs_zpool_malloc,
+       .free =                   zs_zpool_free,
+       .map =                    zs_zpool_map,
+       .unmap =                  zs_zpool_unmap,
+       .total_size =             zs_zpool_total_size,
 };
 
 MODULE_ALIAS("zpool-zsmalloc");
@@ -476,10 +477,6 @@ static inline int get_zspage_inuse(struct zspage *zspage)
        return zspage->inuse;
 }
 
-static inline void set_zspage_inuse(struct zspage *zspage, int val)
-{
-       zspage->inuse = val;
-}
 
 static inline void mod_zspage_inuse(struct zspage *zspage, int val)
 {
index 0e22744..46a3223 100644 (file)
@@ -856,7 +856,6 @@ static int zswap_writeback_entry(struct zpool *pool, unsigned long handle)
        /* extract swpentry from data */
        zhdr = zpool_map_handle(pool, handle, ZPOOL_MM_RO);
        swpentry = zhdr->swpentry; /* here */
-       zpool_unmap_handle(pool, handle);
        tree = zswap_trees[swp_type(swpentry)];
        offset = swp_offset(swpentry);
 
@@ -866,6 +865,7 @@ static int zswap_writeback_entry(struct zpool *pool, unsigned long handle)
        if (!entry) {
                /* entry was invalidated */
                spin_unlock(&tree->lock);
+               zpool_unmap_handle(pool, handle);
                return 0;
        }
        spin_unlock(&tree->lock);
@@ -886,15 +886,13 @@ static int zswap_writeback_entry(struct zpool *pool, unsigned long handle)
        case ZSWAP_SWAPCACHE_NEW: /* page is locked */
                /* decompress */
                dlen = PAGE_SIZE;
-               src = (u8 *)zpool_map_handle(entry->pool->zpool, entry->handle,
-                               ZPOOL_MM_RO) + sizeof(struct zswap_header);
+               src = (u8 *)zhdr + sizeof(struct zswap_header);
                dst = kmap_atomic(page);
                tfm = *get_cpu_ptr(entry->pool->tfm);
                ret = crypto_comp_decompress(tfm, src, entry->length,
                                             dst, &dlen);
                put_cpu_ptr(entry->pool->tfm);
                kunmap_atomic(dst);
-               zpool_unmap_handle(entry->pool->zpool, entry->handle);
                BUG_ON(ret);
                BUG_ON(dlen != PAGE_SIZE);
 
@@ -940,6 +938,7 @@ fail:
        spin_unlock(&tree->lock);
 
 end:
+       zpool_unmap_handle(pool, handle);
        return ret;
 }
 
@@ -997,6 +996,7 @@ static int zswap_frontswap_store(unsigned type, pgoff_t offset,
        char *buf;
        u8 *src, *dst;
        struct zswap_header zhdr = { .swpentry = swp_entry(type, offset) };
+       gfp_t gfp;
 
        /* THP isn't supported */
        if (PageTransHuge(page)) {
@@ -1070,9 +1070,10 @@ static int zswap_frontswap_store(unsigned type, pgoff_t offset,
 
        /* store */
        hlen = zpool_evictable(entry->pool->zpool) ? sizeof(zhdr) : 0;
-       ret = zpool_malloc(entry->pool->zpool, hlen + dlen,
-                          __GFP_NORETRY | __GFP_NOWARN | __GFP_KSWAPD_RECLAIM,
-                          &handle);
+       gfp = __GFP_NORETRY | __GFP_NOWARN | __GFP_KSWAPD_RECLAIM;
+       if (zpool_malloc_support_movable(entry->pool->zpool))
+               gfp |= __GFP_HIGHMEM | __GFP_MOVABLE;
+       ret = zpool_malloc(entry->pool->zpool, hlen + dlen, gfp, &handle);
        if (ret == -ENOSPC) {
                zswap_reject_compress_poor++;
                goto put_dstmem;
index 9622f3e..1d48afc 100644 (file)
@@ -281,6 +281,7 @@ p9_tag_alloc(struct p9_client *c, int8_t type, unsigned int max_size)
 
        p9pdu_reset(&req->tc);
        p9pdu_reset(&req->rc);
+       req->t_err = 0;
        req->status = REQ_STATUS_ALLOC;
        init_waitqueue_head(&req->wq);
        INIT_LIST_HEAD(&req->req_list);
index bac8dad..b21c3c2 100644 (file)
@@ -685,9 +685,9 @@ rdma_create_trans(struct p9_client *client, const char *addr, char *args)
                goto error;
 
        /* Create the Completion Queue */
-       rdma->cq = ib_alloc_cq(rdma->cm_id->device, client,
-                       opts.sq_depth + opts.rq_depth + 1,
-                       0, IB_POLL_SOFTIRQ);
+       rdma->cq = ib_alloc_cq_any(rdma->cm_id->device, client,
+                                  opts.sq_depth + opts.rq_depth + 1,
+                                  IB_POLL_SOFTIRQ);
        if (IS_ERR(rdma->cq))
                goto error;
 
index 4eeea4d..2d56824 100644 (file)
@@ -13,6 +13,7 @@
 #include <linux/nsproxy.h>
 #include <linux/parser.h>
 #include <linux/sched.h>
+#include <linux/sched/mm.h>
 #include <linux/seq_file.h>
 #include <linux/slab.h>
 #include <linux/statfs.h>
@@ -185,18 +186,34 @@ int ceph_compare_options(struct ceph_options *new_opt,
 }
 EXPORT_SYMBOL(ceph_compare_options);
 
+/*
+ * kvmalloc() doesn't fall back to the vmalloc allocator unless flags are
+ * compatible with (a superset of) GFP_KERNEL.  This is because while the
+ * actual pages are allocated with the specified flags, the page table pages
+ * are always allocated with GFP_KERNEL.  map_vm_area() doesn't even take
+ * flags because GFP_KERNEL is hard-coded in {p4d,pud,pmd,pte}_alloc().
+ *
+ * ceph_kvmalloc() may be called with GFP_KERNEL, GFP_NOFS or GFP_NOIO.
+ */
 void *ceph_kvmalloc(size_t size, gfp_t flags)
 {
-       if (size <= (PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER)) {
-               void *ptr = kmalloc(size, flags | __GFP_NOWARN);
-               if (ptr)
-                       return ptr;
+       void *p;
+
+       if ((flags & (__GFP_IO | __GFP_FS)) == (__GFP_IO | __GFP_FS)) {
+               p = kvmalloc(size, flags);
+       } else if ((flags & (__GFP_IO | __GFP_FS)) == __GFP_IO) {
+               unsigned int nofs_flag = memalloc_nofs_save();
+               p = kvmalloc(size, GFP_KERNEL);
+               memalloc_nofs_restore(nofs_flag);
+       } else {
+               unsigned int noio_flag = memalloc_noio_save();
+               p = kvmalloc(size, GFP_KERNEL);
+               memalloc_noio_restore(noio_flag);
        }
 
-       return __vmalloc(size, flags, PAGE_KERNEL);
+       return p;
 }
 
-
 static int parse_fsid(const char *str, struct ceph_fsid *fsid)
 {
        int i = 0;
@@ -694,6 +711,14 @@ void ceph_destroy_client(struct ceph_client *client)
 }
 EXPORT_SYMBOL(ceph_destroy_client);
 
+void ceph_reset_client_addr(struct ceph_client *client)
+{
+       ceph_messenger_reset_nonce(&client->msgr);
+       ceph_monc_reopen_session(&client->monc);
+       ceph_osdc_reopen_osds(&client->osdc);
+}
+EXPORT_SYMBOL(ceph_reset_client_addr);
+
 /*
  * true if we have the mon map (and have thus joined the cluster)
  */
index 962f521..e4cb3db 100644 (file)
@@ -3031,6 +3031,12 @@ static void con_fault(struct ceph_connection *con)
 }
 
 
+void ceph_messenger_reset_nonce(struct ceph_messenger *msgr)
+{
+       u32 nonce = le32_to_cpu(msgr->inst.addr.nonce) + 1000000;
+       msgr->inst.addr.nonce = cpu_to_le32(nonce);
+       encode_my_addr(msgr);
+}
 
 /*
  * initialize a new messenger instance
index 0520bf9..7256c40 100644 (file)
@@ -213,6 +213,13 @@ static void reopen_session(struct ceph_mon_client *monc)
        __open_session(monc);
 }
 
+void ceph_monc_reopen_session(struct ceph_mon_client *monc)
+{
+       mutex_lock(&monc->mutex);
+       reopen_session(monc);
+       mutex_unlock(&monc->mutex);
+}
+
 static void un_backoff(struct ceph_mon_client *monc)
 {
        monc->hunt_mult /= 2; /* reduce by 50% */
index 78ae6e8..ba45b07 100644 (file)
@@ -841,6 +841,7 @@ int osd_req_op_cls_init(struct ceph_osd_request *osd_req, unsigned int which,
        struct ceph_pagelist *pagelist;
        size_t payload_len = 0;
        size_t size;
+       int ret;
 
        op = _osd_req_op_init(osd_req, which, CEPH_OSD_OP_CALL, 0);
 
@@ -852,20 +853,27 @@ int osd_req_op_cls_init(struct ceph_osd_request *osd_req, unsigned int which,
        size = strlen(class);
        BUG_ON(size > (size_t) U8_MAX);
        op->cls.class_len = size;
-       ceph_pagelist_append(pagelist, class, size);
+       ret = ceph_pagelist_append(pagelist, class, size);
+       if (ret)
+               goto err_pagelist_free;
        payload_len += size;
 
        op->cls.method_name = method;
        size = strlen(method);
        BUG_ON(size > (size_t) U8_MAX);
        op->cls.method_len = size;
-       ceph_pagelist_append(pagelist, method, size);
+       ret = ceph_pagelist_append(pagelist, method, size);
+       if (ret)
+               goto err_pagelist_free;
        payload_len += size;
 
        osd_req_op_cls_request_info_pagelist(osd_req, which, pagelist);
-
        op->indata_len = payload_len;
        return 0;
+
+err_pagelist_free:
+       ceph_pagelist_release(pagelist);
+       return ret;
 }
 EXPORT_SYMBOL(osd_req_op_cls_init);
 
@@ -877,6 +885,7 @@ int osd_req_op_xattr_init(struct ceph_osd_request *osd_req, unsigned int which,
                                                      opcode, 0);
        struct ceph_pagelist *pagelist;
        size_t payload_len;
+       int ret;
 
        BUG_ON(opcode != CEPH_OSD_OP_SETXATTR && opcode != CEPH_OSD_OP_CMPXATTR);
 
@@ -886,10 +895,14 @@ int osd_req_op_xattr_init(struct ceph_osd_request *osd_req, unsigned int which,
 
        payload_len = strlen(name);
        op->xattr.name_len = payload_len;
-       ceph_pagelist_append(pagelist, name, payload_len);
+       ret = ceph_pagelist_append(pagelist, name, payload_len);
+       if (ret)
+               goto err_pagelist_free;
 
        op->xattr.value_len = size;
-       ceph_pagelist_append(pagelist, value, size);
+       ret = ceph_pagelist_append(pagelist, value, size);
+       if (ret)
+               goto err_pagelist_free;
        payload_len += size;
 
        op->xattr.cmp_op = cmp_op;
@@ -898,6 +911,10 @@ int osd_req_op_xattr_init(struct ceph_osd_request *osd_req, unsigned int which,
        ceph_osd_data_pagelist_init(&op->xattr.osd_data, pagelist);
        op->indata_len = payload_len;
        return 0;
+
+err_pagelist_free:
+       ceph_pagelist_release(pagelist);
+       return ret;
 }
 EXPORT_SYMBOL(osd_req_op_xattr_init);
 
@@ -1488,7 +1505,6 @@ enum calc_target_result {
 
 static enum calc_target_result calc_target(struct ceph_osd_client *osdc,
                                           struct ceph_osd_request_target *t,
-                                          struct ceph_connection *con,
                                           bool any_change)
 {
        struct ceph_pg_pool_info *pi;
@@ -2272,7 +2288,7 @@ static void __submit_request(struct ceph_osd_request *req, bool wrlocked)
        dout("%s req %p wrlocked %d\n", __func__, req, wrlocked);
 
 again:
-       ct_res = calc_target(osdc, &req->r_t, NULL, false);
+       ct_res = calc_target(osdc, &req->r_t, false);
        if (ct_res == CALC_TARGET_POOL_DNE && !wrlocked)
                goto promote;
 
@@ -2476,6 +2492,14 @@ void ceph_osdc_abort_requests(struct ceph_osd_client *osdc, int err)
 }
 EXPORT_SYMBOL(ceph_osdc_abort_requests);
 
+void ceph_osdc_clear_abort_err(struct ceph_osd_client *osdc)
+{
+       down_write(&osdc->lock);
+       osdc->abort_err = 0;
+       up_write(&osdc->lock);
+}
+EXPORT_SYMBOL(ceph_osdc_clear_abort_err);
+
 static void update_epoch_barrier(struct ceph_osd_client *osdc, u32 eb)
 {
        if (likely(eb > osdc->epoch_barrier)) {
@@ -3087,7 +3111,7 @@ static void linger_submit(struct ceph_osd_linger_request *lreq)
                lreq->reg_req->r_ops[0].notify.cookie = lreq->linger_id;
        }
 
-       calc_target(osdc, &lreq->t, NULL, false);
+       calc_target(osdc, &lreq->t, false);
        osd = lookup_create_osd(osdc, lreq->t.osd, true);
        link_linger(osd, lreq);
 
@@ -3704,7 +3728,7 @@ recalc_linger_target(struct ceph_osd_linger_request *lreq)
        struct ceph_osd_client *osdc = lreq->osdc;
        enum calc_target_result ct_res;
 
-       ct_res = calc_target(osdc, &lreq->t, NULL, true);
+       ct_res = calc_target(osdc, &lreq->t, true);
        if (ct_res == CALC_TARGET_NEED_RESEND) {
                struct ceph_osd *osd;
 
@@ -3776,8 +3800,7 @@ static void scan_requests(struct ceph_osd *osd,
                n = rb_next(n); /* unlink_request(), check_pool_dne() */
 
                dout("%s req %p tid %llu\n", __func__, req, req->r_tid);
-               ct_res = calc_target(osdc, &req->r_t, &req->r_osd->o_con,
-                                    false);
+               ct_res = calc_target(osdc, &req->r_t, false);
                switch (ct_res) {
                case CALC_TARGET_NO_ACTION:
                        force_resend_writes = cleared_full ||
@@ -3886,7 +3909,7 @@ static void kick_requests(struct ceph_osd_client *osdc,
                n = rb_next(n);
 
                if (req->r_t.epoch < osdc->osdmap->epoch) {
-                       ct_res = calc_target(osdc, &req->r_t, NULL, false);
+                       ct_res = calc_target(osdc, &req->r_t, false);
                        if (ct_res == CALC_TARGET_POOL_DNE) {
                                erase_request(need_resend, req);
                                check_pool_dne(req);
@@ -5087,6 +5110,24 @@ out_put_req:
 EXPORT_SYMBOL(ceph_osdc_call);
 
 /*
+ * reset all osd connections
+ */
+void ceph_osdc_reopen_osds(struct ceph_osd_client *osdc)
+{
+       struct rb_node *n;
+
+       down_write(&osdc->lock);
+       for (n = rb_first(&osdc->osds); n; ) {
+               struct ceph_osd *osd = rb_entry(n, struct ceph_osd, o_node);
+
+               n = rb_next(n);
+               if (!reopen_osd(osd))
+                       kick_osd_requests(osd);
+       }
+       up_write(&osdc->lock);
+}
+
+/*
  * init, shutdown
  */
 int ceph_osdc_init(struct ceph_osd_client *osdc, struct ceph_client *client)
index 9043790..4e0de14 100644 (file)
@@ -973,11 +973,11 @@ void ceph_osdmap_destroy(struct ceph_osdmap *map)
                                 struct ceph_pg_pool_info, node);
                __remove_pg_pool(&map->pg_pools, pi);
        }
-       kfree(map->osd_state);
-       kfree(map->osd_weight);
-       kfree(map->osd_addr);
-       kfree(map->osd_primary_affinity);
-       kfree(map->crush_workspace);
+       kvfree(map->osd_state);
+       kvfree(map->osd_weight);
+       kvfree(map->osd_addr);
+       kvfree(map->osd_primary_affinity);
+       kvfree(map->crush_workspace);
        kfree(map);
 }
 
@@ -986,28 +986,41 @@ void ceph_osdmap_destroy(struct ceph_osdmap *map)
  *
  * The new elements are properly initialized.
  */
-static int osdmap_set_max_osd(struct ceph_osdmap *map, int max)
+static int osdmap_set_max_osd(struct ceph_osdmap *map, u32 max)
 {
        u32 *state;
        u32 *weight;
        struct ceph_entity_addr *addr;
+       u32 to_copy;
        int i;
 
-       state = krealloc(map->osd_state, max*sizeof(*state), GFP_NOFS);
-       if (!state)
-               return -ENOMEM;
-       map->osd_state = state;
+       dout("%s old %u new %u\n", __func__, map->max_osd, max);
+       if (max == map->max_osd)
+               return 0;
 
-       weight = krealloc(map->osd_weight, max*sizeof(*weight), GFP_NOFS);
-       if (!weight)
+       state = ceph_kvmalloc(array_size(max, sizeof(*state)), GFP_NOFS);
+       weight = ceph_kvmalloc(array_size(max, sizeof(*weight)), GFP_NOFS);
+       addr = ceph_kvmalloc(array_size(max, sizeof(*addr)), GFP_NOFS);
+       if (!state || !weight || !addr) {
+               kvfree(state);
+               kvfree(weight);
+               kvfree(addr);
                return -ENOMEM;
-       map->osd_weight = weight;
+       }
 
-       addr = krealloc(map->osd_addr, max*sizeof(*addr), GFP_NOFS);
-       if (!addr)
-               return -ENOMEM;
-       map->osd_addr = addr;
+       to_copy = min(map->max_osd, max);
+       if (map->osd_state) {
+               memcpy(state, map->osd_state, to_copy * sizeof(*state));
+               memcpy(weight, map->osd_weight, to_copy * sizeof(*weight));
+               memcpy(addr, map->osd_addr, to_copy * sizeof(*addr));
+               kvfree(map->osd_state);
+               kvfree(map->osd_weight);
+               kvfree(map->osd_addr);
+       }
 
+       map->osd_state = state;
+       map->osd_weight = weight;
+       map->osd_addr = addr;
        for (i = map->max_osd; i < max; i++) {
                map->osd_state[i] = 0;
                map->osd_weight[i] = CEPH_OSD_OUT;
@@ -1017,12 +1030,16 @@ static int osdmap_set_max_osd(struct ceph_osdmap *map, int max)
        if (map->osd_primary_affinity) {
                u32 *affinity;
 
-               affinity = krealloc(map->osd_primary_affinity,
-                                   max*sizeof(*affinity), GFP_NOFS);
+               affinity = ceph_kvmalloc(array_size(max, sizeof(*affinity)),
+                                        GFP_NOFS);
                if (!affinity)
                        return -ENOMEM;
-               map->osd_primary_affinity = affinity;
 
+               memcpy(affinity, map->osd_primary_affinity,
+                      to_copy * sizeof(*affinity));
+               kvfree(map->osd_primary_affinity);
+
+               map->osd_primary_affinity = affinity;
                for (i = map->max_osd; i < max; i++)
                        map->osd_primary_affinity[i] =
                            CEPH_OSD_DEFAULT_PRIMARY_AFFINITY;
@@ -1043,7 +1060,7 @@ static int osdmap_set_crush(struct ceph_osdmap *map, struct crush_map *crush)
 
        work_size = crush_work_size(crush, CEPH_PG_MAX_SIZE);
        dout("%s work_size %zu bytes\n", __func__, work_size);
-       workspace = kmalloc(work_size, GFP_NOIO);
+       workspace = ceph_kvmalloc(work_size, GFP_NOIO);
        if (!workspace) {
                crush_destroy(crush);
                return -ENOMEM;
@@ -1052,7 +1069,7 @@ static int osdmap_set_crush(struct ceph_osdmap *map, struct crush_map *crush)
 
        if (map->crush)
                crush_destroy(map->crush);
-       kfree(map->crush_workspace);
+       kvfree(map->crush_workspace);
        map->crush = crush;
        map->crush_workspace = workspace;
        return 0;
@@ -1298,9 +1315,9 @@ static int set_primary_affinity(struct ceph_osdmap *map, int osd, u32 aff)
        if (!map->osd_primary_affinity) {
                int i;
 
-               map->osd_primary_affinity = kmalloc_array(map->max_osd,
-                                                         sizeof(u32),
-                                                         GFP_NOFS);
+               map->osd_primary_affinity = ceph_kvmalloc(
+                   array_size(map->max_osd, sizeof(*map->osd_primary_affinity)),
+                   GFP_NOFS);
                if (!map->osd_primary_affinity)
                        return -ENOMEM;
 
@@ -1321,7 +1338,7 @@ static int decode_primary_affinity(void **p, void *end,
 
        ceph_decode_32_safe(p, end, len, e_inval);
        if (len == 0) {
-               kfree(map->osd_primary_affinity);
+               kvfree(map->osd_primary_affinity);
                map->osd_primary_affinity = NULL;
                return 0;
        }
index 4ce42c6..d75fddc 100644 (file)
@@ -1960,7 +1960,7 @@ gss_unwrap_resp_integ(struct rpc_task *task, struct rpc_cred *cred,
 
        if (xdr_buf_subsegment(rcv_buf, &integ_buf, data_offset, integ_len))
                goto unwrap_failed;
-       if (xdr_buf_read_netobj(rcv_buf, &mic, mic_offset))
+       if (xdr_buf_read_mic(rcv_buf, &mic, mic_offset))
                goto unwrap_failed;
        maj_stat = gss_verify_mic(ctx->gc_gss_ctx, &integ_buf, &mic);
        if (maj_stat == GSS_S_CONTEXT_EXPIRED)
index 6f1528f..a349094 100644 (file)
@@ -373,7 +373,7 @@ void sunrpc_init_cache_detail(struct cache_detail *cd)
        spin_lock(&cache_list_lock);
        cd->nextcheck = 0;
        cd->entries = 0;
-       atomic_set(&cd->readers, 0);
+       atomic_set(&cd->writers, 0);
        cd->last_close = 0;
        cd->last_warn = -1;
        list_add(&cd->others, &cache_list);
@@ -1029,11 +1029,13 @@ static int cache_open(struct inode *inode, struct file *filp,
                }
                rp->offset = 0;
                rp->q.reader = 1;
-               atomic_inc(&cd->readers);
+
                spin_lock(&queue_lock);
                list_add(&rp->q.list, &cd->queue);
                spin_unlock(&queue_lock);
        }
+       if (filp->f_mode & FMODE_WRITE)
+               atomic_inc(&cd->writers);
        filp->private_data = rp;
        return 0;
 }
@@ -1062,8 +1064,10 @@ static int cache_release(struct inode *inode, struct file *filp,
                filp->private_data = NULL;
                kfree(rp);
 
+       }
+       if (filp->f_mode & FMODE_WRITE) {
+               atomic_dec(&cd->writers);
                cd->last_close = seconds_since_boot();
-               atomic_dec(&cd->readers);
        }
        module_put(cd->owner);
        return 0;
@@ -1171,7 +1175,7 @@ static void warn_no_listener(struct cache_detail *detail)
 
 static bool cache_listeners_exist(struct cache_detail *detail)
 {
-       if (atomic_read(&detail->readers))
+       if (atomic_read(&detail->writers))
                return true;
        if (detail->last_close == 0)
                /* This cache was never opened */
@@ -1520,6 +1524,9 @@ static ssize_t write_flush(struct file *file, const char __user *buf,
        cd->nextcheck = now;
        cache_flush();
 
+       if (cd->flush)
+               cd->flush();
+
        *ppos += count;
        return count;
 }
index a07b516..f7f7856 100644 (file)
@@ -1837,7 +1837,7 @@ call_allocate(struct rpc_task *task)
                return;
        }
 
-       rpc_exit(task, -ERESTARTSYS);
+       rpc_call_rpcerror(task, -ERESTARTSYS);
 }
 
 static int
@@ -1862,6 +1862,7 @@ rpc_xdr_encode(struct rpc_task *task)
                     req->rq_rbuffer,
                     req->rq_rcvsize);
 
+       req->rq_reply_bytes_recvd = 0;
        req->rq_snd_buf.head[0].iov_len = 0;
        xdr_init_encode(&xdr, &req->rq_snd_buf,
                        req->rq_snd_buf.head[0].iov_base, req);
@@ -1881,6 +1882,8 @@ call_encode(struct rpc_task *task)
        if (!rpc_task_need_encode(task))
                goto out;
        dprint_status(task);
+       /* Dequeue task from the receive queue while we're encoding */
+       xprt_request_dequeue_xprt(task);
        /* Encode here so that rpcsec_gss can use correct sequence number. */
        rpc_xdr_encode(task);
        /* Did the encode result in an error condition? */
@@ -2479,6 +2482,7 @@ call_decode(struct rpc_task *task)
        struct rpc_clnt *clnt = task->tk_client;
        struct rpc_rqst *req = task->tk_rqstp;
        struct xdr_stream xdr;
+       int err;
 
        dprint_status(task);
 
@@ -2501,6 +2505,15 @@ call_decode(struct rpc_task *task)
         * before it changed req->rq_reply_bytes_recvd.
         */
        smp_rmb();
+
+       /*
+        * Did we ever call xprt_complete_rqst()? If not, we should assume
+        * the message is incomplete.
+        */
+       err = -EAGAIN;
+       if (!req->rq_reply_bytes_recvd)
+               goto out;
+
        req->rq_rcv_buf.len = req->rq_private_buf.len;
 
        /* Check that the softirq receive buffer is valid */
@@ -2509,7 +2522,9 @@ call_decode(struct rpc_task *task)
 
        xdr_init_decode(&xdr, &req->rq_rcv_buf,
                        req->rq_rcv_buf.head[0].iov_base, req);
-       switch (rpc_decode_header(task, &xdr)) {
+       err = rpc_decode_header(task, &xdr);
+out:
+       switch (err) {
        case 0:
                task->tk_action = rpc_exit_task;
                task->tk_status = rpcauth_unwrap_resp(task, &xdr);
@@ -2518,9 +2533,6 @@ call_decode(struct rpc_task *task)
                return;
        case -EAGAIN:
                task->tk_status = 0;
-               xdr_free_bvec(&req->rq_rcv_buf);
-               req->rq_reply_bytes_recvd = 0;
-               req->rq_rcv_buf.len = 0;
                if (task->tk_client->cl_discrtry)
                        xprt_conditional_disconnect(req->rq_xprt,
                                                    req->rq_connect_cookie);
@@ -2561,7 +2573,7 @@ rpc_encode_header(struct rpc_task *task, struct xdr_stream *xdr)
        return 0;
 out_fail:
        trace_rpc_bad_callhdr(task);
-       rpc_exit(task, error);
+       rpc_call_rpcerror(task, error);
        return error;
 }
 
@@ -2628,7 +2640,7 @@ out_garbage:
                return -EAGAIN;
        }
 out_err:
-       rpc_exit(task, error);
+       rpc_call_rpcerror(task, error);
        return error;
 
 out_unparsable:
index 1f275ab..360afe1 100644 (file)
@@ -541,33 +541,14 @@ rpc_wake_up_task_on_wq_queue_action_locked(struct workqueue_struct *wq,
        return NULL;
 }
 
-static void
-rpc_wake_up_task_on_wq_queue_locked(struct workqueue_struct *wq,
-               struct rpc_wait_queue *queue, struct rpc_task *task)
-{
-       rpc_wake_up_task_on_wq_queue_action_locked(wq, queue, task, NULL, NULL);
-}
-
 /*
  * Wake up a queued task while the queue lock is being held
  */
-static void rpc_wake_up_task_queue_locked(struct rpc_wait_queue *queue, struct rpc_task *task)
-{
-       rpc_wake_up_task_on_wq_queue_locked(rpciod_workqueue, queue, task);
-}
-
-/*
- * Wake up a task on a specific queue
- */
-void rpc_wake_up_queued_task_on_wq(struct workqueue_struct *wq,
-               struct rpc_wait_queue *queue,
-               struct rpc_task *task)
+static void rpc_wake_up_task_queue_locked(struct rpc_wait_queue *queue,
+                                         struct rpc_task *task)
 {
-       if (!RPC_IS_QUEUED(task))
-               return;
-       spin_lock(&queue->lock);
-       rpc_wake_up_task_on_wq_queue_locked(wq, queue, task);
-       spin_unlock(&queue->lock);
+       rpc_wake_up_task_on_wq_queue_action_locked(rpciod_workqueue, queue,
+                                                  task, NULL, NULL);
 }
 
 /*
@@ -930,8 +911,10 @@ static void __rpc_execute(struct rpc_task *task)
                /*
                 * Signalled tasks should exit rather than sleep.
                 */
-               if (RPC_SIGNALLED(task))
+               if (RPC_SIGNALLED(task)) {
+                       task->tk_rpc_status = -ERESTARTSYS;
                        rpc_exit(task, -ERESTARTSYS);
+               }
 
                /*
                 * The queue->lock protects against races with
@@ -967,6 +950,7 @@ static void __rpc_execute(struct rpc_task *task)
                         */
                        dprintk("RPC: %5u got signal\n", task->tk_pid);
                        set_bit(RPC_TASK_SIGNALLED, &task->tk_runstate);
+                       task->tk_rpc_status = -ERESTARTSYS;
                        rpc_exit(task, -ERESTARTSYS);
                }
                dprintk("RPC: %5u sync task resuming\n", task->tk_pid);
index 220b799..d11b705 100644 (file)
@@ -1233,8 +1233,8 @@ svc_generic_init_request(struct svc_rqst *rqstp,
 
        if (rqstp->rq_vers >= progp->pg_nvers )
                goto err_bad_vers;
-         versp = progp->pg_vers[rqstp->rq_vers];
-         if (!versp)
+       versp = progp->pg_vers[rqstp->rq_vers];
+       if (!versp)
                goto err_bad_vers;
 
        /*
index 48c93b9..14ba9e7 100644 (file)
@@ -560,7 +560,7 @@ EXPORT_SYMBOL_GPL(xdr_init_encode);
  * required at the end of encoding, or any other time when the xdr_buf
  * data might be read.
  */
-void xdr_commit_encode(struct xdr_stream *xdr)
+inline void xdr_commit_encode(struct xdr_stream *xdr)
 {
        int shift = xdr->scratch.iov_len;
        void *page;
@@ -1236,43 +1236,60 @@ xdr_encode_word(struct xdr_buf *buf, unsigned int base, u32 obj)
 }
 EXPORT_SYMBOL_GPL(xdr_encode_word);
 
-/* If the netobj starting offset bytes from the start of xdr_buf is contained
- * entirely in the head or the tail, set object to point to it; otherwise
- * try to find space for it at the end of the tail, copy it there, and
- * set obj to point to it. */
-int xdr_buf_read_netobj(struct xdr_buf *buf, struct xdr_netobj *obj, unsigned int offset)
+/**
+ * xdr_buf_read_mic() - obtain the address of the GSS mic from xdr buf
+ * @buf: pointer to buffer containing a mic
+ * @mic: on success, returns the address of the mic
+ * @offset: the offset in buf where mic may be found
+ *
+ * This function may modify the xdr buf if the mic is found to be straddling
+ * a boundary between head, pages, and tail.  On success the mic can be read
+ * from the address returned.  There is no need to free the mic.
+ *
+ * Return: Success returns 0, otherwise an integer error.
+ */
+int xdr_buf_read_mic(struct xdr_buf *buf, struct xdr_netobj *mic, unsigned int offset)
 {
        struct xdr_buf subbuf;
+       unsigned int boundary;
 
-       if (xdr_decode_word(buf, offset, &obj->len))
+       if (xdr_decode_word(buf, offset, &mic->len))
                return -EFAULT;
-       if (xdr_buf_subsegment(buf, &subbuf, offset + 4, obj->len))
+       offset += 4;
+
+       /* Is the mic partially in the head? */
+       boundary = buf->head[0].iov_len;
+       if (offset < boundary && (offset + mic->len) > boundary)
+               xdr_shift_buf(buf, boundary - offset);
+
+       /* Is the mic partially in the pages? */
+       boundary += buf->page_len;
+       if (offset < boundary && (offset + mic->len) > boundary)
+               xdr_shrink_pagelen(buf, boundary - offset);
+
+       if (xdr_buf_subsegment(buf, &subbuf, offset, mic->len))
                return -EFAULT;
 
-       /* Is the obj contained entirely in the head? */
-       obj->data = subbuf.head[0].iov_base;
-       if (subbuf.head[0].iov_len == obj->len)
+       /* Is the mic contained entirely in the head? */
+       mic->data = subbuf.head[0].iov_base;
+       if (subbuf.head[0].iov_len == mic->len)
                return 0;
-       /* ..or is the obj contained entirely in the tail? */
-       obj->data = subbuf.tail[0].iov_base;
-       if (subbuf.tail[0].iov_len == obj->len)
+       /* ..or is the mic contained entirely in the tail? */
+       mic->data = subbuf.tail[0].iov_base;
+       if (subbuf.tail[0].iov_len == mic->len)
                return 0;
 
-       /* use end of tail as storage for obj:
-        * (We don't copy to the beginning because then we'd have
-        * to worry about doing a potentially overlapping copy.
-        * This assumes the object is at most half the length of the
-        * tail.) */
-       if (obj->len > buf->buflen - buf->len)
+       /* Find a contiguous area in @buf to hold all of @mic */
+       if (mic->len > buf->buflen - buf->len)
                return -ENOMEM;
        if (buf->tail[0].iov_len != 0)
-               obj->data = buf->tail[0].iov_base + buf->tail[0].iov_len;
+               mic->data = buf->tail[0].iov_base + buf->tail[0].iov_len;
        else
-               obj->data = buf->head[0].iov_base + buf->head[0].iov_len;
-       __read_bytes_from_xdr_buf(&subbuf, obj->data, obj->len);
+               mic->data = buf->head[0].iov_base + buf->head[0].iov_len;
+       __read_bytes_from_xdr_buf(&subbuf, mic->data, mic->len);
        return 0;
 }
-EXPORT_SYMBOL_GPL(xdr_buf_read_netobj);
+EXPORT_SYMBOL_GPL(xdr_buf_read_mic);
 
 /* Returns 0 on success, or else a negative error code. */
 static int
index 2e71f54..8a45b3c 100644 (file)
@@ -456,6 +456,12 @@ void xprt_release_rqst_cong(struct rpc_task *task)
 }
 EXPORT_SYMBOL_GPL(xprt_release_rqst_cong);
 
+static void xprt_clear_congestion_window_wait_locked(struct rpc_xprt *xprt)
+{
+       if (test_and_clear_bit(XPRT_CWND_WAIT, &xprt->state))
+               __xprt_lock_write_next_cong(xprt);
+}
+
 /*
  * Clear the congestion window wait flag and wake up the next
  * entry on xprt->sending
@@ -671,6 +677,7 @@ void xprt_disconnect_done(struct rpc_xprt *xprt)
        spin_lock(&xprt->transport_lock);
        xprt_clear_connected(xprt);
        xprt_clear_write_space_locked(xprt);
+       xprt_clear_congestion_window_wait_locked(xprt);
        xprt_wake_pending_tasks(xprt, -ENOTCONN);
        spin_unlock(&xprt->transport_lock);
 }
@@ -1324,6 +1331,36 @@ xprt_request_dequeue_transmit(struct rpc_task *task)
 }
 
 /**
+ * xprt_request_dequeue_xprt - remove a task from the transmit+receive queue
+ * @task: pointer to rpc_task
+ *
+ * Remove a task from the transmit and receive queues, and ensure that
+ * it is not pinned by the receive work item.
+ */
+void
+xprt_request_dequeue_xprt(struct rpc_task *task)
+{
+       struct rpc_rqst *req = task->tk_rqstp;
+       struct rpc_xprt *xprt = req->rq_xprt;
+
+       if (test_bit(RPC_TASK_NEED_XMIT, &task->tk_runstate) ||
+           test_bit(RPC_TASK_NEED_RECV, &task->tk_runstate) ||
+           xprt_is_pinned_rqst(req)) {
+               spin_lock(&xprt->queue_lock);
+               xprt_request_dequeue_transmit_locked(task);
+               xprt_request_dequeue_receive_locked(task);
+               while (xprt_is_pinned_rqst(req)) {
+                       set_bit(RPC_TASK_MSG_PIN_WAIT, &task->tk_runstate);
+                       spin_unlock(&xprt->queue_lock);
+                       xprt_wait_on_pinned_rqst(req);
+                       spin_lock(&xprt->queue_lock);
+                       clear_bit(RPC_TASK_MSG_PIN_WAIT, &task->tk_runstate);
+               }
+               spin_unlock(&xprt->queue_lock);
+       }
+}
+
+/**
  * xprt_request_prepare - prepare an encoded request for transport
  * @req: pointer to rpc_rqst
  *
@@ -1747,28 +1784,6 @@ void xprt_retry_reserve(struct rpc_task *task)
        xprt_do_reserve(xprt, task);
 }
 
-static void
-xprt_request_dequeue_all(struct rpc_task *task, struct rpc_rqst *req)
-{
-       struct rpc_xprt *xprt = req->rq_xprt;
-
-       if (test_bit(RPC_TASK_NEED_XMIT, &task->tk_runstate) ||
-           test_bit(RPC_TASK_NEED_RECV, &task->tk_runstate) ||
-           xprt_is_pinned_rqst(req)) {
-               spin_lock(&xprt->queue_lock);
-               xprt_request_dequeue_transmit_locked(task);
-               xprt_request_dequeue_receive_locked(task);
-               while (xprt_is_pinned_rqst(req)) {
-                       set_bit(RPC_TASK_MSG_PIN_WAIT, &task->tk_runstate);
-                       spin_unlock(&xprt->queue_lock);
-                       xprt_wait_on_pinned_rqst(req);
-                       spin_lock(&xprt->queue_lock);
-                       clear_bit(RPC_TASK_MSG_PIN_WAIT, &task->tk_runstate);
-               }
-               spin_unlock(&xprt->queue_lock);
-       }
-}
-
 /**
  * xprt_release - release an RPC request slot
  * @task: task which is finished with the slot
@@ -1788,7 +1803,7 @@ void xprt_release(struct rpc_task *task)
        }
 
        xprt = req->rq_xprt;
-       xprt_request_dequeue_all(task, req);
+       xprt_request_dequeue_xprt(task);
        spin_lock(&xprt->transport_lock);
        xprt->ops->release_xprt(xprt, task);
        if (xprt->ops->release_request)
index 59e624b..50e075f 100644 (file)
@@ -54,9 +54,7 @@ size_t xprt_rdma_bc_maxpayload(struct rpc_xprt *xprt)
 
 unsigned int xprt_rdma_bc_max_slots(struct rpc_xprt *xprt)
 {
-       struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
-
-       return r_xprt->rx_buf.rb_bc_srv_max_requests;
+       return RPCRDMA_BACKWARD_WRS >> 1;
 }
 
 static int rpcrdma_bc_marshal_reply(struct rpc_rqst *rqst)
index 0b6dad7..30065a2 100644 (file)
@@ -7,67 +7,37 @@
 /* Lightweight memory registration using Fast Registration Work
  * Requests (FRWR).
  *
- * FRWR features ordered asynchronous registration and deregistration
- * of arbitrarily sized memory regions. This is the fastest and safest
+ * FRWR features ordered asynchronous registration and invalidation
+ * of arbitrarily-sized memory regions. This is the fastest and safest
  * but most complex memory registration mode.
  */
 
 /* Normal operation
  *
- * A Memory Region is prepared for RDMA READ or WRITE using a FAST_REG
+ * A Memory Region is prepared for RDMA Read or Write using a FAST_REG
  * Work Request (frwr_map). When the RDMA operation is finished, this
  * Memory Region is invalidated using a LOCAL_INV Work Request
- * (frwr_unmap_sync).
+ * (frwr_unmap_async and frwr_unmap_sync).
  *
- * Typically these Work Requests are not signaled, and neither are RDMA
- * SEND Work Requests (with the exception of signaling occasionally to
- * prevent provider work queue overflows). This greatly reduces HCA
+ * Typically FAST_REG Work Requests are not signaled, and neither are
+ * RDMA Send Work Requests (with the exception of signaling occasionally
+ * to prevent provider work queue overflows). This greatly reduces HCA
  * interrupt workload.
- *
- * As an optimization, frwr_unmap marks MRs INVALID before the
- * LOCAL_INV WR is posted. If posting succeeds, the MR is placed on
- * rb_mrs immediately so that no work (like managing a linked list
- * under a spinlock) is needed in the completion upcall.
- *
- * But this means that frwr_map() can occasionally encounter an MR
- * that is INVALID but the LOCAL_INV WR has not completed. Work Queue
- * ordering prevents a subsequent FAST_REG WR from executing against
- * that MR while it is still being invalidated.
  */
 
 /* Transport recovery
  *
- * ->op_map and the transport connect worker cannot run at the same
- * time, but ->op_unmap can fire while the transport connect worker
- * is running. Thus MR recovery is handled in ->op_map, to guarantee
- * that recovered MRs are owned by a sending RPC, and not one where
- * ->op_unmap could fire at the same time transport reconnect is
- * being done.
- *
- * When the underlying transport disconnects, MRs are left in one of
- * four states:
- *
- * INVALID:    The MR was not in use before the QP entered ERROR state.
- *
- * VALID:      The MR was registered before the QP entered ERROR state.
- *
- * FLUSHED_FR: The MR was being registered when the QP entered ERROR
- *             state, and the pending WR was flushed.
- *
- * FLUSHED_LI: The MR was being invalidated when the QP entered ERROR
- *             state, and the pending WR was flushed.
- *
- * When frwr_map encounters FLUSHED and VALID MRs, they are recovered
- * with ib_dereg_mr and then are re-initialized. Because MR recovery
- * allocates fresh resources, it is deferred to a workqueue, and the
- * recovered MRs are placed back on the rb_mrs list when recovery is
- * complete. frwr_map allocates another MR for the current RPC while
- * the broken MR is reset.
- *
- * To ensure that frwr_map doesn't encounter an MR that is marked
- * INVALID but that is about to be flushed due to a previous transport
- * disconnect, the transport connect worker attempts to drain all
- * pending send queue WRs before the transport is reconnected.
+ * frwr_map and frwr_unmap_* cannot run at the same time the transport
+ * connect worker is running. The connect worker holds the transport
+ * send lock, just as ->send_request does. This prevents frwr_map and
+ * the connect worker from running concurrently. When a connection is
+ * closed, the Receive completion queue is drained before the allowing
+ * the connect worker to get control. This prevents frwr_unmap and the
+ * connect worker from running concurrently.
+ *
+ * When the underlying transport disconnects, MRs that are in flight
+ * are flushed and are likely unusable. Thus all flushed MRs are
+ * destroyed. New MRs are created on demand.
  */
 
 #include <linux/sunrpc/rpc_rdma.h>
@@ -118,15 +88,8 @@ void frwr_release_mr(struct rpcrdma_mr *mr)
        kfree(mr);
 }
 
-/* MRs are dynamically allocated, so simply clean up and release the MR.
- * A replacement MR will subsequently be allocated on demand.
- */
-static void
-frwr_mr_recycle_worker(struct work_struct *work)
+static void frwr_mr_recycle(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr *mr)
 {
-       struct rpcrdma_mr *mr = container_of(work, struct rpcrdma_mr, mr_recycle);
-       struct rpcrdma_xprt *r_xprt = mr->mr_xprt;
-
        trace_xprtrdma_mr_recycle(mr);
 
        if (mr->mr_dir != DMA_NONE) {
@@ -136,14 +99,40 @@ frwr_mr_recycle_worker(struct work_struct *work)
                mr->mr_dir = DMA_NONE;
        }
 
-       spin_lock(&r_xprt->rx_buf.rb_mrlock);
+       spin_lock(&r_xprt->rx_buf.rb_lock);
        list_del(&mr->mr_all);
        r_xprt->rx_stats.mrs_recycled++;
-       spin_unlock(&r_xprt->rx_buf.rb_mrlock);
+       spin_unlock(&r_xprt->rx_buf.rb_lock);
 
        frwr_release_mr(mr);
 }
 
+/* MRs are dynamically allocated, so simply clean up and release the MR.
+ * A replacement MR will subsequently be allocated on demand.
+ */
+static void
+frwr_mr_recycle_worker(struct work_struct *work)
+{
+       struct rpcrdma_mr *mr = container_of(work, struct rpcrdma_mr,
+                                            mr_recycle);
+
+       frwr_mr_recycle(mr->mr_xprt, mr);
+}
+
+/* frwr_recycle - Discard MRs
+ * @req: request to reset
+ *
+ * Used after a reconnect. These MRs could be in flight, we can't
+ * tell. Safe thing to do is release them.
+ */
+void frwr_recycle(struct rpcrdma_req *req)
+{
+       struct rpcrdma_mr *mr;
+
+       while ((mr = rpcrdma_mr_pop(&req->rl_registered)))
+               frwr_mr_recycle(mr->mr_xprt, mr);
+}
+
 /* frwr_reset - Place MRs back on the free list
  * @req: request to reset
  *
@@ -156,12 +145,10 @@ frwr_mr_recycle_worker(struct work_struct *work)
  */
 void frwr_reset(struct rpcrdma_req *req)
 {
-       while (!list_empty(&req->rl_registered)) {
-               struct rpcrdma_mr *mr;
+       struct rpcrdma_mr *mr;
 
-               mr = rpcrdma_mr_pop(&req->rl_registered);
-               rpcrdma_mr_unmap_and_put(mr);
-       }
+       while ((mr = rpcrdma_mr_pop(&req->rl_registered)))
+               rpcrdma_mr_put(mr);
 }
 
 /**
@@ -179,11 +166,14 @@ int frwr_init_mr(struct rpcrdma_ia *ia, struct rpcrdma_mr *mr)
        struct ib_mr *frmr;
        int rc;
 
+       /* NB: ib_alloc_mr and device drivers typically allocate
+        *     memory with GFP_KERNEL.
+        */
        frmr = ib_alloc_mr(ia->ri_pd, ia->ri_mrtype, depth);
        if (IS_ERR(frmr))
                goto out_mr_err;
 
-       sg = kcalloc(depth, sizeof(*sg), GFP_KERNEL);
+       sg = kcalloc(depth, sizeof(*sg), GFP_NOFS);
        if (!sg)
                goto out_list_err;
 
@@ -203,8 +193,6 @@ out_mr_err:
        return rc;
 
 out_list_err:
-       dprintk("RPC:       %s: sg allocation failure\n",
-               __func__);
        ib_dereg_mr(frmr);
        return -ENOMEM;
 }
@@ -290,8 +278,8 @@ int frwr_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep)
        ep->rep_attr.cap.max_recv_wr += RPCRDMA_BACKWARD_WRS;
        ep->rep_attr.cap.max_recv_wr += 1; /* for ib_drain_rq */
 
-       ia->ri_max_segs = max_t(unsigned int, 1, RPCRDMA_MAX_DATA_SEGS /
-                               ia->ri_max_frwr_depth);
+       ia->ri_max_segs =
+               DIV_ROUND_UP(RPCRDMA_MAX_DATA_SEGS, ia->ri_max_frwr_depth);
        /* Reply chunks require segments for head and tail buffers */
        ia->ri_max_segs += 2;
        if (ia->ri_max_segs > RPCRDMA_MAX_HDR_SEGS)
@@ -323,31 +311,25 @@ size_t frwr_maxpages(struct rpcrdma_xprt *r_xprt)
  * @nsegs: number of segments remaining
  * @writing: true when RDMA Write will be used
  * @xid: XID of RPC using the registered memory
- * @out: initialized MR
+ * @mr: MR to fill in
  *
  * Prepare a REG_MR Work Request to register a memory region
  * for remote access via RDMA READ or RDMA WRITE.
  *
  * Returns the next segment or a negative errno pointer.
- * On success, the prepared MR is planted in @out.
+ * On success, @mr is filled in.
  */
 struct rpcrdma_mr_seg *frwr_map(struct rpcrdma_xprt *r_xprt,
                                struct rpcrdma_mr_seg *seg,
                                int nsegs, bool writing, __be32 xid,
-                               struct rpcrdma_mr **out)
+                               struct rpcrdma_mr *mr)
 {
        struct rpcrdma_ia *ia = &r_xprt->rx_ia;
-       bool holes_ok = ia->ri_mrtype == IB_MR_TYPE_SG_GAPS;
-       struct rpcrdma_mr *mr;
-       struct ib_mr *ibmr;
        struct ib_reg_wr *reg_wr;
+       struct ib_mr *ibmr;
        int i, n;
        u8 key;
 
-       mr = rpcrdma_mr_get(r_xprt);
-       if (!mr)
-               goto out_getmr_err;
-
        if (nsegs > ia->ri_max_frwr_depth)
                nsegs = ia->ri_max_frwr_depth;
        for (i = 0; i < nsegs;) {
@@ -362,7 +344,7 @@ struct rpcrdma_mr_seg *frwr_map(struct rpcrdma_xprt *r_xprt,
 
                ++seg;
                ++i;
-               if (holes_ok)
+               if (ia->ri_mrtype == IB_MR_TYPE_SG_GAPS)
                        continue;
                if ((i < nsegs && offset_in_page(seg->mr_offset)) ||
                    offset_in_page((seg-1)->mr_offset + (seg-1)->mr_len))
@@ -397,22 +379,15 @@ struct rpcrdma_mr_seg *frwr_map(struct rpcrdma_xprt *r_xprt,
        mr->mr_offset = ibmr->iova;
        trace_xprtrdma_mr_map(mr);
 
-       *out = mr;
        return seg;
 
-out_getmr_err:
-       xprt_wait_for_buffer_space(&r_xprt->rx_xprt);
-       return ERR_PTR(-EAGAIN);
-
 out_dmamap_err:
        mr->mr_dir = DMA_NONE;
        trace_xprtrdma_frwr_sgerr(mr, i);
-       rpcrdma_mr_put(mr);
        return ERR_PTR(-EIO);
 
 out_mapmr_err:
        trace_xprtrdma_frwr_maperr(mr, n);
-       rpcrdma_mr_recycle(mr);
        return ERR_PTR(-EIO);
 }
 
@@ -485,7 +460,7 @@ void frwr_reminv(struct rpcrdma_rep *rep, struct list_head *mrs)
                if (mr->mr_handle == rep->rr_inv_rkey) {
                        list_del_init(&mr->mr_list);
                        trace_xprtrdma_mr_remoteinv(mr);
-                       rpcrdma_mr_unmap_and_put(mr);
+                       rpcrdma_mr_put(mr);
                        break;  /* only one invalidated MR per RPC */
                }
 }
@@ -495,7 +470,7 @@ static void __frwr_release_mr(struct ib_wc *wc, struct rpcrdma_mr *mr)
        if (wc->status != IB_WC_SUCCESS)
                rpcrdma_mr_recycle(mr);
        else
-               rpcrdma_mr_unmap_and_put(mr);
+               rpcrdma_mr_put(mr);
 }
 
 /**
@@ -532,8 +507,8 @@ static void frwr_wc_localinv_wake(struct ib_cq *cq, struct ib_wc *wc)
 
        /* WARNING: Only wr_cqe and status are reliable at this point */
        trace_xprtrdma_wc_li_wake(wc, frwr);
-       complete(&frwr->fr_linv_done);
        __frwr_release_mr(wc, mr);
+       complete(&frwr->fr_linv_done);
 }
 
 /**
@@ -562,8 +537,7 @@ void frwr_unmap_sync(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req)
         */
        frwr = NULL;
        prev = &first;
-       while (!list_empty(&req->rl_registered)) {
-               mr = rpcrdma_mr_pop(&req->rl_registered);
+       while ((mr = rpcrdma_mr_pop(&req->rl_registered))) {
 
                trace_xprtrdma_mr_localinv(mr);
                r_xprt->rx_stats.local_inv_needed++;
@@ -632,11 +606,15 @@ static void frwr_wc_localinv_done(struct ib_cq *cq, struct ib_wc *wc)
        struct rpcrdma_frwr *frwr =
                container_of(cqe, struct rpcrdma_frwr, fr_cqe);
        struct rpcrdma_mr *mr = container_of(frwr, struct rpcrdma_mr, frwr);
+       struct rpcrdma_rep *rep = mr->mr_req->rl_reply;
 
        /* WARNING: Only wr_cqe and status are reliable at this point */
        trace_xprtrdma_wc_li_done(wc, frwr);
-       rpcrdma_complete_rqst(frwr->fr_req->rl_reply);
        __frwr_release_mr(wc, mr);
+
+       /* Ensure @rep is generated before __frwr_release_mr */
+       smp_rmb();
+       rpcrdma_complete_rqst(rep);
 }
 
 /**
@@ -662,15 +640,13 @@ void frwr_unmap_async(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req)
         */
        frwr = NULL;
        prev = &first;
-       while (!list_empty(&req->rl_registered)) {
-               mr = rpcrdma_mr_pop(&req->rl_registered);
+       while ((mr = rpcrdma_mr_pop(&req->rl_registered))) {
 
                trace_xprtrdma_mr_localinv(mr);
                r_xprt->rx_stats.local_inv_needed++;
 
                frwr = &mr->frwr;
                frwr->fr_cqe.done = frwr_wc_localinv;
-               frwr->fr_req = req;
                last = &frwr->fr_invwr;
                last->next = NULL;
                last->wr_cqe = &frwr->fr_cqe;
index 4345e69..b86b5fd 100644 (file)
@@ -342,6 +342,32 @@ encode_read_segment(struct xdr_stream *xdr, struct rpcrdma_mr *mr,
        return 0;
 }
 
+static struct rpcrdma_mr_seg *rpcrdma_mr_prepare(struct rpcrdma_xprt *r_xprt,
+                                                struct rpcrdma_req *req,
+                                                struct rpcrdma_mr_seg *seg,
+                                                int nsegs, bool writing,
+                                                struct rpcrdma_mr **mr)
+{
+       *mr = rpcrdma_mr_pop(&req->rl_free_mrs);
+       if (!*mr) {
+               *mr = rpcrdma_mr_get(r_xprt);
+               if (!*mr)
+                       goto out_getmr_err;
+               trace_xprtrdma_mr_get(req);
+               (*mr)->mr_req = req;
+       }
+
+       rpcrdma_mr_push(*mr, &req->rl_registered);
+       return frwr_map(r_xprt, seg, nsegs, writing, req->rl_slot.rq_xid, *mr);
+
+out_getmr_err:
+       trace_xprtrdma_nomrs(req);
+       xprt_wait_for_buffer_space(&r_xprt->rx_xprt);
+       if (r_xprt->rx_ep.rep_connected != -ENODEV)
+               schedule_work(&r_xprt->rx_buf.rb_refresh_worker);
+       return ERR_PTR(-EAGAIN);
+}
+
 /* Register and XDR encode the Read list. Supports encoding a list of read
  * segments that belong to a single read chunk.
  *
@@ -356,9 +382,10 @@ encode_read_segment(struct xdr_stream *xdr, struct rpcrdma_mr *mr,
  *
  * Only a single @pos value is currently supported.
  */
-static noinline int
-rpcrdma_encode_read_list(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
-                        struct rpc_rqst *rqst, enum rpcrdma_chunktype rtype)
+static int rpcrdma_encode_read_list(struct rpcrdma_xprt *r_xprt,
+                                   struct rpcrdma_req *req,
+                                   struct rpc_rqst *rqst,
+                                   enum rpcrdma_chunktype rtype)
 {
        struct xdr_stream *xdr = &req->rl_stream;
        struct rpcrdma_mr_seg *seg;
@@ -379,10 +406,9 @@ rpcrdma_encode_read_list(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
                return nsegs;
 
        do {
-               seg = frwr_map(r_xprt, seg, nsegs, false, rqst->rq_xid, &mr);
+               seg = rpcrdma_mr_prepare(r_xprt, req, seg, nsegs, false, &mr);
                if (IS_ERR(seg))
                        return PTR_ERR(seg);
-               rpcrdma_mr_push(mr, &req->rl_registered);
 
                if (encode_read_segment(xdr, mr, pos) < 0)
                        return -EMSGSIZE;
@@ -411,9 +437,10 @@ done:
  *
  * Only a single Write chunk is currently supported.
  */
-static noinline int
-rpcrdma_encode_write_list(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
-                         struct rpc_rqst *rqst, enum rpcrdma_chunktype wtype)
+static int rpcrdma_encode_write_list(struct rpcrdma_xprt *r_xprt,
+                                    struct rpcrdma_req *req,
+                                    struct rpc_rqst *rqst,
+                                    enum rpcrdma_chunktype wtype)
 {
        struct xdr_stream *xdr = &req->rl_stream;
        struct rpcrdma_mr_seg *seg;
@@ -440,10 +467,9 @@ rpcrdma_encode_write_list(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
 
        nchunks = 0;
        do {
-               seg = frwr_map(r_xprt, seg, nsegs, true, rqst->rq_xid, &mr);
+               seg = rpcrdma_mr_prepare(r_xprt, req, seg, nsegs, true, &mr);
                if (IS_ERR(seg))
                        return PTR_ERR(seg);
-               rpcrdma_mr_push(mr, &req->rl_registered);
 
                if (encode_rdma_segment(xdr, mr) < 0)
                        return -EMSGSIZE;
@@ -474,9 +500,10 @@ done:
  * Returns zero on success, or a negative errno if a failure occurred.
  * @xdr is advanced to the next position in the stream.
  */
-static noinline int
-rpcrdma_encode_reply_chunk(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
-                          struct rpc_rqst *rqst, enum rpcrdma_chunktype wtype)
+static int rpcrdma_encode_reply_chunk(struct rpcrdma_xprt *r_xprt,
+                                     struct rpcrdma_req *req,
+                                     struct rpc_rqst *rqst,
+                                     enum rpcrdma_chunktype wtype)
 {
        struct xdr_stream *xdr = &req->rl_stream;
        struct rpcrdma_mr_seg *seg;
@@ -501,10 +528,9 @@ rpcrdma_encode_reply_chunk(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
 
        nchunks = 0;
        do {
-               seg = frwr_map(r_xprt, seg, nsegs, true, rqst->rq_xid, &mr);
+               seg = rpcrdma_mr_prepare(r_xprt, req, seg, nsegs, true, &mr);
                if (IS_ERR(seg))
                        return PTR_ERR(seg);
-               rpcrdma_mr_push(mr, &req->rl_registered);
 
                if (encode_rdma_segment(xdr, mr) < 0)
                        return -EMSGSIZE;
@@ -841,12 +867,7 @@ rpcrdma_marshal_req(struct rpcrdma_xprt *r_xprt, struct rpc_rqst *rqst)
         * chunks. Very likely the connection has been replaced,
         * so these registrations are invalid and unusable.
         */
-       while (unlikely(!list_empty(&req->rl_registered))) {
-               struct rpcrdma_mr *mr;
-
-               mr = rpcrdma_mr_pop(&req->rl_registered);
-               rpcrdma_mr_recycle(mr);
-       }
+       frwr_recycle(req);
 
        /* This implementation supports the following combinations
         * of chunk lists in one RPC-over-RDMA Call message:
@@ -1240,8 +1261,6 @@ void rpcrdma_complete_rqst(struct rpcrdma_rep *rep)
        struct rpc_rqst *rqst = rep->rr_rqst;
        int status;
 
-       xprt->reestablish_timeout = 0;
-
        switch (rep->rr_proc) {
        case rdma_msg:
                status = rpcrdma_decode_msg(r_xprt, rep, rqst);
@@ -1300,6 +1319,12 @@ void rpcrdma_reply_handler(struct rpcrdma_rep *rep)
        u32 credits;
        __be32 *p;
 
+       /* Any data means we had a useful conversation, so
+        * then we don't need to delay the next reconnect.
+        */
+       if (xprt->reestablish_timeout)
+               xprt->reestablish_timeout = 0;
+
        /* Fixed transport header fields */
        xdr_init_decode(&rep->rr_stream, &rep->rr_hdrbuf,
                        rep->rr_hdrbuf.head[0].iov_base, NULL);
index abdb300..97bca50 100644 (file)
@@ -73,8 +73,6 @@ atomic_t rdma_stat_rq_prod;
 atomic_t rdma_stat_sq_poll;
 atomic_t rdma_stat_sq_prod;
 
-struct workqueue_struct *svc_rdma_wq;
-
 /*
  * This function implements reading and resetting an atomic_t stat
  * variable through read/write to a proc file. Any write to the file
@@ -230,7 +228,6 @@ static struct ctl_table svcrdma_root_table[] = {
 void svc_rdma_cleanup(void)
 {
        dprintk("SVCRDMA Module Removed, deregister RPC RDMA transport\n");
-       destroy_workqueue(svc_rdma_wq);
        if (svcrdma_table_header) {
                unregister_sysctl_table(svcrdma_table_header);
                svcrdma_table_header = NULL;
@@ -246,10 +243,6 @@ int svc_rdma_init(void)
        dprintk("\tmax_bc_requests  : %u\n", svcrdma_max_bc_requests);
        dprintk("\tmax_inline       : %d\n", svcrdma_max_req_size);
 
-       svc_rdma_wq = alloc_workqueue("svc_rdma", 0, 0);
-       if (!svc_rdma_wq)
-               return -ENOMEM;
-
        if (!svcrdma_table_header)
                svcrdma_table_header =
                        register_sysctl_table(svcrdma_root_table);
index 65e2fb9..96bccd3 100644 (file)
@@ -172,9 +172,10 @@ static void svc_rdma_recv_ctxt_destroy(struct svcxprt_rdma *rdma,
 void svc_rdma_recv_ctxts_destroy(struct svcxprt_rdma *rdma)
 {
        struct svc_rdma_recv_ctxt *ctxt;
+       struct llist_node *node;
 
-       while ((ctxt = svc_rdma_next_recv_ctxt(&rdma->sc_recv_ctxts))) {
-               list_del(&ctxt->rc_list);
+       while ((node = llist_del_first(&rdma->sc_recv_ctxts))) {
+               ctxt = llist_entry(node, struct svc_rdma_recv_ctxt, rc_node);
                svc_rdma_recv_ctxt_destroy(rdma, ctxt);
        }
 }
@@ -183,21 +184,18 @@ static struct svc_rdma_recv_ctxt *
 svc_rdma_recv_ctxt_get(struct svcxprt_rdma *rdma)
 {
        struct svc_rdma_recv_ctxt *ctxt;
+       struct llist_node *node;
 
-       spin_lock(&rdma->sc_recv_lock);
-       ctxt = svc_rdma_next_recv_ctxt(&rdma->sc_recv_ctxts);
-       if (!ctxt)
+       node = llist_del_first(&rdma->sc_recv_ctxts);
+       if (!node)
                goto out_empty;
-       list_del(&ctxt->rc_list);
-       spin_unlock(&rdma->sc_recv_lock);
+       ctxt = llist_entry(node, struct svc_rdma_recv_ctxt, rc_node);
 
 out:
        ctxt->rc_page_count = 0;
        return ctxt;
 
 out_empty:
-       spin_unlock(&rdma->sc_recv_lock);
-
        ctxt = svc_rdma_recv_ctxt_alloc(rdma);
        if (!ctxt)
                return NULL;
@@ -218,11 +216,9 @@ void svc_rdma_recv_ctxt_put(struct svcxprt_rdma *rdma,
        for (i = 0; i < ctxt->rc_page_count; i++)
                put_page(ctxt->rc_pages[i]);
 
-       if (!ctxt->rc_temp) {
-               spin_lock(&rdma->sc_recv_lock);
-               list_add(&ctxt->rc_list, &rdma->sc_recv_ctxts);
-               spin_unlock(&rdma->sc_recv_lock);
-       } else
+       if (!ctxt->rc_temp)
+               llist_add(&ctxt->rc_node, &rdma->sc_recv_ctxts);
+       else
                svc_rdma_recv_ctxt_destroy(rdma, ctxt);
 }
 
index 3fe6651..145a361 100644 (file)
@@ -140,14 +140,13 @@ static struct svcxprt_rdma *svc_rdma_create_xprt(struct svc_serv *serv,
        INIT_LIST_HEAD(&cma_xprt->sc_rq_dto_q);
        INIT_LIST_HEAD(&cma_xprt->sc_read_complete_q);
        INIT_LIST_HEAD(&cma_xprt->sc_send_ctxts);
-       INIT_LIST_HEAD(&cma_xprt->sc_recv_ctxts);
+       init_llist_head(&cma_xprt->sc_recv_ctxts);
        INIT_LIST_HEAD(&cma_xprt->sc_rw_ctxts);
        init_waitqueue_head(&cma_xprt->sc_send_wait);
 
        spin_lock_init(&cma_xprt->sc_lock);
        spin_lock_init(&cma_xprt->sc_rq_dto_lock);
        spin_lock_init(&cma_xprt->sc_send_lock);
-       spin_lock_init(&cma_xprt->sc_recv_lock);
        spin_lock_init(&cma_xprt->sc_rw_ctxt_lock);
 
        /*
@@ -454,14 +453,14 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
                dprintk("svcrdma: error creating PD for connect request\n");
                goto errout;
        }
-       newxprt->sc_sq_cq = ib_alloc_cq(dev, newxprt, newxprt->sc_sq_depth,
-                                       0, IB_POLL_WORKQUEUE);
+       newxprt->sc_sq_cq = ib_alloc_cq_any(dev, newxprt, newxprt->sc_sq_depth,
+                                           IB_POLL_WORKQUEUE);
        if (IS_ERR(newxprt->sc_sq_cq)) {
                dprintk("svcrdma: error creating SQ CQ for connect request\n");
                goto errout;
        }
-       newxprt->sc_rq_cq = ib_alloc_cq(dev, newxprt, rq_depth,
-                                       0, IB_POLL_WORKQUEUE);
+       newxprt->sc_rq_cq =
+               ib_alloc_cq_any(dev, newxprt, rq_depth, IB_POLL_WORKQUEUE);
        if (IS_ERR(newxprt->sc_rq_cq)) {
                dprintk("svcrdma: error creating RQ CQ for connect request\n");
                goto errout;
@@ -630,8 +629,9 @@ static void svc_rdma_free(struct svc_xprt *xprt)
 {
        struct svcxprt_rdma *rdma =
                container_of(xprt, struct svcxprt_rdma, sc_xprt);
+
        INIT_WORK(&rdma->sc_work, __svc_rdma_free);
-       queue_work(svc_rdma_wq, &rdma->sc_work);
+       schedule_work(&rdma->sc_work);
 }
 
 static int svc_rdma_has_wspace(struct svc_xprt *xprt)
index 2ec349e..160558b 100644 (file)
@@ -423,8 +423,6 @@ void xprt_rdma_close(struct rpc_xprt *xprt)
 
        if (ep->rep_connected == -ENODEV)
                return;
-       if (ep->rep_connected > 0)
-               xprt->reestablish_timeout = 0;
        rpcrdma_ep_disconnect(ep, ia);
 
        /* Prepare @xprt for the next connection by reinitializing
@@ -434,6 +432,7 @@ void xprt_rdma_close(struct rpc_xprt *xprt)
        xprt->cwnd = RPC_CWNDSHIFT;
 
 out:
+       xprt->reestablish_timeout = 0;
        ++xprt->connect_cookie;
        xprt_disconnect_done(xprt);
 }
@@ -494,9 +493,9 @@ xprt_rdma_timer(struct rpc_xprt *xprt, struct rpc_task *task)
  * @reconnect_timeout: reconnect timeout after server disconnects
  *
  */
-static void xprt_rdma_tcp_set_connect_timeout(struct rpc_xprt *xprt,
-                                             unsigned long connect_timeout,
-                                             unsigned long reconnect_timeout)
+static void xprt_rdma_set_connect_timeout(struct rpc_xprt *xprt,
+                                         unsigned long connect_timeout,
+                                         unsigned long reconnect_timeout)
 {
        struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
 
@@ -571,6 +570,7 @@ xprt_rdma_alloc_slot(struct rpc_xprt *xprt, struct rpc_task *task)
        return;
 
 out_sleep:
+       set_bit(XPRT_CONGESTED, &xprt->state);
        rpc_sleep_on(&xprt->backlog, task, NULL);
        task->tk_status = -EAGAIN;
 }
@@ -589,7 +589,8 @@ xprt_rdma_free_slot(struct rpc_xprt *xprt, struct rpc_rqst *rqst)
 
        memset(rqst, 0, sizeof(*rqst));
        rpcrdma_buffer_put(&r_xprt->rx_buf, rpcr_to_rdmar(rqst));
-       rpc_wake_up_next(&xprt->backlog);
+       if (unlikely(!rpc_wake_up_next(&xprt->backlog)))
+               clear_bit(XPRT_CONGESTED, &xprt->state);
 }
 
 static bool rpcrdma_check_regbuf(struct rpcrdma_xprt *r_xprt,
@@ -803,7 +804,7 @@ static const struct rpc_xprt_ops xprt_rdma_procs = {
        .send_request           = xprt_rdma_send_request,
        .close                  = xprt_rdma_close,
        .destroy                = xprt_rdma_destroy,
-       .set_connect_timeout    = xprt_rdma_tcp_set_connect_timeout,
+       .set_connect_timeout    = xprt_rdma_set_connect_timeout,
        .print_stats            = xprt_rdma_print_stats,
        .enable_swap            = xprt_rdma_enable_swap,
        .disable_swap           = xprt_rdma_disable_swap,
index 805b1f3..3a90753 100644 (file)
@@ -53,6 +53,7 @@
 #include <linux/slab.h>
 #include <linux/sunrpc/addr.h>
 #include <linux/sunrpc/svc_rdma.h>
+#include <linux/log2.h>
 
 #include <asm-generic/barrier.h>
 #include <asm/bitops.h>
  * internal functions
  */
 static void rpcrdma_sendctx_put_locked(struct rpcrdma_sendctx *sc);
+static void rpcrdma_reps_destroy(struct rpcrdma_buffer *buf);
 static void rpcrdma_mrs_create(struct rpcrdma_xprt *r_xprt);
 static void rpcrdma_mrs_destroy(struct rpcrdma_buffer *buf);
+static void rpcrdma_mr_free(struct rpcrdma_mr *mr);
 static struct rpcrdma_regbuf *
 rpcrdma_regbuf_alloc(size_t size, enum dma_data_direction direction,
                     gfp_t flags);
@@ -405,9 +408,8 @@ rpcrdma_ia_remove(struct rpcrdma_ia *ia)
        struct rpcrdma_ep *ep = &r_xprt->rx_ep;
        struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
        struct rpcrdma_req *req;
-       struct rpcrdma_rep *rep;
 
-       cancel_delayed_work_sync(&buf->rb_refresh_worker);
+       cancel_work_sync(&buf->rb_refresh_worker);
 
        /* This is similar to rpcrdma_ep_destroy, but:
         * - Don't cancel the connect worker.
@@ -429,8 +431,7 @@ rpcrdma_ia_remove(struct rpcrdma_ia *ia)
        /* The ULP is responsible for ensuring all DMA
         * mappings and MRs are gone.
         */
-       list_for_each_entry(rep, &buf->rb_recv_bufs, rr_list)
-               rpcrdma_regbuf_dma_unmap(rep->rr_rdmabuf);
+       rpcrdma_reps_destroy(buf);
        list_for_each_entry(req, &buf->rb_allreqs, rl_all) {
                rpcrdma_regbuf_dma_unmap(req->rl_rdmabuf);
                rpcrdma_regbuf_dma_unmap(req->rl_sendbuf);
@@ -521,18 +522,17 @@ int rpcrdma_ep_create(struct rpcrdma_xprt *r_xprt)
        init_waitqueue_head(&ep->rep_connect_wait);
        ep->rep_receive_count = 0;
 
-       sendcq = ib_alloc_cq(ia->ri_id->device, NULL,
-                            ep->rep_attr.cap.max_send_wr + 1,
-                            ia->ri_id->device->num_comp_vectors > 1 ? 1 : 0,
-                            IB_POLL_WORKQUEUE);
+       sendcq = ib_alloc_cq_any(ia->ri_id->device, NULL,
+                                ep->rep_attr.cap.max_send_wr + 1,
+                                IB_POLL_WORKQUEUE);
        if (IS_ERR(sendcq)) {
                rc = PTR_ERR(sendcq);
                goto out1;
        }
 
-       recvcq = ib_alloc_cq(ia->ri_id->device, NULL,
-                            ep->rep_attr.cap.max_recv_wr + 1,
-                            0, IB_POLL_WORKQUEUE);
+       recvcq = ib_alloc_cq_any(ia->ri_id->device, NULL,
+                                ep->rep_attr.cap.max_recv_wr + 1,
+                                IB_POLL_WORKQUEUE);
        if (IS_ERR(recvcq)) {
                rc = PTR_ERR(recvcq);
                goto out2;
@@ -605,10 +605,10 @@ void rpcrdma_ep_destroy(struct rpcrdma_xprt *r_xprt)
  * Unlike a normal reconnection, a fresh PD and a new set
  * of MRs and buffers is needed.
  */
-static int
-rpcrdma_ep_recreate_xprt(struct rpcrdma_xprt *r_xprt,
-                        struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
+static int rpcrdma_ep_recreate_xprt(struct rpcrdma_xprt *r_xprt,
+                                   struct ib_qp_init_attr *qp_init_attr)
 {
+       struct rpcrdma_ia *ia = &r_xprt->rx_ia;
        int rc, err;
 
        trace_xprtrdma_reinsert(r_xprt);
@@ -625,7 +625,7 @@ rpcrdma_ep_recreate_xprt(struct rpcrdma_xprt *r_xprt,
        }
 
        rc = -ENETUNREACH;
-       err = rdma_create_qp(ia->ri_id, ia->ri_pd, &ep->rep_attr);
+       err = rdma_create_qp(ia->ri_id, ia->ri_pd, qp_init_attr);
        if (err) {
                pr_err("rpcrdma: rdma_create_qp returned %d\n", err);
                goto out3;
@@ -642,16 +642,16 @@ out1:
        return rc;
 }
 
-static int
-rpcrdma_ep_reconnect(struct rpcrdma_xprt *r_xprt, struct rpcrdma_ep *ep,
-                    struct rpcrdma_ia *ia)
+static int rpcrdma_ep_reconnect(struct rpcrdma_xprt *r_xprt,
+                               struct ib_qp_init_attr *qp_init_attr)
 {
+       struct rpcrdma_ia *ia = &r_xprt->rx_ia;
        struct rdma_cm_id *id, *old;
        int err, rc;
 
        trace_xprtrdma_reconnect(r_xprt);
 
-       rpcrdma_ep_disconnect(ep, ia);
+       rpcrdma_ep_disconnect(&r_xprt->rx_ep, ia);
 
        rc = -EHOSTUNREACH;
        id = rpcrdma_create_id(r_xprt, ia);
@@ -673,7 +673,7 @@ rpcrdma_ep_reconnect(struct rpcrdma_xprt *r_xprt, struct rpcrdma_ep *ep,
                goto out_destroy;
        }
 
-       err = rdma_create_qp(id, ia->ri_pd, &ep->rep_attr);
+       err = rdma_create_qp(id, ia->ri_pd, qp_init_attr);
        if (err)
                goto out_destroy;
 
@@ -698,25 +698,27 @@ rpcrdma_ep_connect(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
        struct rpcrdma_xprt *r_xprt = container_of(ia, struct rpcrdma_xprt,
                                                   rx_ia);
        struct rpc_xprt *xprt = &r_xprt->rx_xprt;
+       struct ib_qp_init_attr qp_init_attr;
        int rc;
 
 retry:
+       memcpy(&qp_init_attr, &ep->rep_attr, sizeof(qp_init_attr));
        switch (ep->rep_connected) {
        case 0:
                dprintk("RPC:       %s: connecting...\n", __func__);
-               rc = rdma_create_qp(ia->ri_id, ia->ri_pd, &ep->rep_attr);
+               rc = rdma_create_qp(ia->ri_id, ia->ri_pd, &qp_init_attr);
                if (rc) {
                        rc = -ENETUNREACH;
                        goto out_noupdate;
                }
                break;
        case -ENODEV:
-               rc = rpcrdma_ep_recreate_xprt(r_xprt, ep, ia);
+               rc = rpcrdma_ep_recreate_xprt(r_xprt, &qp_init_attr);
                if (rc)
                        goto out_noupdate;
                break;
        default:
-               rc = rpcrdma_ep_reconnect(r_xprt, ep, ia);
+               rc = rpcrdma_ep_reconnect(r_xprt, &qp_init_attr);
                if (rc)
                        goto out;
        }
@@ -730,6 +732,8 @@ retry:
        if (rc)
                goto out;
 
+       if (xprt->reestablish_timeout < RPCRDMA_INIT_REEST_TO)
+               xprt->reestablish_timeout = RPCRDMA_INIT_REEST_TO;
        wait_event_interruptible(ep->rep_connect_wait, ep->rep_connected != 0);
        if (ep->rep_connected <= 0) {
                if (ep->rep_connected == -EAGAIN)
@@ -943,14 +947,12 @@ rpcrdma_mrs_create(struct rpcrdma_xprt *r_xprt)
        struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
        struct rpcrdma_ia *ia = &r_xprt->rx_ia;
        unsigned int count;
-       LIST_HEAD(free);
-       LIST_HEAD(all);
 
        for (count = 0; count < ia->ri_max_segs; count++) {
                struct rpcrdma_mr *mr;
                int rc;
 
-               mr = kzalloc(sizeof(*mr), GFP_KERNEL);
+               mr = kzalloc(sizeof(*mr), GFP_NOFS);
                if (!mr)
                        break;
 
@@ -962,15 +964,13 @@ rpcrdma_mrs_create(struct rpcrdma_xprt *r_xprt)
 
                mr->mr_xprt = r_xprt;
 
-               list_add(&mr->mr_list, &free);
-               list_add(&mr->mr_all, &all);
+               spin_lock(&buf->rb_lock);
+               list_add(&mr->mr_list, &buf->rb_mrs);
+               list_add(&mr->mr_all, &buf->rb_all_mrs);
+               spin_unlock(&buf->rb_lock);
        }
 
-       spin_lock(&buf->rb_mrlock);
-       list_splice(&free, &buf->rb_mrs);
-       list_splice(&all, &buf->rb_all);
        r_xprt->rx_stats.mrs_allocated += count;
-       spin_unlock(&buf->rb_mrlock);
        trace_xprtrdma_createmrs(r_xprt, count);
 }
 
@@ -978,7 +978,7 @@ static void
 rpcrdma_mr_refresh_worker(struct work_struct *work)
 {
        struct rpcrdma_buffer *buf = container_of(work, struct rpcrdma_buffer,
-                                                 rb_refresh_worker.work);
+                                                 rb_refresh_worker);
        struct rpcrdma_xprt *r_xprt = container_of(buf, struct rpcrdma_xprt,
                                                   rx_buf);
 
@@ -1000,12 +1000,18 @@ struct rpcrdma_req *rpcrdma_req_create(struct rpcrdma_xprt *r_xprt, size_t size,
        struct rpcrdma_buffer *buffer = &r_xprt->rx_buf;
        struct rpcrdma_regbuf *rb;
        struct rpcrdma_req *req;
+       size_t maxhdrsize;
 
        req = kzalloc(sizeof(*req), flags);
        if (req == NULL)
                goto out1;
 
-       rb = rpcrdma_regbuf_alloc(RPCRDMA_HDRBUF_SIZE, DMA_TO_DEVICE, flags);
+       /* Compute maximum header buffer size in bytes */
+       maxhdrsize = rpcrdma_fixed_maxsz + 3 +
+                    r_xprt->rx_ia.ri_max_segs * rpcrdma_readchunk_maxsz;
+       maxhdrsize *= sizeof(__be32);
+       rb = rpcrdma_regbuf_alloc(__roundup_pow_of_two(maxhdrsize),
+                                 DMA_TO_DEVICE, flags);
        if (!rb)
                goto out2;
        req->rl_rdmabuf = rb;
@@ -1019,6 +1025,7 @@ struct rpcrdma_req *rpcrdma_req_create(struct rpcrdma_xprt *r_xprt, size_t size,
        if (!req->rl_recvbuf)
                goto out4;
 
+       INIT_LIST_HEAD(&req->rl_free_mrs);
        INIT_LIST_HEAD(&req->rl_registered);
        spin_lock(&buffer->rb_lock);
        list_add(&req->rl_all, &buffer->rb_allreqs);
@@ -1066,6 +1073,40 @@ out:
        return NULL;
 }
 
+static void rpcrdma_rep_destroy(struct rpcrdma_rep *rep)
+{
+       rpcrdma_regbuf_free(rep->rr_rdmabuf);
+       kfree(rep);
+}
+
+static struct rpcrdma_rep *rpcrdma_rep_get_locked(struct rpcrdma_buffer *buf)
+{
+       struct llist_node *node;
+
+       /* Calls to llist_del_first are required to be serialized */
+       node = llist_del_first(&buf->rb_free_reps);
+       if (!node)
+               return NULL;
+       return llist_entry(node, struct rpcrdma_rep, rr_node);
+}
+
+static void rpcrdma_rep_put(struct rpcrdma_buffer *buf,
+                           struct rpcrdma_rep *rep)
+{
+       if (!rep->rr_temp)
+               llist_add(&rep->rr_node, &buf->rb_free_reps);
+       else
+               rpcrdma_rep_destroy(rep);
+}
+
+static void rpcrdma_reps_destroy(struct rpcrdma_buffer *buf)
+{
+       struct rpcrdma_rep *rep;
+
+       while ((rep = rpcrdma_rep_get_locked(buf)) != NULL)
+               rpcrdma_rep_destroy(rep);
+}
+
 /**
  * rpcrdma_buffer_create - Create initial set of req/rep objects
  * @r_xprt: transport instance to (re)initialize
@@ -1079,12 +1120,10 @@ int rpcrdma_buffer_create(struct rpcrdma_xprt *r_xprt)
 
        buf->rb_max_requests = r_xprt->rx_ep.rep_max_requests;
        buf->rb_bc_srv_max_requests = 0;
-       spin_lock_init(&buf->rb_mrlock);
        spin_lock_init(&buf->rb_lock);
        INIT_LIST_HEAD(&buf->rb_mrs);
-       INIT_LIST_HEAD(&buf->rb_all);
-       INIT_DELAYED_WORK(&buf->rb_refresh_worker,
-                         rpcrdma_mr_refresh_worker);
+       INIT_LIST_HEAD(&buf->rb_all_mrs);
+       INIT_WORK(&buf->rb_refresh_worker, rpcrdma_mr_refresh_worker);
 
        rpcrdma_mrs_create(r_xprt);
 
@@ -1103,7 +1142,7 @@ int rpcrdma_buffer_create(struct rpcrdma_xprt *r_xprt)
        }
 
        buf->rb_credits = 1;
-       INIT_LIST_HEAD(&buf->rb_recv_bufs);
+       init_llist_head(&buf->rb_free_reps);
 
        rc = rpcrdma_sendctxs_create(r_xprt);
        if (rc)
@@ -1115,12 +1154,6 @@ out:
        return rc;
 }
 
-static void rpcrdma_rep_destroy(struct rpcrdma_rep *rep)
-{
-       rpcrdma_regbuf_free(rep->rr_rdmabuf);
-       kfree(rep);
-}
-
 /**
  * rpcrdma_req_destroy - Destroy an rpcrdma_req object
  * @req: unused object to be destroyed
@@ -1128,11 +1161,13 @@ static void rpcrdma_rep_destroy(struct rpcrdma_rep *rep)
  * This function assumes that the caller prevents concurrent device
  * unload and transport tear-down.
  */
-void
-rpcrdma_req_destroy(struct rpcrdma_req *req)
+void rpcrdma_req_destroy(struct rpcrdma_req *req)
 {
        list_del(&req->rl_all);
 
+       while (!list_empty(&req->rl_free_mrs))
+               rpcrdma_mr_free(rpcrdma_mr_pop(&req->rl_free_mrs));
+
        rpcrdma_regbuf_free(req->rl_recvbuf);
        rpcrdma_regbuf_free(req->rl_sendbuf);
        rpcrdma_regbuf_free(req->rl_rdmabuf);
@@ -1148,25 +1183,19 @@ rpcrdma_mrs_destroy(struct rpcrdma_buffer *buf)
        unsigned int count;
 
        count = 0;
-       spin_lock(&buf->rb_mrlock);
-       while (!list_empty(&buf->rb_all)) {
-               mr = list_entry(buf->rb_all.next, struct rpcrdma_mr, mr_all);
+       spin_lock(&buf->rb_lock);
+       while ((mr = list_first_entry_or_null(&buf->rb_all_mrs,
+                                             struct rpcrdma_mr,
+                                             mr_all)) != NULL) {
                list_del(&mr->mr_all);
-
-               spin_unlock(&buf->rb_mrlock);
-
-               /* Ensure MW is not on any rl_registered list */
-               if (!list_empty(&mr->mr_list))
-                       list_del(&mr->mr_list);
+               spin_unlock(&buf->rb_lock);
 
                frwr_release_mr(mr);
                count++;
-               spin_lock(&buf->rb_mrlock);
+               spin_lock(&buf->rb_lock);
        }
-       spin_unlock(&buf->rb_mrlock);
+       spin_unlock(&buf->rb_lock);
        r_xprt->rx_stats.mrs_allocated = 0;
-
-       dprintk("RPC:       %s: released %u MRs\n", __func__, count);
 }
 
 /**
@@ -1180,18 +1209,10 @@ rpcrdma_mrs_destroy(struct rpcrdma_buffer *buf)
 void
 rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf)
 {
-       cancel_delayed_work_sync(&buf->rb_refresh_worker);
+       cancel_work_sync(&buf->rb_refresh_worker);
 
        rpcrdma_sendctxs_destroy(buf);
-
-       while (!list_empty(&buf->rb_recv_bufs)) {
-               struct rpcrdma_rep *rep;
-
-               rep = list_first_entry(&buf->rb_recv_bufs,
-                                      struct rpcrdma_rep, rr_list);
-               list_del(&rep->rr_list);
-               rpcrdma_rep_destroy(rep);
-       }
+       rpcrdma_reps_destroy(buf);
 
        while (!list_empty(&buf->rb_send_bufs)) {
                struct rpcrdma_req *req;
@@ -1216,54 +1237,20 @@ struct rpcrdma_mr *
 rpcrdma_mr_get(struct rpcrdma_xprt *r_xprt)
 {
        struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
-       struct rpcrdma_mr *mr = NULL;
-
-       spin_lock(&buf->rb_mrlock);
-       if (!list_empty(&buf->rb_mrs))
-               mr = rpcrdma_mr_pop(&buf->rb_mrs);
-       spin_unlock(&buf->rb_mrlock);
+       struct rpcrdma_mr *mr;
 
-       if (!mr)
-               goto out_nomrs;
+       spin_lock(&buf->rb_lock);
+       mr = rpcrdma_mr_pop(&buf->rb_mrs);
+       spin_unlock(&buf->rb_lock);
        return mr;
-
-out_nomrs:
-       trace_xprtrdma_nomrs(r_xprt);
-       if (r_xprt->rx_ep.rep_connected != -ENODEV)
-               schedule_delayed_work(&buf->rb_refresh_worker, 0);
-
-       /* Allow the reply handler and refresh worker to run */
-       cond_resched();
-
-       return NULL;
-}
-
-static void
-__rpcrdma_mr_put(struct rpcrdma_buffer *buf, struct rpcrdma_mr *mr)
-{
-       spin_lock(&buf->rb_mrlock);
-       rpcrdma_mr_push(mr, &buf->rb_mrs);
-       spin_unlock(&buf->rb_mrlock);
-}
-
-/**
- * rpcrdma_mr_put - Release an rpcrdma_mr object
- * @mr: object to release
- *
- */
-void
-rpcrdma_mr_put(struct rpcrdma_mr *mr)
-{
-       __rpcrdma_mr_put(&mr->mr_xprt->rx_buf, mr);
 }
 
 /**
- * rpcrdma_mr_unmap_and_put - DMA unmap an MR and release it
- * @mr: object to release
+ * rpcrdma_mr_put - DMA unmap an MR and release it
+ * @mr: MR to release
  *
  */
-void
-rpcrdma_mr_unmap_and_put(struct rpcrdma_mr *mr)
+void rpcrdma_mr_put(struct rpcrdma_mr *mr)
 {
        struct rpcrdma_xprt *r_xprt = mr->mr_xprt;
 
@@ -1273,7 +1260,19 @@ rpcrdma_mr_unmap_and_put(struct rpcrdma_mr *mr)
                                mr->mr_sg, mr->mr_nents, mr->mr_dir);
                mr->mr_dir = DMA_NONE;
        }
-       __rpcrdma_mr_put(&r_xprt->rx_buf, mr);
+
+       rpcrdma_mr_push(mr, &mr->mr_req->rl_free_mrs);
+}
+
+static void rpcrdma_mr_free(struct rpcrdma_mr *mr)
+{
+       struct rpcrdma_xprt *r_xprt = mr->mr_xprt;
+       struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
+
+       mr->mr_req = NULL;
+       spin_lock(&buf->rb_lock);
+       rpcrdma_mr_push(mr, &buf->rb_mrs);
+       spin_unlock(&buf->rb_lock);
 }
 
 /**
@@ -1304,39 +1303,24 @@ rpcrdma_buffer_get(struct rpcrdma_buffer *buffers)
  */
 void rpcrdma_buffer_put(struct rpcrdma_buffer *buffers, struct rpcrdma_req *req)
 {
-       struct rpcrdma_rep *rep = req->rl_reply;
-
+       if (req->rl_reply)
+               rpcrdma_rep_put(buffers, req->rl_reply);
        req->rl_reply = NULL;
 
        spin_lock(&buffers->rb_lock);
        list_add(&req->rl_list, &buffers->rb_send_bufs);
-       if (rep) {
-               if (!rep->rr_temp) {
-                       list_add(&rep->rr_list, &buffers->rb_recv_bufs);
-                       rep = NULL;
-               }
-       }
        spin_unlock(&buffers->rb_lock);
-       if (rep)
-               rpcrdma_rep_destroy(rep);
 }
 
-/*
- * Put reply buffers back into pool when not attached to
- * request. This happens in error conditions.
+/**
+ * rpcrdma_recv_buffer_put - Release rpcrdma_rep back to free list
+ * @rep: rep to release
+ *
+ * Used after error conditions.
  */
-void
-rpcrdma_recv_buffer_put(struct rpcrdma_rep *rep)
+void rpcrdma_recv_buffer_put(struct rpcrdma_rep *rep)
 {
-       struct rpcrdma_buffer *buffers = &rep->rr_rxprt->rx_buf;
-
-       if (!rep->rr_temp) {
-               spin_lock(&buffers->rb_lock);
-               list_add(&rep->rr_list, &buffers->rb_recv_bufs);
-               spin_unlock(&buffers->rb_lock);
-       } else {
-               rpcrdma_rep_destroy(rep);
-       }
+       rpcrdma_rep_put(&rep->rr_rxprt->rx_buf, rep);
 }
 
 /* Returns a pointer to a rpcrdma_regbuf object, or NULL.
@@ -1484,7 +1468,7 @@ rpcrdma_post_recvs(struct rpcrdma_xprt *r_xprt, bool temp)
        count = 0;
 
        needed = buf->rb_credits + (buf->rb_bc_srv_max_requests << 1);
-       if (ep->rep_receive_count > needed)
+       if (likely(ep->rep_receive_count > needed))
                goto out;
        needed -= ep->rep_receive_count;
        if (!temp)
@@ -1492,22 +1476,10 @@ rpcrdma_post_recvs(struct rpcrdma_xprt *r_xprt, bool temp)
 
        /* fast path: all needed reps can be found on the free list */
        wr = NULL;
-       spin_lock(&buf->rb_lock);
        while (needed) {
-               rep = list_first_entry_or_null(&buf->rb_recv_bufs,
-                                              struct rpcrdma_rep, rr_list);
+               rep = rpcrdma_rep_get_locked(buf);
                if (!rep)
-                       break;
-
-               list_del(&rep->rr_list);
-               rep->rr_recv_wr.next = wr;
-               wr = &rep->rr_recv_wr;
-               --needed;
-       }
-       spin_unlock(&buf->rb_lock);
-
-       while (needed) {
-               rep = rpcrdma_rep_create(r_xprt, temp);
+                       rep = rpcrdma_rep_create(r_xprt, temp);
                if (!rep)
                        break;
 
@@ -1524,7 +1496,7 @@ rpcrdma_post_recvs(struct rpcrdma_xprt *r_xprt, bool temp)
                if (!rpcrdma_regbuf_dma_map(r_xprt, rep->rr_rdmabuf))
                        goto release_wrs;
 
-               trace_xprtrdma_post_recv(rep->rr_recv_wr.wr_cqe);
+               trace_xprtrdma_post_recv(rep);
                ++count;
        }
 
index 92ce09f..65e6b0e 100644 (file)
@@ -47,6 +47,7 @@
 #include <linux/atomic.h>              /* atomic_t, etc */
 #include <linux/kref.h>                        /* struct kref */
 #include <linux/workqueue.h>           /* struct work_struct */
+#include <linux/llist.h>
 
 #include <rdma/rdma_cm.h>              /* RDMA connection api */
 #include <rdma/ib_verbs.h>             /* RDMA verbs api */
@@ -117,9 +118,6 @@ struct rpcrdma_ep {
 #endif
 
 /* Registered buffer -- registered kmalloc'd memory for RDMA SEND/RECV
- *
- * The below structure appears at the front of a large region of kmalloc'd
- * memory, which always starts on a good alignment boundary.
  */
 
 struct rpcrdma_regbuf {
@@ -158,25 +156,22 @@ static inline void *rdmab_data(const struct rpcrdma_regbuf *rb)
 
 /* To ensure a transport can always make forward progress,
  * the number of RDMA segments allowed in header chunk lists
- * is capped at 8. This prevents less-capable devices and
- * memory registrations from overrunning the Send buffer
- * while building chunk lists.
+ * is capped at 16. This prevents less-capable devices from
+ * overrunning the Send buffer while building chunk lists.
  *
  * Elements of the Read list take up more room than the
- * Write list or Reply chunk. 8 read segments means the Read
- * list (or Write list or Reply chunk) cannot consume more
- * than
- *
- * ((8 + 2) * read segment size) + 1 XDR words, or 244 bytes.
+ * Write list or Reply chunk. 16 read segments means the
+ * chunk lists cannot consume more than
  *
- * And the fixed part of the header is another 24 bytes.
+ * ((16 + 2) * read segment size) + 1 XDR words,
  *
- * The smallest inline threshold is 1024 bytes, ensuring that
- * at least 750 bytes are available for RPC messages.
+ * or about 400 bytes. The fixed part of the header is
+ * another 24 bytes. Thus when the inline threshold is
+ * 1024 bytes, at least 600 bytes are available for RPC
+ * message bodies.
  */
 enum {
-       RPCRDMA_MAX_HDR_SEGS = 8,
-       RPCRDMA_HDRBUF_SIZE = 256,
+       RPCRDMA_MAX_HDR_SEGS = 16,
 };
 
 /*
@@ -206,7 +201,7 @@ struct rpcrdma_rep {
        struct rpc_rqst         *rr_rqst;
        struct xdr_buf          rr_hdrbuf;
        struct xdr_stream       rr_stream;
-       struct list_head        rr_list;
+       struct llist_node       rr_node;
        struct ib_recv_wr       rr_recv_wr;
 };
 
@@ -240,20 +235,20 @@ struct rpcrdma_sendctx {
  * An external memory region is any buffer or page that is registered
  * on the fly (ie, not pre-registered).
  */
-struct rpcrdma_req;
 struct rpcrdma_frwr {
        struct ib_mr                    *fr_mr;
        struct ib_cqe                   fr_cqe;
        struct completion               fr_linv_done;
-       struct rpcrdma_req              *fr_req;
        union {
                struct ib_reg_wr        fr_regwr;
                struct ib_send_wr       fr_invwr;
        };
 };
 
+struct rpcrdma_req;
 struct rpcrdma_mr {
        struct list_head        mr_list;
+       struct rpcrdma_req      *mr_req;
        struct scatterlist      *mr_sg;
        int                     mr_nents;
        enum dma_data_direction mr_dir;
@@ -331,7 +326,8 @@ struct rpcrdma_req {
        struct list_head        rl_all;
        struct kref             rl_kref;
 
-       struct list_head        rl_registered;  /* registered segments */
+       struct list_head        rl_free_mrs;
+       struct list_head        rl_registered;
        struct rpcrdma_mr_seg   rl_segments[RPCRDMA_MAX_SEGS];
 };
 
@@ -344,7 +340,7 @@ rpcr_to_rdmar(const struct rpc_rqst *rqst)
 static inline void
 rpcrdma_mr_push(struct rpcrdma_mr *mr, struct list_head *list)
 {
-       list_add_tail(&mr->mr_list, list);
+       list_add(&mr->mr_list, list);
 }
 
 static inline struct rpcrdma_mr *
@@ -352,8 +348,9 @@ rpcrdma_mr_pop(struct list_head *list)
 {
        struct rpcrdma_mr *mr;
 
-       mr = list_first_entry(list, struct rpcrdma_mr, mr_list);
-       list_del_init(&mr->mr_list);
+       mr = list_first_entry_or_null(list, struct rpcrdma_mr, mr_list);
+       if (mr)
+               list_del_init(&mr->mr_list);
        return mr;
 }
 
@@ -364,19 +361,19 @@ rpcrdma_mr_pop(struct list_head *list)
  * One of these is associated with a transport instance
  */
 struct rpcrdma_buffer {
-       spinlock_t              rb_mrlock;      /* protect rb_mrs list */
+       spinlock_t              rb_lock;
+       struct list_head        rb_send_bufs;
        struct list_head        rb_mrs;
-       struct list_head        rb_all;
 
        unsigned long           rb_sc_head;
        unsigned long           rb_sc_tail;
        unsigned long           rb_sc_last;
        struct rpcrdma_sendctx  **rb_sc_ctxs;
 
-       spinlock_t              rb_lock;        /* protect buf lists */
-       struct list_head        rb_send_bufs;
-       struct list_head        rb_recv_bufs;
        struct list_head        rb_allreqs;
+       struct list_head        rb_all_mrs;
+
+       struct llist_head       rb_free_reps;
 
        u32                     rb_max_requests;
        u32                     rb_credits;     /* most recent credit grant */
@@ -384,7 +381,7 @@ struct rpcrdma_buffer {
        u32                     rb_bc_srv_max_requests;
        u32                     rb_bc_max_requests;
 
-       struct delayed_work     rb_refresh_worker;
+       struct work_struct      rb_refresh_worker;
 };
 
 /*
@@ -490,7 +487,6 @@ struct rpcrdma_sendctx *rpcrdma_sendctx_get_locked(struct rpcrdma_xprt *r_xprt);
 
 struct rpcrdma_mr *rpcrdma_mr_get(struct rpcrdma_xprt *r_xprt);
 void rpcrdma_mr_put(struct rpcrdma_mr *mr);
-void rpcrdma_mr_unmap_and_put(struct rpcrdma_mr *mr);
 
 static inline void
 rpcrdma_mr_recycle(struct rpcrdma_mr *mr)
@@ -546,6 +542,7 @@ rpcrdma_data_dir(bool writing)
 /* Memory registration calls xprtrdma/frwr_ops.c
  */
 bool frwr_is_supported(struct ib_device *device);
+void frwr_recycle(struct rpcrdma_req *req);
 void frwr_reset(struct rpcrdma_req *req);
 int frwr_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep);
 int frwr_init_mr(struct rpcrdma_ia *ia, struct rpcrdma_mr *mr);
@@ -554,7 +551,7 @@ size_t frwr_maxpages(struct rpcrdma_xprt *r_xprt);
 struct rpcrdma_mr_seg *frwr_map(struct rpcrdma_xprt *r_xprt,
                                struct rpcrdma_mr_seg *seg,
                                int nsegs, bool writing, __be32 xid,
-                               struct rpcrdma_mr **mr);
+                               struct rpcrdma_mr *mr);
 int frwr_send(struct rpcrdma_ia *ia, struct rpcrdma_req *req);
 void frwr_reminv(struct rpcrdma_rep *rep, struct list_head *mrs);
 void frwr_unmap_sync(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req);
index e2176c1..9ac8872 100644 (file)
@@ -562,10 +562,14 @@ xs_read_stream_call(struct sock_xprt *transport, struct msghdr *msg, int flags)
                printk(KERN_WARNING "Callback slot table overflowed\n");
                return -ESHUTDOWN;
        }
+       if (transport->recv.copied && !req->rq_private_buf.len)
+               return -ESHUTDOWN;
 
        ret = xs_read_stream_request(transport, msg, flags, req);
        if (msg->msg_flags & (MSG_EOR|MSG_TRUNC))
                xprt_complete_bc_request(req, transport->recv.copied);
+       else
+               req->rq_private_buf.len = transport->recv.copied;
 
        return ret;
 }
@@ -587,7 +591,7 @@ xs_read_stream_reply(struct sock_xprt *transport, struct msghdr *msg, int flags)
        /* Look up and lock the request corresponding to the given XID */
        spin_lock(&xprt->queue_lock);
        req = xprt_lookup_rqst(xprt, transport->recv.xid);
-       if (!req) {
+       if (!req || (transport->recv.copied && !req->rq_private_buf.len)) {
                msg->msg_flags |= MSG_TRUNC;
                goto out;
        }
@@ -599,6 +603,8 @@ xs_read_stream_reply(struct sock_xprt *transport, struct msghdr *msg, int flags)
        spin_lock(&xprt->queue_lock);
        if (msg->msg_flags & (MSG_EOR|MSG_TRUNC))
                xprt_complete_rqst(req->rq_task, transport->recv.copied);
+       else
+               req->rq_private_buf.len = transport->recv.copied;
        xprt_unpin_rqst(req);
 out:
        spin_unlock(&xprt->queue_lock);
index 947b8ff..bba3104 100644 (file)
@@ -206,14 +206,7 @@ static int xdp_umem_map_pages(struct xdp_umem *umem)
 
 static void xdp_umem_unpin_pages(struct xdp_umem *umem)
 {
-       unsigned int i;
-
-       for (i = 0; i < umem->npgs; i++) {
-               struct page *page = umem->pgs[i];
-
-               set_page_dirty_lock(page);
-               put_page(page);
-       }
+       put_user_pages_dirty_lock(umem->pgs, umem->npgs, true);
 
        kfree(umem->pgs);
        umem->pgs = NULL;
index c2f1af3..fa8fbb8 100644 (file)
@@ -977,7 +977,7 @@ static int xsk_mmap(struct file *file, struct socket *sock,
        /* Matches the smp_wmb() in xsk_init_queue */
        smp_rmb();
        qpg = virt_to_head_page(q->ring);
-       if (size > (PAGE_SIZE << compound_order(qpg)))
+       if (size > page_size(qpg))
                return -EINVAL;
 
        pfn = virt_to_phys(q->ring) >> PAGE_SHIFT;
index 2a06955..ae50816 100644 (file)
@@ -14,7 +14,9 @@
 #include <linux/rpmsg.h>
 
 #define MSG            "hello world!"
-#define MSG_LIMIT      100
+
+static int count = 100;
+module_param(count, int, 0644);
 
 struct instance_data {
        int rx_count;
@@ -29,11 +31,11 @@ static int rpmsg_sample_cb(struct rpmsg_device *rpdev, void *data, int len,
        dev_info(&rpdev->dev, "incoming msg %d (src: 0x%x)\n",
                 ++idata->rx_count, src);
 
-       print_hex_dump(KERN_DEBUG, __func__, DUMP_PREFIX_NONE, 16, 1,
-                      data, len,  true);
+       print_hex_dump_debug(__func__, DUMP_PREFIX_NONE, 16, 1, data, len,
+                            true);
 
        /* samples should not live forever */
-       if (idata->rx_count >= MSG_LIMIT) {
+       if (idata->rx_count >= count) {
                dev_info(&rpdev->dev, "goodbye!\n");
                return 0;
        }
index 92e770a..ce84a30 100644 (file)
@@ -152,20 +152,9 @@ static const struct file_operations vd_fops = {
 
 /* function prototypes */
 
-static int mtty_trigger_interrupt(const guid_t *uuid);
+static int mtty_trigger_interrupt(struct mdev_state *mdev_state);
 
 /* Helper functions */
-static struct mdev_state *find_mdev_state_by_uuid(const guid_t *uuid)
-{
-       struct mdev_state *mds;
-
-       list_for_each_entry(mds, &mdev_devices_list, next) {
-               if (guid_equal(mdev_uuid(mds->mdev), uuid))
-                       return mds;
-       }
-
-       return NULL;
-}
 
 static void dump_buffer(u8 *buf, uint32_t count)
 {
@@ -337,8 +326,7 @@ static void handle_bar_write(unsigned int index, struct mdev_state *mdev_state,
                                pr_err("Serial port %d: Fifo level trigger\n",
                                        index);
 #endif
-                               mtty_trigger_interrupt(
-                                               mdev_uuid(mdev_state->mdev));
+                               mtty_trigger_interrupt(mdev_state);
                        }
                } else {
 #if defined(DEBUG_INTR)
@@ -352,8 +340,7 @@ static void handle_bar_write(unsigned int index, struct mdev_state *mdev_state,
                         */
                        if (mdev_state->s[index].uart_reg[UART_IER] &
                                                                UART_IER_RLSI)
-                               mtty_trigger_interrupt(
-                                               mdev_uuid(mdev_state->mdev));
+                               mtty_trigger_interrupt(mdev_state);
                }
                mutex_unlock(&mdev_state->rxtx_lock);
                break;
@@ -372,8 +359,7 @@ static void handle_bar_write(unsigned int index, struct mdev_state *mdev_state,
                                pr_err("Serial port %d: IER_THRI write\n",
                                        index);
 #endif
-                               mtty_trigger_interrupt(
-                                               mdev_uuid(mdev_state->mdev));
+                               mtty_trigger_interrupt(mdev_state);
                        }
 
                        mutex_unlock(&mdev_state->rxtx_lock);
@@ -444,7 +430,7 @@ static void handle_bar_write(unsigned int index, struct mdev_state *mdev_state,
 #if defined(DEBUG_INTR)
                        pr_err("Serial port %d: MCR_OUT2 write\n", index);
 #endif
-                       mtty_trigger_interrupt(mdev_uuid(mdev_state->mdev));
+                       mtty_trigger_interrupt(mdev_state);
                }
 
                if ((mdev_state->s[index].uart_reg[UART_IER] & UART_IER_MSI) &&
@@ -452,7 +438,7 @@ static void handle_bar_write(unsigned int index, struct mdev_state *mdev_state,
 #if defined(DEBUG_INTR)
                        pr_err("Serial port %d: MCR RTS/DTR write\n", index);
 #endif
-                       mtty_trigger_interrupt(mdev_uuid(mdev_state->mdev));
+                       mtty_trigger_interrupt(mdev_state);
                }
                break;
 
@@ -503,8 +489,7 @@ static void handle_bar_read(unsigned int index, struct mdev_state *mdev_state,
 #endif
                        if (mdev_state->s[index].uart_reg[UART_IER] &
                                                         UART_IER_THRI)
-                               mtty_trigger_interrupt(
-                                       mdev_uuid(mdev_state->mdev));
+                               mtty_trigger_interrupt(mdev_state);
                }
                mutex_unlock(&mdev_state->rxtx_lock);
 
@@ -1028,17 +1013,9 @@ static int mtty_set_irqs(struct mdev_device *mdev, uint32_t flags,
        return ret;
 }
 
-static int mtty_trigger_interrupt(const guid_t *uuid)
+static int mtty_trigger_interrupt(struct mdev_state *mdev_state)
 {
        int ret = -1;
-       struct mdev_state *mdev_state;
-
-       mdev_state = find_mdev_state_by_uuid(uuid);
-
-       if (!mdev_state) {
-               pr_info("%s: mdev not found\n", __func__);
-               return -EINVAL;
-       }
 
        if ((mdev_state->irq_index == VFIO_PCI_MSI_IRQ_INDEX) &&
            (!mdev_state->msi_evtfd))
index c42891e..3e86b30 100644 (file)
@@ -17,7 +17,7 @@ hostprogs-$(CONFIG_VT)           += conmakehash
 hostprogs-$(BUILD_C_RECORDMCOUNT) += recordmcount
 hostprogs-$(CONFIG_BUILDTIME_EXTABLE_SORT) += sortextable
 hostprogs-$(CONFIG_ASN1)        += asn1_compiler
-hostprogs-$(CONFIG_MODULE_SIG += sign-file
+hostprogs-$(CONFIG_MODULE_SIG_FORMAT) += sign-file
 hostprogs-$(CONFIG_SYSTEM_TRUSTED_KEYRING) += extract-cert
 hostprogs-$(CONFIG_SYSTEM_EXTRA_CERTIFICATE) += insert-sys-cert
 
index 9800a39..952fff4 100644 (file)
@@ -54,7 +54,8 @@ MODPOST = scripts/mod/modpost                                         \
        $(if $(KBUILD_EXTMOD),$(addprefix -e ,$(KBUILD_EXTRA_SYMBOLS))) \
        $(if $(KBUILD_EXTMOD),-o $(modulesymfile))                      \
        $(if $(CONFIG_SECTION_MISMATCH_WARN_ONLY),,-E)                  \
-       $(if $(KBUILD_MODPOST_WARN),-w)
+       $(if $(KBUILD_MODPOST_WARN),-w)                                 \
+       $(if $(filter nsdeps,$(MAKECMDGOALS)),-d)
 
 ifdef MODPOST_VMLINUX
 
@@ -95,6 +96,8 @@ ifneq ($(KBUILD_MODPOST_NOFINAL),1)
        $(Q)$(MAKE) -f $(srctree)/scripts/Makefile.modfinal
 endif
 
+nsdeps: __modpost
+
 endif
 
 .PHONY: $(PHONY)
index 93a7edf..6fcc66a 100755 (executable)
@@ -62,6 +62,8 @@ my $conststructsfile = "$D/const_structs.checkpatch";
 my $typedefsfile = "";
 my $color = "auto";
 my $allow_c99_comments = 1; # Can be overridden by --ignore C99_COMMENT_TOLERANCE
+# git output parsing needs US English output, so first set backtick child process LANGUAGE
+my $git_command ='export LANGUAGE=en_US.UTF-8; git';
 
 sub help {
        my ($exitcode) = @_;
@@ -904,7 +906,7 @@ sub seed_camelcase_includes {
        $camelcase_seeded = 1;
 
        if (-e ".git") {
-               my $git_last_include_commit = `git log --no-merges --pretty=format:"%h%n" -1 -- include`;
+               my $git_last_include_commit = `${git_command} log --no-merges --pretty=format:"%h%n" -1 -- include`;
                chomp $git_last_include_commit;
                $camelcase_cache = ".checkpatch-camelcase.git.$git_last_include_commit";
        } else {
@@ -932,7 +934,7 @@ sub seed_camelcase_includes {
        }
 
        if (-e ".git") {
-               $files = `git ls-files "include/*.h"`;
+               $files = `${git_command} ls-files "include/*.h"`;
                @include_files = split('\n', $files);
        }
 
@@ -956,13 +958,13 @@ sub git_commit_info {
 
        return ($id, $desc) if ((which("git") eq "") || !(-e ".git"));
 
-       my $output = `git log --no-color --format='%H %s' -1 $commit 2>&1`;
+       my $output = `${git_command} log --no-color --format='%H %s' -1 $commit 2>&1`;
        $output =~ s/^\s*//gm;
        my @lines = split("\n", $output);
 
        return ($id, $desc) if ($#lines < 0);
 
-       if ($lines[0] =~ /^error: short SHA1 $commit is ambiguous\./) {
+       if ($lines[0] =~ /^error: short SHA1 $commit is ambiguous/) {
 # Maybe one day convert this block of bash into something that returns
 # all matching commit ids, but it's very slow...
 #
@@ -1006,7 +1008,7 @@ if ($git) {
                } else {
                        $git_range = "-1 $commit_expr";
                }
-               my $lines = `git log --no-color --no-merges --pretty=format:'%H %s' $git_range`;
+               my $lines = `${git_command} log --no-color --no-merges --pretty=format:'%H %s' $git_range`;
                foreach my $line (split(/\n/, $lines)) {
                        $line =~ /^([0-9a-fA-F]{40,40}) (.*)$/;
                        next if (!defined($1) || !defined($2));
@@ -2725,8 +2727,10 @@ sub process {
                    ($line =~ /^\s*(?:WARNING:|BUG:)/ ||
                     $line =~ /^\s*\[\s*\d+\.\d{6,6}\s*\]/ ||
                                        # timestamp
-                    $line =~ /^\s*\[\<[0-9a-fA-F]{8,}\>\]/)) {
-                                       # stack dump address
+                    $line =~ /^\s*\[\<[0-9a-fA-F]{8,}\>\]/) ||
+                    $line =~ /^(?:\s+\w+:\s+[0-9a-fA-F]+){3,3}/ ||
+                    $line =~ /^\s*\#\d+\s*\[[0-9a-fA-F]+\]\s*\w+ at [0-9a-fA-F]+/) {
+                                       # stack dump address styles
                        $commit_log_possible_stack_dump = 1;
                }
 
@@ -2898,6 +2902,17 @@ sub process {
                        }
                }
 
+# check for invalid commit id
+               if ($in_commit_log && $line =~ /(^fixes:|\bcommit)\s+([0-9a-f]{6,40})\b/i) {
+                       my $id;
+                       my $description;
+                       ($id, $description) = git_commit_info($2, undef, undef);
+                       if (!defined($id)) {
+                               WARN("UNKNOWN_COMMIT_ID",
+                                    "Unknown commit id '$2', maybe rebased or not pulled?\n" . $herecurr);
+                       }
+               }
+
 # ignore non-hunk lines and lines being removed
                next if (!$hunk_line || $line =~ /^-/);
 
@@ -3069,21 +3084,21 @@ sub process {
 # check SPDX comment style for .[chsS] files
                                if ($realfile =~ /\.[chsS]$/ &&
                                    $rawline =~ /SPDX-License-Identifier:/ &&
-                                   $rawline !~ /^\+\s*\Q$comment\E\s*/) {
+                                   $rawline !~ m@^\+\s*\Q$comment\E\s*@) {
                                        WARN("SPDX_LICENSE_TAG",
                                             "Improper SPDX comment style for '$realfile', please use '$comment' instead\n" . $herecurr);
                                }
 
                                if ($comment !~ /^$/ &&
-                                   $rawline !~ /^\+\Q$comment\E SPDX-License-Identifier: /) {
-                                        WARN("SPDX_LICENSE_TAG",
-                                             "Missing or malformed SPDX-License-Identifier tag in line $checklicenseline\n" . $herecurr);
+                                   $rawline !~ m@^\+\Q$comment\E SPDX-License-Identifier: @) {
+                                       WARN("SPDX_LICENSE_TAG",
+                                            "Missing or malformed SPDX-License-Identifier tag in line $checklicenseline\n" . $herecurr);
                                } elsif ($rawline =~ /(SPDX-License-Identifier: .*)/) {
-                                        my $spdx_license = $1;
-                                        if (!is_SPDX_License_valid($spdx_license)) {
-                                                 WARN("SPDX_LICENSE_TAG",
-                                                      "'$spdx_license' is not supported in LICENSES/...\n" . $herecurr);
-                                        }
+                                       my $spdx_license = $1;
+                                       if (!is_SPDX_License_valid($spdx_license)) {
+                                               WARN("SPDX_LICENSE_TAG",
+                                                    "'$spdx_license' is not supported in LICENSES/...\n" . $herecurr);
+                                       }
                                }
                        }
                }
@@ -4660,7 +4675,7 @@ sub process {
 
 # closing brace should have a space following it when it has anything
 # on the line
-               if ($line =~ /}(?!(?:,|;|\)))\S/) {
+               if ($line =~ /}(?!(?:,|;|\)|\}))\S/) {
                        if (ERROR("SPACING",
                                  "space required after that close brace '}'\n" . $herecurr) &&
                            $fix) {
@@ -5191,7 +5206,7 @@ sub process {
                                next if ($arg =~ /\.\.\./);
                                next if ($arg =~ /^type$/i);
                                my $tmp_stmt = $define_stmt;
-                               $tmp_stmt =~ s/\b(typeof|__typeof__|__builtin\w+|typecheck\s*\(\s*$Type\s*,|\#+)\s*\(*\s*$arg\s*\)*\b//g;
+                               $tmp_stmt =~ s/\b(sizeof|typeof|__typeof__|__builtin\w+|typecheck\s*\(\s*$Type\s*,|\#+)\s*\(*\s*$arg\s*\)*\b//g;
                                $tmp_stmt =~ s/\#+\s*$arg\b//g;
                                $tmp_stmt =~ s/\b$arg\s*\#\#//g;
                                my $use_cnt = () = $tmp_stmt =~ /\b$arg\b/g;
@@ -5873,6 +5888,18 @@ sub process {
                             "__aligned(size) is preferred over __attribute__((aligned(size)))\n" . $herecurr);
                }
 
+# Check for __attribute__ section, prefer __section
+               if ($realfile !~ m@\binclude/uapi/@ &&
+                   $line =~ /\b__attribute__\s*\(\s*\(.*_*section_*\s*\(\s*("[^"]*")/) {
+                       my $old = substr($rawline, $-[1], $+[1] - $-[1]);
+                       my $new = substr($old, 1, -1);
+                       if (WARN("PREFER_SECTION",
+                                "__section($new) is preferred over __attribute__((section($old)))\n" . $herecurr) &&
+                           $fix) {
+                               $fixed[$fixlinenr] =~ s/\b__attribute__\s*\(\s*\(\s*_*section_*\s*\(\s*\Q$old\E\s*\)\s*\)\s*\)/__section($new)/;
+                       }
+               }
+
 # Check for __attribute__ format(printf, prefer __printf
                if ($realfile !~ m@\binclude/uapi/@ &&
                    $line =~ /\b__attribute__\s*\(\s*\(\s*format\s*\(\s*printf/) {
@@ -6480,6 +6507,12 @@ sub process {
                             "Using $1 should generally have parentheses around the comparison\n" . $herecurr);
                }
 
+# nested likely/unlikely calls
+               if ($line =~ /\b(?:(?:un)?likely)\s*\(\s*!?\s*(IS_ERR(?:_OR_NULL|_VALUE)?|WARN)/) {
+                       WARN("LIKELY_MISUSE",
+                            "nested (un)?likely() calls, $1 already uses unlikely() internally\n" . $herecurr);
+               }
+
 # whine mightly about in_atomic
                if ($line =~ /\bin_atomic\s*\(/) {
                        if ($realfile =~ m@^drivers/@) {
diff --git a/scripts/coccinelle/misc/add_namespace.cocci b/scripts/coccinelle/misc/add_namespace.cocci
new file mode 100644 (file)
index 0000000..c832bb6
--- /dev/null
@@ -0,0 +1,23 @@
+// SPDX-License-Identifier: GPL-2.0-only
+//
+/// Adds missing MODULE_IMPORT_NS statements to source files
+///
+/// This script is usually called from scripts/nsdeps with -D ns=<namespace> to
+/// add a missing namespace tag to a module source file.
+///
+
+@has_ns_import@
+declarer name MODULE_IMPORT_NS;
+identifier virtual.ns;
+@@
+MODULE_IMPORT_NS(ns);
+
+// Add missing imports, but only adjacent to a MODULE_LICENSE statement.
+// That ensures we are adding it only to the main module source file.
+@do_import depends on !has_ns_import@
+declarer name MODULE_LICENSE;
+expression license;
+identifier virtual.ns;
+@@
+MODULE_LICENSE(license);
++ MODULE_IMPORT_NS(ns);
index 7d3030d..548330e 100755 (executable)
@@ -94,7 +94,7 @@ if (defined $opt{'o'}) {
 #
 while ( <$module_symvers> ) {
        chomp;
-       my (undef, $symbol, $module, $gpl) = split;
+       my (undef, $symbol, $namespace, $module, $gpl) = split('\t');
        $SYMBOL { $symbol } =  [ $module , "0" , $symbol, $gpl];
 }
 close($module_symvers);
index 6d5bbd3..bd29e4e 100644 (file)
@@ -443,13 +443,13 @@ static int is_pure_ops_struct(const_tree node)
                if (node == fieldtype)
                        continue;
 
-               if (!is_fptr(fieldtype))
-                       return 0;
-
-               if (code != RECORD_TYPE && code != UNION_TYPE)
+               if (code == RECORD_TYPE || code == UNION_TYPE) {
+                       if (!is_pure_ops_struct(fieldtype))
+                               return 0;
                        continue;
+               }
 
-               if (!is_pure_ops_struct(fieldtype))
+               if (!is_fptr(fieldtype))
                        return 0;
        }
 
index 2f5b95f..34e40e9 100644 (file)
@@ -77,12 +77,12 @@ lx-symbols command."""
             gdb.write("scanning for modules in {0}\n".format(path))
             for root, dirs, files in os.walk(path):
                 for name in files:
-                    if name.endswith(".ko"):
+                    if name.endswith(".ko") or name.endswith(".ko.debug"):
                         self.module_files.append(root + "/" + name)
         self.module_files_updated = True
 
     def _get_module_file(self, module_name):
-        module_pattern = ".*/{0}\.ko$".format(
+        module_pattern = ".*/{0}\.ko(?:.debug)?$".format(
             module_name.replace("_", r"[_\-]"))
         for name in self.module_files:
             if re.match(module_pattern, name) and os.path.exists(name):
index 820eed8..3961941 100644 (file)
@@ -38,6 +38,8 @@ static int sec_mismatch_count = 0;
 static int sec_mismatch_fatal = 0;
 /* ignore missing files */
 static int ignore_missing_files;
+/* write namespace dependencies */
+static int write_namespace_deps;
 
 enum export {
        export_plain,      export_unused,     export_gpl,
@@ -164,6 +166,7 @@ struct symbol {
        struct module *module;
        unsigned int crc;
        int crc_valid;
+       const char *namespace;
        unsigned int weak:1;
        unsigned int vmlinux:1;    /* 1 if symbol is defined in vmlinux */
        unsigned int kernel:1;     /* 1 if symbol is from kernel
@@ -235,6 +238,37 @@ static struct symbol *find_symbol(const char *name)
        return NULL;
 }
 
+static bool contains_namespace(struct namespace_list *list,
+                              const char *namespace)
+{
+       struct namespace_list *ns_entry;
+
+       for (ns_entry = list; ns_entry != NULL; ns_entry = ns_entry->next)
+               if (strcmp(ns_entry->namespace, namespace) == 0)
+                       return true;
+
+       return false;
+}
+
+static void add_namespace(struct namespace_list **list, const char *namespace)
+{
+       struct namespace_list *ns_entry;
+
+       if (!contains_namespace(*list, namespace)) {
+               ns_entry = NOFAIL(malloc(sizeof(struct namespace_list) +
+                                        strlen(namespace) + 1));
+               strcpy(ns_entry->namespace, namespace);
+               ns_entry->next = *list;
+               *list = ns_entry;
+       }
+}
+
+static bool module_imports_namespace(struct module *module,
+                                    const char *namespace)
+{
+       return contains_namespace(module->imported_namespaces, namespace);
+}
+
 static const struct {
        const char *str;
        enum export export;
@@ -314,23 +348,39 @@ static enum export export_from_sec(struct elf_info *elf, unsigned int sec)
                return export_unknown;
 }
 
+static const char *sym_extract_namespace(const char **symname)
+{
+       size_t n;
+       char *dupsymname;
+
+       n = strcspn(*symname, ".");
+       if (n < strlen(*symname) - 1) {
+               dupsymname = NOFAIL(strdup(*symname));
+               dupsymname[n] = '\0';
+               *symname = dupsymname;
+               return dupsymname + n + 1;
+       }
+
+       return NULL;
+}
+
 /**
  * Add an exported symbol - it may have already been added without a
  * CRC, in this case just update the CRC
  **/
-static struct symbol *sym_add_exported(const char *name, struct module *mod,
-                                      enum export export)
+static struct symbol *sym_add_exported(const char *name, const char *namespace,
+                                      struct module *mod, enum export export)
 {
        struct symbol *s = find_symbol(name);
 
        if (!s) {
                s = new_symbol(name, mod, export);
+               s->namespace = namespace;
        } else {
                if (!s->preloaded) {
-                       warn("%s: '%s' exported twice. Previous export "
-                            "was in %s%s\n", mod->name, name,
-                            s->module->name,
-                            is_vmlinux(s->module->name) ?"":".ko");
+                       warn("%s: '%s' exported twice. Previous export was in %s%s\n",
+                            mod->name, name, s->module->name,
+                            is_vmlinux(s->module->name) ? "" : ".ko");
                } else {
                        /* In case Module.symvers was out of date */
                        s->module = mod;
@@ -622,6 +672,7 @@ static void handle_modversions(struct module *mod, struct elf_info *info,
        unsigned int crc;
        enum export export;
        bool is_crc = false;
+       const char *name, *namespace;
 
        if ((!is_vmlinux(mod->name) || mod->is_dot_o) &&
            strstarts(symname, "__ksymtab"))
@@ -693,8 +744,9 @@ static void handle_modversions(struct module *mod, struct elf_info *info,
        default:
                /* All exported symbols */
                if (strstarts(symname, "__ksymtab_")) {
-                       sym_add_exported(symname + strlen("__ksymtab_"), mod,
-                                       export);
+                       name = symname + strlen("__ksymtab_");
+                       namespace = sym_extract_namespace(&name);
+                       sym_add_exported(name, namespace, mod, export);
                }
                if (strcmp(symname, "init_module") == 0)
                        mod->has_init = 1;
@@ -1945,6 +1997,7 @@ static void read_symbols(const char *modname)
        const char *symname;
        char *version;
        char *license;
+       char *namespace;
        struct module *mod;
        struct elf_info info = { };
        Elf_Sym *sym;
@@ -1976,6 +2029,12 @@ static void read_symbols(const char *modname)
                license = get_next_modinfo(&info, "license", license);
        }
 
+       namespace = get_modinfo(&info, "import_ns");
+       while (namespace) {
+               add_namespace(&mod->imported_namespaces, namespace);
+               namespace = get_next_modinfo(&info, "import_ns", namespace);
+       }
+
        for (sym = info.symtab_start; sym < info.symtab_stop; sym++) {
                symname = remove_dot(info.strtab + sym->st_name);
 
@@ -2135,6 +2194,18 @@ static int check_exports(struct module *mod)
                        basename++;
                else
                        basename = mod->name;
+
+               if (exp->namespace) {
+                       add_namespace(&mod->required_namespaces,
+                                     exp->namespace);
+
+                       if (!write_namespace_deps &&
+                           !module_imports_namespace(mod, exp->namespace)) {
+                               warn("module %s uses symbol %s from namespace %s, but does not import it.\n",
+                                    basename, exp->name, exp->namespace);
+                       }
+               }
+
                if (!mod->gpl_compatible)
                        check_for_gpl_usage(exp->export, basename, exp->name);
                check_for_unused(exp->export, basename, exp->name);
@@ -2354,7 +2425,7 @@ static void read_dump(const char *fname, unsigned int kernel)
                return;
 
        while ((line = get_next_line(&pos, file, size))) {
-               char *symname, *modname, *d, *export, *end;
+               char *symname, *namespace, *modname, *d, *export, *end;
                unsigned int crc;
                struct module *mod;
                struct symbol *s;
@@ -2362,7 +2433,10 @@ static void read_dump(const char *fname, unsigned int kernel)
                if (!(symname = strchr(line, '\t')))
                        goto fail;
                *symname++ = '\0';
-               if (!(modname = strchr(symname, '\t')))
+               if (!(namespace = strchr(symname, '\t')))
+                       goto fail;
+               *namespace++ = '\0';
+               if (!(modname = strchr(namespace, '\t')))
                        goto fail;
                *modname++ = '\0';
                if ((export = strchr(modname, '\t')) != NULL)
@@ -2379,7 +2453,8 @@ static void read_dump(const char *fname, unsigned int kernel)
                        mod = new_module(modname);
                        mod->skip = 1;
                }
-               s = sym_add_exported(symname, mod, export_no(export));
+               s = sym_add_exported(symname, namespace, mod,
+                                    export_no(export));
                s->kernel    = kernel;
                s->preloaded = 1;
                s->is_static = 0;
@@ -2409,16 +2484,20 @@ static void write_dump(const char *fname)
 {
        struct buffer buf = { };
        struct symbol *symbol;
+       const char *namespace;
        int n;
 
        for (n = 0; n < SYMBOL_HASH_SIZE ; n++) {
                symbol = symbolhash[n];
                while (symbol) {
-                       if (dump_sym(symbol))
-                               buf_printf(&buf, "0x%08x\t%s\t%s\t%s\n",
-                                       symbol->crc, symbol->name,
-                                       symbol->module->name,
-                                       export_str(symbol->export));
+                       if (dump_sym(symbol)) {
+                               namespace = symbol->namespace;
+                               buf_printf(&buf, "0x%08x\t%s\t%s\t%s\t%s\n",
+                                          symbol->crc, symbol->name,
+                                          namespace ? namespace : "",
+                                          symbol->module->name,
+                                          export_str(symbol->export));
+                       }
                        symbol = symbol->next;
                }
        }
@@ -2426,6 +2505,31 @@ static void write_dump(const char *fname)
        free(buf.p);
 }
 
+static void write_namespace_deps_files(void)
+{
+       struct module *mod;
+       struct namespace_list *ns;
+       struct buffer ns_deps_buf = {};
+
+       for (mod = modules; mod; mod = mod->next) {
+               char fname[PATH_MAX];
+
+               if (mod->skip)
+                       continue;
+
+               ns_deps_buf.pos = 0;
+
+               for (ns = mod->required_namespaces; ns; ns = ns->next)
+                       buf_printf(&ns_deps_buf, "%s\n", ns->namespace);
+
+               if (ns_deps_buf.pos == 0)
+                       continue;
+
+               sprintf(fname, "%s.ns_deps", mod->name);
+               write_if_changed(&ns_deps_buf, fname);
+       }
+}
+
 struct ext_sym_list {
        struct ext_sym_list *next;
        const char *file;
@@ -2443,7 +2547,7 @@ int main(int argc, char **argv)
        struct ext_sym_list *extsym_iter;
        struct ext_sym_list *extsym_start = NULL;
 
-       while ((opt = getopt(argc, argv, "i:I:e:mnsT:o:awE")) != -1) {
+       while ((opt = getopt(argc, argv, "i:I:e:mnsT:o:awEd")) != -1) {
                switch (opt) {
                case 'i':
                        kernel_read = optarg;
@@ -2484,6 +2588,9 @@ int main(int argc, char **argv)
                case 'E':
                        sec_mismatch_fatal = 1;
                        break;
+               case 'd':
+                       write_namespace_deps = 1;
+                       break;
                default:
                        exit(1);
                }
@@ -2518,6 +2625,9 @@ int main(int argc, char **argv)
 
                err |= check_modname_len(mod);
                err |= check_exports(mod);
+               if (write_namespace_deps)
+                       continue;
+
                add_header(&buf, mod);
                add_intree_flag(&buf, !external_module);
                add_retpoline(&buf);
@@ -2530,6 +2640,12 @@ int main(int argc, char **argv)
                sprintf(fname, "%s.mod.c", mod->name);
                write_if_changed(&buf, fname);
        }
+
+       if (write_namespace_deps) {
+               write_namespace_deps_files();
+               return 0;
+       }
+
        if (dump_write)
                write_dump(dump_write);
        if (sec_mismatch_count && sec_mismatch_fatal)
index 8453d6a..92a926d 100644 (file)
@@ -109,6 +109,11 @@ buf_printf(struct buffer *buf, const char *fmt, ...);
 void
 buf_write(struct buffer *buf, const char *s, int len);
 
+struct namespace_list {
+       struct namespace_list *next;
+       char namespace[0];
+};
+
 struct module {
        struct module *next;
        const char *name;
@@ -121,6 +126,10 @@ struct module {
        struct buffer dev_table_buf;
        char         srcversion[25];
        int is_dot_o;
+       // Required namespace dependencies
+       struct namespace_list *required_namespaces;
+       // Actual imported namespaces
+       struct namespace_list *imported_namespaces;
 };
 
 struct elf_info {
diff --git a/scripts/nsdeps b/scripts/nsdeps
new file mode 100644 (file)
index 0000000..ac2b603
--- /dev/null
@@ -0,0 +1,58 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+# Linux kernel symbol namespace import generator
+#
+# This script requires a minimum spatch version.
+SPATCH_REQ_VERSION="1.0.4"
+
+DIR="$(dirname $(readlink -f $0))/.."
+SPATCH="`which ${SPATCH:=spatch}`"
+if [ ! -x "$SPATCH" ]; then
+       echo 'spatch is part of the Coccinelle project and is available at http://coccinelle.lip6.fr/'
+       exit 1
+fi
+
+SPATCH_REQ_VERSION_NUM=$(echo $SPATCH_REQ_VERSION | ${DIR}/scripts/ld-version.sh)
+SPATCH_VERSION=$($SPATCH --version | head -1 | awk '{print $3}')
+SPATCH_VERSION_NUM=$(echo $SPATCH_VERSION | ${DIR}/scripts/ld-version.sh)
+
+if [ "$SPATCH_VERSION_NUM" -lt "$SPATCH_REQ_VERSION_NUM" ] ; then
+       echo "spatch needs to be version $SPATCH_REQ_VERSION or higher"
+       exit 1
+fi
+
+generate_deps_for_ns() {
+       $SPATCH --very-quiet --in-place --sp-file \
+               $srctree/scripts/coccinelle/misc/add_namespace.cocci -D ns=$1 $2
+}
+
+generate_deps() {
+       local mod_name=`basename $@ .ko`
+       local mod_file=`echo $@ | sed -e 's/\.ko/\.mod/'`
+       local ns_deps_file=`echo $@ | sed -e 's/\.ko/\.ns_deps/'`
+       if [ ! -f "$ns_deps_file" ]; then return; fi
+       local mod_source_files=`cat $mod_file | sed -n 1p                      \
+                                             | sed -e 's/\.o/\.c/g'           \
+                                             | sed "s/[^ ]* */${srctree}\/&/g"`
+       for ns in `cat $ns_deps_file`; do
+               echo "Adding namespace $ns to module $mod_name (if needed)."
+               generate_deps_for_ns $ns $mod_source_files
+               # sort the imports
+               for source_file in $mod_source_files; do
+                       sed '/MODULE_IMPORT_NS/Q' $source_file > ${source_file}.tmp
+                       offset=$(wc -l ${source_file}.tmp | awk '{print $1;}')
+                       cat $source_file | grep MODULE_IMPORT_NS | sort -u >> ${source_file}.tmp
+                       tail -n +$((offset +1)) ${source_file} | grep -v MODULE_IMPORT_NS >> ${source_file}.tmp
+                       if ! diff -q ${source_file} ${source_file}.tmp; then
+                               mv ${source_file}.tmp ${source_file}
+                       else
+                               rm ${source_file}.tmp
+                       fi
+               done
+       done
+}
+
+for f in `cat $objtree/modules.order`; do
+       generate_deps $f
+done
+
index 0d65594..2a1a2d3 100644 (file)
@@ -237,6 +237,7 @@ source "security/apparmor/Kconfig"
 source "security/loadpin/Kconfig"
 source "security/yama/Kconfig"
 source "security/safesetid/Kconfig"
+source "security/lockdown/Kconfig"
 
 source "security/integrity/Kconfig"
 
@@ -276,11 +277,11 @@ endchoice
 
 config LSM
        string "Ordered list of enabled LSMs"
-       default "yama,loadpin,safesetid,integrity,smack,selinux,tomoyo,apparmor" if DEFAULT_SECURITY_SMACK
-       default "yama,loadpin,safesetid,integrity,apparmor,selinux,smack,tomoyo" if DEFAULT_SECURITY_APPARMOR
-       default "yama,loadpin,safesetid,integrity,tomoyo" if DEFAULT_SECURITY_TOMOYO
-       default "yama,loadpin,safesetid,integrity" if DEFAULT_SECURITY_DAC
-       default "yama,loadpin,safesetid,integrity,selinux,smack,tomoyo,apparmor"
+       default "lockdown,yama,loadpin,safesetid,integrity,smack,selinux,tomoyo,apparmor" if DEFAULT_SECURITY_SMACK
+       default "lockdown,yama,loadpin,safesetid,integrity,apparmor,selinux,smack,tomoyo" if DEFAULT_SECURITY_APPARMOR
+       default "lockdown,yama,loadpin,safesetid,integrity,tomoyo" if DEFAULT_SECURITY_TOMOYO
+       default "lockdown,yama,loadpin,safesetid,integrity" if DEFAULT_SECURITY_DAC
+       default "lockdown,yama,loadpin,safesetid,integrity,selinux,smack,tomoyo,apparmor"
        help
          A comma-separated list of LSMs, in initialization order.
          Any LSMs left off this list will be ignored. This can be
index c598b90..be1dd9d 100644 (file)
@@ -11,6 +11,7 @@ subdir-$(CONFIG_SECURITY_APPARMOR)    += apparmor
 subdir-$(CONFIG_SECURITY_YAMA)         += yama
 subdir-$(CONFIG_SECURITY_LOADPIN)      += loadpin
 subdir-$(CONFIG_SECURITY_SAFESETID)    += safesetid
+subdir-$(CONFIG_SECURITY_LOCKDOWN_LSM) += lockdown
 
 # always enable default capabilities
 obj-y                                  += commoncap.o
@@ -27,6 +28,7 @@ obj-$(CONFIG_SECURITY_APPARMOR)               += apparmor/
 obj-$(CONFIG_SECURITY_YAMA)            += yama/
 obj-$(CONFIG_SECURITY_LOADPIN)         += loadpin/
 obj-$(CONFIG_SECURITY_SAFESETID)       += safesetid/
+obj-$(CONFIG_SECURITY_LOCKDOWN_LSM)    += lockdown/
 obj-$(CONFIG_CGROUP_DEVICE)            += device_cgroup.o
 
 # Object integrity file lists
index c352532..0bae6ad 100644 (file)
@@ -18,8 +18,8 @@ if INTEGRITY
 
 config INTEGRITY_SIGNATURE
        bool "Digital signature verification using multiple keyrings"
-       depends on KEYS
        default n
+       select KEYS
        select SIGNATURE
        help
          This option enables digital signature verification support
index 868ade3..ea1aae3 100644 (file)
@@ -39,11 +39,10 @@ static const char * const keyring_name[INTEGRITY_KEYRING_MAX] = {
 #define restrict_link_to_ima restrict_link_by_builtin_trusted
 #endif
 
-int integrity_digsig_verify(const unsigned int id, const char *sig, int siglen,
-                           const char *digest, int digestlen)
+static struct key *integrity_keyring_from_id(const unsigned int id)
 {
-       if (id >= INTEGRITY_KEYRING_MAX || siglen < 2)
-               return -EINVAL;
+       if (id >= INTEGRITY_KEYRING_MAX)
+               return ERR_PTR(-EINVAL);
 
        if (!keyring[id]) {
                keyring[id] =
@@ -52,23 +51,49 @@ int integrity_digsig_verify(const unsigned int id, const char *sig, int siglen,
                        int err = PTR_ERR(keyring[id]);
                        pr_err("no %s keyring: %d\n", keyring_name[id], err);
                        keyring[id] = NULL;
-                       return err;
+                       return ERR_PTR(err);
                }
        }
 
+       return keyring[id];
+}
+
+int integrity_digsig_verify(const unsigned int id, const char *sig, int siglen,
+                           const char *digest, int digestlen)
+{
+       struct key *keyring;
+
+       if (siglen < 2)
+               return -EINVAL;
+
+       keyring = integrity_keyring_from_id(id);
+       if (IS_ERR(keyring))
+               return PTR_ERR(keyring);
+
        switch (sig[1]) {
        case 1:
                /* v1 API expect signature without xattr type */
-               return digsig_verify(keyring[id], sig + 1, siglen - 1,
-                                    digest, digestlen);
+               return digsig_verify(keyring, sig + 1, siglen - 1, digest,
+                                    digestlen);
        case 2:
-               return asymmetric_verify(keyring[id], sig, siglen,
-                                        digest, digestlen);
+               return asymmetric_verify(keyring, sig, siglen, digest,
+                                        digestlen);
        }
 
        return -EOPNOTSUPP;
 }
 
+int integrity_modsig_verify(const unsigned int id, const struct modsig *modsig)
+{
+       struct key *keyring;
+
+       keyring = integrity_keyring_from_id(id);
+       if (IS_ERR(keyring))
+               return PTR_ERR(keyring);
+
+       return ima_modsig_verify(keyring, modsig);
+}
+
 static int __init __integrity_init_keyring(const unsigned int id,
                                           key_perm_t perm,
                                           struct key_restriction *restriction)
index 2ced99d..838476d 100644 (file)
@@ -160,7 +160,7 @@ config IMA_APPRAISE
 
 config IMA_ARCH_POLICY
         bool "Enable loading an IMA architecture specific policy"
-        depends on (KEXEC_VERIFY_SIG && IMA) || IMA_APPRAISE \
+        depends on (KEXEC_SIG && IMA) || IMA_APPRAISE \
                   && INTEGRITY_ASYMMETRIC_KEYS
         default n
         help
@@ -233,6 +233,19 @@ config IMA_APPRAISE_BOOTPARAM
          This option enables the different "ima_appraise=" modes
          (eg. fix, log) from the boot command line.
 
+config IMA_APPRAISE_MODSIG
+       bool "Support module-style signatures for appraisal"
+       depends on IMA_APPRAISE
+       depends on INTEGRITY_ASYMMETRIC_KEYS
+       select PKCS7_MESSAGE_PARSER
+       select MODULE_SIG_FORMAT
+       default n
+       help
+          Adds support for signatures appended to files. The format of the
+          appended signature is the same used for signed kernel modules.
+          The modsig keyword can be used in the IMA policy to allow a hook
+          to accept such signatures.
+
 config IMA_TRUSTED_KEYRING
        bool "Require all keys on the .ima keyring be signed (deprecated)"
        depends on IMA_APPRAISE && SYSTEM_TRUSTED_KEYRING
index d921dc4..31d57cd 100644 (file)
@@ -9,5 +9,6 @@ obj-$(CONFIG_IMA) += ima.o
 ima-y := ima_fs.o ima_queue.o ima_init.o ima_main.o ima_crypto.o ima_api.o \
         ima_policy.o ima_template.o ima_template_lib.o
 ima-$(CONFIG_IMA_APPRAISE) += ima_appraise.o
+ima-$(CONFIG_IMA_APPRAISE_MODSIG) += ima_modsig.o
 ima-$(CONFIG_HAVE_IMA_KEXEC) += ima_kexec.o
 obj-$(CONFIG_IMA_BLACKLIST_KEYRING) += ima_mok.o
index 011b91c..3689081 100644 (file)
@@ -60,6 +60,7 @@ struct ima_event_data {
        const unsigned char *filename;
        struct evm_ima_xattr_data *xattr_value;
        int xattr_len;
+       const struct modsig *modsig;
        const char *violation;
        const void *buf;
        int buf_len;
@@ -113,6 +114,8 @@ struct ima_kexec_hdr {
        u64 count;
 };
 
+extern const int read_idmap[];
+
 #ifdef CONFIG_HAVE_IMA_KEXEC
 void ima_load_kexec_buffer(void);
 #else
@@ -149,6 +152,7 @@ int template_desc_init_fields(const char *template_fmt,
                              int *num_fields);
 struct ima_template_desc *ima_template_desc_current(void);
 struct ima_template_desc *lookup_template_desc(const char *name);
+bool ima_template_has_modsig(const struct ima_template_desc *ima_template);
 int ima_restore_measurement_entry(struct ima_template_entry *entry);
 int ima_restore_measurement_list(loff_t bufsize, void *buf);
 int ima_measurements_show(struct seq_file *m, void *v);
@@ -196,6 +200,10 @@ enum ima_hooks {
        __ima_hooks(__ima_hook_enumify)
 };
 
+extern const char *const func_tokens[];
+
+struct modsig;
+
 /* LIM API function definitions */
 int ima_get_action(struct inode *inode, const struct cred *cred, u32 secid,
                   int mask, enum ima_hooks func, int *pcr,
@@ -203,11 +211,11 @@ int ima_get_action(struct inode *inode, const struct cred *cred, u32 secid,
 int ima_must_measure(struct inode *inode, int mask, enum ima_hooks func);
 int ima_collect_measurement(struct integrity_iint_cache *iint,
                            struct file *file, void *buf, loff_t size,
-                           enum hash_algo algo);
+                           enum hash_algo algo, struct modsig *modsig);
 void ima_store_measurement(struct integrity_iint_cache *iint, struct file *file,
                           const unsigned char *filename,
                           struct evm_ima_xattr_data *xattr_value,
-                          int xattr_len, int pcr,
+                          int xattr_len, const struct modsig *modsig, int pcr,
                           struct ima_template_desc *template_desc);
 void ima_audit_measurement(struct integrity_iint_cache *iint,
                           const unsigned char *filename);
@@ -249,7 +257,7 @@ int ima_appraise_measurement(enum ima_hooks func,
                             struct integrity_iint_cache *iint,
                             struct file *file, const unsigned char *filename,
                             struct evm_ima_xattr_data *xattr_value,
-                            int xattr_len);
+                            int xattr_len, const struct modsig *modsig);
 int ima_must_appraise(struct inode *inode, int mask, enum ima_hooks func);
 void ima_update_xattr(struct integrity_iint_cache *iint, struct file *file);
 enum integrity_status ima_get_cache_status(struct integrity_iint_cache *iint,
@@ -265,7 +273,8 @@ static inline int ima_appraise_measurement(enum ima_hooks func,
                                           struct file *file,
                                           const unsigned char *filename,
                                           struct evm_ima_xattr_data *xattr_value,
-                                          int xattr_len)
+                                          int xattr_len,
+                                          const struct modsig *modsig)
 {
        return INTEGRITY_UNKNOWN;
 }
@@ -302,6 +311,51 @@ static inline int ima_read_xattr(struct dentry *dentry,
 
 #endif /* CONFIG_IMA_APPRAISE */
 
+#ifdef CONFIG_IMA_APPRAISE_MODSIG
+bool ima_hook_supports_modsig(enum ima_hooks func);
+int ima_read_modsig(enum ima_hooks func, const void *buf, loff_t buf_len,
+                   struct modsig **modsig);
+void ima_collect_modsig(struct modsig *modsig, const void *buf, loff_t size);
+int ima_get_modsig_digest(const struct modsig *modsig, enum hash_algo *algo,
+                         const u8 **digest, u32 *digest_size);
+int ima_get_raw_modsig(const struct modsig *modsig, const void **data,
+                      u32 *data_len);
+void ima_free_modsig(struct modsig *modsig);
+#else
+static inline bool ima_hook_supports_modsig(enum ima_hooks func)
+{
+       return false;
+}
+
+static inline int ima_read_modsig(enum ima_hooks func, const void *buf,
+                                 loff_t buf_len, struct modsig **modsig)
+{
+       return -EOPNOTSUPP;
+}
+
+static inline void ima_collect_modsig(struct modsig *modsig, const void *buf,
+                                     loff_t size)
+{
+}
+
+static inline int ima_get_modsig_digest(const struct modsig *modsig,
+                                       enum hash_algo *algo, const u8 **digest,
+                                       u32 *digest_size)
+{
+       return -EOPNOTSUPP;
+}
+
+static inline int ima_get_raw_modsig(const struct modsig *modsig,
+                                    const void **data, u32 *data_len)
+{
+       return -EOPNOTSUPP;
+}
+
+static inline void ima_free_modsig(struct modsig *modsig)
+{
+}
+#endif /* CONFIG_IMA_APPRAISE_MODSIG */
+
 /* LSM based policy rules require audit */
 #ifdef CONFIG_IMA_LSM_RULES
 
index f614e22..610759f 100644 (file)
@@ -45,8 +45,8 @@ int ima_alloc_init_template(struct ima_event_data *event_data,
        else
                template_desc = ima_template_desc_current();
 
-       *entry = kzalloc(sizeof(**entry) + template_desc->num_fields *
-                        sizeof(struct ima_field_data), GFP_NOFS);
+       *entry = kzalloc(struct_size(*entry, template_data,
+                                    template_desc->num_fields), GFP_NOFS);
        if (!*entry)
                return -ENOMEM;
 
@@ -205,7 +205,7 @@ int ima_get_action(struct inode *inode, const struct cred *cred, u32 secid,
  */
 int ima_collect_measurement(struct integrity_iint_cache *iint,
                            struct file *file, void *buf, loff_t size,
-                           enum hash_algo algo)
+                           enum hash_algo algo, struct modsig *modsig)
 {
        const char *audit_cause = "failed";
        struct inode *inode = file_inode(file);
@@ -219,6 +219,14 @@ int ima_collect_measurement(struct integrity_iint_cache *iint,
                char digest[IMA_MAX_DIGEST_SIZE];
        } hash;
 
+       /*
+        * Always collect the modsig, because IMA might have already collected
+        * the file digest without collecting the modsig in a previous
+        * measurement rule.
+        */
+       if (modsig)
+               ima_collect_modsig(modsig, buf, size);
+
        if (iint->flags & IMA_COLLECTED)
                goto out;
 
@@ -285,7 +293,7 @@ out:
 void ima_store_measurement(struct integrity_iint_cache *iint,
                           struct file *file, const unsigned char *filename,
                           struct evm_ima_xattr_data *xattr_value,
-                          int xattr_len, int pcr,
+                          int xattr_len, const struct modsig *modsig, int pcr,
                           struct ima_template_desc *template_desc)
 {
        static const char op[] = "add_template_measure";
@@ -297,10 +305,17 @@ void ima_store_measurement(struct integrity_iint_cache *iint,
                                             .file = file,
                                             .filename = filename,
                                             .xattr_value = xattr_value,
-                                            .xattr_len = xattr_len };
+                                            .xattr_len = xattr_len,
+                                            .modsig = modsig };
        int violation = 0;
 
-       if (iint->measured_pcrs & (0x1 << pcr))
+       /*
+        * We still need to store the measurement in the case of MODSIG because
+        * we only have its contents to put in the list at the time of
+        * appraisal, but a file measurement from earlier might already exist in
+        * the measurement list.
+        */
+       if (iint->measured_pcrs & (0x1 << pcr) && !modsig)
                return;
 
        result = ima_alloc_init_template(&event_data, &entry, template_desc);
index 89b8319..136ae4e 100644 (file)
@@ -200,6 +200,110 @@ int ima_read_xattr(struct dentry *dentry,
 }
 
 /*
+ * xattr_verify - verify xattr digest or signature
+ *
+ * Verify whether the hash or signature matches the file contents.
+ *
+ * Return 0 on success, error code otherwise.
+ */
+static int xattr_verify(enum ima_hooks func, struct integrity_iint_cache *iint,
+                       struct evm_ima_xattr_data *xattr_value, int xattr_len,
+                       enum integrity_status *status, const char **cause)
+{
+       int rc = -EINVAL, hash_start = 0;
+
+       switch (xattr_value->type) {
+       case IMA_XATTR_DIGEST_NG:
+               /* first byte contains algorithm id */
+               hash_start = 1;
+               /* fall through */
+       case IMA_XATTR_DIGEST:
+               if (iint->flags & IMA_DIGSIG_REQUIRED) {
+                       *cause = "IMA-signature-required";
+                       *status = INTEGRITY_FAIL;
+                       break;
+               }
+               clear_bit(IMA_DIGSIG, &iint->atomic_flags);
+               if (xattr_len - sizeof(xattr_value->type) - hash_start >=
+                               iint->ima_hash->length)
+                       /*
+                        * xattr length may be longer. md5 hash in previous
+                        * version occupied 20 bytes in xattr, instead of 16
+                        */
+                       rc = memcmp(&xattr_value->data[hash_start],
+                                   iint->ima_hash->digest,
+                                   iint->ima_hash->length);
+               else
+                       rc = -EINVAL;
+               if (rc) {
+                       *cause = "invalid-hash";
+                       *status = INTEGRITY_FAIL;
+                       break;
+               }
+               *status = INTEGRITY_PASS;
+               break;
+       case EVM_IMA_XATTR_DIGSIG:
+               set_bit(IMA_DIGSIG, &iint->atomic_flags);
+               rc = integrity_digsig_verify(INTEGRITY_KEYRING_IMA,
+                                            (const char *)xattr_value,
+                                            xattr_len,
+                                            iint->ima_hash->digest,
+                                            iint->ima_hash->length);
+               if (rc == -EOPNOTSUPP) {
+                       *status = INTEGRITY_UNKNOWN;
+                       break;
+               }
+               if (IS_ENABLED(CONFIG_INTEGRITY_PLATFORM_KEYRING) && rc &&
+                   func == KEXEC_KERNEL_CHECK)
+                       rc = integrity_digsig_verify(INTEGRITY_KEYRING_PLATFORM,
+                                                    (const char *)xattr_value,
+                                                    xattr_len,
+                                                    iint->ima_hash->digest,
+                                                    iint->ima_hash->length);
+               if (rc) {
+                       *cause = "invalid-signature";
+                       *status = INTEGRITY_FAIL;
+               } else {
+                       *status = INTEGRITY_PASS;
+               }
+               break;
+       default:
+               *status = INTEGRITY_UNKNOWN;
+               *cause = "unknown-ima-data";
+               break;
+       }
+
+       return rc;
+}
+
+/*
+ * modsig_verify - verify modsig signature
+ *
+ * Verify whether the signature matches the file contents.
+ *
+ * Return 0 on success, error code otherwise.
+ */
+static int modsig_verify(enum ima_hooks func, const struct modsig *modsig,
+                        enum integrity_status *status, const char **cause)
+{
+       int rc;
+
+       rc = integrity_modsig_verify(INTEGRITY_KEYRING_IMA, modsig);
+       if (IS_ENABLED(CONFIG_INTEGRITY_PLATFORM_KEYRING) && rc &&
+           func == KEXEC_KERNEL_CHECK)
+               rc = integrity_modsig_verify(INTEGRITY_KEYRING_PLATFORM,
+                                            modsig);
+       if (rc) {
+               *cause = "invalid-signature";
+               *status = INTEGRITY_FAIL;
+       } else {
+               *status = INTEGRITY_PASS;
+       }
+
+       return rc;
+}
+
+/*
  * ima_appraise_measurement - appraise file measurement
  *
  * Call evm_verifyxattr() to verify the integrity of 'security.ima'.
@@ -211,19 +315,22 @@ int ima_appraise_measurement(enum ima_hooks func,
                             struct integrity_iint_cache *iint,
                             struct file *file, const unsigned char *filename,
                             struct evm_ima_xattr_data *xattr_value,
-                            int xattr_len)
+                            int xattr_len, const struct modsig *modsig)
 {
        static const char op[] = "appraise_data";
        const char *cause = "unknown";
        struct dentry *dentry = file_dentry(file);
        struct inode *inode = d_backing_inode(dentry);
        enum integrity_status status = INTEGRITY_UNKNOWN;
-       int rc = xattr_len, hash_start = 0;
+       int rc = xattr_len;
+       bool try_modsig = iint->flags & IMA_MODSIG_ALLOWED && modsig;
 
-       if (!(inode->i_opflags & IOP_XATTR))
+       /* If not appraising a modsig, we need an xattr. */
+       if (!(inode->i_opflags & IOP_XATTR) && !try_modsig)
                return INTEGRITY_UNKNOWN;
 
-       if (rc <= 0) {
+       /* If reading the xattr failed and there's no modsig, error out. */
+       if (rc <= 0 && !try_modsig) {
                if (rc && rc != -ENODATA)
                        goto out;
 
@@ -246,6 +353,10 @@ int ima_appraise_measurement(enum ima_hooks func,
        case INTEGRITY_UNKNOWN:
                break;
        case INTEGRITY_NOXATTRS:        /* No EVM protected xattrs. */
+               /* It's fine not to have xattrs when using a modsig. */
+               if (try_modsig)
+                       break;
+               /* fall through */
        case INTEGRITY_NOLABEL:         /* No security.evm xattr. */
                cause = "missing-HMAC";
                goto out;
@@ -256,65 +367,18 @@ int ima_appraise_measurement(enum ima_hooks func,
                WARN_ONCE(true, "Unexpected integrity status %d\n", status);
        }
 
-       switch (xattr_value->type) {
-       case IMA_XATTR_DIGEST_NG:
-               /* first byte contains algorithm id */
-               hash_start = 1;
-               /* fall through */
-       case IMA_XATTR_DIGEST:
-               if (iint->flags & IMA_DIGSIG_REQUIRED) {
-                       cause = "IMA-signature-required";
-                       status = INTEGRITY_FAIL;
-                       break;
-               }
-               clear_bit(IMA_DIGSIG, &iint->atomic_flags);
-               if (xattr_len - sizeof(xattr_value->type) - hash_start >=
-                               iint->ima_hash->length)
-                       /* xattr length may be longer. md5 hash in previous
-                          version occupied 20 bytes in xattr, instead of 16
-                        */
-                       rc = memcmp(&xattr_value->data[hash_start],
-                                   iint->ima_hash->digest,
-                                   iint->ima_hash->length);
-               else
-                       rc = -EINVAL;
-               if (rc) {
-                       cause = "invalid-hash";
-                       status = INTEGRITY_FAIL;
-                       break;
-               }
-               status = INTEGRITY_PASS;
-               break;
-       case EVM_IMA_XATTR_DIGSIG:
-               set_bit(IMA_DIGSIG, &iint->atomic_flags);
-               rc = integrity_digsig_verify(INTEGRITY_KEYRING_IMA,
-                                            (const char *)xattr_value,
-                                            xattr_len,
-                                            iint->ima_hash->digest,
-                                            iint->ima_hash->length);
-               if (rc == -EOPNOTSUPP) {
-                       status = INTEGRITY_UNKNOWN;
-                       break;
-               }
-               if (IS_ENABLED(CONFIG_INTEGRITY_PLATFORM_KEYRING) && rc &&
-                   func == KEXEC_KERNEL_CHECK)
-                       rc = integrity_digsig_verify(INTEGRITY_KEYRING_PLATFORM,
-                                                    (const char *)xattr_value,
-                                                    xattr_len,
-                                                    iint->ima_hash->digest,
-                                                    iint->ima_hash->length);
-               if (rc) {
-                       cause = "invalid-signature";
-                       status = INTEGRITY_FAIL;
-               } else {
-                       status = INTEGRITY_PASS;
-               }
-               break;
-       default:
-               status = INTEGRITY_UNKNOWN;
-               cause = "unknown-ima-data";
-               break;
-       }
+       if (xattr_value)
+               rc = xattr_verify(func, iint, xattr_value, xattr_len, &status,
+                                 &cause);
+
+       /*
+        * If we have a modsig and either no imasig or the imasig's key isn't
+        * known, then try verifying the modsig.
+        */
+       if (try_modsig &&
+           (!xattr_value || xattr_value->type == IMA_XATTR_DIGEST_NG ||
+            rc == -ENOKEY))
+               rc = modsig_verify(func, modsig, &status, &cause);
 
 out:
        /*
@@ -332,7 +396,7 @@ out:
                                    op, cause, rc, 0);
        } else if (status != INTEGRITY_PASS) {
                /* Fix mode, but don't replace file signatures. */
-               if ((ima_appraise & IMA_APPRAISE_FIX) &&
+               if ((ima_appraise & IMA_APPRAISE_FIX) && !try_modsig &&
                    (!xattr_value ||
                     xattr_value->type != EVM_IMA_XATTR_DIGSIG)) {
                        if (!ima_fix_xattr(dentry, iint))
@@ -371,7 +435,7 @@ void ima_update_xattr(struct integrity_iint_cache *iint, struct file *file)
            !(iint->flags & IMA_HASH))
                return;
 
-       rc = ima_collect_measurement(iint, file, NULL, 0, ima_hash_algo);
+       rc = ima_collect_measurement(iint, file, NULL, 0, ima_hash_algo, NULL);
        if (rc < 0)
                return;
 
index d4c7b8e..73044fc 100644 (file)
@@ -268,8 +268,16 @@ static int ima_calc_file_hash_atfm(struct file *file,
                rbuf_len = min_t(loff_t, i_size - offset, rbuf_size[active]);
                rc = integrity_kernel_read(file, offset, rbuf[active],
                                           rbuf_len);
-               if (rc != rbuf_len)
+               if (rc != rbuf_len) {
+                       if (rc >= 0)
+                               rc = -EINVAL;
+                       /*
+                        * Forward current rc, do not overwrite with return value
+                        * from ahash_wait()
+                        */
+                       ahash_wait(ahash_rc, &wait);
                        goto out3;
+               }
 
                if (rbuf[1] && offset) {
                        /* Using two buffers, and it is not the first
index 5840197..60027c6 100644 (file)
@@ -202,6 +202,7 @@ static int process_measurement(struct file *file, const struct cred *cred,
        int rc = 0, action, must_appraise = 0;
        int pcr = CONFIG_IMA_MEASURE_PCR_IDX;
        struct evm_ima_xattr_data *xattr_value = NULL;
+       struct modsig *modsig = NULL;
        int xattr_len = 0;
        bool violation_check;
        enum hash_algo hash_algo;
@@ -302,13 +303,27 @@ static int process_measurement(struct file *file, const struct cred *cred,
        }
 
        if ((action & IMA_APPRAISE_SUBMASK) ||
-                   strcmp(template_desc->name, IMA_TEMPLATE_IMA_NAME) != 0)
+           strcmp(template_desc->name, IMA_TEMPLATE_IMA_NAME) != 0) {
                /* read 'security.ima' */
                xattr_len = ima_read_xattr(file_dentry(file), &xattr_value);
 
+               /*
+                * Read the appended modsig if allowed by the policy, and allow
+                * an additional measurement list entry, if needed, based on the
+                * template format and whether the file was already measured.
+                */
+               if (iint->flags & IMA_MODSIG_ALLOWED) {
+                       rc = ima_read_modsig(func, buf, size, &modsig);
+
+                       if (!rc && ima_template_has_modsig(template_desc) &&
+                           iint->flags & IMA_MEASURED)
+                               action |= IMA_MEASURE;
+               }
+       }
+
        hash_algo = ima_get_hash_algo(xattr_value, xattr_len);
 
-       rc = ima_collect_measurement(iint, file, buf, size, hash_algo);
+       rc = ima_collect_measurement(iint, file, buf, size, hash_algo, modsig);
        if (rc != 0 && rc != -EBADF && rc != -EINVAL)
                goto out_locked;
 
@@ -317,12 +332,12 @@ static int process_measurement(struct file *file, const struct cred *cred,
 
        if (action & IMA_MEASURE)
                ima_store_measurement(iint, file, pathname,
-                                     xattr_value, xattr_len, pcr,
+                                     xattr_value, xattr_len, modsig, pcr,
                                      template_desc);
        if (rc == 0 && (action & IMA_APPRAISE_SUBMASK)) {
                inode_lock(inode);
                rc = ima_appraise_measurement(func, iint, file, pathname,
-                                             xattr_value, xattr_len);
+                                             xattr_value, xattr_len, modsig);
                inode_unlock(inode);
                if (!rc)
                        rc = mmap_violation_check(func, file, &pathbuf,
@@ -339,6 +354,7 @@ out_locked:
                rc = -EACCES;
        mutex_unlock(&iint->mutex);
        kfree(xattr_value);
+       ima_free_modsig(modsig);
 out:
        if (pathbuf)
                __putname(pathbuf);
@@ -502,7 +518,7 @@ int ima_read_file(struct file *file, enum kernel_read_file_id read_id)
        return 0;
 }
 
-static const int read_idmap[READING_MAX_ID] = {
+const int read_idmap[READING_MAX_ID] = {
        [READING_FIRMWARE] = FIRMWARE_CHECK,
        [READING_FIRMWARE_PREALLOC_BUFFER] = FIRMWARE_CHECK,
        [READING_MODULE] = MODULE_CHECK,
@@ -574,7 +590,7 @@ int ima_load_data(enum kernel_load_data_id id)
 
        switch (id) {
        case LOADING_KEXEC_IMAGE:
-               if (IS_ENABLED(CONFIG_KEXEC_VERIFY_SIG)
+               if (IS_ENABLED(CONFIG_KEXEC_SIG)
                    && arch_ima_get_secureboot()) {
                        pr_err("impossible to appraise a kernel image without a file descriptor; try using kexec_file_load syscall.\n");
                        return -EACCES;
diff --git a/security/integrity/ima/ima_modsig.c b/security/integrity/ima/ima_modsig.c
new file mode 100644 (file)
index 0000000..d106885
--- /dev/null
@@ -0,0 +1,168 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * IMA support for appraising module-style appended signatures.
+ *
+ * Copyright (C) 2019  IBM Corporation
+ *
+ * Author:
+ * Thiago Jung Bauermann <bauerman@linux.ibm.com>
+ */
+
+#include <linux/types.h>
+#include <linux/module_signature.h>
+#include <keys/asymmetric-type.h>
+#include <crypto/pkcs7.h>
+
+#include "ima.h"
+
+struct modsig {
+       struct pkcs7_message *pkcs7_msg;
+
+       enum hash_algo hash_algo;
+
+       /* This digest will go in the 'd-modsig' field of the IMA template. */
+       const u8 *digest;
+       u32 digest_size;
+
+       /*
+        * This is what will go to the measurement list if the template requires
+        * storing the signature.
+        */
+       int raw_pkcs7_len;
+       u8 raw_pkcs7[];
+};
+
+/**
+ * ima_hook_supports_modsig - can the policy allow modsig for this hook?
+ *
+ * modsig is only supported by hooks using ima_post_read_file(), because only
+ * they preload the contents of the file in a buffer. FILE_CHECK does that in
+ * some cases, but not when reached from vfs_open(). POLICY_CHECK can support
+ * it, but it's not useful in practice because it's a text file so deny.
+ */
+bool ima_hook_supports_modsig(enum ima_hooks func)
+{
+       switch (func) {
+       case KEXEC_KERNEL_CHECK:
+       case KEXEC_INITRAMFS_CHECK:
+       case MODULE_CHECK:
+               return true;
+       default:
+               return false;
+       }
+}
+
+/*
+ * ima_read_modsig - Read modsig from buf.
+ *
+ * Return: 0 on success, error code otherwise.
+ */
+int ima_read_modsig(enum ima_hooks func, const void *buf, loff_t buf_len,
+                   struct modsig **modsig)
+{
+       const size_t marker_len = strlen(MODULE_SIG_STRING);
+       const struct module_signature *sig;
+       struct modsig *hdr;
+       size_t sig_len;
+       const void *p;
+       int rc;
+
+       if (buf_len <= marker_len + sizeof(*sig))
+               return -ENOENT;
+
+       p = buf + buf_len - marker_len;
+       if (memcmp(p, MODULE_SIG_STRING, marker_len))
+               return -ENOENT;
+
+       buf_len -= marker_len;
+       sig = (const struct module_signature *)(p - sizeof(*sig));
+
+       rc = mod_check_sig(sig, buf_len, func_tokens[func]);
+       if (rc)
+               return rc;
+
+       sig_len = be32_to_cpu(sig->sig_len);
+       buf_len -= sig_len + sizeof(*sig);
+
+       /* Allocate sig_len additional bytes to hold the raw PKCS#7 data. */
+       hdr = kzalloc(sizeof(*hdr) + sig_len, GFP_KERNEL);
+       if (!hdr)
+               return -ENOMEM;
+
+       hdr->pkcs7_msg = pkcs7_parse_message(buf + buf_len, sig_len);
+       if (IS_ERR(hdr->pkcs7_msg)) {
+               rc = PTR_ERR(hdr->pkcs7_msg);
+               kfree(hdr);
+               return rc;
+       }
+
+       memcpy(hdr->raw_pkcs7, buf + buf_len, sig_len);
+       hdr->raw_pkcs7_len = sig_len;
+
+       /* We don't know the hash algorithm yet. */
+       hdr->hash_algo = HASH_ALGO__LAST;
+
+       *modsig = hdr;
+
+       return 0;
+}
+
+/**
+ * ima_collect_modsig - Calculate the file hash without the appended signature.
+ *
+ * Since the modsig is part of the file contents, the hash used in its signature
+ * isn't the same one ordinarily calculated by IMA. Therefore PKCS7 code
+ * calculates a separate one for signature verification.
+ */
+void ima_collect_modsig(struct modsig *modsig, const void *buf, loff_t size)
+{
+       int rc;
+
+       /*
+        * Provide the file contents (minus the appended sig) so that the PKCS7
+        * code can calculate the file hash.
+        */
+       size -= modsig->raw_pkcs7_len + strlen(MODULE_SIG_STRING) +
+               sizeof(struct module_signature);
+       rc = pkcs7_supply_detached_data(modsig->pkcs7_msg, buf, size);
+       if (rc)
+               return;
+
+       /* Ask the PKCS7 code to calculate the file hash. */
+       rc = pkcs7_get_digest(modsig->pkcs7_msg, &modsig->digest,
+                             &modsig->digest_size, &modsig->hash_algo);
+}
+
+int ima_modsig_verify(struct key *keyring, const struct modsig *modsig)
+{
+       return verify_pkcs7_message_sig(NULL, 0, modsig->pkcs7_msg, keyring,
+                                       VERIFYING_MODULE_SIGNATURE, NULL, NULL);
+}
+
+int ima_get_modsig_digest(const struct modsig *modsig, enum hash_algo *algo,
+                         const u8 **digest, u32 *digest_size)
+{
+       *algo = modsig->hash_algo;
+       *digest = modsig->digest;
+       *digest_size = modsig->digest_size;
+
+       return 0;
+}
+
+int ima_get_raw_modsig(const struct modsig *modsig, const void **data,
+                      u32 *data_len)
+{
+       *data = &modsig->raw_pkcs7;
+       *data_len = modsig->raw_pkcs7_len;
+
+       return 0;
+}
+
+void ima_free_modsig(struct modsig *modsig)
+{
+       if (!modsig)
+               return;
+
+       pkcs7_free_message(modsig->pkcs7_msg);
+       kfree(modsig);
+}
index 6df7f64..5380aca 100644 (file)
@@ -6,6 +6,9 @@
  * ima_policy.c
  *     - initialize default measure policy rules
  */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
 #include <linux/init.h>
 #include <linux/list.h>
 #include <linux/fs.h>
@@ -491,6 +494,9 @@ int ima_match_policy(struct inode *inode, const struct cred *cred, u32 secid,
        struct ima_rule_entry *entry;
        int action = 0, actmask = flags | (flags << 1);
 
+       if (template_desc)
+               *template_desc = ima_template_desc_current();
+
        rcu_read_lock();
        list_for_each_entry_rcu(entry, ima_rules, list) {
 
@@ -510,6 +516,7 @@ int ima_match_policy(struct inode *inode, const struct cred *cred, u32 secid,
                                action |= IMA_FAIL_UNVERIFIABLE_SIGS;
                }
 
+
                if (entry->action & IMA_DO_MASK)
                        actmask &= ~(entry->action | entry->action << 1);
                else
@@ -520,8 +527,6 @@ int ima_match_policy(struct inode *inode, const struct cred *cred, u32 secid,
 
                if (template_desc && entry->template)
                        *template_desc = entry->template;
-               else if (template_desc)
-                       *template_desc = ima_template_desc_current();
 
                if (!actmask)
                        break;
@@ -843,6 +848,38 @@ static void ima_log_string(struct audit_buffer *ab, char *key, char *value)
        ima_log_string_op(ab, key, value, NULL);
 }
 
+/*
+ * Validating the appended signature included in the measurement list requires
+ * the file hash calculated without the appended signature (i.e., the 'd-modsig'
+ * field). Therefore, notify the user if they have the 'modsig' field but not
+ * the 'd-modsig' field in the template.
+ */
+static void check_template_modsig(const struct ima_template_desc *template)
+{
+#define MSG "template with 'modsig' field also needs 'd-modsig' field\n"
+       bool has_modsig, has_dmodsig;
+       static bool checked;
+       int i;
+
+       /* We only need to notify the user once. */
+       if (checked)
+               return;
+
+       has_modsig = has_dmodsig = false;
+       for (i = 0; i < template->num_fields; i++) {
+               if (!strcmp(template->fields[i]->field_id, "modsig"))
+                       has_modsig = true;
+               else if (!strcmp(template->fields[i]->field_id, "d-modsig"))
+                       has_dmodsig = true;
+       }
+
+       if (has_modsig && !has_dmodsig)
+               pr_notice(MSG);
+
+       checked = true;
+#undef MSG
+}
+
 static int ima_parse_rule(char *rule, struct ima_rule_entry *entry)
 {
        struct audit_buffer *ab;
@@ -1128,6 +1165,10 @@ static int ima_parse_rule(char *rule, struct ima_rule_entry *entry)
                        ima_log_string(ab, "appraise_type", args[0].from);
                        if ((strcmp(args[0].from, "imasig")) == 0)
                                entry->flags |= IMA_DIGSIG_REQUIRED;
+                       else if (ima_hook_supports_modsig(entry->func) &&
+                                strcmp(args[0].from, "imasig|modsig") == 0)
+                               entry->flags |= IMA_DIGSIG_REQUIRED |
+                                               IMA_MODSIG_ALLOWED;
                        else
                                result = -EINVAL;
                        break;
@@ -1181,6 +1222,12 @@ static int ima_parse_rule(char *rule, struct ima_rule_entry *entry)
        else if (entry->action == APPRAISE)
                temp_ima_appraise |= ima_appraise_flag(entry->func);
 
+       if (!result && entry->flags & IMA_MODSIG_ALLOWED) {
+               template_desc = entry->template ? entry->template :
+                                                 ima_template_desc_current();
+               check_template_modsig(template_desc);
+       }
+
        audit_log_format(ab, "res=%d", !result);
        audit_log_end(ab);
        return result;
@@ -1252,6 +1299,12 @@ void ima_delete_rules(void)
        }
 }
 
+#define __ima_hook_stringify(str)      (#str),
+
+const char *const func_tokens[] = {
+       __ima_hooks(__ima_hook_stringify)
+};
+
 #ifdef CONFIG_IMA_READ_POLICY
 enum {
        mask_exec = 0, mask_write, mask_read, mask_append
@@ -1264,12 +1317,6 @@ static const char *const mask_tokens[] = {
        "^MAY_APPEND"
 };
 
-#define __ima_hook_stringify(str)      (#str),
-
-static const char *const func_tokens[] = {
-       __ima_hooks(__ima_hook_stringify)
-};
-
 void *ima_policy_start(struct seq_file *m, loff_t *pos)
 {
        loff_t l = *pos;
@@ -1447,8 +1494,12 @@ int ima_policy_show(struct seq_file *m, void *v)
        }
        if (entry->template)
                seq_printf(m, "template=%s ", entry->template->name);
-       if (entry->flags & IMA_DIGSIG_REQUIRED)
-               seq_puts(m, "appraise_type=imasig ");
+       if (entry->flags & IMA_DIGSIG_REQUIRED) {
+               if (entry->flags & IMA_MODSIG_ALLOWED)
+                       seq_puts(m, "appraise_type=imasig|modsig ");
+               else
+                       seq_puts(m, "appraise_type=imasig ");
+       }
        if (entry->flags & IMA_PERMIT_DIRECTIO)
                seq_puts(m, "permit_directio ");
        rcu_read_unlock();
@@ -1456,3 +1507,53 @@ int ima_policy_show(struct seq_file *m, void *v)
        return 0;
 }
 #endif /* CONFIG_IMA_READ_POLICY */
+
+#if defined(CONFIG_IMA_APPRAISE) && defined(CONFIG_INTEGRITY_TRUSTED_KEYRING)
+/*
+ * ima_appraise_signature: whether IMA will appraise a given function using
+ * an IMA digital signature. This is restricted to cases where the kernel
+ * has a set of built-in trusted keys in order to avoid an attacker simply
+ * loading additional keys.
+ */
+bool ima_appraise_signature(enum kernel_read_file_id id)
+{
+       struct ima_rule_entry *entry;
+       bool found = false;
+       enum ima_hooks func;
+
+       if (id >= READING_MAX_ID)
+               return false;
+
+       func = read_idmap[id] ?: FILE_CHECK;
+
+       rcu_read_lock();
+       list_for_each_entry_rcu(entry, ima_rules, list) {
+               if (entry->action != APPRAISE)
+                       continue;
+
+               /*
+                * A generic entry will match, but otherwise require that it
+                * match the func we're looking for
+                */
+               if (entry->func && entry->func != func)
+                       continue;
+
+               /*
+                * We require this to be a digital signature, not a raw IMA
+                * hash.
+                */
+               if (entry->flags & IMA_DIGSIG_REQUIRED)
+                       found = true;
+
+               /*
+                * We've found a rule that matches, so break now even if it
+                * didn't require a digital signature - a later rule that does
+                * won't override it, so would be a false positive.
+                */
+               break;
+       }
+
+       rcu_read_unlock();
+       return found;
+}
+#endif /* CONFIG_IMA_APPRAISE && CONFIG_INTEGRITY_TRUSTED_KEYRING */
index cb349d7..6aa6408 100644 (file)
@@ -23,6 +23,7 @@ static struct ima_template_desc builtin_templates[] = {
        {.name = "ima-ng", .fmt = "d-ng|n-ng"},
        {.name = "ima-sig", .fmt = "d-ng|n-ng|sig"},
        {.name = "ima-buf", .fmt = "d-ng|n-ng|buf"},
+       {.name = "ima-modsig", .fmt = "d-ng|n-ng|sig|d-modsig|modsig"},
        {.name = "", .fmt = ""},        /* placeholder for a custom format */
 };
 
@@ -42,6 +43,10 @@ static const struct ima_template_field supported_fields[] = {
         .field_show = ima_show_template_sig},
        {.field_id = "buf", .field_init = ima_eventbuf_init,
         .field_show = ima_show_template_buf},
+       {.field_id = "d-modsig", .field_init = ima_eventdigest_modsig_init,
+        .field_show = ima_show_template_digest_ng},
+       {.field_id = "modsig", .field_init = ima_eventmodsig_init,
+        .field_show = ima_show_template_sig},
 };
 
 /*
@@ -49,10 +54,29 @@ static const struct ima_template_field supported_fields[] = {
  * need to be accounted for since they shouldn't be defined in the same template
  * description as 'd-ng' and 'n-ng' respectively.
  */
-#define MAX_TEMPLATE_NAME_LEN sizeof("d-ng|n-ng|sig|buf")
+#define MAX_TEMPLATE_NAME_LEN sizeof("d-ng|n-ng|sig|buf|d-modisg|modsig")
 
 static struct ima_template_desc *ima_template;
 
+/**
+ * ima_template_has_modsig - Check whether template has modsig-related fields.
+ * @ima_template: IMA template to check.
+ *
+ * Tells whether the given template has fields referencing a file's appended
+ * signature.
+ */
+bool ima_template_has_modsig(const struct ima_template_desc *ima_template)
+{
+       int i;
+
+       for (i = 0; i < ima_template->num_fields; i++)
+               if (!strcmp(ima_template->fields[i]->field_id, "modsig") ||
+                   !strcmp(ima_template->fields[i]->field_id, "d-modsig"))
+                       return true;
+
+       return false;
+}
+
 static int __init ima_template_setup(char *str)
 {
        struct ima_template_desc *template_desc;
@@ -282,9 +306,8 @@ static int ima_restore_template_data(struct ima_template_desc *template_desc,
        int ret = 0;
        int i;
 
-       *entry = kzalloc(sizeof(**entry) +
-                   template_desc->num_fields * sizeof(struct ima_field_data),
-                   GFP_NOFS);
+       *entry = kzalloc(struct_size(*entry, template_data,
+                                    template_desc->num_fields), GFP_NOFS);
        if (!*entry)
                return -ENOMEM;
 
index 2fb9a10..32ae05d 100644 (file)
@@ -225,7 +225,8 @@ int ima_parse_buf(void *bufstartp, void *bufendp, void **bufcurp,
        return 0;
 }
 
-static int ima_eventdigest_init_common(u8 *digest, u32 digestsize, u8 hash_algo,
+static int ima_eventdigest_init_common(const u8 *digest, u32 digestsize,
+                                      u8 hash_algo,
                                       struct ima_field_data *field_data)
 {
        /*
@@ -328,6 +329,41 @@ out:
                                           hash_algo, field_data);
 }
 
+/*
+ * This function writes the digest of the file which is expected to match the
+ * digest contained in the file's appended signature.
+ */
+int ima_eventdigest_modsig_init(struct ima_event_data *event_data,
+                               struct ima_field_data *field_data)
+{
+       enum hash_algo hash_algo;
+       const u8 *cur_digest;
+       u32 cur_digestsize;
+
+       if (!event_data->modsig)
+               return 0;
+
+       if (event_data->violation) {
+               /* Recording a violation. */
+               hash_algo = HASH_ALGO_SHA1;
+               cur_digest = NULL;
+               cur_digestsize = 0;
+       } else {
+               int rc;
+
+               rc = ima_get_modsig_digest(event_data->modsig, &hash_algo,
+                                          &cur_digest, &cur_digestsize);
+               if (rc)
+                       return rc;
+               else if (hash_algo == HASH_ALGO__LAST || cur_digestsize == 0)
+                       /* There was some error collecting the digest. */
+                       return -EINVAL;
+       }
+
+       return ima_eventdigest_init_common(cur_digest, cur_digestsize,
+                                          hash_algo, field_data);
+}
+
 static int ima_eventname_init_common(struct ima_event_data *event_data,
                                     struct ima_field_data *field_data,
                                     bool size_limit)
@@ -406,3 +442,29 @@ int ima_eventbuf_init(struct ima_event_data *event_data,
                                             event_data->buf_len, DATA_FMT_HEX,
                                             field_data);
 }
+
+/*
+ *  ima_eventmodsig_init - include the appended file signature as part of the
+ *  template data
+ */
+int ima_eventmodsig_init(struct ima_event_data *event_data,
+                        struct ima_field_data *field_data)
+{
+       const void *data;
+       u32 data_len;
+       int rc;
+
+       if (!event_data->modsig)
+               return 0;
+
+       /*
+        * modsig is a runtime structure containing pointers. Get its raw data
+        * instead.
+        */
+       rc = ima_get_raw_modsig(event_data->modsig, &data, &data_len);
+       if (rc)
+               return rc;
+
+       return ima_write_template_field_data(data, data_len, DATA_FMT_HEX,
+                                            field_data);
+}
index 652aa5d..9a88c79 100644 (file)
@@ -36,10 +36,14 @@ int ima_eventname_init(struct ima_event_data *event_data,
                       struct ima_field_data *field_data);
 int ima_eventdigest_ng_init(struct ima_event_data *event_data,
                            struct ima_field_data *field_data);
+int ima_eventdigest_modsig_init(struct ima_event_data *event_data,
+                               struct ima_field_data *field_data);
 int ima_eventname_ng_init(struct ima_event_data *event_data,
                          struct ima_field_data *field_data);
 int ima_eventsig_init(struct ima_event_data *event_data,
                      struct ima_field_data *field_data);
 int ima_eventbuf_init(struct ima_event_data *event_data,
                      struct ima_field_data *field_data);
+int ima_eventmodsig_init(struct ima_event_data *event_data,
+                        struct ima_field_data *field_data);
 #endif /* __LINUX_IMA_TEMPLATE_LIB_H */
index ed12d8e..d9323d3 100644 (file)
@@ -31,6 +31,7 @@
 #define IMA_NEW_FILE           0x04000000
 #define EVM_IMMUTABLE_DIGSIG   0x08000000
 #define IMA_FAIL_UNVERIFIABLE_SIGS     0x10000000
+#define IMA_MODSIG_ALLOWED     0x20000000
 
 #define IMA_DO_MASK            (IMA_MEASURE | IMA_APPRAISE | IMA_AUDIT | \
                                 IMA_HASH | IMA_APPRAISE_SUBMASK)
@@ -147,10 +148,13 @@ int integrity_kernel_read(struct file *file, loff_t offset,
 
 extern struct dentry *integrity_dir;
 
+struct modsig;
+
 #ifdef CONFIG_INTEGRITY_SIGNATURE
 
 int integrity_digsig_verify(const unsigned int id, const char *sig, int siglen,
                            const char *digest, int digestlen);
+int integrity_modsig_verify(unsigned int id, const struct modsig *modsig);
 
 int __init integrity_init_keyring(const unsigned int id);
 int __init integrity_load_x509(const unsigned int id, const char *path);
@@ -165,6 +169,12 @@ static inline int integrity_digsig_verify(const unsigned int id,
        return -EOPNOTSUPP;
 }
 
+static inline int integrity_modsig_verify(unsigned int id,
+                                         const struct modsig *modsig)
+{
+       return -EOPNOTSUPP;
+}
+
 static inline int integrity_init_keyring(const unsigned int id)
 {
        return 0;
@@ -190,6 +200,16 @@ static inline int asymmetric_verify(struct key *keyring, const char *sig,
 }
 #endif
 
+#ifdef CONFIG_IMA_APPRAISE_MODSIG
+int ima_modsig_verify(struct key *keyring, const struct modsig *modsig);
+#else
+static inline int ima_modsig_verify(struct key *keyring,
+                                   const struct modsig *modsig)
+{
+       return -EOPNOTSUPP;
+}
+#endif
+
 #ifdef CONFIG_IMA_LOAD_X509
 void __init ima_load_x509(void);
 #else
index ade6991..1fbd778 100644 (file)
@@ -1228,11 +1228,16 @@ hashalg_fail:
 
 static int __init init_digests(void)
 {
+       int i;
+
        digests = kcalloc(chip->nr_allocated_banks, sizeof(*digests),
                          GFP_KERNEL);
        if (!digests)
                return -ENOMEM;
 
+       for (i = 0; i < chip->nr_allocated_banks; i++)
+               digests[i].alg_id = chip->allocated_banks[i].alg_id;
+
        return 0;
 }
 
diff --git a/security/lockdown/Kconfig b/security/lockdown/Kconfig
new file mode 100644 (file)
index 0000000..e84ddf4
--- /dev/null
@@ -0,0 +1,47 @@
+config SECURITY_LOCKDOWN_LSM
+       bool "Basic module for enforcing kernel lockdown"
+       depends on SECURITY
+       select MODULE_SIG if MODULES
+       help
+         Build support for an LSM that enforces a coarse kernel lockdown
+         behaviour.
+
+config SECURITY_LOCKDOWN_LSM_EARLY
+       bool "Enable lockdown LSM early in init"
+       depends on SECURITY_LOCKDOWN_LSM
+       help
+         Enable the lockdown LSM early in boot. This is necessary in order
+         to ensure that lockdown enforcement can be carried out on kernel
+         boot parameters that are otherwise parsed before the security
+         subsystem is fully initialised. If enabled, lockdown will
+         unconditionally be called before any other LSMs.
+
+choice
+       prompt "Kernel default lockdown mode"
+       default LOCK_DOWN_KERNEL_FORCE_NONE
+       depends on SECURITY_LOCKDOWN_LSM
+       help
+         The kernel can be configured to default to differing levels of
+         lockdown.
+
+config LOCK_DOWN_KERNEL_FORCE_NONE
+       bool "None"
+       help
+         No lockdown functionality is enabled by default. Lockdown may be
+         enabled via the kernel commandline or /sys/kernel/security/lockdown.
+
+config LOCK_DOWN_KERNEL_FORCE_INTEGRITY
+       bool "Integrity"
+       help
+        The kernel runs in integrity mode by default. Features that allow
+        the kernel to be modified at runtime are disabled.
+
+config LOCK_DOWN_KERNEL_FORCE_CONFIDENTIALITY
+       bool "Confidentiality"
+       help
+        The kernel runs in confidentiality mode by default. Features that
+        allow the kernel to be modified at runtime or that permit userland
+        code to read confidential material held inside the kernel are
+        disabled.
+
+endchoice
diff --git a/security/lockdown/Makefile b/security/lockdown/Makefile
new file mode 100644 (file)
index 0000000..e3634b9
--- /dev/null
@@ -0,0 +1 @@
+obj-$(CONFIG_SECURITY_LOCKDOWN_LSM) += lockdown.o
diff --git a/security/lockdown/lockdown.c b/security/lockdown/lockdown.c
new file mode 100644 (file)
index 0000000..8a10b43
--- /dev/null
@@ -0,0 +1,191 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Lock down the kernel
+ *
+ * Copyright (C) 2016 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+
+#include <linux/security.h>
+#include <linux/export.h>
+#include <linux/lsm_hooks.h>
+
+static enum lockdown_reason kernel_locked_down;
+
+static const char *const lockdown_reasons[LOCKDOWN_CONFIDENTIALITY_MAX+1] = {
+       [LOCKDOWN_NONE] = "none",
+       [LOCKDOWN_MODULE_SIGNATURE] = "unsigned module loading",
+       [LOCKDOWN_DEV_MEM] = "/dev/mem,kmem,port",
+       [LOCKDOWN_KEXEC] = "kexec of unsigned images",
+       [LOCKDOWN_HIBERNATION] = "hibernation",
+       [LOCKDOWN_PCI_ACCESS] = "direct PCI access",
+       [LOCKDOWN_IOPORT] = "raw io port access",
+       [LOCKDOWN_MSR] = "raw MSR access",
+       [LOCKDOWN_ACPI_TABLES] = "modifying ACPI tables",
+       [LOCKDOWN_PCMCIA_CIS] = "direct PCMCIA CIS storage",
+       [LOCKDOWN_TIOCSSERIAL] = "reconfiguration of serial port IO",
+       [LOCKDOWN_MODULE_PARAMETERS] = "unsafe module parameters",
+       [LOCKDOWN_MMIOTRACE] = "unsafe mmio",
+       [LOCKDOWN_DEBUGFS] = "debugfs access",
+       [LOCKDOWN_INTEGRITY_MAX] = "integrity",
+       [LOCKDOWN_KCORE] = "/proc/kcore access",
+       [LOCKDOWN_KPROBES] = "use of kprobes",
+       [LOCKDOWN_BPF_READ] = "use of bpf to read kernel RAM",
+       [LOCKDOWN_PERF] = "unsafe use of perf",
+       [LOCKDOWN_TRACEFS] = "use of tracefs",
+       [LOCKDOWN_CONFIDENTIALITY_MAX] = "confidentiality",
+};
+
+static const enum lockdown_reason lockdown_levels[] = {LOCKDOWN_NONE,
+                                                LOCKDOWN_INTEGRITY_MAX,
+                                                LOCKDOWN_CONFIDENTIALITY_MAX};
+
+/*
+ * Put the kernel into lock-down mode.
+ */
+static int lock_kernel_down(const char *where, enum lockdown_reason level)
+{
+       if (kernel_locked_down >= level)
+               return -EPERM;
+
+       kernel_locked_down = level;
+       pr_notice("Kernel is locked down from %s; see man kernel_lockdown.7\n",
+                 where);
+       return 0;
+}
+
+static int __init lockdown_param(char *level)
+{
+       if (!level)
+               return -EINVAL;
+
+       if (strcmp(level, "integrity") == 0)
+               lock_kernel_down("command line", LOCKDOWN_INTEGRITY_MAX);
+       else if (strcmp(level, "confidentiality") == 0)
+               lock_kernel_down("command line", LOCKDOWN_CONFIDENTIALITY_MAX);
+       else
+               return -EINVAL;
+
+       return 0;
+}
+
+early_param("lockdown", lockdown_param);
+
+/**
+ * lockdown_is_locked_down - Find out if the kernel is locked down
+ * @what: Tag to use in notice generated if lockdown is in effect
+ */
+static int lockdown_is_locked_down(enum lockdown_reason what)
+{
+       if (WARN(what >= LOCKDOWN_CONFIDENTIALITY_MAX,
+                "Invalid lockdown reason"))
+               return -EPERM;
+
+       if (kernel_locked_down >= what) {
+               if (lockdown_reasons[what])
+                       pr_notice("Lockdown: %s: %s is restricted; see man kernel_lockdown.7\n",
+                                 current->comm, lockdown_reasons[what]);
+               return -EPERM;
+       }
+
+       return 0;
+}
+
+static struct security_hook_list lockdown_hooks[] __lsm_ro_after_init = {
+       LSM_HOOK_INIT(locked_down, lockdown_is_locked_down),
+};
+
+static int __init lockdown_lsm_init(void)
+{
+#if defined(CONFIG_LOCK_DOWN_KERNEL_FORCE_INTEGRITY)
+       lock_kernel_down("Kernel configuration", LOCKDOWN_INTEGRITY_MAX);
+#elif defined(CONFIG_LOCK_DOWN_KERNEL_FORCE_CONFIDENTIALITY)
+       lock_kernel_down("Kernel configuration", LOCKDOWN_CONFIDENTIALITY_MAX);
+#endif
+       security_add_hooks(lockdown_hooks, ARRAY_SIZE(lockdown_hooks),
+                          "lockdown");
+       return 0;
+}
+
+static ssize_t lockdown_read(struct file *filp, char __user *buf, size_t count,
+                            loff_t *ppos)
+{
+       char temp[80];
+       int i, offset = 0;
+
+       for (i = 0; i < ARRAY_SIZE(lockdown_levels); i++) {
+               enum lockdown_reason level = lockdown_levels[i];
+
+               if (lockdown_reasons[level]) {
+                       const char *label = lockdown_reasons[level];
+
+                       if (kernel_locked_down == level)
+                               offset += sprintf(temp+offset, "[%s] ", label);
+                       else
+                               offset += sprintf(temp+offset, "%s ", label);
+               }
+       }
+
+       /* Convert the last space to a newline if needed. */
+       if (offset > 0)
+               temp[offset-1] = '\n';
+
+       return simple_read_from_buffer(buf, count, ppos, temp, strlen(temp));
+}
+
+static ssize_t lockdown_write(struct file *file, const char __user *buf,
+                             size_t n, loff_t *ppos)
+{
+       char *state;
+       int i, len, err = -EINVAL;
+
+       state = memdup_user_nul(buf, n);
+       if (IS_ERR(state))
+               return PTR_ERR(state);
+
+       len = strlen(state);
+       if (len && state[len-1] == '\n') {
+               state[len-1] = '\0';
+               len--;
+       }
+
+       for (i = 0; i < ARRAY_SIZE(lockdown_levels); i++) {
+               enum lockdown_reason level = lockdown_levels[i];
+               const char *label = lockdown_reasons[level];
+
+               if (label && !strcmp(state, label))
+                       err = lock_kernel_down("securityfs", level);
+       }
+
+       kfree(state);
+       return err ? err : n;
+}
+
+static const struct file_operations lockdown_ops = {
+       .read  = lockdown_read,
+       .write = lockdown_write,
+};
+
+static int __init lockdown_secfs_init(void)
+{
+       struct dentry *dentry;
+
+       dentry = securityfs_create_file("lockdown", 0600, NULL, NULL,
+                                       &lockdown_ops);
+       return PTR_ERR_OR_ZERO(dentry);
+}
+
+core_initcall(lockdown_secfs_init);
+
+#ifdef CONFIG_SECURITY_LOCKDOWN_LSM_EARLY
+DEFINE_EARLY_LSM(lockdown) = {
+#else
+DEFINE_LSM(lockdown) = {
+#endif
+       .name = "lockdown",
+       .init = lockdown_lsm_init,
+};
index d568e17..74a13d4 100644 (file)
@@ -187,7 +187,8 @@ out_free_rule:
 out_free_buf:
        kfree(buf);
 out_free_pol:
-       release_ruleset(pol);
+       if (pol)
+                release_ruleset(pol);
        return err;
 }
 
index 250ee2d..1bc000f 100644 (file)
@@ -33,6 +33,7 @@
 
 /* How many LSMs were built into the kernel? */
 #define LSM_COUNT (__end_lsm_info - __start_lsm_info)
+#define EARLY_LSM_COUNT (__end_early_lsm_info - __start_early_lsm_info)
 
 struct security_hook_heads security_hook_heads __lsm_ro_after_init;
 static BLOCKING_NOTIFIER_HEAD(blocking_lsm_notifier_chain);
@@ -277,6 +278,8 @@ static void __init ordered_lsm_parse(const char *order, const char *origin)
 static void __init lsm_early_cred(struct cred *cred);
 static void __init lsm_early_task(struct task_struct *task);
 
+static int lsm_append(const char *new, char **result);
+
 static void __init ordered_lsm_init(void)
 {
        struct lsm_info **lsm;
@@ -323,6 +326,26 @@ static void __init ordered_lsm_init(void)
        kfree(ordered_lsms);
 }
 
+int __init early_security_init(void)
+{
+       int i;
+       struct hlist_head *list = (struct hlist_head *) &security_hook_heads;
+       struct lsm_info *lsm;
+
+       for (i = 0; i < sizeof(security_hook_heads) / sizeof(struct hlist_head);
+            i++)
+               INIT_HLIST_HEAD(&list[i]);
+
+       for (lsm = __start_early_lsm_info; lsm < __end_early_lsm_info; lsm++) {
+               if (!lsm->enabled)
+                       lsm->enabled = &lsm_enabled_true;
+               prepare_lsm(lsm);
+               initialize_lsm(lsm);
+       }
+
+       return 0;
+}
+
 /**
  * security_init - initializes the security framework
  *
@@ -330,14 +353,18 @@ static void __init ordered_lsm_init(void)
  */
 int __init security_init(void)
 {
-       int i;
-       struct hlist_head *list = (struct hlist_head *) &security_hook_heads;
+       struct lsm_info *lsm;
 
        pr_info("Security Framework initializing\n");
 
-       for (i = 0; i < sizeof(security_hook_heads) / sizeof(struct hlist_head);
-            i++)
-               INIT_HLIST_HEAD(&list[i]);
+       /*
+        * Append the names of the early LSM modules now that kmalloc() is
+        * available
+        */
+       for (lsm = __start_early_lsm_info; lsm < __end_early_lsm_info; lsm++) {
+               if (lsm->enabled)
+                       lsm_append(lsm->name, &lsm_names);
+       }
 
        /* Load LSMs in specified order. */
        ordered_lsm_init();
@@ -384,7 +411,7 @@ static bool match_last_lsm(const char *list, const char *lsm)
        return !strcmp(last, lsm);
 }
 
-static int lsm_append(char *new, char **result)
+static int lsm_append(const char *new, char **result)
 {
        char *cp;
 
@@ -422,8 +449,15 @@ void __init security_add_hooks(struct security_hook_list *hooks, int count,
                hooks[i].lsm = lsm;
                hlist_add_tail_rcu(&hooks[i].list, hooks[i].head);
        }
-       if (lsm_append(lsm, &lsm_names) < 0)
-               panic("%s - Cannot get early memory.\n", __func__);
+
+       /*
+        * Don't try to append during early_security_init(), we'll come back
+        * and fix this up afterwards.
+        */
+       if (slab_is_available()) {
+               if (lsm_append(lsm, &lsm_names) < 0)
+                       panic("%s - Cannot get early memory.\n", __func__);
+       }
 }
 
 int call_blocking_lsm_notifier(enum lsm_event event, void *data)
@@ -870,6 +904,12 @@ int security_move_mount(const struct path *from_path, const struct path *to_path
        return call_int_hook(move_mount, 0, from_path, to_path);
 }
 
+int security_path_notify(const struct path *path, u64 mask,
+                               unsigned int obj_type)
+{
+       return call_int_hook(path_notify, 0, path, mask, obj_type);
+}
+
 int security_inode_alloc(struct inode *inode)
 {
        int rc = lsm_inode_alloc(inode);
@@ -2358,3 +2398,9 @@ void security_bpf_prog_free(struct bpf_prog_aux *aux)
        call_void_hook(bpf_prog_free_security, aux);
 }
 #endif /* CONFIG_BPF_SYSCALL */
+
+int security_locked_down(enum lockdown_reason what)
+{
+       return call_int_hook(locked_down, 0, what);
+}
+EXPORT_SYMBOL(security_locked_down);
index 74dd46d..9625b99 100644 (file)
@@ -89,6 +89,8 @@
 #include <linux/kernfs.h>
 #include <linux/stringhash.h>  /* for hashlen_string() */
 #include <uapi/linux/mount.h>
+#include <linux/fsnotify.h>
+#include <linux/fanotify.h>
 
 #include "avc.h"
 #include "objsec.h"
@@ -3275,6 +3277,50 @@ static int selinux_inode_removexattr(struct dentry *dentry, const char *name)
        return -EACCES;
 }
 
+static int selinux_path_notify(const struct path *path, u64 mask,
+                                               unsigned int obj_type)
+{
+       int ret;
+       u32 perm;
+
+       struct common_audit_data ad;
+
+       ad.type = LSM_AUDIT_DATA_PATH;
+       ad.u.path = *path;
+
+       /*
+        * Set permission needed based on the type of mark being set.
+        * Performs an additional check for sb watches.
+        */
+       switch (obj_type) {
+       case FSNOTIFY_OBJ_TYPE_VFSMOUNT:
+               perm = FILE__WATCH_MOUNT;
+               break;
+       case FSNOTIFY_OBJ_TYPE_SB:
+               perm = FILE__WATCH_SB;
+               ret = superblock_has_perm(current_cred(), path->dentry->d_sb,
+                                               FILESYSTEM__WATCH, &ad);
+               if (ret)
+                       return ret;
+               break;
+       case FSNOTIFY_OBJ_TYPE_INODE:
+               perm = FILE__WATCH;
+               break;
+       default:
+               return -EINVAL;
+       }
+
+       /* blocking watches require the file:watch_with_perm permission */
+       if (mask & (ALL_FSNOTIFY_PERM_EVENTS))
+               perm |= FILE__WATCH_WITH_PERM;
+
+       /* watches on read-like events need the file:watch_reads permission */
+       if (mask & (FS_ACCESS | FS_ACCESS_PERM | FS_CLOSE_NOWRITE))
+               perm |= FILE__WATCH_READS;
+
+       return path_has_perm(current_cred(), path, perm);
+}
+
 /*
  * Copy the inode security context value to the user.
  *
@@ -3403,7 +3449,7 @@ static int selinux_inode_copy_up_xattr(const char *name)
 static int selinux_kernfs_init_security(struct kernfs_node *kn_dir,
                                        struct kernfs_node *kn)
 {
-       const struct task_security_struct *tsec = current_security();
+       const struct task_security_struct *tsec = selinux_cred(current_cred());
        u32 parent_sid, newsid, clen;
        int rc;
        char *context;
@@ -6818,6 +6864,7 @@ static struct security_hook_list selinux_hooks[] __lsm_ro_after_init = {
        LSM_HOOK_INIT(inode_getsecid, selinux_inode_getsecid),
        LSM_HOOK_INIT(inode_copy_up, selinux_inode_copy_up),
        LSM_HOOK_INIT(inode_copy_up_xattr, selinux_inode_copy_up_xattr),
+       LSM_HOOK_INIT(path_notify, selinux_path_notify),
 
        LSM_HOOK_INIT(kernfs_init_security, selinux_kernfs_init_security),
 
index 201f7e5..32e9b03 100644 (file)
@@ -7,7 +7,8 @@
 
 #define COMMON_FILE_PERMS COMMON_FILE_SOCK_PERMS, "unlink", "link", \
     "rename", "execute", "quotaon", "mounton", "audit_access", \
-    "open", "execmod"
+       "open", "execmod", "watch", "watch_mount", "watch_sb", \
+       "watch_with_perm", "watch_reads"
 
 #define COMMON_SOCK_PERMS COMMON_FILE_SOCK_PERMS, "bind", "connect", \
     "listen", "accept", "getopt", "setopt", "shutdown", "recvfrom",  \
@@ -60,7 +61,7 @@ struct security_class_mapping secclass_map[] = {
        { "filesystem",
          { "mount", "remount", "unmount", "getattr",
            "relabelfrom", "relabelto", "associate", "quotamod",
-           "quotaget", NULL } },
+           "quotaget", "watch", NULL } },
        { "file",
          { COMMON_FILE_PERMS,
            "execute_no_trans", "entrypoint", NULL } },
index 91c5395..586b7ab 100644 (file)
@@ -37,16 +37,6 @@ struct task_security_struct {
        u32 sockcreate_sid;     /* fscreate SID */
 };
 
-/*
- * get the subjective security ID of the current task
- */
-static inline u32 current_sid(void)
-{
-       const struct task_security_struct *tsec = current_security();
-
-       return tsec->sid;
-}
-
 enum label_initialized {
        LABEL_INVALID,          /* invalid or not initialized */
        LABEL_INITIALIZED,      /* initialized */
@@ -185,4 +175,14 @@ static inline struct ipc_security_struct *selinux_ipc(
        return ipc->security + selinux_blob_sizes.lbs_ipc;
 }
 
+/*
+ * get the subjective security ID of the current task
+ */
+static inline u32 current_sid(void)
+{
+       const struct task_security_struct *tsec = selinux_cred(current_cred());
+
+       return tsec->sid;
+}
+
 #endif /* _SELINUX_OBJSEC_H_ */
index 9cb83ee..e40fecd 100644 (file)
@@ -132,9 +132,9 @@ static void sel_netif_destroy(struct sel_netif *netif)
  */
 static int sel_netif_sid_slow(struct net *ns, int ifindex, u32 *sid)
 {
-       int ret;
+       int ret = 0;
        struct sel_netif *netif;
-       struct sel_netif *new = NULL;
+       struct sel_netif *new;
        struct net_device *dev;
 
        /* NOTE: we always use init's network namespace since we don't
@@ -151,32 +151,27 @@ static int sel_netif_sid_slow(struct net *ns, int ifindex, u32 *sid)
        netif = sel_netif_find(ns, ifindex);
        if (netif != NULL) {
                *sid = netif->nsec.sid;
-               ret = 0;
                goto out;
        }
-       new = kzalloc(sizeof(*new), GFP_ATOMIC);
-       if (new == NULL) {
-               ret = -ENOMEM;
-               goto out;
-       }
-       ret = security_netif_sid(&selinux_state, dev->name, &new->nsec.sid);
-       if (ret != 0)
-               goto out;
-       new->nsec.ns = ns;
-       new->nsec.ifindex = ifindex;
-       ret = sel_netif_insert(new);
+
+       ret = security_netif_sid(&selinux_state, dev->name, sid);
        if (ret != 0)
                goto out;
-       *sid = new->nsec.sid;
+       new = kzalloc(sizeof(*new), GFP_ATOMIC);
+       if (new) {
+               new->nsec.ns = ns;
+               new->nsec.ifindex = ifindex;
+               new->nsec.sid = *sid;
+               if (sel_netif_insert(new))
+                       kfree(new);
+       }
 
 out:
        spin_unlock_bh(&sel_netif_lock);
        dev_put(dev);
-       if (unlikely(ret)) {
+       if (unlikely(ret))
                pr_warn("SELinux: failure in %s(), unable to determine network interface label (%d)\n",
                        __func__, ifindex);
-               kfree(new);
-       }
        return ret;
 }
 
index cae1fca..9ab84ef 100644 (file)
@@ -189,9 +189,9 @@ static void sel_netnode_insert(struct sel_netnode *node)
  */
 static int sel_netnode_sid_slow(void *addr, u16 family, u32 *sid)
 {
-       int ret = -ENOMEM;
+       int ret;
        struct sel_netnode *node;
-       struct sel_netnode *new = NULL;
+       struct sel_netnode *new;
 
        spin_lock_bh(&sel_netnode_lock);
        node = sel_netnode_find(addr, family);
@@ -200,38 +200,36 @@ static int sel_netnode_sid_slow(void *addr, u16 family, u32 *sid)
                spin_unlock_bh(&sel_netnode_lock);
                return 0;
        }
+
        new = kzalloc(sizeof(*new), GFP_ATOMIC);
-       if (new == NULL)
-               goto out;
        switch (family) {
        case PF_INET:
                ret = security_node_sid(&selinux_state, PF_INET,
                                        addr, sizeof(struct in_addr), sid);
-               new->nsec.addr.ipv4 = *(__be32 *)addr;
+               if (new)
+                       new->nsec.addr.ipv4 = *(__be32 *)addr;
                break;
        case PF_INET6:
                ret = security_node_sid(&selinux_state, PF_INET6,
                                        addr, sizeof(struct in6_addr), sid);
-               new->nsec.addr.ipv6 = *(struct in6_addr *)addr;
+               if (new)
+                       new->nsec.addr.ipv6 = *(struct in6_addr *)addr;
                break;
        default:
                BUG();
                ret = -EINVAL;
        }
-       if (ret != 0)
-               goto out;
-
-       new->nsec.family = family;
-       new->nsec.sid = *sid;
-       sel_netnode_insert(new);
+       if (ret == 0 && new) {
+               new->nsec.family = family;
+               new->nsec.sid = *sid;
+               sel_netnode_insert(new);
+       } else
+               kfree(new);
 
-out:
        spin_unlock_bh(&sel_netnode_lock);
-       if (unlikely(ret)) {
+       if (unlikely(ret))
                pr_warn("SELinux: failure in %s(), unable to determine network node label\n",
                        __func__);
-               kfree(new);
-       }
        return ret;
 }
 
index 364b6d5..3f8b2c0 100644 (file)
@@ -137,9 +137,9 @@ static void sel_netport_insert(struct sel_netport *port)
  */
 static int sel_netport_sid_slow(u8 protocol, u16 pnum, u32 *sid)
 {
-       int ret = -ENOMEM;
+       int ret;
        struct sel_netport *port;
-       struct sel_netport *new = NULL;
+       struct sel_netport *new;
 
        spin_lock_bh(&sel_netport_lock);
        port = sel_netport_find(protocol, pnum);
@@ -148,25 +148,23 @@ static int sel_netport_sid_slow(u8 protocol, u16 pnum, u32 *sid)
                spin_unlock_bh(&sel_netport_lock);
                return 0;
        }
-       new = kzalloc(sizeof(*new), GFP_ATOMIC);
-       if (new == NULL)
-               goto out;
+
        ret = security_port_sid(&selinux_state, protocol, pnum, sid);
        if (ret != 0)
                goto out;
-
-       new->psec.port = pnum;
-       new->psec.protocol = protocol;
-       new->psec.sid = *sid;
-       sel_netport_insert(new);
+       new = kzalloc(sizeof(*new), GFP_ATOMIC);
+       if (new) {
+               new->psec.port = pnum;
+               new->psec.protocol = protocol;
+               new->psec.sid = *sid;
+               sel_netport_insert(new);
+       }
 
 out:
        spin_unlock_bh(&sel_netport_lock);
-       if (unlikely(ret)) {
+       if (unlikely(ret))
                pr_warn("SELinux: failure in %s(), unable to determine network port label\n",
                        __func__);
-               kfree(new);
-       }
        return ret;
 }
 
index f8efaa9..1260f5f 100644 (file)
@@ -177,6 +177,195 @@ static struct policydb_compat_info *policydb_lookup_compat(int version)
 }
 
 /*
+ * The following *_destroy functions are used to
+ * free any memory allocated for each kind of
+ * symbol data in the policy database.
+ */
+
+static int perm_destroy(void *key, void *datum, void *p)
+{
+       kfree(key);
+       kfree(datum);
+       return 0;
+}
+
+static int common_destroy(void *key, void *datum, void *p)
+{
+       struct common_datum *comdatum;
+
+       kfree(key);
+       if (datum) {
+               comdatum = datum;
+               hashtab_map(comdatum->permissions.table, perm_destroy, NULL);
+               hashtab_destroy(comdatum->permissions.table);
+       }
+       kfree(datum);
+       return 0;
+}
+
+static void constraint_expr_destroy(struct constraint_expr *expr)
+{
+       if (expr) {
+               ebitmap_destroy(&expr->names);
+               if (expr->type_names) {
+                       ebitmap_destroy(&expr->type_names->types);
+                       ebitmap_destroy(&expr->type_names->negset);
+                       kfree(expr->type_names);
+               }
+               kfree(expr);
+       }
+}
+
+static int cls_destroy(void *key, void *datum, void *p)
+{
+       struct class_datum *cladatum;
+       struct constraint_node *constraint, *ctemp;
+       struct constraint_expr *e, *etmp;
+
+       kfree(key);
+       if (datum) {
+               cladatum = datum;
+               hashtab_map(cladatum->permissions.table, perm_destroy, NULL);
+               hashtab_destroy(cladatum->permissions.table);
+               constraint = cladatum->constraints;
+               while (constraint) {
+                       e = constraint->expr;
+                       while (e) {
+                               etmp = e;
+                               e = e->next;
+                               constraint_expr_destroy(etmp);
+                       }
+                       ctemp = constraint;
+                       constraint = constraint->next;
+                       kfree(ctemp);
+               }
+
+               constraint = cladatum->validatetrans;
+               while (constraint) {
+                       e = constraint->expr;
+                       while (e) {
+                               etmp = e;
+                               e = e->next;
+                               constraint_expr_destroy(etmp);
+                       }
+                       ctemp = constraint;
+                       constraint = constraint->next;
+                       kfree(ctemp);
+               }
+               kfree(cladatum->comkey);
+       }
+       kfree(datum);
+       return 0;
+}
+
+static int role_destroy(void *key, void *datum, void *p)
+{
+       struct role_datum *role;
+
+       kfree(key);
+       if (datum) {
+               role = datum;
+               ebitmap_destroy(&role->dominates);
+               ebitmap_destroy(&role->types);
+       }
+       kfree(datum);
+       return 0;
+}
+
+static int type_destroy(void *key, void *datum, void *p)
+{
+       kfree(key);
+       kfree(datum);
+       return 0;
+}
+
+static int user_destroy(void *key, void *datum, void *p)
+{
+       struct user_datum *usrdatum;
+
+       kfree(key);
+       if (datum) {
+               usrdatum = datum;
+               ebitmap_destroy(&usrdatum->roles);
+               ebitmap_destroy(&usrdatum->range.level[0].cat);
+               ebitmap_destroy(&usrdatum->range.level[1].cat);
+               ebitmap_destroy(&usrdatum->dfltlevel.cat);
+       }
+       kfree(datum);
+       return 0;
+}
+
+static int sens_destroy(void *key, void *datum, void *p)
+{
+       struct level_datum *levdatum;
+
+       kfree(key);
+       if (datum) {
+               levdatum = datum;
+               if (levdatum->level)
+                       ebitmap_destroy(&levdatum->level->cat);
+               kfree(levdatum->level);
+       }
+       kfree(datum);
+       return 0;
+}
+
+static int cat_destroy(void *key, void *datum, void *p)
+{
+       kfree(key);
+       kfree(datum);
+       return 0;
+}
+
+static int (*destroy_f[SYM_NUM]) (void *key, void *datum, void *datap) =
+{
+       common_destroy,
+       cls_destroy,
+       role_destroy,
+       type_destroy,
+       user_destroy,
+       cond_destroy_bool,
+       sens_destroy,
+       cat_destroy,
+};
+
+static int filenametr_destroy(void *key, void *datum, void *p)
+{
+       struct filename_trans *ft = key;
+
+       kfree(ft->name);
+       kfree(key);
+       kfree(datum);
+       cond_resched();
+       return 0;
+}
+
+static int range_tr_destroy(void *key, void *datum, void *p)
+{
+       struct mls_range *rt = datum;
+
+       kfree(key);
+       ebitmap_destroy(&rt->level[0].cat);
+       ebitmap_destroy(&rt->level[1].cat);
+       kfree(datum);
+       cond_resched();
+       return 0;
+}
+
+static void ocontext_destroy(struct ocontext *c, int i)
+{
+       if (!c)
+               return;
+
+       context_destroy(&c->context[0]);
+       context_destroy(&c->context[1]);
+       if (i == OCON_ISID || i == OCON_FS ||
+           i == OCON_NETIF || i == OCON_FSUSE)
+               kfree(c->u.name);
+       kfree(c);
+}
+
+/*
  * Initialize the role table.
  */
 static int roles_init(struct policydb *p)
@@ -250,6 +439,7 @@ static int filenametr_cmp(struct hashtab *h, const void *k1, const void *k2)
 static u32 rangetr_hash(struct hashtab *h, const void *k)
 {
        const struct range_trans *key = k;
+
        return (key->source_type + (key->target_type << 3) +
                (key->target_class << 5)) & (h->size - 1);
 }
@@ -272,8 +462,6 @@ static int rangetr_cmp(struct hashtab *h, const void *k1, const void *k2)
        return v;
 }
 
-static int (*destroy_f[SYM_NUM]) (void *key, void *datum, void *datap);
-
 /*
  * Initialize a policy database structure.
  */
@@ -301,7 +489,8 @@ static int policydb_init(struct policydb *p)
        if (rc)
                goto out;
 
-       p->filename_trans = hashtab_create(filenametr_hash, filenametr_cmp, (1 << 10));
+       p->filename_trans = hashtab_create(filenametr_hash, filenametr_cmp,
+                                          (1 << 10));
        if (!p->filename_trans) {
                rc = -ENOMEM;
                goto out;
@@ -399,7 +588,7 @@ static int type_index(void *key, void *datum, void *datap)
                    || typdatum->bounds > p->p_types.nprim)
                        return -EINVAL;
                p->sym_val_to_name[SYM_TYPES][typdatum->value - 1] = key;
-               p->type_val_to_struct_array[typdatum->value - 1] = typdatum;
+               p->type_val_to_struct[typdatum->value - 1] = typdatum;
        }
 
        return 0;
@@ -477,9 +666,9 @@ static void hash_eval(struct hashtab *h, const char *hash_name)
        struct hashtab_info info;
 
        hashtab_stat(h, &info);
-       pr_debug("SELinux: %s:  %d entries and %d/%d buckets used, "
-              "longest chain length %d\n", hash_name, h->nel,
-              info.slots_used, h->size, info.max_chain_len);
+       pr_debug("SELinux: %s:  %d entries and %d/%d buckets used, longest chain length %d\n",
+                hash_name, h->nel, info.slots_used, h->size,
+                info.max_chain_len);
 }
 
 static void symtab_hash_eval(struct symtab *s)
@@ -541,10 +730,10 @@ static int policydb_index(struct policydb *p)
        if (!p->user_val_to_struct)
                return -ENOMEM;
 
-       p->type_val_to_struct_array = kvcalloc(p->p_types.nprim,
-                                              sizeof(*p->type_val_to_struct_array),
-                                              GFP_KERNEL);
-       if (!p->type_val_to_struct_array)
+       p->type_val_to_struct = kvcalloc(p->p_types.nprim,
+                                        sizeof(*p->type_val_to_struct),
+                                        GFP_KERNEL);
+       if (!p->type_val_to_struct)
                return -ENOMEM;
 
        rc = cond_init_bool_indexes(p);
@@ -568,193 +757,6 @@ out:
 }
 
 /*
- * The following *_destroy functions are used to
- * free any memory allocated for each kind of
- * symbol data in the policy database.
- */
-
-static int perm_destroy(void *key, void *datum, void *p)
-{
-       kfree(key);
-       kfree(datum);
-       return 0;
-}
-
-static int common_destroy(void *key, void *datum, void *p)
-{
-       struct common_datum *comdatum;
-
-       kfree(key);
-       if (datum) {
-               comdatum = datum;
-               hashtab_map(comdatum->permissions.table, perm_destroy, NULL);
-               hashtab_destroy(comdatum->permissions.table);
-       }
-       kfree(datum);
-       return 0;
-}
-
-static void constraint_expr_destroy(struct constraint_expr *expr)
-{
-       if (expr) {
-               ebitmap_destroy(&expr->names);
-               if (expr->type_names) {
-                       ebitmap_destroy(&expr->type_names->types);
-                       ebitmap_destroy(&expr->type_names->negset);
-                       kfree(expr->type_names);
-               }
-               kfree(expr);
-       }
-}
-
-static int cls_destroy(void *key, void *datum, void *p)
-{
-       struct class_datum *cladatum;
-       struct constraint_node *constraint, *ctemp;
-       struct constraint_expr *e, *etmp;
-
-       kfree(key);
-       if (datum) {
-               cladatum = datum;
-               hashtab_map(cladatum->permissions.table, perm_destroy, NULL);
-               hashtab_destroy(cladatum->permissions.table);
-               constraint = cladatum->constraints;
-               while (constraint) {
-                       e = constraint->expr;
-                       while (e) {
-                               etmp = e;
-                               e = e->next;
-                               constraint_expr_destroy(etmp);
-                       }
-                       ctemp = constraint;
-                       constraint = constraint->next;
-                       kfree(ctemp);
-               }
-
-               constraint = cladatum->validatetrans;
-               while (constraint) {
-                       e = constraint->expr;
-                       while (e) {
-                               etmp = e;
-                               e = e->next;
-                               constraint_expr_destroy(etmp);
-                       }
-                       ctemp = constraint;
-                       constraint = constraint->next;
-                       kfree(ctemp);
-               }
-               kfree(cladatum->comkey);
-       }
-       kfree(datum);
-       return 0;
-}
-
-static int role_destroy(void *key, void *datum, void *p)
-{
-       struct role_datum *role;
-
-       kfree(key);
-       if (datum) {
-               role = datum;
-               ebitmap_destroy(&role->dominates);
-               ebitmap_destroy(&role->types);
-       }
-       kfree(datum);
-       return 0;
-}
-
-static int type_destroy(void *key, void *datum, void *p)
-{
-       kfree(key);
-       kfree(datum);
-       return 0;
-}
-
-static int user_destroy(void *key, void *datum, void *p)
-{
-       struct user_datum *usrdatum;
-
-       kfree(key);
-       if (datum) {
-               usrdatum = datum;
-               ebitmap_destroy(&usrdatum->roles);
-               ebitmap_destroy(&usrdatum->range.level[0].cat);
-               ebitmap_destroy(&usrdatum->range.level[1].cat);
-               ebitmap_destroy(&usrdatum->dfltlevel.cat);
-       }
-       kfree(datum);
-       return 0;
-}
-
-static int sens_destroy(void *key, void *datum, void *p)
-{
-       struct level_datum *levdatum;
-
-       kfree(key);
-       if (datum) {
-               levdatum = datum;
-               if (levdatum->level)
-                       ebitmap_destroy(&levdatum->level->cat);
-               kfree(levdatum->level);
-       }
-       kfree(datum);
-       return 0;
-}
-
-static int cat_destroy(void *key, void *datum, void *p)
-{
-       kfree(key);
-       kfree(datum);
-       return 0;
-}
-
-static int (*destroy_f[SYM_NUM]) (void *key, void *datum, void *datap) =
-{
-       common_destroy,
-       cls_destroy,
-       role_destroy,
-       type_destroy,
-       user_destroy,
-       cond_destroy_bool,
-       sens_destroy,
-       cat_destroy,
-};
-
-static int filenametr_destroy(void *key, void *datum, void *p)
-{
-       struct filename_trans *ft = key;
-       kfree(ft->name);
-       kfree(key);
-       kfree(datum);
-       cond_resched();
-       return 0;
-}
-
-static int range_tr_destroy(void *key, void *datum, void *p)
-{
-       struct mls_range *rt = datum;
-       kfree(key);
-       ebitmap_destroy(&rt->level[0].cat);
-       ebitmap_destroy(&rt->level[1].cat);
-       kfree(datum);
-       cond_resched();
-       return 0;
-}
-
-static void ocontext_destroy(struct ocontext *c, int i)
-{
-       if (!c)
-               return;
-
-       context_destroy(&c->context[0]);
-       context_destroy(&c->context[1]);
-       if (i == OCON_ISID || i == OCON_FS ||
-           i == OCON_NETIF || i == OCON_FSUSE)
-               kfree(c->u.name);
-       kfree(c);
-}
-
-/*
  * Free any memory allocated by a policy database structure.
  */
 void policydb_destroy(struct policydb *p)
@@ -777,7 +779,7 @@ void policydb_destroy(struct policydb *p)
        kfree(p->class_val_to_struct);
        kfree(p->role_val_to_struct);
        kfree(p->user_val_to_struct);
-       kvfree(p->type_val_to_struct_array);
+       kvfree(p->type_val_to_struct);
 
        avtab_destroy(&p->te_avtab);
 
@@ -1722,7 +1724,7 @@ static int type_bounds_sanity_check(void *key, void *datum, void *datap)
                        return -EINVAL;
                }
 
-               upper = p->type_val_to_struct_array[upper->bounds - 1];
+               upper = p->type_val_to_struct[upper->bounds - 1];
                BUG_ON(!upper);
 
                if (upper->attribute) {
index fcc6366..162d0e7 100644 (file)
@@ -253,7 +253,7 @@ struct policydb {
        struct class_datum **class_val_to_struct;
        struct role_datum **role_val_to_struct;
        struct user_datum **user_val_to_struct;
-       struct type_datum **type_val_to_struct_array;
+       struct type_datum **type_val_to_struct;
 
        /* type enforcement access vectors and transitions */
        struct avtab te_avtab;
index d61563a..3a29e7c 100644 (file)
@@ -542,13 +542,13 @@ static void type_attribute_bounds_av(struct policydb *policydb,
        struct type_datum *target;
        u32 masked = 0;
 
-       source = policydb->type_val_to_struct_array[scontext->type - 1];
+       source = policydb->type_val_to_struct[scontext->type - 1];
        BUG_ON(!source);
 
        if (!source->bounds)
                return;
 
-       target = policydb->type_val_to_struct_array[tcontext->type - 1];
+       target = policydb->type_val_to_struct[tcontext->type - 1];
        BUG_ON(!target);
 
        memset(&lo_avd, 0, sizeof(lo_avd));
@@ -891,7 +891,7 @@ int security_bounded_transition(struct selinux_state *state,
 
        index = new_context->type;
        while (true) {
-               type = policydb->type_val_to_struct_array[index - 1];
+               type = policydb->type_val_to_struct[index - 1];
                BUG_ON(!type);
 
                /* not bounded anymore */
index 1f0a6ea..7d49994 100644 (file)
@@ -12,7 +12,7 @@
 #include <linux/slab.h>
 #include <linux/sched.h>
 #include <linux/spinlock.h>
-#include <linux/atomic.h>
+#include <asm/barrier.h>
 #include "flask.h"
 #include "security.h"
 #include "sidtab.h"
@@ -23,14 +23,14 @@ int sidtab_init(struct sidtab *s)
 
        memset(s->roots, 0, sizeof(s->roots));
 
+       /* max count is SIDTAB_MAX so valid index is always < SIDTAB_MAX */
        for (i = 0; i < SIDTAB_RCACHE_SIZE; i++)
-               atomic_set(&s->rcache[i], -1);
+               s->rcache[i] = SIDTAB_MAX;
 
        for (i = 0; i < SECINITSID_NUM; i++)
                s->isids[i].set = 0;
 
-       atomic_set(&s->count, 0);
-
+       s->count = 0;
        s->convert = NULL;
 
        spin_lock_init(&s->lock);
@@ -130,14 +130,12 @@ static struct context *sidtab_do_lookup(struct sidtab *s, u32 index, int alloc)
 
 static struct context *sidtab_lookup(struct sidtab *s, u32 index)
 {
-       u32 count = (u32)atomic_read(&s->count);
+       /* read entries only after reading count */
+       u32 count = smp_load_acquire(&s->count);
 
        if (index >= count)
                return NULL;
 
-       /* read entries after reading count */
-       smp_rmb();
-
        return sidtab_do_lookup(s, index, 0);
 }
 
@@ -210,10 +208,10 @@ static int sidtab_find_context(union sidtab_entry_inner entry,
 static void sidtab_rcache_update(struct sidtab *s, u32 index, u32 pos)
 {
        while (pos > 0) {
-               atomic_set(&s->rcache[pos], atomic_read(&s->rcache[pos - 1]));
+               WRITE_ONCE(s->rcache[pos], READ_ONCE(s->rcache[pos - 1]));
                --pos;
        }
-       atomic_set(&s->rcache[0], (int)index);
+       WRITE_ONCE(s->rcache[0], index);
 }
 
 static void sidtab_rcache_push(struct sidtab *s, u32 index)
@@ -227,14 +225,14 @@ static int sidtab_rcache_search(struct sidtab *s, struct context *context,
        u32 i;
 
        for (i = 0; i < SIDTAB_RCACHE_SIZE; i++) {
-               int v = atomic_read(&s->rcache[i]);
+               u32 v = READ_ONCE(s->rcache[i]);
 
-               if (v < 0)
+               if (v >= SIDTAB_MAX)
                        continue;
 
-               if (context_cmp(sidtab_do_lookup(s, (u32)v, 0), context)) {
-                       sidtab_rcache_update(s, (u32)v, i);
-                       *index = (u32)v;
+               if (context_cmp(sidtab_do_lookup(s, v, 0), context)) {
+                       sidtab_rcache_update(s, v, i);
+                       *index = v;
                        return 0;
                }
        }
@@ -245,8 +243,7 @@ static int sidtab_reverse_lookup(struct sidtab *s, struct context *context,
                                 u32 *index)
 {
        unsigned long flags;
-       u32 count = (u32)atomic_read(&s->count);
-       u32 count_locked, level, pos;
+       u32 count, count_locked, level, pos;
        struct sidtab_convert_params *convert;
        struct context *dst, *dst_convert;
        int rc;
@@ -255,11 +252,10 @@ static int sidtab_reverse_lookup(struct sidtab *s, struct context *context,
        if (rc == 0)
                return 0;
 
+       /* read entries only after reading count */
+       count = smp_load_acquire(&s->count);
        level = sidtab_level_from_count(count);
 
-       /* read entries after reading count */
-       smp_rmb();
-
        pos = 0;
        rc = sidtab_find_context(s->roots[level], &pos, count, level,
                                 context, index);
@@ -272,7 +268,7 @@ static int sidtab_reverse_lookup(struct sidtab *s, struct context *context,
        spin_lock_irqsave(&s->lock, flags);
 
        convert = s->convert;
-       count_locked = (u32)atomic_read(&s->count);
+       count_locked = s->count;
        level = sidtab_level_from_count(count_locked);
 
        /* if count has changed before we acquired the lock, then catch up */
@@ -320,7 +316,7 @@ static int sidtab_reverse_lookup(struct sidtab *s, struct context *context,
                }
 
                /* at this point we know the insert won't fail */
-               atomic_set(&convert->target->count, count + 1);
+               convert->target->count = count + 1;
        }
 
        if (context->len)
@@ -331,9 +327,7 @@ static int sidtab_reverse_lookup(struct sidtab *s, struct context *context,
        *index = count;
 
        /* write entries before writing new count */
-       smp_wmb();
-
-       atomic_set(&s->count, count + 1);
+       smp_store_release(&s->count, count + 1);
 
        rc = 0;
 out_unlock:
@@ -423,7 +417,7 @@ int sidtab_convert(struct sidtab *s, struct sidtab_convert_params *params)
                return -EBUSY;
        }
 
-       count = (u32)atomic_read(&s->count);
+       count = s->count;
        level = sidtab_level_from_count(count);
 
        /* allocate last leaf in the new sidtab (to avoid race with
@@ -436,7 +430,7 @@ int sidtab_convert(struct sidtab *s, struct sidtab_convert_params *params)
        }
 
        /* set count in case no new entries are added during conversion */
-       atomic_set(&params->target->count, count);
+       params->target->count = count;
 
        /* enable live convert of new entries */
        s->convert = params;
index bbd5c0d..1f47631 100644 (file)
@@ -40,8 +40,8 @@ union sidtab_entry_inner {
 #define SIDTAB_LEAF_ENTRIES \
        (SIDTAB_NODE_ALLOC_SIZE / sizeof(struct sidtab_entry_leaf))
 
-#define SIDTAB_MAX_BITS 31 /* limited to INT_MAX due to atomic_t range */
-#define SIDTAB_MAX (((u32)1 << SIDTAB_MAX_BITS) - 1)
+#define SIDTAB_MAX_BITS 32
+#define SIDTAB_MAX U32_MAX
 /* ensure enough tree levels for SIDTAB_MAX entries */
 #define SIDTAB_MAX_LEVEL \
        DIV_ROUND_UP(SIDTAB_MAX_BITS - size_to_shift(SIDTAB_LEAF_ENTRIES), \
@@ -69,13 +69,22 @@ struct sidtab_convert_params {
 #define SIDTAB_RCACHE_SIZE 3
 
 struct sidtab {
+       /*
+        * lock-free read access only for as many items as a prior read of
+        * 'count'
+        */
        union sidtab_entry_inner roots[SIDTAB_MAX_LEVEL + 1];
-       atomic_t count;
+       /*
+        * access atomically via {READ|WRITE}_ONCE(); only increment under
+        * spinlock
+        */
+       u32 count;
+       /* access only under spinlock */
        struct sidtab_convert_params *convert;
        spinlock_t lock;
 
-       /* reverse lookup cache */
-       atomic_t rcache[SIDTAB_RCACHE_SIZE];
+       /* reverse lookup cache - access atomically via {READ|WRITE}_ONCE() */
+       u32 rcache[SIDTAB_RCACHE_SIZE];
 
        /* index == SID - 1 (no entry for SECSID_NULL) */
        struct sidtab_isid_entry isids[SECINITSID_NUM];
index f1c93a7..38ac3da 100644 (file)
@@ -465,7 +465,7 @@ char *smk_parse_smack(const char *string, int len)
        if (i == 0 || i >= SMK_LONGLABEL)
                return ERR_PTR(-EINVAL);
 
-       smack = kzalloc(i + 1, GFP_KERNEL);
+       smack = kzalloc(i + 1, GFP_NOFS);
        if (smack == NULL)
                return ERR_PTR(-ENOMEM);
 
@@ -500,7 +500,7 @@ int smk_netlbl_mls(int level, char *catset, struct netlbl_lsm_secattr *sap,
                        if ((m & *cp) == 0)
                                continue;
                        rc = netlbl_catmap_setbit(&sap->attr.mls.cat,
-                                                 cat, GFP_KERNEL);
+                                                 cat, GFP_NOFS);
                        if (rc < 0) {
                                netlbl_catmap_free(sap->attr.mls.cat);
                                return rc;
@@ -536,7 +536,7 @@ struct smack_known *smk_import_entry(const char *string, int len)
        if (skp != NULL)
                goto freeout;
 
-       skp = kzalloc(sizeof(*skp), GFP_KERNEL);
+       skp = kzalloc(sizeof(*skp), GFP_NOFS);
        if (skp == NULL) {
                skp = ERR_PTR(-ENOMEM);
                goto freeout;
index 4c5e5a4..abeb09c 100644 (file)
@@ -288,7 +288,7 @@ static struct smack_known *smk_fetch(const char *name, struct inode *ip,
        if (!(ip->i_opflags & IOP_XATTR))
                return ERR_PTR(-EOPNOTSUPP);
 
-       buffer = kzalloc(SMK_LONGLABEL, GFP_KERNEL);
+       buffer = kzalloc(SMK_LONGLABEL, GFP_NOFS);
        if (buffer == NULL)
                return ERR_PTR(-ENOMEM);
 
@@ -307,7 +307,7 @@ static struct smack_known *smk_fetch(const char *name, struct inode *ip,
 
 /**
  * init_inode_smack - initialize an inode security blob
- * @isp: the blob to initialize
+ * @inode: inode to extract the info from
  * @skp: a pointer to the Smack label entry to use in the blob
  *
  */
@@ -509,7 +509,7 @@ static int smack_ptrace_traceme(struct task_struct *ptp)
 
 /**
  * smack_syslog - Smack approval on syslog
- * @type: message type
+ * @typefrom_file: unused
  *
  * Returns 0 on success, error code otherwise.
  */
@@ -765,7 +765,7 @@ static int smack_sb_eat_lsm_opts(char *options, void **mnt_opts)
 /**
  * smack_set_mnt_opts - set Smack specific mount options
  * @sb: the file system superblock
- * @opts: Smack mount options
+ * @mnt_opts: Smack mount options
  * @kern_flags: mount option from kernel space or user space
  * @set_kern_flags: where to store converted mount opts
  *
@@ -937,7 +937,8 @@ static int smack_bprm_set_creds(struct linux_binprm *bprm)
 
                if (rc != 0)
                        return rc;
-       } else if (bprm->unsafe)
+       }
+       if (bprm->unsafe & ~LSM_UNSAFE_PTRACE)
                return -EPERM;
 
        bsp->smk_task = isp->smk_task;
@@ -958,7 +959,7 @@ static int smack_bprm_set_creds(struct linux_binprm *bprm)
  * smack_inode_alloc_security - allocate an inode blob
  * @inode: the inode in need of a blob
  *
- * Returns 0 if it gets a blob, -ENOMEM otherwise
+ * Returns 0
  */
 static int smack_inode_alloc_security(struct inode *inode)
 {
@@ -1164,7 +1165,7 @@ static int smack_inode_rename(struct inode *old_inode,
  *
  * This is the important Smack hook.
  *
- * Returns 0 if access is permitted, -EACCES otherwise
+ * Returns 0 if access is permitted, an error code otherwise
  */
 static int smack_inode_permission(struct inode *inode, int mask)
 {
@@ -1222,8 +1223,7 @@ static int smack_inode_setattr(struct dentry *dentry, struct iattr *iattr)
 
 /**
  * smack_inode_getattr - Smack check for getting attributes
- * @mnt: vfsmount of the object
- * @dentry: the object
+ * @path: path to extract the info from
  *
  * Returns 0 if access is permitted, an error code otherwise
  */
@@ -1870,14 +1870,13 @@ static int smack_file_receive(struct file *file)
 /**
  * smack_file_open - Smack dentry open processing
  * @file: the object
- * @cred: task credential
  *
  * Set the security blob in the file structure.
  * Allow the open only if the task has read access. There are
  * many read operations (e.g. fstat) that you can do with an
  * fd even if you have the file open write-only.
  *
- * Returns 0
+ * Returns 0 if current has access, error code otherwise
  */
 static int smack_file_open(struct file *file)
 {
@@ -1900,7 +1899,7 @@ static int smack_file_open(struct file *file)
 
 /**
  * smack_cred_alloc_blank - "allocate" blank task-level security credentials
- * @new: the new credentials
+ * @cred: the new credentials
  * @gfp: the atomicity of any memory allocations
  *
  * Prepare a blank set of credentials for modification.  This must allocate all
@@ -1983,7 +1982,7 @@ static void smack_cred_transfer(struct cred *new, const struct cred *old)
 
 /**
  * smack_cred_getsecid - get the secid corresponding to a creds structure
- * @c: the object creds
+ * @cred: the object creds
  * @secid: where to put the result
  *
  * Sets the secid to contain a u32 version of the smack label.
@@ -2140,8 +2139,6 @@ static int smack_task_getioprio(struct task_struct *p)
 /**
  * smack_task_setscheduler - Smack check on setting scheduler
  * @p: the task object
- * @policy: unused
- * @lp: unused
  *
  * Return 0 if read access is permitted
  */
@@ -2611,8 +2608,9 @@ static void smk_ipv6_port_label(struct socket *sock, struct sockaddr *address)
 
 /**
  * smk_ipv6_port_check - check Smack port access
- * @sock: socket
+ * @sk: socket
  * @address: address
+ * @act: the action being taken
  *
  * Create or update the port list entry
  */
@@ -2782,7 +2780,7 @@ static int smack_socket_post_create(struct socket *sock, int family,
  *
  * Cross reference the peer labels for SO_PEERSEC
  *
- * Returns 0 on success, and error code otherwise
+ * Returns 0
  */
 static int smack_socket_socketpair(struct socket *socka,
                                   struct socket *sockb)
@@ -3014,13 +3012,13 @@ static int smack_shm_shmctl(struct kern_ipc_perm *isp, int cmd)
  *
  * Returns 0 if current has the requested access, error code otherwise
  */
-static int smack_shm_shmat(struct kern_ipc_perm *ipc, char __user *shmaddr,
+static int smack_shm_shmat(struct kern_ipc_perm *isp, char __user *shmaddr,
                           int shmflg)
 {
        int may;
 
        may = smack_flags_to_may(shmflg);
-       return smk_curacc_shm(ipc, may);
+       return smk_curacc_shm(isp, may);
 }
 
 /**
@@ -3925,6 +3923,8 @@ access_check:
                        skp = smack_ipv6host_label(&sadd);
                if (skp == NULL)
                        skp = smack_net_ambient;
+               if (skb == NULL)
+                       break;
 #ifdef CONFIG_AUDIT
                smk_ad_init_net(&ad, __func__, LSM_AUDIT_DATA_NET, &net);
                ad.a.u.net->family = family;
@@ -4762,7 +4762,7 @@ static __init void init_smack_known_list(void)
 /**
  * smack_init - initialize the smack system
  *
- * Returns 0
+ * Returns 0 on success, -ENOMEM is there's no memory
  */
 static __init int smack_init(void)
 {
index 218292b..f5b3252 100644 (file)
@@ -15,7 +15,7 @@ alesis_io14_tx_pcm_chs[MAX_STREAMS][SND_DICE_RATE_MODE_COUNT] = {
 
 static const unsigned int
 alesis_io26_tx_pcm_chs[MAX_STREAMS][SND_DICE_RATE_MODE_COUNT] = {
-       {10, 10, 8},    /* Tx0 = Analog + S/PDIF. */
+       {10, 10, 4},    /* Tx0 = Analog + S/PDIF. */
        {16, 8, 0},     /* Tx1 = ADAT1 + ADAT2. */
 };
 
index 91e71be..240f4ca 100644 (file)
@@ -2485,8 +2485,7 @@ static const struct pci_device_id azx_ids[] = {
                         AZX_DCAPS_PM_RUNTIME },
        /* AMD Raven */
        { PCI_DEVICE(0x1022, 0x15e3),
-         .driver_data = AZX_DRIVER_GENERIC | AZX_DCAPS_PRESET_ATI_SB |
-                        AZX_DCAPS_PM_RUNTIME },
+         .driver_data = AZX_DRIVER_GENERIC | AZX_DCAPS_PRESET_AMD_SB },
        /* ATI HDMI */
        { PCI_DEVICE(0x1002, 0x0002),
          .driver_data = AZX_DRIVER_ATIHDMI_NS | AZX_DCAPS_PRESET_ATI_HDMI_NS },
index e283966..bc9dd8e 100644 (file)
@@ -357,6 +357,7 @@ static const struct hda_fixup ad1986a_fixups[] = {
 
 static const struct snd_pci_quirk ad1986a_fixup_tbl[] = {
        SND_PCI_QUIRK(0x103c, 0x30af, "HP B2800", AD1986A_FIXUP_LAPTOP_IMIC),
+       SND_PCI_QUIRK(0x1043, 0x1153, "ASUS M9V", AD1986A_FIXUP_LAPTOP_IMIC),
        SND_PCI_QUIRK(0x1043, 0x1443, "ASUS Z99He", AD1986A_FIXUP_EAPD),
        SND_PCI_QUIRK(0x1043, 0x1447, "ASUS A8JN", AD1986A_FIXUP_EAPD),
        SND_PCI_QUIRK_MASK(0x1043, 0xff00, 0x8100, "ASUS P5", AD1986A_FIXUP_3STACK),
index da16954..b000b36 100644 (file)
@@ -5817,6 +5817,7 @@ enum {
        ALC292_FIXUP_DELL_E7X,
        ALC292_FIXUP_DISABLE_AAMIX,
        ALC293_FIXUP_DISABLE_AAMIX_MULTIJACK,
+       ALC298_FIXUP_ALIENWARE_MIC_NO_PRESENCE,
        ALC298_FIXUP_DELL1_MIC_NO_PRESENCE,
        ALC298_FIXUP_DELL_AIO_MIC_NO_PRESENCE,
        ALC275_FIXUP_DELL_XPS,
@@ -5871,6 +5872,7 @@ enum {
        ALC256_FIXUP_ASUS_MIC_NO_PRESENCE,
        ALC299_FIXUP_PREDATOR_SPK,
        ALC294_FIXUP_ASUS_INTSPK_HEADSET_MIC,
+       ALC256_FIXUP_MEDION_HEADSET_NO_PRESENCE,
 };
 
 static const struct hda_fixup alc269_fixups[] = {
@@ -6506,6 +6508,15 @@ static const struct hda_fixup alc269_fixups[] = {
                .chained = true,
                .chain_id = ALC292_FIXUP_DISABLE_AAMIX
        },
+       [ALC298_FIXUP_ALIENWARE_MIC_NO_PRESENCE] = {
+               .type = HDA_FIXUP_PINS,
+               .v.pins = (const struct hda_pintbl[]) {
+                       { 0x18, 0x01a1913c }, /* headset mic w/o jack detect */
+                       { }
+               },
+               .chained_before = true,
+               .chain_id = ALC269_FIXUP_HEADSET_MODE,
+       },
        [ALC298_FIXUP_DELL1_MIC_NO_PRESENCE] = {
                .type = HDA_FIXUP_PINS,
                .v.pins = (const struct hda_pintbl[]) {
@@ -6927,6 +6938,16 @@ static const struct hda_fixup alc269_fixups[] = {
                .chained = true,
                .chain_id = ALC269_FIXUP_HEADSET_MODE_NO_HP_MIC
        },
+       [ALC256_FIXUP_MEDION_HEADSET_NO_PRESENCE] = {
+               .type = HDA_FIXUP_PINS,
+               .v.pins = (const struct hda_pintbl[]) {
+                       { 0x19, 0x04a11040 },
+                       { 0x21, 0x04211020 },
+                       { }
+               },
+               .chained = true,
+               .chain_id = ALC256_FIXUP_ASUS_HEADSET_MODE
+       },
 };
 
 static const struct snd_pci_quirk alc269_fixup_tbl[] = {
@@ -7190,6 +7211,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
        SND_PCI_QUIRK(0x17aa, 0x9e54, "LENOVO NB", ALC269_FIXUP_LENOVO_EAPD),
        SND_PCI_QUIRK(0x19e5, 0x3204, "Huawei MACH-WX9", ALC256_FIXUP_HUAWEI_MACH_WX9_PINS),
        SND_PCI_QUIRK(0x1b7d, 0xa831, "Ordissimo EVE2 ", ALC269VB_FIXUP_ORDISSIMO_EVE2), /* Also known as Malata PC-B1303 */
+       SND_PCI_QUIRK(0x10ec, 0x118c, "Medion EE4254 MD62100", ALC256_FIXUP_MEDION_HEADSET_NO_PRESENCE),
 
 #if 0
        /* Below is a quirk table taken from the old code.
@@ -7358,6 +7380,7 @@ static const struct hda_model_fixup alc269_fixup_models[] = {
        {.id = ALC295_FIXUP_CHROME_BOOK, .name = "alc-chrome-book"},
        {.id = ALC299_FIXUP_PREDATOR_SPK, .name = "predator-spk"},
        {.id = ALC298_FIXUP_HUAWEI_MBX_STEREO, .name = "huawei-mbx-stereo"},
+       {.id = ALC256_FIXUP_MEDION_HEADSET_NO_PRESENCE, .name = "alc256-medion-headset"},
        {}
 };
 #define ALC225_STANDARD_PINS \
@@ -7770,6 +7793,11 @@ static const struct snd_hda_pin_quirk alc269_pin_fixup_tbl[] = {
                {0x17, 0x90170110},
                {0x1a, 0x03011020},
                {0x21, 0x03211030}),
+       SND_HDA_PIN_QUIRK(0x10ec0298, 0x1028, "Dell", ALC298_FIXUP_ALIENWARE_MIC_NO_PRESENCE,
+               {0x12, 0xb7a60140},
+               {0x17, 0x90170110},
+               {0x1a, 0x03a11030},
+               {0x21, 0x03211020}),
        SND_HDA_PIN_QUIRK(0x10ec0299, 0x1028, "Dell", ALC269_FIXUP_DELL4_MIC_NO_PRESENCE,
                ALC225_STANDARD_PINS,
                {0x12, 0xb7a60130},
index 48e9eef..ca60339 100644 (file)
@@ -116,19 +116,16 @@ static struct atmel_pcm_dma_params ssc_dma_params[NUM_SSC_DEVICES][2] = {
 static struct atmel_ssc_info ssc_info[NUM_SSC_DEVICES] = {
        {
        .name           = "ssc0",
-       .lock           = __SPIN_LOCK_UNLOCKED(ssc_info[0].lock),
        .dir_mask       = SSC_DIR_MASK_UNUSED,
        .initialized    = 0,
        },
        {
        .name           = "ssc1",
-       .lock           = __SPIN_LOCK_UNLOCKED(ssc_info[1].lock),
        .dir_mask       = SSC_DIR_MASK_UNUSED,
        .initialized    = 0,
        },
        {
        .name           = "ssc2",
-       .lock           = __SPIN_LOCK_UNLOCKED(ssc_info[2].lock),
        .dir_mask       = SSC_DIR_MASK_UNUSED,
        .initialized    = 0,
        },
@@ -317,13 +314,10 @@ static int atmel_ssc_startup(struct snd_pcm_substream *substream,
 
        snd_soc_dai_set_dma_data(dai, substream, dma_params);
 
-       spin_lock_irq(&ssc_p->lock);
-       if (ssc_p->dir_mask & dir_mask) {
-               spin_unlock_irq(&ssc_p->lock);
+       if (ssc_p->dir_mask & dir_mask)
                return -EBUSY;
-       }
+
        ssc_p->dir_mask |= dir_mask;
-       spin_unlock_irq(&ssc_p->lock);
 
        return 0;
 }
@@ -355,7 +349,6 @@ static void atmel_ssc_shutdown(struct snd_pcm_substream *substream,
 
        dir_mask = 1 << dir;
 
-       spin_lock_irq(&ssc_p->lock);
        ssc_p->dir_mask &= ~dir_mask;
        if (!ssc_p->dir_mask) {
                if (ssc_p->initialized) {
@@ -369,7 +362,6 @@ static void atmel_ssc_shutdown(struct snd_pcm_substream *substream,
                ssc_p->cmr_div = ssc_p->tcmr_period = ssc_p->rcmr_period = 0;
                ssc_p->forced_divider = 0;
        }
-       spin_unlock_irq(&ssc_p->lock);
 
        /* Shutdown the SSC clock. */
        pr_debug("atmel_ssc_dai: Stopping clock\n");
index ae764cb..3470b96 100644 (file)
@@ -93,7 +93,6 @@ struct atmel_ssc_state {
 struct atmel_ssc_info {
        char *name;
        struct ssc_device *ssc;
-       spinlock_t lock;        /* lock for dir_mask */
        unsigned short dir_mask;        /* 0=unused, 1=playback, 2=capture */
        unsigned short initialized;     /* true if SSC has been initialized */
        unsigned short daifmt;
index 50ed86d..88b7569 100644 (file)
@@ -21,8 +21,7 @@
 
 #define PCM3168A_FORMATS (SNDRV_PCM_FMTBIT_S16_LE | \
                         SNDRV_PCM_FMTBIT_S24_3LE | \
-                        SNDRV_PCM_FMTBIT_S24_LE | \
-                        SNDRV_PCM_FMTBIT_S32_LE)
+                        SNDRV_PCM_FMTBIT_S24_LE)
 
 #define PCM3168A_FMT_I2S               0x0
 #define PCM3168A_FMT_LEFT_J            0x1
index ef0b746..b517e4b 100644 (file)
@@ -628,6 +628,16 @@ static int fsl_sai_startup(struct snd_pcm_substream *substream,
                           FSL_SAI_CR3_TRCE_MASK,
                           FSL_SAI_CR3_TRCE);
 
+       /*
+        * EDMA controller needs period size to be a multiple of
+        * tx/rx maxburst
+        */
+       if (sai->soc_data->use_edma)
+               snd_pcm_hw_constraint_step(substream->runtime, 0,
+                                          SNDRV_PCM_HW_PARAM_PERIOD_SIZE,
+                                          tx ? sai->dma_params_tx.maxburst :
+                                          sai->dma_params_rx.maxburst);
+
        ret = snd_pcm_hw_constraint_list(substream->runtime, 0,
                        SNDRV_PCM_HW_PARAM_RATE, &fsl_sai_rate_constraints);
 
@@ -1026,30 +1036,35 @@ static int fsl_sai_remove(struct platform_device *pdev)
 
 static const struct fsl_sai_soc_data fsl_sai_vf610_data = {
        .use_imx_pcm = false,
+       .use_edma = false,
        .fifo_depth = 32,
        .reg_offset = 0,
 };
 
 static const struct fsl_sai_soc_data fsl_sai_imx6sx_data = {
        .use_imx_pcm = true,
+       .use_edma = false,
        .fifo_depth = 32,
        .reg_offset = 0,
 };
 
 static const struct fsl_sai_soc_data fsl_sai_imx7ulp_data = {
        .use_imx_pcm = true,
+       .use_edma = false,
        .fifo_depth = 16,
        .reg_offset = 8,
 };
 
 static const struct fsl_sai_soc_data fsl_sai_imx8mq_data = {
        .use_imx_pcm = true,
+       .use_edma = false,
        .fifo_depth = 128,
        .reg_offset = 8,
 };
 
 static const struct fsl_sai_soc_data fsl_sai_imx8qm_data = {
        .use_imx_pcm = true,
+       .use_edma = true,
        .fifo_depth = 64,
        .reg_offset = 0,
 };
index b12cb57..76b15de 100644 (file)
 
 struct fsl_sai_soc_data {
        bool use_imx_pcm;
+       bool use_edma;
        unsigned int fifo_depth;
        unsigned int reg_offset;
 };
index 6b75716..e72f826 100644 (file)
@@ -1,30 +1,9 @@
 # SPDX-License-Identifier: GPL-2.0-only
-config SND_JZ4740_SOC
-       tristate "SoC Audio for Ingenic JZ4740 SoC"
-       depends on MIPS || COMPILE_TEST
-       select SND_SOC_GENERIC_DMAENGINE_PCM
-       help
-         Say Y or M if you want to add support for codecs attached to
-         the JZ4740 I2S interface. You will also need to select the audio
-         interfaces to support below.
-
-if SND_JZ4740_SOC
-
 config SND_JZ4740_SOC_I2S
        tristate "SoC Audio (I2S protocol) for Ingenic JZ4740 SoC"
+       depends on MIPS || COMPILE_TEST
        depends on HAS_IOMEM
+       select SND_SOC_GENERIC_DMAENGINE_PCM
        help
          Say Y if you want to use I2S protocol and I2S codec on Ingenic JZ4740
          based boards.
-
-config SND_JZ4740_SOC_QI_LB60
-       tristate "SoC Audio support for Qi LB60"
-       depends on HAS_IOMEM
-       depends on JZ4740_QI_LB60 || COMPILE_TEST
-       select SND_JZ4740_SOC_I2S
-    select SND_SOC_JZ4740_CODEC
-       help
-         Say Y if you want to add support for ASoC audio on the Qi LB60 board
-         a.k.a Qi Ben NanoNote.
-
-endif
index fb10e9a..f8701c9 100644 (file)
@@ -5,8 +5,3 @@
 snd-soc-jz4740-i2s-objs := jz4740-i2s.o
 
 obj-$(CONFIG_SND_JZ4740_SOC_I2S) += snd-soc-jz4740-i2s.o
-
-# Jz4740 Machine Support
-snd-soc-qi-lb60-objs := qi_lb60.o
-
-obj-$(CONFIG_SND_JZ4740_SOC_QI_LB60) += snd-soc-qi-lb60.o
diff --git a/sound/soc/jz4740/qi_lb60.c b/sound/soc/jz4740/qi_lb60.c
deleted file mode 100644 (file)
index 8ef6f41..0000000
+++ /dev/null
@@ -1,106 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Copyright (C) 2009, Lars-Peter Clausen <lars@metafoo.de>
- */
-
-#include <linux/module.h>
-#include <linux/moduleparam.h>
-#include <linux/timer.h>
-#include <linux/interrupt.h>
-#include <linux/platform_device.h>
-#include <sound/core.h>
-#include <sound/pcm.h>
-#include <sound/soc.h>
-#include <linux/gpio/consumer.h>
-
-struct qi_lb60 {
-       struct gpio_desc *snd_gpio;
-       struct gpio_desc *amp_gpio;
-};
-
-static int qi_lb60_spk_event(struct snd_soc_dapm_widget *widget,
-                            struct snd_kcontrol *ctrl, int event)
-{
-       struct qi_lb60 *qi_lb60 = snd_soc_card_get_drvdata(widget->dapm->card);
-       int on = !SND_SOC_DAPM_EVENT_OFF(event);
-
-       gpiod_set_value_cansleep(qi_lb60->snd_gpio, on);
-       gpiod_set_value_cansleep(qi_lb60->amp_gpio, on);
-
-       return 0;
-}
-
-static const struct snd_soc_dapm_widget qi_lb60_widgets[] = {
-       SND_SOC_DAPM_SPK("Speaker", qi_lb60_spk_event),
-       SND_SOC_DAPM_MIC("Mic", NULL),
-};
-
-static const struct snd_soc_dapm_route qi_lb60_routes[] = {
-       {"Mic", NULL, "MIC"},
-       {"Speaker", NULL, "LOUT"},
-       {"Speaker", NULL, "ROUT"},
-};
-
-SND_SOC_DAILINK_DEFS(hifi,
-       DAILINK_COMP_ARRAY(COMP_CPU("jz4740-i2s")),
-       DAILINK_COMP_ARRAY(COMP_CODEC("jz4740-codec", "jz4740-hifi")),
-       DAILINK_COMP_ARRAY(COMP_PLATFORM("jz4740-i2s")));
-
-static struct snd_soc_dai_link qi_lb60_dai = {
-       .name = "jz4740",
-       .stream_name = "jz4740",
-       .dai_fmt = SND_SOC_DAIFMT_I2S | SND_SOC_DAIFMT_NB_NF |
-               SND_SOC_DAIFMT_CBM_CFM,
-       SND_SOC_DAILINK_REG(hifi),
-};
-
-static struct snd_soc_card qi_lb60_card = {
-       .name = "QI LB60",
-       .owner = THIS_MODULE,
-       .dai_link = &qi_lb60_dai,
-       .num_links = 1,
-
-       .dapm_widgets = qi_lb60_widgets,
-       .num_dapm_widgets = ARRAY_SIZE(qi_lb60_widgets),
-       .dapm_routes = qi_lb60_routes,
-       .num_dapm_routes = ARRAY_SIZE(qi_lb60_routes),
-       .fully_routed = true,
-};
-
-static int qi_lb60_probe(struct platform_device *pdev)
-{
-       struct qi_lb60 *qi_lb60;
-       struct snd_soc_card *card = &qi_lb60_card;
-
-       qi_lb60 = devm_kzalloc(&pdev->dev, sizeof(*qi_lb60), GFP_KERNEL);
-       if (!qi_lb60)
-               return -ENOMEM;
-
-       qi_lb60->snd_gpio = devm_gpiod_get(&pdev->dev, "snd", GPIOD_OUT_LOW);
-       if (IS_ERR(qi_lb60->snd_gpio))
-               return PTR_ERR(qi_lb60->snd_gpio);
-
-       qi_lb60->amp_gpio = devm_gpiod_get(&pdev->dev, "amp", GPIOD_OUT_LOW);
-       if (IS_ERR(qi_lb60->amp_gpio))
-               return PTR_ERR(qi_lb60->amp_gpio);
-
-       card->dev = &pdev->dev;
-
-       snd_soc_card_set_drvdata(card, qi_lb60);
-
-       return devm_snd_soc_register_card(&pdev->dev, card);
-}
-
-static struct platform_driver qi_lb60_driver = {
-       .driver         = {
-               .name   = "qi-lb60-audio",
-       },
-       .probe          = qi_lb60_probe,
-};
-
-module_platform_driver(qi_lb60_driver);
-
-MODULE_AUTHOR("Lars-Peter Clausen <lars@metafoo.de>");
-MODULE_DESCRIPTION("ALSA SoC QI LB60 Audio support");
-MODULE_LICENSE("GPL v2");
-MODULE_ALIAS("platform:qi-lb60-audio");
index f6a7466..fc5d089 100644 (file)
@@ -286,6 +286,11 @@ static int rsnd_ssi_master_clk_start(struct rsnd_mod *mod,
        if (rsnd_ssi_is_multi_slave(mod, io))
                return 0;
 
+       if (rsnd_runtime_is_tdm_split(io))
+               chan = rsnd_io_converted_chan(io);
+
+       chan = rsnd_channel_normalization(chan);
+
        if (ssi->usrcnt > 0) {
                if (ssi->rate != rate) {
                        dev_err(dev, "SSI parent/child should use same rate\n");
@@ -300,11 +305,6 @@ static int rsnd_ssi_master_clk_start(struct rsnd_mod *mod,
                return 0;
        }
 
-       if (rsnd_runtime_is_tdm_split(io))
-               chan = rsnd_io_converted_chan(io);
-
-       chan = rsnd_channel_normalization(chan);
-
        main_rate = rsnd_ssi_clk_query(rdai, rate, chan, &idx);
        if (!main_rate) {
                dev_err(dev, "unsupported clock rate\n");
index 35f48e9..88978a3 100644 (file)
@@ -978,7 +978,7 @@ static void soc_cleanup_component(struct snd_soc_component *component)
        /* For framework level robustness */
        snd_soc_component_set_jack(component, NULL, NULL);
 
-       list_del(&component->card_list);
+       list_del_init(&component->card_list);
        snd_soc_dapm_free(snd_soc_component_get_dapm(component));
        soc_cleanup_component_debugfs(component);
        component->card = NULL;
index 87a9b9d..29f6105 100644 (file)
@@ -200,11 +200,18 @@ config SND_SOC_DM365_AIC3X_CODEC
 
 config SND_SOC_DM365_VOICE_CODEC
        bool "Voice Codec - CQ93VC"
-       select MFD_DAVINCI_VOICECODEC
-       select SND_SOC_CQ0093VC
        help
          Say Y if you want to add support for SoC On-chip voice codec
 endchoice
 
+config SND_SOC_DM365_VOICE_CODEC_MODULE
+       def_tristate y
+       depends on SND_SOC_DM365_VOICE_CODEC && SND_SOC
+       select MFD_DAVINCI_VOICECODEC
+       select SND_SOC_CQ0093VC
+       help
+         The is an internal symbol needed to ensure that the codec
+         and MFD driver can be built as loadable modules if necessary.
+
 endmenu
 
index 25faf2d..fbfde99 100644 (file)
@@ -1658,6 +1658,8 @@ u64 snd_usb_interface_dsd_format_quirks(struct snd_usb_audio *chip,
        case 0x25ce:  /* Mytek devices */
        case 0x278b:  /* Rotel? */
        case 0x2ab6:  /* T+A devices */
+       case 0x3842:  /* EVGA */
+       case 0xc502:  /* HiBy devices */
                if (fp->dsd_raw)
                        return SNDRV_PCM_FMTBIT_DSD_U32_BE;
                break;
index 5171b9c..0652d3e 100644 (file)
 #define X86_FEATURE_VMMCALL            ( 8*32+15) /* Prefer VMMCALL to VMCALL */
 #define X86_FEATURE_XENPV              ( 8*32+16) /* "" Xen paravirtual guest */
 #define X86_FEATURE_EPT_AD             ( 8*32+17) /* Intel Extended Page Table access-dirty bit */
+#define X86_FEATURE_VMCALL             ( 8*32+18) /* "" Hypervisor supports the VMCALL instruction */
+#define X86_FEATURE_VMW_VMMCALL                ( 8*32+19) /* "" VMware prefers VMMCALL hypercall instruction */
 
 /* Intel-defined CPU features, CPUID level 0x00000007:0 (EBX), word 9 */
 #define X86_FEATURE_FSGSBASE           ( 9*32+ 0) /* RDFSBASE, WRFSBASE, RDGSBASE, WRGSBASE instructions*/
 /* Intel-defined CPU features, CPUID level 0x00000007:0 (EDX), word 18 */
 #define X86_FEATURE_AVX512_4VNNIW      (18*32+ 2) /* AVX-512 Neural Network Instructions */
 #define X86_FEATURE_AVX512_4FMAPS      (18*32+ 3) /* AVX-512 Multiply Accumulation Single precision */
+#define X86_FEATURE_AVX512_VP2INTERSECT (18*32+ 8) /* AVX-512 Intersect for D/Q */
 #define X86_FEATURE_MD_CLEAR           (18*32+10) /* VERW clears CPU buffers */
 #define X86_FEATURE_TSX_FORCE_ABORT    (18*32+13) /* "" TSX_FORCE_ABORT */
 #define X86_FEATURE_PCONFIG            (18*32+18) /* Intel PCONFIG */
index 30d7d04..196fdd0 100644 (file)
@@ -3,7 +3,7 @@
 #define _UAPI_ASM_X86_UNISTD_H
 
 /* x32 syscall flag bit */
-#define __X32_SYSCALL_BIT      0x40000000
+#define __X32_SYSCALL_BIT      0x40000000UL
 
 #ifndef __KERNEL__
 # ifdef __i386__
diff --git a/tools/hv/Build b/tools/hv/Build
new file mode 100644 (file)
index 0000000..6cf51fa
--- /dev/null
@@ -0,0 +1,3 @@
+hv_kvp_daemon-y += hv_kvp_daemon.o
+hv_vss_daemon-y += hv_vss_daemon.o
+hv_fcopy_daemon-y += hv_fcopy_daemon.o
index 5db5e62..b57143d 100644 (file)
@@ -1,28 +1,55 @@
 # SPDX-License-Identifier: GPL-2.0
 # Makefile for Hyper-V tools
-
-WARNINGS = -Wall -Wextra
-CFLAGS = $(WARNINGS) -g $(shell getconf LFS_CFLAGS)
-
-CFLAGS += -D__EXPORTED_HEADERS__ -I../../include/uapi -I../../include
+include ../scripts/Makefile.include
 
 sbindir ?= /usr/sbin
 libexecdir ?= /usr/libexec
 sharedstatedir ?= /var/lib
 
-ALL_PROGRAMS := hv_kvp_daemon hv_vss_daemon hv_fcopy_daemon
+ifeq ($(srctree),)
+srctree := $(patsubst %/,%,$(dir $(CURDIR)))
+srctree := $(patsubst %/,%,$(dir $(srctree)))
+endif
+
+# Do not use make's built-in rules
+# (this improves performance and avoids hard-to-debug behaviour);
+MAKEFLAGS += -r
+
+override CFLAGS += -O2 -Wall -g -D_GNU_SOURCE -I$(OUTPUT)include
+
+ALL_TARGETS := hv_kvp_daemon hv_vss_daemon hv_fcopy_daemon
+ALL_PROGRAMS := $(patsubst %,$(OUTPUT)%,$(ALL_TARGETS))
 
 ALL_SCRIPTS := hv_get_dhcp_info.sh hv_get_dns_info.sh hv_set_ifconfig.sh
 
 all: $(ALL_PROGRAMS)
 
-%: %.c
-       $(CC) $(CFLAGS) -o $@ $^
+export srctree OUTPUT CC LD CFLAGS
+include $(srctree)/tools/build/Makefile.include
+
+HV_KVP_DAEMON_IN := $(OUTPUT)hv_kvp_daemon-in.o
+$(HV_KVP_DAEMON_IN): FORCE
+       $(Q)$(MAKE) $(build)=hv_kvp_daemon
+$(OUTPUT)hv_kvp_daemon: $(HV_KVP_DAEMON_IN)
+       $(QUIET_LINK)$(CC) $(CFLAGS) $(LDFLAGS) $< -o $@
+
+HV_VSS_DAEMON_IN := $(OUTPUT)hv_vss_daemon-in.o
+$(HV_VSS_DAEMON_IN): FORCE
+       $(Q)$(MAKE) $(build)=hv_vss_daemon
+$(OUTPUT)hv_vss_daemon: $(HV_VSS_DAEMON_IN)
+       $(QUIET_LINK)$(CC) $(CFLAGS) $(LDFLAGS) $< -o $@
+
+HV_FCOPY_DAEMON_IN := $(OUTPUT)hv_fcopy_daemon-in.o
+$(HV_FCOPY_DAEMON_IN): FORCE
+       $(Q)$(MAKE) $(build)=hv_fcopy_daemon
+$(OUTPUT)hv_fcopy_daemon: $(HV_FCOPY_DAEMON_IN)
+       $(QUIET_LINK)$(CC) $(CFLAGS) $(LDFLAGS) $< -o $@
 
 clean:
-       $(RM) hv_kvp_daemon hv_vss_daemon hv_fcopy_daemon
+       rm -f $(ALL_PROGRAMS)
+       find $(if $(OUTPUT),$(OUTPUT),.) -name '*.o' -delete -o -name '\.*.d' -delete
 
-install: all
+install: $(ALL_PROGRAMS)
        install -d -m 755 $(DESTDIR)$(sbindir); \
        install -d -m 755 $(DESTDIR)$(libexecdir)/hypervkvpd; \
        install -d -m 755 $(DESTDIR)$(sharedstatedir); \
@@ -33,3 +60,7 @@ install: all
        for script in $(ALL_SCRIPTS); do \
                install $$script -m 755 $(DESTDIR)$(libexecdir)/hypervkvpd/$${script%.sh}; \
        done
+
+FORCE:
+
+.PHONY: all install clean FORCE prepare
index bbd75ac..550223f 100644 (file)
@@ -3,6 +3,7 @@
 #define _TOOLS_ASM_BUG_H
 
 #include <linux/compiler.h>
+#include <stdio.h>
 
 #define __WARN_printf(arg...)  do { fprintf(stderr, arg); } while (0)
 
index d83763a..e03b1ea 100644 (file)
@@ -31,25 +31,9 @@ struct rb_root {
        struct rb_node *rb_node;
 };
 
-/*
- * Leftmost-cached rbtrees.
- *
- * We do not cache the rightmost node based on footprint
- * size vs number of potential users that could benefit
- * from O(1) rb_last(). Just not worth it, users that want
- * this feature can always implement the logic explicitly.
- * Furthermore, users that want to cache both pointers may
- * find it a bit asymmetric, but that's ok.
- */
-struct rb_root_cached {
-       struct rb_root rb_root;
-       struct rb_node *rb_leftmost;
-};
-
 #define rb_parent(r)   ((struct rb_node *)((r)->__rb_parent_color & ~3))
 
 #define RB_ROOT        (struct rb_root) { NULL, }
-#define RB_ROOT_CACHED (struct rb_root_cached) { {NULL, }, NULL }
 #define        rb_entry(ptr, type, member) container_of(ptr, type, member)
 
 #define RB_EMPTY_ROOT(root)  (READ_ONCE((root)->rb_node) == NULL)
@@ -71,12 +55,6 @@ extern struct rb_node *rb_prev(const struct rb_node *);
 extern struct rb_node *rb_first(const struct rb_root *);
 extern struct rb_node *rb_last(const struct rb_root *);
 
-extern void rb_insert_color_cached(struct rb_node *,
-                                  struct rb_root_cached *, bool);
-extern void rb_erase_cached(struct rb_node *node, struct rb_root_cached *);
-/* Same as rb_first(), but O(1) */
-#define rb_first_cached(root) (root)->rb_leftmost
-
 /* Postorder iteration - always visit the parent after its children */
 extern struct rb_node *rb_first_postorder(const struct rb_root *);
 extern struct rb_node *rb_next_postorder(const struct rb_node *);
@@ -84,8 +62,6 @@ extern struct rb_node *rb_next_postorder(const struct rb_node *);
 /* Fast replacement of a single node without remove/rebalance/add/rebalance */
 extern void rb_replace_node(struct rb_node *victim, struct rb_node *new,
                            struct rb_root *root);
-extern void rb_replace_node_cached(struct rb_node *victim, struct rb_node *new,
-                                  struct rb_root_cached *root);
 
 static inline void rb_link_node(struct rb_node *node, struct rb_node *parent,
                                struct rb_node **rb_link)
@@ -129,4 +105,51 @@ static inline void rb_erase_init(struct rb_node *n, struct rb_root *root)
        rb_erase(n, root);
        RB_CLEAR_NODE(n);
 }
+
+/*
+ * Leftmost-cached rbtrees.
+ *
+ * We do not cache the rightmost node based on footprint
+ * size vs number of potential users that could benefit
+ * from O(1) rb_last(). Just not worth it, users that want
+ * this feature can always implement the logic explicitly.
+ * Furthermore, users that want to cache both pointers may
+ * find it a bit asymmetric, but that's ok.
+ */
+struct rb_root_cached {
+       struct rb_root rb_root;
+       struct rb_node *rb_leftmost;
+};
+
+#define RB_ROOT_CACHED (struct rb_root_cached) { {NULL, }, NULL }
+
+/* Same as rb_first(), but O(1) */
+#define rb_first_cached(root) (root)->rb_leftmost
+
+static inline void rb_insert_color_cached(struct rb_node *node,
+                                         struct rb_root_cached *root,
+                                         bool leftmost)
+{
+       if (leftmost)
+               root->rb_leftmost = node;
+       rb_insert_color(node, &root->rb_root);
+}
+
+static inline void rb_erase_cached(struct rb_node *node,
+                                  struct rb_root_cached *root)
+{
+       if (root->rb_leftmost == node)
+               root->rb_leftmost = rb_next(node);
+       rb_erase(node, &root->rb_root);
+}
+
+static inline void rb_replace_node_cached(struct rb_node *victim,
+                                         struct rb_node *new,
+                                         struct rb_root_cached *root)
+{
+       if (root->rb_leftmost == victim)
+               root->rb_leftmost = new;
+       rb_replace_node(victim, new, &root->rb_root);
+}
+
 #endif /* __TOOLS_LINUX_PERF_RBTREE_H */
index ddd0100..381aa94 100644 (file)
@@ -32,17 +32,16 @@ struct rb_augment_callbacks {
        void (*rotate)(struct rb_node *old, struct rb_node *new);
 };
 
-extern void __rb_insert_augmented(struct rb_node *node,
-                                 struct rb_root *root,
-                                 bool newleft, struct rb_node **leftmost,
+extern void __rb_insert_augmented(struct rb_node *node, struct rb_root *root,
        void (*augment_rotate)(struct rb_node *old, struct rb_node *new));
+
 /*
  * Fixup the rbtree and update the augmented information when rebalancing.
  *
  * On insertion, the user must update the augmented information on the path
  * leading to the inserted node, then call rb_link_node() as usual and
- * rb_augment_inserted() instead of the usual rb_insert_color() call.
- * If rb_augment_inserted() rebalances the rbtree, it will callback into
+ * rb_insert_augmented() instead of the usual rb_insert_color() call.
+ * If rb_insert_augmented() rebalances the rbtree, it will callback into
  * a user provided function to update the augmented information on the
  * affected subtrees.
  */
@@ -50,7 +49,7 @@ static inline void
 rb_insert_augmented(struct rb_node *node, struct rb_root *root,
                    const struct rb_augment_callbacks *augment)
 {
-       __rb_insert_augmented(node, root, false, NULL, augment->rotate);
+       __rb_insert_augmented(node, root, augment->rotate);
 }
 
 static inline void
@@ -58,45 +57,92 @@ rb_insert_augmented_cached(struct rb_node *node,
                           struct rb_root_cached *root, bool newleft,
                           const struct rb_augment_callbacks *augment)
 {
-       __rb_insert_augmented(node, &root->rb_root,
-                             newleft, &root->rb_leftmost, augment->rotate);
+       if (newleft)
+               root->rb_leftmost = node;
+       rb_insert_augmented(node, &root->rb_root, augment);
 }
 
-#define RB_DECLARE_CALLBACKS(rbstatic, rbname, rbstruct, rbfield,      \
-                            rbtype, rbaugmented, rbcompute)            \
+/*
+ * Template for declaring augmented rbtree callbacks (generic case)
+ *
+ * RBSTATIC:    'static' or empty
+ * RBNAME:      name of the rb_augment_callbacks structure
+ * RBSTRUCT:    struct type of the tree nodes
+ * RBFIELD:     name of struct rb_node field within RBSTRUCT
+ * RBAUGMENTED: name of field within RBSTRUCT holding data for subtree
+ * RBCOMPUTE:   name of function that recomputes the RBAUGMENTED data
+ */
+
+#define RB_DECLARE_CALLBACKS(RBSTATIC, RBNAME,                         \
+                            RBSTRUCT, RBFIELD, RBAUGMENTED, RBCOMPUTE) \
 static inline void                                                     \
-rbname ## _propagate(struct rb_node *rb, struct rb_node *stop)         \
+RBNAME ## _propagate(struct rb_node *rb, struct rb_node *stop)         \
 {                                                                      \
        while (rb != stop) {                                            \
-               rbstruct *node = rb_entry(rb, rbstruct, rbfield);       \
-               rbtype augmented = rbcompute(node);                     \
-               if (node->rbaugmented == augmented)                     \
+               RBSTRUCT *node = rb_entry(rb, RBSTRUCT, RBFIELD);       \
+               if (RBCOMPUTE(node, true))                              \
                        break;                                          \
-               node->rbaugmented = augmented;                          \
-               rb = rb_parent(&node->rbfield);                         \
+               rb = rb_parent(&node->RBFIELD);                         \
        }                                                               \
 }                                                                      \
 static inline void                                                     \
-rbname ## _copy(struct rb_node *rb_old, struct rb_node *rb_new)                \
+RBNAME ## _copy(struct rb_node *rb_old, struct rb_node *rb_new)                \
 {                                                                      \
-       rbstruct *old = rb_entry(rb_old, rbstruct, rbfield);            \
-       rbstruct *new = rb_entry(rb_new, rbstruct, rbfield);            \
-       new->rbaugmented = old->rbaugmented;                            \
+       RBSTRUCT *old = rb_entry(rb_old, RBSTRUCT, RBFIELD);            \
+       RBSTRUCT *new = rb_entry(rb_new, RBSTRUCT, RBFIELD);            \
+       new->RBAUGMENTED = old->RBAUGMENTED;                            \
 }                                                                      \
 static void                                                            \
-rbname ## _rotate(struct rb_node *rb_old, struct rb_node *rb_new)      \
+RBNAME ## _rotate(struct rb_node *rb_old, struct rb_node *rb_new)      \
 {                                                                      \
-       rbstruct *old = rb_entry(rb_old, rbstruct, rbfield);            \
-       rbstruct *new = rb_entry(rb_new, rbstruct, rbfield);            \
-       new->rbaugmented = old->rbaugmented;                            \
-       old->rbaugmented = rbcompute(old);                              \
+       RBSTRUCT *old = rb_entry(rb_old, RBSTRUCT, RBFIELD);            \
+       RBSTRUCT *new = rb_entry(rb_new, RBSTRUCT, RBFIELD);            \
+       new->RBAUGMENTED = old->RBAUGMENTED;                            \
+       RBCOMPUTE(old, false);                                          \
 }                                                                      \
-rbstatic const struct rb_augment_callbacks rbname = {                  \
-       .propagate = rbname ## _propagate,                              \
-       .copy = rbname ## _copy,                                        \
-       .rotate = rbname ## _rotate                                     \
+RBSTATIC const struct rb_augment_callbacks RBNAME = {                  \
+       .propagate = RBNAME ## _propagate,                              \
+       .copy = RBNAME ## _copy,                                        \
+       .rotate = RBNAME ## _rotate                                     \
 };
 
+/*
+ * Template for declaring augmented rbtree callbacks,
+ * computing RBAUGMENTED scalar as max(RBCOMPUTE(node)) for all subtree nodes.
+ *
+ * RBSTATIC:    'static' or empty
+ * RBNAME:      name of the rb_augment_callbacks structure
+ * RBSTRUCT:    struct type of the tree nodes
+ * RBFIELD:     name of struct rb_node field within RBSTRUCT
+ * RBTYPE:      type of the RBAUGMENTED field
+ * RBAUGMENTED: name of RBTYPE field within RBSTRUCT holding data for subtree
+ * RBCOMPUTE:   name of function that returns the per-node RBTYPE scalar
+ */
+
+#define RB_DECLARE_CALLBACKS_MAX(RBSTATIC, RBNAME, RBSTRUCT, RBFIELD,        \
+                                RBTYPE, RBAUGMENTED, RBCOMPUTE)              \
+static inline bool RBNAME ## _compute_max(RBSTRUCT *node, bool exit)         \
+{                                                                            \
+       RBSTRUCT *child;                                                      \
+       RBTYPE max = RBCOMPUTE(node);                                         \
+       if (node->RBFIELD.rb_left) {                                          \
+               child = rb_entry(node->RBFIELD.rb_left, RBSTRUCT, RBFIELD);   \
+               if (child->RBAUGMENTED > max)                                 \
+                       max = child->RBAUGMENTED;                             \
+       }                                                                     \
+       if (node->RBFIELD.rb_right) {                                         \
+               child = rb_entry(node->RBFIELD.rb_right, RBSTRUCT, RBFIELD);  \
+               if (child->RBAUGMENTED > max)                                 \
+                       max = child->RBAUGMENTED;                             \
+       }                                                                     \
+       if (exit && node->RBAUGMENTED == max)                                 \
+               return true;                                                  \
+       node->RBAUGMENTED = max;                                              \
+       return false;                                                         \
+}                                                                            \
+RB_DECLARE_CALLBACKS(RBSTATIC, RBNAME,                                       \
+                    RBSTRUCT, RBFIELD, RBAUGMENTED, RBNAME ## _compute_max)
+
 
 #define        RB_RED          0
 #define        RB_BLACK        1
@@ -139,7 +185,6 @@ extern void __rb_erase_color(struct rb_node *parent, struct rb_root *root,
 
 static __always_inline struct rb_node *
 __rb_erase_augmented(struct rb_node *node, struct rb_root *root,
-                    struct rb_node **leftmost,
                     const struct rb_augment_callbacks *augment)
 {
        struct rb_node *child = node->rb_right;
@@ -147,9 +192,6 @@ __rb_erase_augmented(struct rb_node *node, struct rb_root *root,
        struct rb_node *parent, *rebalance;
        unsigned long pc;
 
-       if (leftmost && node == *leftmost)
-               *leftmost = rb_next(node);
-
        if (!tmp) {
                /*
                 * Case 1: node to erase has no more than 1 child (easy!)
@@ -249,8 +291,7 @@ static __always_inline void
 rb_erase_augmented(struct rb_node *node, struct rb_root *root,
                   const struct rb_augment_callbacks *augment)
 {
-       struct rb_node *rebalance = __rb_erase_augmented(node, root,
-                                                        NULL, augment);
+       struct rb_node *rebalance = __rb_erase_augmented(node, root, augment);
        if (rebalance)
                __rb_erase_color(rebalance, root, augment->rotate);
 }
@@ -259,11 +300,9 @@ static __always_inline void
 rb_erase_augmented_cached(struct rb_node *node, struct rb_root_cached *root,
                          const struct rb_augment_callbacks *augment)
 {
-       struct rb_node *rebalance = __rb_erase_augmented(node, &root->rb_root,
-                                                        &root->rb_leftmost,
-                                                        augment);
-       if (rebalance)
-               __rb_erase_color(rebalance, &root->rb_root, augment->rotate);
+       if (root->rb_leftmost == node)
+               root->rb_leftmost = rb_next(node);
+       rb_erase_augmented(node, &root->rb_root, augment);
 }
 
 #endif /* _TOOLS_LINUX_RBTREE_AUGMENTED_H */
index 1be0e79..1fc8faa 100644 (file)
@@ -569,7 +569,7 @@ __SYSCALL(__NR_semget, sys_semget)
 __SC_COMP(__NR_semctl, sys_semctl, compat_sys_semctl)
 #if defined(__ARCH_WANT_TIME32_SYSCALLS) || __BITS_PER_LONG != 32
 #define __NR_semtimedop 192
-__SC_COMP(__NR_semtimedop, sys_semtimedop, sys_semtimedop_time32)
+__SC_3264(__NR_semtimedop, sys_semtimedop_time32, sys_semtimedop)
 #endif
 #define __NR_semop 193
 __SYSCALL(__NR_semop, sys_semop)
index 59c71fa..2a616aa 100644 (file)
@@ -311,6 +311,7 @@ struct fscrypt_key {
 #define FS_NOCOW_FL                    0x00800000 /* Do not cow file */
 #define FS_INLINE_DATA_FL              0x10000000 /* Reserved for ext4 */
 #define FS_PROJINHERIT_FL              0x20000000 /* Create with parents projid */
+#define FS_CASEFOLD_FL                 0x40000000 /* Folder is case insensitive */
 #define FS_RESERVED_FL                 0x80000000 /* reserved for ext2 lib */
 
 #define FS_FL_USER_VISIBLE             0x0003DFFF /* User visible flags */
index 094bb03..7da1b37 100644 (file)
@@ -181,7 +181,7 @@ struct prctl_mm_map {
 #define PR_GET_THP_DISABLE     42
 
 /*
- * Tell the kernel to start/stop helping userspace manage bounds tables.
+ * No longer implemented, but left here to ensure the numbers stay reserved:
  */
 #define PR_MPX_ENABLE_MANAGEMENT  43
 #define PR_MPX_DISABLE_MANAGEMENT 44
@@ -229,4 +229,9 @@ struct prctl_mm_map {
 # define PR_PAC_APDBKEY                        (1UL << 3)
 # define PR_PAC_APGAKEY                        (1UL << 4)
 
+/* Tagged user address controls for arm64 */
+#define PR_SET_TAGGED_ADDR_CTRL                55
+#define PR_GET_TAGGED_ADDR_CTRL                56
+# define PR_TAGGED_ADDR_ENABLE         (1UL << 0)
+
 #endif /* _LINUX_PRCTL_H */
index 804f145..2548ff8 100644 (file)
@@ -83,14 +83,10 @@ __rb_rotate_set_parents(struct rb_node *old, struct rb_node *new,
 
 static __always_inline void
 __rb_insert(struct rb_node *node, struct rb_root *root,
-           bool newleft, struct rb_node **leftmost,
            void (*augment_rotate)(struct rb_node *old, struct rb_node *new))
 {
        struct rb_node *parent = rb_red_parent(node), *gparent, *tmp;
 
-       if (newleft)
-               *leftmost = node;
-
        while (true) {
                /*
                 * Loop invariant: node is red.
@@ -436,34 +432,17 @@ static const struct rb_augment_callbacks dummy_callbacks = {
 
 void rb_insert_color(struct rb_node *node, struct rb_root *root)
 {
-       __rb_insert(node, root, false, NULL, dummy_rotate);
+       __rb_insert(node, root, dummy_rotate);
 }
 
 void rb_erase(struct rb_node *node, struct rb_root *root)
 {
        struct rb_node *rebalance;
-       rebalance = __rb_erase_augmented(node, root,
-                                        NULL, &dummy_callbacks);
+       rebalance = __rb_erase_augmented(node, root, &dummy_callbacks);
        if (rebalance)
                ____rb_erase_color(rebalance, root, dummy_rotate);
 }
 
-void rb_insert_color_cached(struct rb_node *node,
-                           struct rb_root_cached *root, bool leftmost)
-{
-       __rb_insert(node, &root->rb_root, leftmost,
-                   &root->rb_leftmost, dummy_rotate);
-}
-
-void rb_erase_cached(struct rb_node *node, struct rb_root_cached *root)
-{
-       struct rb_node *rebalance;
-       rebalance = __rb_erase_augmented(node, &root->rb_root,
-                                        &root->rb_leftmost, &dummy_callbacks);
-       if (rebalance)
-               ____rb_erase_color(rebalance, &root->rb_root, dummy_rotate);
-}
-
 /*
  * Augmented rbtree manipulation functions.
  *
@@ -472,10 +451,9 @@ void rb_erase_cached(struct rb_node *node, struct rb_root_cached *root)
  */
 
 void __rb_insert_augmented(struct rb_node *node, struct rb_root *root,
-                          bool newleft, struct rb_node **leftmost,
        void (*augment_rotate)(struct rb_node *old, struct rb_node *new))
 {
-       __rb_insert(node, root, newleft, leftmost, augment_rotate);
+       __rb_insert(node, root, augment_rotate);
 }
 
 /*
@@ -580,15 +558,6 @@ void rb_replace_node(struct rb_node *victim, struct rb_node *new,
        __rb_change_child(victim, new, parent, root);
 }
 
-void rb_replace_node_cached(struct rb_node *victim, struct rb_node *new,
-                           struct rb_root_cached *root)
-{
-       rb_replace_node(victim, new, &root->rb_root);
-
-       if (root->rb_leftmost == victim)
-               root->rb_leftmost = new;
-}
-
 static struct rb_node *rb_left_deepest_node(const struct rb_node *node)
 {
        for (;;) {
index ba54bfc..f9a5d79 100644 (file)
@@ -6,14 +6,3 @@ libtraceevent-y += parse-utils.o
 libtraceevent-y += kbuffer-parse.o
 libtraceevent-y += tep_strerror.o
 libtraceevent-y += event-parse-api.o
-
-plugin_jbd2-y         += plugin_jbd2.o
-plugin_hrtimer-y      += plugin_hrtimer.o
-plugin_kmem-y         += plugin_kmem.o
-plugin_kvm-y          += plugin_kvm.o
-plugin_mac80211-y     += plugin_mac80211.o
-plugin_sched_switch-y += plugin_sched_switch.o
-plugin_function-y     += plugin_function.o
-plugin_xen-y          += plugin_xen.o
-plugin_scsi-y         += plugin_scsi.o
-plugin_cfg80211-y     += plugin_cfg80211.o
diff --git a/tools/lib/traceevent/Documentation/libtraceevent-event_print.txt b/tools/lib/traceevent/Documentation/libtraceevent-event_print.txt
new file mode 100644 (file)
index 0000000..2c6a618
--- /dev/null
@@ -0,0 +1,130 @@
+libtraceevent(3)
+================
+
+NAME
+----
+tep_print_event - Writes event information into a trace sequence.
+
+SYNOPSIS
+--------
+[verse]
+--
+*#include <event-parse.h>*
+*#include <trace-seq.h>*
+
+void *tep_print_event*(struct tep_handle pass:[*]_tep_, struct trace_seqpass:[*]_s_, struct tep_record pass:[*]_record_, const char pass:[*]_fmt_, _..._)
+--
+
+DESCRIPTION
+-----------
+
+The _tep_print_event()_ function parses the event information of the given
+_record_ and writes it into the trace sequence _s_, according to the format
+string _fmt_. The desired information is specified after the format string.
+The _fmt_ is printf-like format string, following arguments are supported:
+[verse]
+--
+       TEP_PRINT_PID, "%d"  - PID of the event.
+       TEP_PRINT_CPU, "%d"  - Event CPU.
+       TEP_PRINT_COMM, "%s" - Event command string.
+       TEP_PRINT_NAME, "%s" - Event name.
+       TEP_PRINT_LATENCY, "%s" - Latency of the event. It prints 4 or more
+                       fields - interrupt state, scheduling state,
+                       current context, and preemption count.
+                       Field 1 is the interrupt enabled state:
+                               d : Interrupts are disabled
+                               . : Interrupts are enabled
+                               X : The architecture does not support this
+                                   information
+                       Field 2 is the "need resched" state.
+                               N : The task is set to call the scheduler when
+                                   possible, as another higher priority task
+                                   may need to be scheduled in.
+                               . : The task is not set to call the scheduler.
+                       Field 3 is the context state.
+                               . : Normal context
+                               s : Soft interrupt context
+                               h : Hard interrupt context
+                               H : Hard interrupt context which triggered
+                                   during soft interrupt context.
+                               z : NMI context
+                               Z : NMI context which triggered during hard
+                                   interrupt context
+                       Field 4 is the preemption count.
+                               . : The preempt count is zero.
+                       On preemptible kernels (where the task can be scheduled
+                       out in arbitrary locations while in kernel context), the
+                       preempt count, when non zero, will prevent the kernel
+                       from scheduling out the current task. The preempt count
+                       number is displayed when it is not zero.
+                       Depending on the kernel, it may show other fields
+                       (lock depth, or migration disabled, which are unique to
+                       specialized kernels).
+       TEP_PRINT_TIME, %d - event time stamp. A divisor and precision can be
+                       specified as part of this format string:
+                       "%precision.divisord". Example:
+                       "%3.1000d" - divide the time by 1000 and print the first
+                       3 digits before the dot. Thus, the time stamp
+                       "123456000" will be printed as "123.456"
+       TEP_PRINT_INFO, "%s" - event information.
+       TEP_PRINT_INFO_RAW, "%s" - event information, in raw format.
+
+--
+EXAMPLE
+-------
+[source,c]
+--
+#include <event-parse.h>
+#include <trace-seq.h>
+...
+struct trace_seq seq;
+trace_seq_init(&seq);
+struct tep_handle *tep = tep_alloc();
+...
+void print_my_event(struct tep_record *record)
+{
+       trace_seq_reset(&seq);
+       tep_print_event(tep, s, record, "%16s-%-5d [%03d] %s %6.1000d %s %s",
+                       TEP_PRINT_COMM, TEP_PRINT_PID, TEP_PRINT_CPU,
+                       TEP_PRINT_LATENCY, TEP_PRINT_TIME, TEP_PRINT_NAME,
+                       TEP_PRINT_INFO);
+}
+...
+--
+
+FILES
+-----
+[verse]
+--
+*event-parse.h*
+       Header file to include in order to have access to the library APIs.
+*trace-seq.h*
+       Header file to include in order to have access to trace sequences related APIs.
+       Trace sequences are used to allow a function to call several other functions
+       to create a string of data to use.
+*-ltraceevent*
+       Linker switch to add when building a program that uses the library.
+--
+
+SEE ALSO
+--------
+_libtraceevent(3)_, _trace-cmd(1)_
+
+AUTHOR
+------
+[verse]
+--
+*Steven Rostedt* <rostedt@goodmis.org>, author of *libtraceevent*.
+*Tzvetomir Stoyanov* <tz.stoyanov@gmail.com>, author of this man page.
+--
+REPORTING BUGS
+--------------
+Report bugs to  <linux-trace-devel@vger.kernel.org>
+
+LICENSE
+-------
+libtraceevent is Free Software licensed under the GNU LGPL 2.1
+
+RESOURCES
+---------
+https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
index 38bfea3..f6aca0d 100644 (file)
@@ -59,12 +59,12 @@ parser context.
 
 The _tep_register_function()_ function registers a function name mapped to an
 address and (optional) module. This mapping is used in case the function tracer
-or events have "%pF" or "%pS" parameter in its format string. It is common to
-pass in the kallsyms function names with their corresponding addresses with this
+or events have "%pS" parameter in its format string. It is common to pass in
+the kallsyms function names with their corresponding addresses with this
 function. The _tep_ argument is the trace event parser context. The _name_ is
-the name of the function, the string is copied internally. The _addr_ is
-the start address of the function. The _mod_ is the kernel module
-the function may be in (NULL for none).
+the name of the function, the string is copied internally. The _addr_ is the
+start address of the function. The _mod_ is the kernel module the function may
+be in (NULL for none).
 
 The _tep_register_print_string()_ function  registers a string by the address
 it was stored in the kernel. Some strings internal to the kernel with static
index 8d56831..45b2017 100644 (file)
@@ -3,7 +3,7 @@ libtraceevent(3)
 
 NAME
 ----
-tep_alloc, tep_free,tep_ref, tep_unref,tep_ref_get - Create, destroy, manage
+tep_alloc, tep_free,tep_ref, tep_unref,tep_get_ref - Create, destroy, manage
 references of trace event parser context.
 
 SYNOPSIS
@@ -16,7 +16,7 @@ struct tep_handle pass:[*]*tep_alloc*(void);
 void *tep_free*(struct tep_handle pass:[*]_tep_);
 void *tep_ref*(struct tep_handle pass:[*]_tep_);
 void *tep_unref*(struct tep_handle pass:[*]_tep_);
-int *tep_ref_get*(struct tep_handle pass:[*]_tep_);
+int *tep_get_ref*(struct tep_handle pass:[*]_tep_);
 --
 
 DESCRIPTION
@@ -57,9 +57,9 @@ EXAMPLE
 ...
 struct tep_handle *tep = tep_alloc();
 ...
-int ref = tep_ref_get(tep);
+int ref = tep_get_ref(tep);
 tep_ref(tep);
-if ( (ref+1) != tep_ref_get(tep)) {
+if ( (ref+1) != tep_get_ref(tep)) {
        /* Something wrong happened, the counter is not incremented by 1 */
 }
 tep_unref(tep);
diff --git a/tools/lib/traceevent/Documentation/libtraceevent-plugins.txt b/tools/lib/traceevent/Documentation/libtraceevent-plugins.txt
new file mode 100644 (file)
index 0000000..596032a
--- /dev/null
@@ -0,0 +1,99 @@
+libtraceevent(3)
+================
+
+NAME
+----
+tep_load_plugins, tep_unload_plugins - Load / unload traceevent plugins.
+
+SYNOPSIS
+--------
+[verse]
+--
+*#include <event-parse.h>*
+
+struct tep_plugin_list pass:[*]*tep_load_plugins*(struct tep_handle pass:[*]_tep_);
+void *tep_unload_plugins*(struct tep_plugin_list pass:[*]_plugin_list_, struct tep_handle pass:[*]_tep_);
+--
+
+DESCRIPTION
+-----------
+The _tep_load_plugins()_ function loads all plugins, located in the plugin
+directories. The _tep_ argument is trace event parser context.
+The plugin directories are :
+[verse]
+--
+       - System's plugin directory, defined at the library compile time. It
+         depends on the library installation prefix and usually is
+         _(install_preffix)/lib/traceevent/plugins_
+       - Directory, defined by the environment variable _TRACEEVENT_PLUGIN_DIR_
+       - User's plugin directory, located at _~/.local/lib/traceevent/plugins_
+--
+Loading of plugins can be controlled by the _tep_flags_, using the
+_tep_set_flag()_ API:
+[verse]
+--
+       _TEP_DISABLE_SYS_PLUGINS_       - do not load plugins, located in
+                                       the system's plugin directory.
+       _TEP_DISABLE_PLUGINS_           - do not load any plugins.
+--
+The _tep_set_flag()_ API needs to be called before _tep_load_plugins()_, if
+loading of all plugins is not the desired case.
+
+The _tep_unload_plugins()_ function unloads the plugins, previously loaded by
+_tep_load_plugins()_. The _tep_ argument is trace event parser context. The
+_plugin_list_ is the list of loaded plugins, returned by
+the _tep_load_plugins()_ function.
+
+RETURN VALUE
+------------
+The _tep_load_plugins()_ function returns a list of successfully loaded plugins,
+or NULL in case no plugins are loaded.
+
+EXAMPLE
+-------
+[source,c]
+--
+#include <event-parse.h>
+...
+struct tep_handle *tep = tep_alloc();
+...
+struct tep_plugin_list *plugins = tep_load_plugins(tep);
+if (plugins == NULL) {
+       /* no plugins are loaded */
+}
+...
+tep_unload_plugins(plugins, tep);
+--
+
+FILES
+-----
+[verse]
+--
+*event-parse.h*
+       Header file to include in order to have access to the library APIs.
+*-ltraceevent*
+       Linker switch to add when building a program that uses the library.
+--
+
+SEE ALSO
+--------
+_libtraceevent(3)_, _trace-cmd(1)_, _tep_set_flag(3)_
+
+AUTHOR
+------
+[verse]
+--
+*Steven Rostedt* <rostedt@goodmis.org>, author of *libtraceevent*.
+*Tzvetomir Stoyanov* <tz.stoyanov@gmail.com>, author of this man page.
+--
+REPORTING BUGS
+--------------
+Report bugs to  <linux-trace-devel@vger.kernel.org>
+
+LICENSE
+-------
+libtraceevent is Free Software licensed under the GNU LGPL 2.1
+
+RESOURCES
+---------
+https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
index fbd977b..d530a7c 100644 (file)
@@ -16,7 +16,7 @@ Management of tep handler data structure and access of its members:
        void *tep_free*(struct tep_handle pass:[*]_tep_);
        void *tep_ref*(struct tep_handle pass:[*]_tep_);
        void *tep_unref*(struct tep_handle pass:[*]_tep_);
-       int *tep_ref_get*(struct tep_handle pass:[*]_tep_);
+       int *tep_get_ref*(struct tep_handle pass:[*]_tep_);
        void *tep_set_flag*(struct tep_handle pass:[*]_tep_, enum tep_flag _flag_);
        void *tep_clear_flag*(struct tep_handle pass:[*]_tep_, enum tep_flag _flag_);
        bool *tep_test_flag*(struct tep_handle pass:[*]_tep_, enum tep_flag _flags_);
@@ -26,15 +26,12 @@ Management of tep handler data structure and access of its members:
        void *tep_set_long_size*(struct tep_handle pass:[*]_tep_, int _long_size_);
        int *tep_get_page_size*(struct tep_handle pass:[*]_tep_);
        void *tep_set_page_size*(struct tep_handle pass:[*]_tep_, int _page_size_);
-       bool *tep_is_latency_format*(struct tep_handle pass:[*]_tep_);
-       void *tep_set_latency_format*(struct tep_handle pass:[*]_tep_, int _lat_);
        int *tep_get_header_page_size*(struct tep_handle pass:[*]_tep_);
        int *tep_get_header_timestamp_size*(struct tep_handle pass:[*]_tep_);
        bool *tep_is_old_format*(struct tep_handle pass:[*]_tep_);
        int *tep_strerror*(struct tep_handle pass:[*]_tep_, enum tep_errno _errnum_, char pass:[*]_buf_, size_t _buflen_);
 
 Register / unregister APIs:
-       int *tep_register_trace_clock*(struct tep_handle pass:[*]_tep_, const char pass:[*]_trace_clock_);
        int *tep_register_function*(struct tep_handle pass:[*]_tep_, char pass:[*]_name_, unsigned long long _addr_, char pass:[*]_mod_);
        int *tep_register_event_handler*(struct tep_handle pass:[*]_tep_, int _id_, const char pass:[*]_sys_name_, const char pass:[*]_event_name_, tep_event_handler_func _func_, void pass:[*]_context_);
        int *tep_unregister_event_handler*(struct tep_handle pass:[*]tep, int id, const char pass:[*]sys_name, const char pass:[*]event_name, tep_event_handler_func func, void pass:[*]_context_);
@@ -57,14 +54,7 @@ Event related APIs:
        int *tep_get_events_count*(struct tep_handle pass:[*]_tep_);
        struct tep_event pass:[*]pass:[*]*tep_list_events*(struct tep_handle pass:[*]_tep_, enum tep_event_sort_type _sort_type_);
        struct tep_event pass:[*]pass:[*]*tep_list_events_copy*(struct tep_handle pass:[*]_tep_, enum tep_event_sort_type _sort_type_);
-
-Event printing:
-       void *tep_print_event*(struct tep_handle pass:[*]_tep_, struct trace_seq pass:[*]_s_, struct tep_record pass:[*]_record_, bool _use_trace_clock_);
-       void *tep_print_event_data*(struct tep_handle pass:[*]_tep_, struct trace_seq pass:[*]_s_, struct tep_event pass:[*]_event_, struct tep_record pass:[*]_record_);
-       void *tep_event_info*(struct trace_seq pass:[*]_s_, struct tep_event pass:[*]_event_, struct tep_record pass:[*]_record_);
-       void *tep_print_event_task*(struct tep_handle pass:[*]_tep_, struct trace_seq pass:[*]_s_, struct tep_event pass:[*]_event_, struct tep_record pass:[*]_record_);
-       void *tep_print_event_time*(struct tep_handle pass:[*]_tep_, struct trace_seq pass:[*]_s_, struct tep_event pass:[*]_event_, struct tep_record pass:[*]record, bool _use_trace_clock_);
-       void *tep_set_print_raw*(struct tep_handle pass:[*]_tep_, int _print_raw_);
+       void *tep_print_event*(struct tep_handle pass:[*]_tep_, struct trace_seq pass:[*]_s_, struct tep_record pass:[*]_record_, const char pass:[*]_fmt_, _..._);
 
 Event finding:
        struct tep_event pass:[*]*tep_find_event*(struct tep_handle pass:[*]_tep_, int _id_);
@@ -116,7 +106,6 @@ Filter management:
        int *tep_filter_compare*(struct tep_event_filter pass:[*]_filter1_, struct tep_event_filter pass:[*]_filter2_);
 
 Parsing various data from the records:
-       void *tep_data_latency_format*(struct tep_handle pass:[*]_tep_, struct trace_seq pass:[*]_s_, struct tep_record pass:[*]_record_);
        int *tep_data_type*(struct tep_handle pass:[*]_tep_, struct tep_record pass:[*]_rec_);
        int *tep_data_pid*(struct tep_handle pass:[*]_tep_, struct tep_record pass:[*]_rec_);
        int *tep_data_preempt_count*(struct tep_handle pass:[*]_tep_, struct tep_record pass:[*]_rec_);
index a39cdd0..5315f37 100644 (file)
@@ -58,30 +58,6 @@ export man_dir man_dir_SQ INSTALL
 export DESTDIR DESTDIR_SQ
 export EVENT_PARSE_VERSION
 
-set_plugin_dir := 1
-
-# Set plugin_dir to preffered global plugin location
-# If we install under $HOME directory we go under
-# $(HOME)/.local/lib/traceevent/plugins
-#
-# We dont set PLUGIN_DIR in case we install under $HOME
-# directory, because by default the code looks under:
-# $(HOME)/.local/lib/traceevent/plugins by default.
-#
-ifeq ($(plugin_dir),)
-ifeq ($(prefix),$(HOME))
-override plugin_dir = $(HOME)/.local/lib/traceevent/plugins
-set_plugin_dir := 0
-else
-override plugin_dir = $(libdir)/traceevent/plugins
-endif
-endif
-
-ifeq ($(set_plugin_dir),1)
-PLUGIN_DIR = -DPLUGIN_DIR="$(plugin_dir)"
-PLUGIN_DIR_SQ = '$(subst ','\'',$(PLUGIN_DIR))'
-endif
-
 include ../../scripts/Makefile.include
 
 # copy a bit from Linux kbuild
@@ -105,7 +81,6 @@ export prefix libdir src obj
 # Shell quotes
 libdir_SQ = $(subst ','\'',$(libdir))
 libdir_relative_SQ = $(subst ','\'',$(libdir_relative))
-plugin_dir_SQ = $(subst ','\'',$(plugin_dir))
 
 CONFIG_INCLUDES = 
 CONFIG_LIBS    =
@@ -151,29 +126,14 @@ MAKEOVERRIDES=
 export srctree OUTPUT CC LD CFLAGS V
 build := -f $(srctree)/tools/build/Makefile.build dir=. obj
 
-PLUGINS  = plugin_jbd2.so
-PLUGINS += plugin_hrtimer.so
-PLUGINS += plugin_kmem.so
-PLUGINS += plugin_kvm.so
-PLUGINS += plugin_mac80211.so
-PLUGINS += plugin_sched_switch.so
-PLUGINS += plugin_function.so
-PLUGINS += plugin_xen.so
-PLUGINS += plugin_scsi.so
-PLUGINS += plugin_cfg80211.so
-
-PLUGINS    := $(addprefix $(OUTPUT),$(PLUGINS))
-PLUGINS_IN := $(PLUGINS:.so=-in.o)
-
 TE_IN      := $(OUTPUT)libtraceevent-in.o
 LIB_TARGET := $(addprefix $(OUTPUT),$(LIB_TARGET))
-DYNAMIC_LIST_FILE := $(OUTPUT)libtraceevent-dynamic-list
 
-CMD_TARGETS = $(LIB_TARGET) $(PLUGINS) $(DYNAMIC_LIST_FILE)
+CMD_TARGETS = $(LIB_TARGET)
 
 TARGETS = $(CMD_TARGETS)
 
-all: all_cmd
+all: all_cmd plugins
 
 all_cmd: $(CMD_TARGETS)
 
@@ -188,17 +148,6 @@ $(OUTPUT)libtraceevent.so.$(EVENT_PARSE_VERSION): $(TE_IN)
 $(OUTPUT)libtraceevent.a: $(TE_IN)
        $(QUIET_LINK)$(RM) $@; $(AR) rcs $@ $^
 
-$(OUTPUT)libtraceevent-dynamic-list: $(PLUGINS)
-       $(QUIET_GEN)$(call do_generate_dynamic_list_file, $(PLUGINS), $@)
-
-plugins: $(PLUGINS)
-
-__plugin_obj = $(notdir $@)
-  plugin_obj = $(__plugin_obj:-in.o=)
-
-$(PLUGINS_IN): force
-       $(Q)$(MAKE) $(build)=$(plugin_obj)
-
 $(OUTPUT)%.so: $(OUTPUT)%-in.o
        $(QUIET_LINK)$(CC) $(CFLAGS) -shared $(LDFLAGS) -nostartfiles -o $@ $^
 
@@ -258,25 +207,6 @@ define do_install
        $(INSTALL) $(if $3,-m $3,) $1 '$(DESTDIR_SQ)$2'
 endef
 
-define do_install_plugins
-       for plugin in $1; do                            \
-         $(call do_install,$$plugin,$(plugin_dir_SQ)); \
-       done
-endef
-
-define do_generate_dynamic_list_file
-       symbol_type=`$(NM) -u -D $1 | awk 'NF>1 {print $$1}' | \
-       xargs echo "U w W" | tr 'w ' 'W\n' | sort -u | xargs echo`;\
-       if [ "$$symbol_type" = "U W" ];then                             \
-               (echo '{';                                              \
-               $(NM) -u -D $1 | awk 'NF>1 {print "\t"$$2";"}' | sort -u;\
-               echo '};';                                              \
-               ) > $2;                                                 \
-       else                                                            \
-               (echo Either missing one of [$1] or bad version of $(NM)) 1>&2;\
-       fi
-endef
-
 PKG_CONFIG_FILE = libtraceevent.pc
 define do_install_pkgconfig_file
        if [ -n "${pkgconfig_dir}" ]; then                                      \
@@ -296,10 +226,6 @@ install_lib: all_cmd install_plugins install_headers install_pkgconfig
                $(call do_install_mkdir,$(libdir_SQ)); \
                cp -fpR $(LIB_INSTALL) $(DESTDIR)$(libdir_SQ)
 
-install_plugins: $(PLUGINS)
-       $(call QUIET_INSTALL, trace_plugins) \
-               $(call do_install_plugins, $(PLUGINS))
-
 install_pkgconfig:
        $(call QUIET_INSTALL, $(PKG_CONFIG_FILE)) \
                $(call do_install_pkgconfig_file,$(prefix))
@@ -313,7 +239,7 @@ install_headers:
 
 install: install_lib
 
-clean:
+clean: clean_plugins
        $(call QUIET_CLEAN, libtraceevent) \
                $(RM) *.o *~ $(TARGETS) *.a *.so $(VERSION_FILES) .*.d .*.cmd; \
                $(RM) TRACEEVENT-CFLAGS tags TAGS; \
@@ -351,7 +277,19 @@ help:
        @echo '  doc-install         - install the man pages'
        @echo '  doc-uninstall       - uninstall the man pages'
        @echo''
-PHONY += force plugins
+
+PHONY += plugins
+plugins:
+       $(call descend,plugins)
+
+PHONY += install_plugins
+install_plugins:
+       $(call descend,plugins,install)
+
+PHONY += clean_plugins
+clean_plugins:
+       $(call descend,plugins,clean)
+
 force:
 
 # Declare the contents of the .PHONY variable as phony.  We keep that
index bb22238..d948475 100644 (file)
@@ -4367,10 +4367,20 @@ static struct tep_print_arg *make_bprint_args(char *fmt, void *data, int size, s
                                        switch (*ptr) {
                                        case 's':
                                        case 'S':
-                                       case 'f':
-                                       case 'F':
                                        case 'x':
                                                break;
+                                       case 'f':
+                                       case 'F':
+                                               /*
+                                                * Pre-5.5 kernels use %pf and
+                                                * %pF for printing symbols
+                                                * while kernels since 5.5 use
+                                                * %pfw for fwnodes. So check
+                                                * %p[fF] isn't followed by 'w'.
+                                                */
+                                               if (ptr[1] != 'w')
+                                                       break;
+                                               /* fall through */
                                        default:
                                                /*
                                                 * Older kernels do not process
@@ -4487,12 +4497,12 @@ get_bprint_format(void *data, int size __maybe_unused,
 
        printk = find_printk(tep, addr);
        if (!printk) {
-               if (asprintf(&format, "%%pf: (NO FORMAT FOUND at %llx)\n", addr) < 0)
+               if (asprintf(&format, "%%ps: (NO FORMAT FOUND at %llx)\n", addr) < 0)
                        return NULL;
                return format;
        }
 
-       if (asprintf(&format, "%s: %s", "%pf", printk->printk) < 0)
+       if (asprintf(&format, "%s: %s", "%ps", printk->printk) < 0)
                return NULL;
 
        return format;
@@ -5517,8 +5527,10 @@ static void print_event_time(struct tep_handle *tep, struct trace_seq *s,
        if (divstr && isdigit(*(divstr + 1)))
                div = atoi(divstr + 1);
        time = record->ts;
-       if (div)
+       if (div) {
+               time += div / 2;
                time /= div;
+       }
        pr = prec;
        while (pr--)
                p10 *= 10;
index d438ee4..b77837f 100644 (file)
@@ -441,6 +441,8 @@ int tep_register_print_string(struct tep_handle *tep, const char *fmt,
                              unsigned long long addr);
 bool tep_is_pid_registered(struct tep_handle *tep, int pid);
 
+struct tep_event *tep_get_event(struct tep_handle *tep, int index);
+
 #define TEP_PRINT_INFO         "INFO"
 #define TEP_PRINT_INFO_RAW     "INFO_RAW"
 #define TEP_PRINT_COMM         "COMM"
diff --git a/tools/lib/traceevent/plugins/Build b/tools/lib/traceevent/plugins/Build
new file mode 100644 (file)
index 0000000..210d269
--- /dev/null
@@ -0,0 +1,10 @@
+plugin_jbd2-y         += plugin_jbd2.o
+plugin_hrtimer-y      += plugin_hrtimer.o
+plugin_kmem-y         += plugin_kmem.o
+plugin_kvm-y          += plugin_kvm.o
+plugin_mac80211-y     += plugin_mac80211.o
+plugin_sched_switch-y += plugin_sched_switch.o
+plugin_function-y     += plugin_function.o
+plugin_xen-y          += plugin_xen.o
+plugin_scsi-y         += plugin_scsi.o
+plugin_cfg80211-y     += plugin_cfg80211.o
diff --git a/tools/lib/traceevent/plugins/Makefile b/tools/lib/traceevent/plugins/Makefile
new file mode 100644 (file)
index 0000000..f440989
--- /dev/null
@@ -0,0 +1,222 @@
+# SPDX-License-Identifier: GPL-2.0
+
+#MAKEFLAGS += --no-print-directory
+
+
+# Makefiles suck: This macro sets a default value of $(2) for the
+# variable named by $(1), unless the variable has been set by
+# environment or command line. This is necessary for CC and AR
+# because make sets default values, so the simpler ?= approach
+# won't work as expected.
+define allow-override
+  $(if $(or $(findstring environment,$(origin $(1))),\
+            $(findstring command line,$(origin $(1)))),,\
+    $(eval $(1) = $(2)))
+endef
+
+# Allow setting CC and AR, or setting CROSS_COMPILE as a prefix.
+$(call allow-override,CC,$(CROSS_COMPILE)gcc)
+$(call allow-override,AR,$(CROSS_COMPILE)ar)
+$(call allow-override,NM,$(CROSS_COMPILE)nm)
+$(call allow-override,PKG_CONFIG,pkg-config)
+
+EXT = -std=gnu99
+INSTALL = install
+
+# Use DESTDIR for installing into a different root directory.
+# This is useful for building a package. The program will be
+# installed in this directory as if it was the root directory.
+# Then the build tool can move it later.
+DESTDIR ?=
+DESTDIR_SQ = '$(subst ','\'',$(DESTDIR))'
+
+LP64 := $(shell echo __LP64__ | ${CC} ${CFLAGS} -E -x c - | tail -n 1)
+ifeq ($(LP64), 1)
+  libdir_relative = lib64
+else
+  libdir_relative = lib
+endif
+
+prefix ?= /usr/local
+libdir = $(prefix)/$(libdir_relative)
+
+set_plugin_dir := 1
+
+# Set plugin_dir to preffered global plugin location
+# If we install under $HOME directory we go under
+# $(HOME)/.local/lib/traceevent/plugins
+#
+# We dont set PLUGIN_DIR in case we install under $HOME
+# directory, because by default the code looks under:
+# $(HOME)/.local/lib/traceevent/plugins by default.
+#
+ifeq ($(plugin_dir),)
+ifeq ($(prefix),$(HOME))
+override plugin_dir = $(HOME)/.local/lib/traceevent/plugins
+set_plugin_dir := 0
+else
+override plugin_dir = $(libdir)/traceevent/plugins
+endif
+endif
+
+ifeq ($(set_plugin_dir),1)
+PLUGIN_DIR = -DPLUGIN_DIR="$(plugin_dir)"
+PLUGIN_DIR_SQ = '$(subst ','\'',$(PLUGIN_DIR))'
+endif
+
+include ../../../scripts/Makefile.include
+
+# copy a bit from Linux kbuild
+
+ifeq ("$(origin V)", "command line")
+  VERBOSE = $(V)
+endif
+ifndef VERBOSE
+  VERBOSE = 0
+endif
+
+ifeq ($(srctree),)
+srctree := $(patsubst %/,%,$(dir $(CURDIR)))
+srctree := $(patsubst %/,%,$(dir $(srctree)))
+srctree := $(patsubst %/,%,$(dir $(srctree)))
+srctree := $(patsubst %/,%,$(dir $(srctree)))
+#$(info Determined 'srctree' to be $(srctree))
+endif
+
+export prefix libdir src obj
+
+# Shell quotes
+plugin_dir_SQ = $(subst ','\'',$(plugin_dir))
+
+CONFIG_INCLUDES =
+CONFIG_LIBS    =
+CONFIG_FLAGS   =
+
+OBJ            = $@
+N              =
+
+INCLUDES = -I. -I.. -I $(srctree)/tools/include $(CONFIG_INCLUDES)
+
+# Set compile option CFLAGS
+ifdef EXTRA_CFLAGS
+  CFLAGS := $(EXTRA_CFLAGS)
+else
+  CFLAGS := -g -Wall
+endif
+
+# Append required CFLAGS
+override CFLAGS += -fPIC
+override CFLAGS += $(CONFIG_FLAGS) $(INCLUDES) $(PLUGIN_DIR_SQ)
+override CFLAGS += $(udis86-flags) -D_GNU_SOURCE
+
+ifeq ($(VERBOSE),1)
+  Q =
+else
+  Q = @
+endif
+
+# Disable command line variables (CFLAGS) override from top
+# level Makefile (perf), otherwise build Makefile will get
+# the same command line setup.
+MAKEOVERRIDES=
+
+export srctree OUTPUT CC LD CFLAGS V
+
+build := -f $(srctree)/tools/build/Makefile.build dir=. obj
+
+DYNAMIC_LIST_FILE := $(OUTPUT)libtraceevent-dynamic-list
+
+PLUGINS  = plugin_jbd2.so
+PLUGINS += plugin_hrtimer.so
+PLUGINS += plugin_kmem.so
+PLUGINS += plugin_kvm.so
+PLUGINS += plugin_mac80211.so
+PLUGINS += plugin_sched_switch.so
+PLUGINS += plugin_function.so
+PLUGINS += plugin_xen.so
+PLUGINS += plugin_scsi.so
+PLUGINS += plugin_cfg80211.so
+
+PLUGINS    := $(addprefix $(OUTPUT),$(PLUGINS))
+PLUGINS_IN := $(PLUGINS:.so=-in.o)
+
+plugins: $(PLUGINS) $(DYNAMIC_LIST_FILE)
+
+__plugin_obj = $(notdir $@)
+  plugin_obj = $(__plugin_obj:-in.o=)
+
+$(PLUGINS_IN): force
+       $(Q)$(MAKE) $(build)=$(plugin_obj)
+
+$(OUTPUT)libtraceevent-dynamic-list: $(PLUGINS)
+       $(QUIET_GEN)$(call do_generate_dynamic_list_file, $(PLUGINS), $@)
+
+$(OUTPUT)%.so: $(OUTPUT)%-in.o
+       $(QUIET_LINK)$(CC) $(CFLAGS) -shared $(LDFLAGS) -nostartfiles -o $@ $^
+
+define update_dir
+  (echo $1 > $@.tmp;                           \
+   if [ -r $@ ] && cmp -s $@ $@.tmp; then      \
+     rm -f $@.tmp;                             \
+   else                                                \
+     echo '  UPDATE                 $@';       \
+     mv -f $@.tmp $@;                          \
+   fi);
+endef
+
+tags:  force
+       $(RM) tags
+       find . -name '*.[ch]' | xargs ctags --extra=+f --c-kinds=+px \
+       --regex-c++='/_PE\(([^,)]*).*/TEP_ERRNO__\1/'
+
+TAGS:  force
+       $(RM) TAGS
+       find . -name '*.[ch]' | xargs etags \
+       --regex='/_PE(\([^,)]*\).*/TEP_ERRNO__\1/'
+
+define do_install_mkdir
+       if [ ! -d '$(DESTDIR_SQ)$1' ]; then             \
+               $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$1'; \
+       fi
+endef
+
+define do_install
+       $(call do_install_mkdir,$2);                    \
+       $(INSTALL) $(if $3,-m $3,) $1 '$(DESTDIR_SQ)$2'
+endef
+
+define do_install_plugins
+       for plugin in $1; do                            \
+         $(call do_install,$$plugin,$(plugin_dir_SQ)); \
+       done
+endef
+
+define do_generate_dynamic_list_file
+       symbol_type=`$(NM) -u -D $1 | awk 'NF>1 {print $$1}' | \
+       xargs echo "U w W" | tr 'w ' 'W\n' | sort -u | xargs echo`;\
+       if [ "$$symbol_type" = "U W" ];then                             \
+               (echo '{';                                              \
+               $(NM) -u -D $1 | awk 'NF>1 {print "\t"$$2";"}' | sort -u;\
+               echo '};';                                              \
+               ) > $2;                                                 \
+       else                                                            \
+               (echo Either missing one of [$1] or bad version of $(NM)) 1>&2;\
+               fi
+endef
+
+install: $(PLUGINS)
+       $(call QUIET_INSTALL, trace_plugins) \
+       $(call do_install_plugins, $(PLUGINS))
+
+clean:
+       $(call QUIET_CLEAN, trace_plugins) \
+               $(RM) *.o *~ $(TARGETS) *.a *.so $(VERSION_FILES) .*.d .*.cmd; \
+               $(RM) $(OUTPUT)libtraceevent-dynamic-list \
+               $(RM) TRACEEVENT-CFLAGS tags TAGS;
+
+PHONY += force plugins
+force:
+
+# Declare the contents of the .PHONY variable as phony.  We keep that
+# information in a variable so we can use it in if_changed and friends.
+.PHONY: $(PHONY)
index 176f2f0..044c9a3 100644 (file)
@@ -138,7 +138,6 @@ static bool __dead_end_function(struct objtool_file *file, struct symbol *func,
                "do_task_dead",
                "__module_put_and_exit",
                "complete_and_exit",
-               "kvm_spurious_fault",
                "__reiserfs_panic",
                "lbug_with_loc",
                "fortify_panic",
index a269d78..46f7fba 100644 (file)
@@ -924,7 +924,7 @@ ifndef NO_JVMTI
     JDIR=$(shell /usr/sbin/update-java-alternatives -l | head -1 | awk '{print $$3}')
   else
     ifneq (,$(wildcard /usr/sbin/alternatives))
-      JDIR=$(shell /usr/sbin/alternatives --display java | tail -1 | cut -d' ' -f 5 | sed 's%/jre/bin/java.%%g')
+      JDIR=$(shell /usr/sbin/alternatives --display java | tail -1 | cut -d' ' -f 5 | sed -e 's%/jre/bin/java.%%g' -e 's%/bin/java.%%g')
     endif
   endif
   ifndef JDIR
index f9807d8..902c792 100644 (file)
@@ -292,7 +292,7 @@ endif
 LIBTRACEEVENT = $(TE_PATH)libtraceevent.a
 export LIBTRACEEVENT
 
-LIBTRACEEVENT_DYNAMIC_LIST = $(TE_PATH)libtraceevent-dynamic-list
+LIBTRACEEVENT_DYNAMIC_LIST = $(TE_PATH)plugins/libtraceevent-dynamic-list
 
 #
 # The static build has no dynsym table, so this does not work for
@@ -567,7 +567,7 @@ all: shell_compatibility_test $(ALL_PROGRAMS) $(LANG_BINDINGS) $(OTHER_PROGRAMS)
 # Create python binding output directory if not already present
 _dummy := $(shell [ -d '$(OUTPUT)python' ] || mkdir -p '$(OUTPUT)python')
 
-$(OUTPUT)python/perf.so: $(PYTHON_EXT_SRCS) $(PYTHON_EXT_DEPS) $(LIBTRACEEVENT_DYNAMIC_LIST)
+$(OUTPUT)python/perf.so: $(PYTHON_EXT_SRCS) $(PYTHON_EXT_DEPS) $(LIBTRACEEVENT_DYNAMIC_LIST) $(LIBPERF)
        $(QUIET_GEN)LDSHARED="$(CC) -pthread -shared" \
         CFLAGS='$(CFLAGS)' LDFLAGS='$(LDFLAGS) $(LIBTRACEEVENT_DYNAMIC_LIST_LDFLAGS)' \
          $(PYTHON_WORD) util/setup.py \
@@ -737,7 +737,7 @@ libtraceevent_plugins: FORCE
        $(Q)$(MAKE) -C $(TRACE_EVENT_DIR) $(LIBTRACEEVENT_FLAGS) O=$(OUTPUT) plugins
 
 $(LIBTRACEEVENT_DYNAMIC_LIST): libtraceevent_plugins
-       $(Q)$(MAKE) -C $(TRACE_EVENT_DIR) $(LIBTRACEEVENT_FLAGS) O=$(OUTPUT) $(OUTPUT)libtraceevent-dynamic-list
+       $(Q)$(MAKE) -C $(TRACE_EVENT_DIR) $(LIBTRACEEVENT_FLAGS) O=$(OUTPUT) $(OUTPUT)plugins/libtraceevent-dynamic-list
 
 $(LIBTRACEEVENT)-clean:
        $(call QUIET_CLEAN, libtraceevent)
index c32db09..ede040c 100644 (file)
 #include "../../util/event.h"
 #include "../../util/evlist.h"
 #include "../../util/evsel.h"
+#include "../../util/evsel_config.h"
 #include "../../util/pmu.h"
 #include "../../util/cs-etm.h"
-#include "../../util/util.h"
+#include <internal/lib.h> // page_size
 #include "../../util/session.h"
 
 #include <errno.h>
@@ -416,7 +417,7 @@ static int cs_etm_recording_options(struct auxtrace_record *itr,
                if (err)
                        goto out;
 
-               tracking_evsel = perf_evlist__last(evlist);
+               tracking_evsel = evlist__last(evlist);
                perf_evlist__set_tracking_event(evlist, tracking_evsel);
 
                tracking_evsel->core.attr.freq = 0;
@@ -648,7 +649,7 @@ static int cs_etm_info_fill(struct auxtrace_record *itr,
        if (priv_size != cs_etm_info_priv_size(itr, session->evlist))
                return -EINVAL;
 
-       if (!session->evlist->nr_mmaps)
+       if (!session->evlist->core.nr_mmaps)
                return -EINVAL;
 
        /* If the cpu_map is empty all online CPUs are involved */
index 4b36469..eba6541 100644 (file)
@@ -16,7 +16,7 @@
 #include "../../util/evsel.h"
 #include "../../util/evlist.h"
 #include "../../util/session.h"
-#include "../../util/util.h"
+#include <internal/lib.h> // page_size
 #include "../../util/pmu.h"
 #include "../../util/debug.h"
 #include "../../util/auxtrace.h"
@@ -51,7 +51,7 @@ static int arm_spe_info_fill(struct auxtrace_record *itr,
        if (priv_size != ARM_SPE_AUXTRACE_PRIV_SIZE)
                return -EINVAL;
 
-       if (!session->evlist->nr_mmaps)
+       if (!session->evlist->core.nr_mmaps)
                return -EINVAL;
 
        auxtrace_info->type = PERF_AUXTRACE_ARM_SPE;
@@ -129,7 +129,7 @@ static int arm_spe_recording_options(struct auxtrace_record *itr,
        if (err)
                return err;
 
-       tracking_evsel = perf_evlist__last(evlist);
+       tracking_evsel = evlist__last(evlist);
        perf_evlist__set_tracking_event(evlist, tracking_evsel);
 
        tracking_evsel->core.attr.freq = 0;
index b047b88..917b97d 100644 (file)
@@ -11,7 +11,6 @@
 #include <dwarf-regs.h>
 #include <linux/ptrace.h> /* for struct user_pt_regs */
 #include <linux/stringify.h>
-#include "util.h"
 
 struct pt_regs_dwarfnum {
        const char *name;
index e41defa..a32e4b7 100644 (file)
@@ -1,5 +1,7 @@
 #include <stdio.h>
 #include <stdlib.h>
+#include <perf/cpumap.h>
+#include <internal/cpumap.h>
 #include <api/fs/fs.h>
 #include "debug.h"
 #include "header.h"
@@ -29,7 +31,7 @@ char *get_cpuid_str(struct perf_pmu *pmu)
 
        /* read midr from list of cpus mapped to this pmu */
        cpus = perf_cpu_map__get(pmu->cpus);
-       for (cpu = 0; cpu < cpus->nr; cpu++) {
+       for (cpu = 0; cpu < perf_cpu_map__nr(cpus); cpu++) {
                scnprintf(path, PATH_MAX, "%s/devices/system/cpu/cpu%d"MIDR,
                                sysfs, cpus->map[cpu]);
 
index 002520d..1495a95 100644 (file)
@@ -5,8 +5,8 @@
 #include <libunwind.h>
 #include "perf_regs.h"
 #include "../../util/unwind.h"
-#include "../../util/debug.h"
 #endif
+#include "../../util/debug.h"
 
 int LIBUNWIND__ARCH_REG_ID(int regnum)
 {
index 4952890..0c4f4ca 100644 (file)
@@ -12,7 +12,6 @@
 #include <linux/ptrace.h>
 #include <linux/kernel.h>
 #include <linux/stringify.h>
-#include "util.h"
 
 struct pt_regs_dwarfnum {
        const char *name;
index 0b24266..b6b7bc7 100644 (file)
@@ -6,7 +6,6 @@
 #include <string.h>
 #include <linux/stringify.h>
 #include "header.h"
-#include "util.h"
 
 #define mfspr(rn)       ({unsigned long rval; \
                         asm volatile("mfspr %0," __stringify(rn) \
index f0dbf7b..9cc1c4a 100644 (file)
@@ -5,9 +5,11 @@
 #include "util/debug.h"
 #include "util/evsel.h"
 #include "util/evlist.h"
+#include "util/pmu.h"
 
 #include "book3s_hv_exits.h"
 #include "book3s_hcalls.h"
+#include <subcmd/parse-options.h>
 
 #define NR_TPS 4
 
@@ -172,3 +174,46 @@ int cpu_isa_init(struct perf_kvm_stat *kvm, const char *cpuid __maybe_unused)
 
        return ret;
 }
+
+/*
+ * Incase of powerpc architecture, pmu registers are programmable
+ * by guest kernel. So monitoring guest via host may not provide
+ * valid samples with default 'cycles' event. It is better to use
+ * 'trace_imc/trace_cycles' event for guest profiling, since it
+ * can track the guest instruction pointer in the trace-record.
+ *
+ * Function to parse the arguments and return appropriate values.
+ */
+int kvm_add_default_arch_event(int *argc, const char **argv)
+{
+       const char **tmp;
+       bool event = false;
+       int i, j = *argc;
+
+       const struct option event_options[] = {
+               OPT_BOOLEAN('e', "event", &event, NULL),
+               OPT_END()
+       };
+
+       tmp = calloc(j + 1, sizeof(char *));
+       if (!tmp)
+               return -EINVAL;
+
+       for (i = 0; i < j; i++)
+               tmp[i] = argv[i];
+
+       parse_options(j, tmp, event_options, NULL, PARSE_OPT_KEEP_UNKNOWN);
+       if (!event) {
+               if (pmu_have_event("trace_imc", "trace_cycles")) {
+                       argv[j++] = strdup("-e");
+                       argv[j++] = strdup("trace_imc/trace_cycles/");
+                       *argc += 2;
+               } else {
+                       free(tmp);
+                       return -EINVAL;
+               }
+       }
+
+       free(tmp);
+       return 0;
+}
index fc9c2f5..3018a05 100644 (file)
@@ -13,6 +13,7 @@
 #include "util/callchain.h"
 #include "util/debug.h"
 #include "util/dso.h"
+#include "util/event.h" // struct ip_callchain
 #include "util/map.h"
 #include "util/symbol.h"
 
index 8a4b717..abb7a12 100644 (file)
@@ -4,7 +4,6 @@
  * Copyright (C) 2015 Naveen N. Rao, IBM Corporation
  */
 
-#include "debug.h"
 #include "dso.h"
 #include "symbol.h"
 #include "map.h"
index cb19878..6ac8887 100644 (file)
@@ -4,6 +4,7 @@ PERF_HAVE_DWARF_REGS := 1
 endif
 HAVE_KVM_STAT_SUPPORT := 1
 PERF_HAVE_ARCH_REGS_QUERY_REGISTER_OFFSET := 1
+PERF_HAVE_JITDUMP := 1
 
 #
 # Syscall table generation for perf
index b0fb70e..0db5c58 100644 (file)
@@ -1,4 +1,5 @@
 #include <stdbool.h>
+#include <stdlib.h>
 #include <linux/kernel.h>
 #include <linux/types.h>
 #include <linux/bitops.h>
index c8c86a0..724efb2 100644 (file)
@@ -2,7 +2,7 @@
 #include <unistd.h>
 #include <stdio.h>
 #include <string.h>
-#include "util.h"
+#include <internal/lib.h> // page_size
 #include "machine.h"
 #include "api/fs/fs.h"
 #include "debug.h"
index 3b5cc33..3ec562a 100644 (file)
@@ -5,7 +5,7 @@
 #include "evlist.h"
 #include "evsel.h"
 #include "arch-tests.h"
-#include "util.h"
+#include <internal/lib.h> // page_size
 
 #include <signal.h>
 #include <sys/mman.h>
@@ -63,9 +63,9 @@ int test__intel_cqm_count_nmi_context(struct test *test __maybe_unused, int subt
                goto out;
        }
 
-       evsel = perf_evlist__first(evlist);
+       evsel = evlist__first(evlist);
        if (!evsel) {
-               pr_debug("perf_evlist__first failed\n");
+               pr_debug("evlist__first failed\n");
                goto out;
        }
 
index eb36359..fa94795 100644 (file)
@@ -15,9 +15,9 @@
 #include "evlist.h"
 #include "evsel.h"
 #include "thread_map.h"
-#include "cpumap.h"
 #include "record.h"
 #include "tsc.h"
+#include "util/mmap.h"
 #include "tests/tests.h"
 
 #include "arch-tests.h"
@@ -66,7 +66,7 @@ int test__perf_time_to_tsc(struct test *test __maybe_unused, int subtest __maybe
        union perf_event *event;
        u64 test_tsc, comm1_tsc, comm2_tsc;
        u64 test_time, comm1_time = 0, comm2_time = 0;
-       struct perf_mmap *md;
+       struct mmap *md;
 
        threads = thread_map__new(-1, getpid(), UINT_MAX);
        CHECK_NOT_NULL__(threads);
@@ -83,7 +83,7 @@ int test__perf_time_to_tsc(struct test *test __maybe_unused, int subtest __maybe
 
        perf_evlist__config(evlist, &opts, NULL);
 
-       evsel = perf_evlist__first(evlist);
+       evsel = evlist__first(evlist);
 
        evsel->core.attr.comm = 1;
        evsel->core.attr.disabled = 1;
@@ -91,9 +91,9 @@ int test__perf_time_to_tsc(struct test *test __maybe_unused, int subtest __maybe
 
        CHECK__(evlist__open(evlist));
 
-       CHECK__(perf_evlist__mmap(evlist, UINT_MAX));
+       CHECK__(evlist__mmap(evlist, UINT_MAX));
 
-       pc = evlist->mmap[0].base;
+       pc = evlist->mmap[0].core.base;
        ret = perf_read_tsc_conversion(pc, &tc);
        if (ret) {
                if (ret == -EOPNOTSUPP) {
@@ -115,7 +115,7 @@ int test__perf_time_to_tsc(struct test *test __maybe_unused, int subtest __maybe
 
        evlist__disable(evlist);
 
-       for (i = 0; i < evlist->nr_mmaps; i++) {
+       for (i = 0; i < evlist->core.nr_mmaps; i++) {
                md = &evlist->mmap[i];
                if (perf_mmap__read_init(md) < 0)
                        continue;
index 6e67cee..1ea9166 100644 (file)
@@ -13,7 +13,7 @@
 #include "tests/tests.h"
 #include "cloexec.h"
 #include "event.h"
-#include "util.h"
+#include <internal/lib.h> // page_size
 #include "arch-tests.h"
 
 static u64 rdpmc(unsigned int counter)
index 9876c7a..3e67915 100644 (file)
@@ -1,6 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0
 #include "../../../../arch/x86/include/asm/insn.h"
 #include "archinsn.h"
+#include "event.h"
 #include "machine.h"
 #include "thread.h"
 #include "symbol.h"
index a3a0b68..d357c62 100644 (file)
@@ -3,6 +3,8 @@
 #include <linux/string.h>
 #include <linux/zalloc.h>
 
+#include "../../util/event.h"
+#include "../../util/synthetic-events.h"
 #include "../../util/machine.h"
 #include "../../util/tool.h"
 #include "../../util/map.h"
index d263430..f7f68a5 100644 (file)
@@ -15,6 +15,7 @@
 #include "../../util/event.h"
 #include "../../util/evsel.h"
 #include "../../util/evlist.h"
+#include "../../util/mmap.h"
 #include "../../util/session.h"
 #include "../../util/pmu.h"
 #include "../../util/debug.h"
@@ -22,7 +23,7 @@
 #include "../../util/tsc.h"
 #include "../../util/auxtrace.h"
 #include "../../util/intel-bts.h"
-#include "../../util/util.h"
+#include <internal/lib.h> // page_size
 
 #define KiB(x) ((x) * 1024)
 #define MiB(x) ((x) * 1024 * 1024)
@@ -74,10 +75,10 @@ static int intel_bts_info_fill(struct auxtrace_record *itr,
        if (priv_size != INTEL_BTS_AUXTRACE_PRIV_SIZE)
                return -EINVAL;
 
-       if (!session->evlist->nr_mmaps)
+       if (!session->evlist->core.nr_mmaps)
                return -EINVAL;
 
-       pc = session->evlist->mmap[0].base;
+       pc = session->evlist->mmap[0].core.base;
        if (pc) {
                err = perf_read_tsc_conversion(pc, &tc);
                if (err) {
@@ -230,7 +231,7 @@ static int intel_bts_recording_options(struct auxtrace_record *itr,
                if (err)
                        return err;
 
-               tracking_evsel = perf_evlist__last(evlist);
+               tracking_evsel = evlist__last(evlist);
 
                perf_evlist__set_tracking_event(evlist, tracking_evsel);
 
index cb7cf16..d6d2625 100644 (file)
@@ -18,6 +18,7 @@
 #include "../../util/evlist.h"
 #include "../../util/evsel.h"
 #include "../../util/cpumap.h"
+#include "../../util/mmap.h"
 #include <subcmd/parse-options.h>
 #include "../../util/parse-events.h"
 #include "../../util/pmu.h"
@@ -26,7 +27,7 @@
 #include "../../util/record.h"
 #include "../../util/target.h"
 #include "../../util/tsc.h"
-#include "../../util/util.h"
+#include <internal/lib.h> // page_size
 #include "../../util/intel-pt.h"
 
 #define KiB(x) ((x) * 1024)
@@ -351,10 +352,10 @@ static int intel_pt_info_fill(struct auxtrace_record *itr,
        filter = intel_pt_find_filter(session->evlist, ptr->intel_pt_pmu);
        filter_str_len = filter ? strlen(filter) : 0;
 
-       if (!session->evlist->nr_mmaps)
+       if (!session->evlist->core.nr_mmaps)
                return -EINVAL;
 
-       pc = session->evlist->mmap[0].base;
+       pc = session->evlist->mmap[0].core.base;
        if (pc) {
                err = perf_read_tsc_conversion(pc, &tc);
                if (err) {
@@ -416,12 +417,12 @@ static int intel_pt_track_switches(struct evlist *evlist)
                return err;
        }
 
-       evsel = perf_evlist__last(evlist);
+       evsel = evlist__last(evlist);
 
        perf_evsel__set_sample_bit(evsel, CPU);
        perf_evsel__set_sample_bit(evsel, TIME);
 
-       evsel->system_wide = true;
+       evsel->core.system_wide = true;
        evsel->no_aux_samples = true;
        evsel->immediate = true;
 
@@ -716,13 +717,13 @@ static int intel_pt_recording_options(struct auxtrace_record *itr,
                                if (err)
                                        return err;
 
-                               switch_evsel = perf_evlist__last(evlist);
+                               switch_evsel = evlist__last(evlist);
 
                                switch_evsel->core.attr.freq = 0;
                                switch_evsel->core.attr.sample_period = 1;
                                switch_evsel->core.attr.context_switch = 1;
 
-                               switch_evsel->system_wide = true;
+                               switch_evsel->core.system_wide = true;
                                switch_evsel->no_aux_samples = true;
                                switch_evsel->immediate = true;
 
@@ -774,7 +775,7 @@ static int intel_pt_recording_options(struct auxtrace_record *itr,
                if (err)
                        return err;
 
-               tracking_evsel = perf_evlist__last(evlist);
+               tracking_evsel = evlist__last(evlist);
 
                perf_evlist__set_tracking_event(evlist, tracking_evsel);
 
index 1e9ec78..e17e080 100644 (file)
@@ -1,9 +1,10 @@
 // SPDX-License-Identifier: GPL-2.0
 #include <linux/types.h>
 #include <linux/string.h>
+#include <limits.h>
 #include <stdlib.h>
 
-#include "../../util/util.h"
+#include <internal/lib.h> // page_size
 #include "../../util/machine.h"
 #include "../../util/map.h"
 #include "../../util/symbol.h"
index c5197a1..2f55afb 100644 (file)
@@ -8,6 +8,8 @@
 #include <linux/types.h>
 #include <asm/barrier.h>
 #include "../../../util/debug.h"
+#include "../../../util/event.h"
+#include "../../../util/synthetic-events.h"
 #include "../../../util/tsc.h"
 
 int perf_read_tsc_conversion(const struct perf_event_mmap_page *pc,
index 05920e3..4735797 100644 (file)
@@ -1,11 +1,11 @@
 // SPDX-License-Identifier: GPL-2.0
 
 #include <errno.h>
+#include "../../util/debug.h"
 #ifndef REMOTE_UNWIND_LIBUNWIND
 #include <libunwind.h>
 #include "perf_regs.h"
 #include "../../util/unwind.h"
-#include "../../util/debug.h"
 #endif
 
 #ifdef HAVE_ARCH_X86_64_SUPPORT
index d1caa4a..bb617e5 100644 (file)
 #include <sys/resource.h>
 #include <sys/epoll.h>
 #include <sys/eventfd.h>
+#include <internal/cpumap.h>
 #include <perf/cpumap.h>
 
 #include "../util/stat.h"
 #include <subcmd/parse-options.h>
 #include "bench.h"
-#include "cpumap.h"
 
 #include <err.h>
 
index f6b4472..7af6944 100644 (file)
 #include <sys/epoll.h>
 #include <sys/eventfd.h>
 #include <sys/types.h>
+#include <internal/cpumap.h>
 #include <perf/cpumap.h>
 
 #include "../util/stat.h"
 #include <subcmd/parse-options.h>
 #include "bench.h"
-#include "cpumap.h"
 
 #include <err.h>
 
index 80e1389..8ba0c33 100644 (file)
 #include <linux/kernel.h>
 #include <linux/zalloc.h>
 #include <sys/time.h>
+#include <internal/cpumap.h>
 #include <perf/cpumap.h>
 
 #include "../util/stat.h"
 #include <subcmd/parse-options.h>
 #include "bench.h"
 #include "futex.h"
-#include "cpumap.h"
 
 #include <err.h>
 
index c5d6d0a..d0cae81 100644 (file)
 #include <linux/kernel.h>
 #include <linux/zalloc.h>
 #include <errno.h>
+#include <internal/cpumap.h>
 #include <perf/cpumap.h>
 #include "bench.h"
 #include "futex.h"
-#include "cpumap.h"
 
 #include <err.h>
 #include <stdlib.h>
index 75d3418..a00a689 100644 (file)
 #include <linux/kernel.h>
 #include <linux/time64.h>
 #include <errno.h>
+#include <internal/cpumap.h>
 #include <perf/cpumap.h>
 #include "bench.h"
 #include "futex.h"
-#include "cpumap.h"
 
 #include <err.h>
 #include <stdlib.h>
index 163fe16..a053cf2 100644 (file)
@@ -29,7 +29,8 @@ int bench_futex_wake_parallel(int argc __maybe_unused, const char **argv __maybe
 #include <linux/time64.h>
 #include <errno.h>
 #include "futex.h"
-#include "cpumap.h"
+#include <internal/cpumap.h>
+#include <perf/cpumap.h>
 
 #include <err.h>
 #include <stdlib.h>
index 77dcdc1..df81009 100644 (file)
 #include <linux/kernel.h>
 #include <linux/time64.h>
 #include <errno.h>
+#include <internal/cpumap.h>
 #include <perf/cpumap.h>
 #include "bench.h"
 #include "futex.h"
-#include "cpumap.h"
 
 #include <err.h>
 #include <stdlib.h>
index 62b8ef4..5797253 100644 (file)
@@ -9,7 +9,6 @@
 /* For the CLR_() macros */
 #include <pthread.h>
 
-#include "../builtin.h"
 #include <subcmd/parse-options.h>
 #include "../util/cloexec.h"
 
index c63eb9a..97e4a4f 100644 (file)
@@ -10,9 +10,7 @@
  *
  */
 
-#include "../util/util.h"
 #include <subcmd/parse-options.h>
-#include "../builtin.h"
 #include "bench.h"
 
 /* Test groups of 20 processes spraying to 20 receivers */
index 35b07f1..3c88d1f 100644 (file)
@@ -9,9 +9,7 @@
  *  http://people.redhat.com/mingo/cfs-scheduler/tools/pipe-test-1m.c
  * Ported to perf by Hitoshi Mitake <mitake@dcl.info.waseda.ac.jp>
  */
-#include "../util/util.h"
 #include <subcmd/parse-options.h>
-#include "../builtin.h"
 #include "bench.h"
 
 #include <unistd.h>
index 4e4d2e7..8db8fc9 100644 (file)
@@ -27,6 +27,7 @@
 #include "util/sort.h"
 #include "util/hist.h"
 #include "util/dso.h"
+#include "util/machine.h"
 #include "util/map.h"
 #include "util/session.h"
 #include "util/tool.h"
@@ -39,6 +40,7 @@
 #include <dlfcn.h>
 #include <errno.h>
 #include <linux/bitmap.h>
+#include <linux/err.h>
 
 struct perf_annotate {
        struct perf_tool tool;
@@ -583,8 +585,8 @@ int cmd_annotate(int argc, const char **argv)
        data.path = input_name;
 
        annotate.session = perf_session__new(&data, false, &annotate.tool);
-       if (annotate.session == NULL)
-               return -1;
+       if (IS_ERR(annotate.session))
+               return PTR_ERR(annotate.session);
 
        annotate.has_br_stack = perf_header__has_feat(&annotate.session->header,
                                                      HEADER_BRANCH_STACK);
index 1a69eb5..39efa51 100644 (file)
@@ -28,6 +28,7 @@
 #include "util/util.h"
 #include "util/probe-file.h"
 #include <linux/string.h>
+#include <linux/err.h>
 
 static int build_id_cache__kcore_buildid(const char *proc_dir, char *sbuildid)
 {
@@ -422,8 +423,8 @@ int cmd_buildid_cache(int argc, const char **argv)
                data.force = force;
 
                session = perf_session__new(&data, false, NULL);
-               if (session == NULL)
-                       return -1;
+               if (IS_ERR(session))
+                       return PTR_ERR(session);
        }
 
        if (symbol__init(session ? &session->header.env : NULL) < 0)
index 5a0d8b3..e3ef755 100644 (file)
@@ -18,6 +18,7 @@
 #include "util/symbol.h"
 #include "util/data.h"
 #include <errno.h>
+#include <linux/err.h>
 
 static int sysfs__fprintf_build_id(FILE *fp)
 {
@@ -65,8 +66,8 @@ static int perf_session__list_build_ids(bool force, bool with_hits)
                goto out;
 
        session = perf_session__new(&data, false, &build_id__mark_dso_hit_ops);
-       if (session == NULL)
-               return -1;
+       if (IS_ERR(session))
+               return PTR_ERR(session);
 
        /*
         * We take all buildids when the file contains AUX area tracing data
index b09b12e..3542b6a 100644 (file)
@@ -13,6 +13,7 @@
 #include <errno.h>
 #include <inttypes.h>
 #include <linux/compiler.h>
+#include <linux/err.h>
 #include <linux/kernel.h>
 #include <linux/stringify.h>
 #include <linux/zalloc.h>
@@ -20,6 +21,7 @@
 #include <sys/param.h>
 #include "debug.h"
 #include "builtin.h"
+#include <perf/cpumap.h>
 #include <subcmd/pager.h>
 #include <subcmd/parse-options.h>
 #include "map_symbol.h"
@@ -2780,8 +2782,9 @@ static int perf_c2c__report(int argc, const char **argv)
        }
 
        session = perf_session__new(&data, 0, &c2c.tool);
-       if (session == NULL) {
-               pr_debug("No memory for session\n");
+       if (IS_ERR(session)) {
+               err = PTR_ERR(session);
+               pr_debug("Error creating perf session\n");
                goto out;
        }
 
index 42d8157..2603015 100644 (file)
@@ -9,7 +9,6 @@
 
 #include "util/cache.h"
 #include <subcmd/parse-options.h>
-#include "util/util.h"
 #include "util/debug.h"
 #include "util/config.h"
 #include <linux/string.h>
index 827e480..c37a786 100644 (file)
@@ -23,6 +23,7 @@
 #include "util/time-utils.h"
 #include "util/annotate.h"
 #include "util/map.h"
+#include <linux/err.h>
 #include <linux/zalloc.h>
 #include <subcmd/pager.h>
 #include <subcmd/parse-options.h>
@@ -1153,9 +1154,9 @@ static int check_file_brstack(void)
 
        data__for_each_file(i, d) {
                d->session = perf_session__new(&d->data, false, &pdiff.tool);
-               if (!d->session) {
+               if (IS_ERR(d->session)) {
                        pr_err("Failed to open %s\n", d->data.path);
-                       return -1;
+                       return PTR_ERR(d->session);
                }
 
                has_br_stack = perf_header__has_feat(&d->session->header,
@@ -1185,9 +1186,9 @@ static int __cmd_diff(void)
 
        data__for_each_file(i, d) {
                d->session = perf_session__new(&d->data, false, &pdiff.tool);
-               if (!d->session) {
+               if (IS_ERR(d->session)) {
+                       ret = PTR_ERR(d->session);
                        pr_err("Failed to open %s\n", d->data.path);
-                       ret = -1;
                        goto out_delete;
                }
 
index 238fa38..4405019 100644 (file)
@@ -5,18 +5,18 @@
  */
 #include "builtin.h"
 
-#include "util/util.h"
-
 #include <linux/list.h>
 
 #include "perf.h"
 #include "util/evlist.h"
 #include "util/evsel.h"
+#include "util/evsel_fprintf.h"
 #include "util/parse-events.h"
 #include <subcmd/parse-options.h>
 #include "util/session.h"
 #include "util/data.h"
 #include "util/debug.h"
+#include <linux/err.h>
 
 static int __cmd_evlist(const char *file_name, struct perf_attr_details *details)
 {
@@ -30,8 +30,8 @@ static int __cmd_evlist(const char *file_name, struct perf_attr_details *details
        bool has_tracepoint = false;
 
        session = perf_session__new(&data, 0, NULL);
-       if (session == NULL)
-               return -1;
+       if (IS_ERR(session))
+               return PTR_ERR(session);
 
        evlist__for_each_entry(session->evlist, pos) {
                perf_evsel__fprintf(pos, details, stdout);
index c14f40b..372ecb3 100644 (file)
@@ -21,7 +21,9 @@
 #include "util/auxtrace.h"
 #include "util/jit.h"
 #include "util/symbol.h"
+#include "util/synthetic-events.h"
 #include "util/thread.h"
+#include <linux/err.h>
 
 #include <subcmd/parse-options.h>
 
@@ -834,8 +836,8 @@ int cmd_inject(int argc, const char **argv)
 
        data.path = inject.input_name;
        inject.session = perf_session__new(&data, true, &inject.tool);
-       if (inject.session == NULL)
-               return -1;
+       if (IS_ERR(inject.session))
+               return PTR_ERR(inject.session);
 
        if (zstd_init(&(inject.session->zstd_data), 0) < 0)
                pr_warning("Decompression initialization failed.\n");
index b5682be..1e61e35 100644 (file)
@@ -14,6 +14,7 @@
 #include "util/tool.h"
 #include "util/callchain.h"
 #include "util/time-utils.h"
+#include <linux/err.h>
 
 #include <subcmd/pager.h>
 #include <subcmd/parse-options.h>
@@ -1956,8 +1957,8 @@ int cmd_kmem(int argc, const char **argv)
        data.path = input_name;
 
        kmem_session = session = perf_session__new(&data, false, &perf_kmem);
-       if (session == NULL)
-               return -1;
+       if (IS_ERR(session))
+               return PTR_ERR(session);
 
        ret = -1;
 
index 0a4fcbe..2227e2f 100644 (file)
@@ -5,6 +5,7 @@
 #include "util/build-id.h"
 #include "util/evsel.h"
 #include "util/evlist.h"
+#include "util/mmap.h"
 #include "util/term.h"
 #include "util/symbol.h"
 #include "util/thread.h"
 #include "util/debug.h"
 #include "util/tool.h"
 #include "util/stat.h"
+#include "util/synthetic-events.h"
 #include "util/top.h"
 #include "util/data.h"
 #include "util/ordered-events.h"
+#include "util/kvm-stat.h"
 #include "ui/ui.h"
 
 #include <sys/prctl.h>
@@ -31,6 +34,7 @@
 #include <sys/stat.h>
 #include <fcntl.h>
 
+#include <linux/err.h>
 #include <linux/kernel.h>
 #include <linux/string.h>
 #include <linux/time64.h>
@@ -58,7 +62,6 @@ static const char *get_filename_for_perf_kvm(void)
 }
 
 #ifdef HAVE_KVM_STAT_SUPPORT
-#include "util/kvm-stat.h"
 
 void exit_event_get_key(struct evsel *evsel,
                        struct perf_sample *sample,
@@ -748,7 +751,7 @@ static s64 perf_kvm__mmap_read_idx(struct perf_kvm_stat *kvm, int idx,
 {
        struct evlist *evlist = kvm->evlist;
        union perf_event *event;
-       struct perf_mmap *md;
+       struct mmap *md;
        u64 timestamp;
        s64 n = 0;
        int err;
@@ -799,7 +802,7 @@ static int perf_kvm__mmap_read(struct perf_kvm_stat *kvm)
        s64 n, ntotal = 0;
        u64 flush_time = ULLONG_MAX, mmap_time;
 
-       for (i = 0; i < kvm->evlist->nr_mmaps; i++) {
+       for (i = 0; i < kvm->evlist->core.nr_mmaps; i++) {
                n = perf_kvm__mmap_read_idx(kvm, i, &mmap_time);
                if (n < 0)
                        return -1;
@@ -964,10 +967,10 @@ static int kvm_events_live_report(struct perf_kvm_stat *kvm)
                goto out;
        }
 
-       if (perf_evlist__add_pollfd(kvm->evlist, kvm->timerfd) < 0)
+       if (evlist__add_pollfd(kvm->evlist, kvm->timerfd) < 0)
                goto out;
 
-       nr_stdin = perf_evlist__add_pollfd(kvm->evlist, fileno(stdin));
+       nr_stdin = evlist__add_pollfd(kvm->evlist, fileno(stdin));
        if (nr_stdin < 0)
                goto out;
 
@@ -978,7 +981,7 @@ static int kvm_events_live_report(struct perf_kvm_stat *kvm)
        evlist__enable(kvm->evlist);
 
        while (!done) {
-               struct fdarray *fda = &kvm->evlist->pollfd;
+               struct fdarray *fda = &kvm->evlist->core.pollfd;
                int rc;
 
                rc = perf_kvm__mmap_read(kvm);
@@ -1058,7 +1061,7 @@ static int kvm_live_open_events(struct perf_kvm_stat *kvm)
                goto out;
        }
 
-       if (perf_evlist__mmap(evlist, kvm->opts.mmap_pages) < 0) {
+       if (evlist__mmap(evlist, kvm->opts.mmap_pages) < 0) {
                ui__error("Failed to mmap the events: %s\n",
                          str_error_r(errno, sbuf, sizeof(sbuf)));
                evlist__close(evlist);
@@ -1090,9 +1093,9 @@ static int read_events(struct perf_kvm_stat *kvm)
 
        kvm->tool = eops;
        kvm->session = perf_session__new(&file, false, &kvm->tool);
-       if (!kvm->session) {
+       if (IS_ERR(kvm->session)) {
                pr_err("Initializing perf session failed\n");
-               return -1;
+               return PTR_ERR(kvm->session);
        }
 
        symbol__init(&kvm->session->header.env);
@@ -1445,8 +1448,8 @@ static int kvm_events_live(struct perf_kvm_stat *kvm,
         * perf session
         */
        kvm->session = perf_session__new(&data, false, &kvm->tool);
-       if (kvm->session == NULL) {
-               err = -1;
+       if (IS_ERR(kvm->session)) {
+               err = PTR_ERR(kvm->session);
                goto out;
        }
        kvm->session->evlist = kvm->evlist;
@@ -1513,11 +1516,21 @@ perf_stat:
 }
 #endif /* HAVE_KVM_STAT_SUPPORT */
 
+int __weak kvm_add_default_arch_event(int *argc __maybe_unused,
+                                       const char **argv __maybe_unused)
+{
+       return 0;
+}
+
 static int __cmd_record(const char *file_name, int argc, const char **argv)
 {
-       int rec_argc, i = 0, j;
+       int rec_argc, i = 0, j, ret;
        const char **rec_argv;
 
+       ret = kvm_add_default_arch_event(&argc, argv);
+       if (ret)
+               return -EINVAL;
+
        rec_argc = argc + 2;
        rec_argv = calloc(rec_argc + 1, sizeof(char *));
        rec_argv[i++] = strdup("record");
index e290f6b..08e62ae 100644 (file)
@@ -81,9 +81,9 @@ int cmd_list(int argc, const char **argv)
                                                long_desc_flag, details_flag);
                else if (strcmp(argv[i], "sdt") == 0)
                        print_sdt_events(NULL, NULL, raw_dump);
-               else if (strcmp(argv[i], "metric") == 0)
+               else if (strcmp(argv[i], "metric") == 0 || strcmp(argv[i], "metrics") == 0)
                        metricgroup__print(true, false, NULL, raw_dump, details_flag);
-               else if (strcmp(argv[i], "metricgroup") == 0)
+               else if (strcmp(argv[i], "metricgroup") == 0 || strcmp(argv[i], "metricgroups") == 0)
                        metricgroup__print(false, true, NULL, raw_dump, details_flag);
                else if ((sep = strchr(argv[i], ':')) != NULL) {
                        int sep_idx;
index 4c2b7f4..474dfd5 100644 (file)
@@ -30,6 +30,7 @@
 #include <linux/hash.h>
 #include <linux/kernel.h>
 #include <linux/zalloc.h>
+#include <linux/err.h>
 
 static struct perf_session *session;
 
@@ -872,9 +873,9 @@ static int __cmd_report(bool display_info)
        };
 
        session = perf_session__new(&data, false, &eops);
-       if (!session) {
+       if (IS_ERR(session)) {
                pr_err("Initializing perf session failed\n");
-               return -1;
+               return PTR_ERR(session);
        }
 
        symbol__init(&session->header.env);
index 27d2bde..a13f581 100644 (file)
@@ -17,6 +17,7 @@
 #include "util/dso.h"
 #include "util/map.h"
 #include "util/symbol.h"
+#include <linux/err.h>
 
 #define MEM_OPERATION_LOAD     0x1
 #define MEM_OPERATION_STORE    0x2
@@ -249,8 +250,8 @@ static int report_raw_events(struct perf_mem *mem)
        struct perf_session *session = perf_session__new(&data, false,
                                                         &mem->tool);
 
-       if (session == NULL)
-               return -1;
+       if (IS_ERR(session))
+               return PTR_ERR(session);
 
        if (mem->cpu_list) {
                ret = perf_session__cpu_bitmap(session, mem->cpu_list,
index 1447004..2333286 100644 (file)
@@ -20,6 +20,7 @@
 #include "util/evlist.h"
 #include "util/evsel.h"
 #include "util/debug.h"
+#include "util/mmap.h"
 #include "util/target.h"
 #include "util/session.h"
 #include "util/tool.h"
@@ -38,6 +39,7 @@
 #include "util/trigger.h"
 #include "util/perf-hooks.h"
 #include "util/cpu-set-sched.h"
+#include "util/synthetic-events.h"
 #include "util/time-utils.h"
 #include "util/units.h"
 #include "util/bpf-event.h"
@@ -53,6 +55,7 @@
 #include <signal.h>
 #include <sys/mman.h>
 #include <sys/wait.h>
+#include <linux/err.h>
 #include <linux/string.h>
 #include <linux/time64.h>
 #include <linux/zalloc.h>
@@ -117,7 +120,7 @@ static bool switch_output_time(struct record *rec)
               trigger_is_ready(&switch_output_trigger);
 }
 
-static int record__write(struct record *rec, struct perf_mmap *map __maybe_unused,
+static int record__write(struct record *rec, struct mmap *map __maybe_unused,
                         void *bf, size_t size)
 {
        struct perf_data_file *file = &rec->session->data->file;
@@ -166,7 +169,7 @@ static int record__aio_write(struct aiocb *cblock, int trace_fd,
        return rc;
 }
 
-static int record__aio_complete(struct perf_mmap *md, struct aiocb *cblock)
+static int record__aio_complete(struct mmap *md, struct aiocb *cblock)
 {
        void *rem_buf;
        off_t rem_off;
@@ -212,7 +215,7 @@ static int record__aio_complete(struct perf_mmap *md, struct aiocb *cblock)
        return rc;
 }
 
-static int record__aio_sync(struct perf_mmap *md, bool sync_all)
+static int record__aio_sync(struct mmap *md, bool sync_all)
 {
        struct aiocb **aiocb = md->aio.aiocb;
        struct aiocb *cblocks = md->aio.cblocks;
@@ -253,12 +256,12 @@ struct record_aio {
        size_t          size;
 };
 
-static int record__aio_pushfn(struct perf_mmap *map, void *to, void *buf, size_t size)
+static int record__aio_pushfn(struct mmap *map, void *to, void *buf, size_t size)
 {
        struct record_aio *aio = to;
 
        /*
-        * map->base data pointed by buf is copied into free map->aio.data[] buffer
+        * map->core.base data pointed by buf is copied into free map->aio.data[] buffer
         * to release space in the kernel buffer as fast as possible, calling
         * perf_mmap__consume() from perf_mmap__push() function.
         *
@@ -298,7 +301,7 @@ static int record__aio_pushfn(struct perf_mmap *map, void *to, void *buf, size_t
        return size;
 }
 
-static int record__aio_push(struct record *rec, struct perf_mmap *map, off_t *off)
+static int record__aio_push(struct record *rec, struct mmap *map, off_t *off)
 {
        int ret, idx;
        int trace_fd = rec->session->data->file.fd;
@@ -349,15 +352,15 @@ static void record__aio_mmap_read_sync(struct record *rec)
 {
        int i;
        struct evlist *evlist = rec->evlist;
-       struct perf_mmap *maps = evlist->mmap;
+       struct mmap *maps = evlist->mmap;
 
        if (!record__aio_enabled(rec))
                return;
 
-       for (i = 0; i < evlist->nr_mmaps; i++) {
-               struct perf_mmap *map = &maps[i];
+       for (i = 0; i < evlist->core.nr_mmaps; i++) {
+               struct mmap *map = &maps[i];
 
-               if (map->base)
+               if (map->core.base)
                        record__aio_sync(map, true);
        }
 }
@@ -385,7 +388,7 @@ static int record__aio_parse(const struct option *opt,
 #else /* HAVE_AIO_SUPPORT */
 static int nr_cblocks_max = 0;
 
-static int record__aio_push(struct record *rec __maybe_unused, struct perf_mmap *map __maybe_unused,
+static int record__aio_push(struct record *rec __maybe_unused, struct mmap *map __maybe_unused,
                            off_t *off __maybe_unused)
 {
        return -1;
@@ -437,7 +440,7 @@ static int record__mmap_flush_parse(const struct option *opt,
        if (!opts->mmap_flush)
                opts->mmap_flush = MMAP_FLUSH_DEFAULT;
 
-       flush_max = perf_evlist__mmap_size(opts->mmap_pages);
+       flush_max = evlist__mmap_size(opts->mmap_pages);
        flush_max /= 4;
        if (opts->mmap_flush > flush_max)
                opts->mmap_flush = flush_max;
@@ -480,7 +483,7 @@ static int process_synthesized_event(struct perf_tool *tool,
        return record__write(rec, NULL, event, event->header.size);
 }
 
-static int record__pushfn(struct perf_mmap *map, void *to, void *bf, size_t size)
+static int record__pushfn(struct mmap *map, void *to, void *bf, size_t size)
 {
        struct record *rec = to;
 
@@ -525,7 +528,7 @@ static void record__sig_exit(void)
 #ifdef HAVE_AUXTRACE_SUPPORT
 
 static int record__process_auxtrace(struct perf_tool *tool,
-                                   struct perf_mmap *map,
+                                   struct mmap *map,
                                    union perf_event *event, void *data1,
                                    size_t len1, void *data2, size_t len2)
 {
@@ -563,7 +566,7 @@ static int record__process_auxtrace(struct perf_tool *tool,
 }
 
 static int record__auxtrace_mmap_read(struct record *rec,
-                                     struct perf_mmap *map)
+                                     struct mmap *map)
 {
        int ret;
 
@@ -579,7 +582,7 @@ static int record__auxtrace_mmap_read(struct record *rec,
 }
 
 static int record__auxtrace_mmap_read_snapshot(struct record *rec,
-                                              struct perf_mmap *map)
+                                              struct mmap *map)
 {
        int ret;
 
@@ -600,8 +603,8 @@ static int record__auxtrace_read_snapshot_all(struct record *rec)
        int i;
        int rc = 0;
 
-       for (i = 0; i < rec->evlist->nr_mmaps; i++) {
-               struct perf_mmap *map = &rec->evlist->mmap[i];
+       for (i = 0; i < rec->evlist->core.nr_mmaps; i++) {
+               struct mmap *map = &rec->evlist->mmap[i];
 
                if (!map->auxtrace_mmap.base)
                        continue;
@@ -666,7 +669,7 @@ static int record__auxtrace_init(struct record *rec)
 
 static inline
 int record__auxtrace_mmap_read(struct record *rec __maybe_unused,
-                              struct perf_mmap *map __maybe_unused)
+                              struct mmap *map __maybe_unused)
 {
        return 0;
 }
@@ -705,7 +708,7 @@ static int record__mmap_evlist(struct record *rec,
        if (opts->affinity != PERF_AFFINITY_SYS)
                cpu__setup_cpunode_map();
 
-       if (perf_evlist__mmap_ex(evlist, opts->mmap_pages,
+       if (evlist__mmap_ex(evlist, opts->mmap_pages,
                                 opts->auxtrace_mmap_pages,
                                 opts->auxtrace_snapshot_mode,
                                 opts->nr_cblocks, opts->affinity,
@@ -753,9 +756,9 @@ static int record__open(struct record *rec)
                if (perf_evlist__add_dummy(evlist))
                        return -ENOMEM;
 
-               pos = perf_evlist__first(evlist);
+               pos = evlist__first(evlist);
                pos->tracking = 0;
-               pos = perf_evlist__last(evlist);
+               pos = evlist__last(evlist);
                pos->tracking = 1;
                pos->core.attr.enable_on_exec = 1;
        }
@@ -786,6 +789,17 @@ try_again:
                pos->supported = true;
        }
 
+       if (symbol_conf.kptr_restrict && !perf_evlist__exclude_kernel(evlist)) {
+               pr_warning(
+"WARNING: Kernel address maps (/proc/{kallsyms,modules}) are restricted,\n"
+"check /proc/sys/kernel/kptr_restrict and /proc/sys/kernel/perf_event_paranoid.\n\n"
+"Samples in kernel functions may not be resolved if a suitable vmlinux\n"
+"file is not found in the buildid cache or in the vmlinux path.\n\n"
+"Samples in kernel modules won't be resolved at all.\n\n"
+"If some relocation was applied (e.g. kexec) symbols may be misresolved\n"
+"even with a suitable vmlinux or kallsyms file.\n\n");
+       }
+
        if (perf_evlist__apply_filters(evlist, &pos)) {
                pr_err("failed to set filter \"%s\" on event %s with %d (%s)\n",
                        pos->filter, perf_evsel__name(pos), errno,
@@ -888,7 +902,7 @@ static struct perf_event_header finished_round_event = {
        .type = PERF_RECORD_FINISHED_ROUND,
 };
 
-static void record__adjust_affinity(struct record *rec, struct perf_mmap *map)
+static void record__adjust_affinity(struct record *rec, struct mmap *map)
 {
        if (rec->opts.affinity != PERF_AFFINITY_SYS &&
            !CPU_EQUAL(&rec->affinity_mask, &map->affinity_mask)) {
@@ -935,7 +949,7 @@ static int record__mmap_read_evlist(struct record *rec, struct evlist *evlist,
        u64 bytes_written = rec->bytes_written;
        int i;
        int rc = 0;
-       struct perf_mmap *maps;
+       struct mmap *maps;
        int trace_fd = rec->data.file.fd;
        off_t off = 0;
 
@@ -952,20 +966,20 @@ static int record__mmap_read_evlist(struct record *rec, struct evlist *evlist,
        if (record__aio_enabled(rec))
                off = record__aio_get_pos(trace_fd);
 
-       for (i = 0; i < evlist->nr_mmaps; i++) {
+       for (i = 0; i < evlist->core.nr_mmaps; i++) {
                u64 flush = 0;
-               struct perf_mmap *map = &maps[i];
+               struct mmap *map = &maps[i];
 
-               if (map->base) {
+               if (map->core.base) {
                        record__adjust_affinity(rec, map);
                        if (synch) {
-                               flush = map->flush;
-                               map->flush = 1;
+                               flush = map->core.flush;
+                               map->core.flush = 1;
                        }
                        if (!record__aio_enabled(rec)) {
                                if (perf_mmap__push(map, rec, record__pushfn) < 0) {
                                        if (synch)
-                                               map->flush = flush;
+                                               map->core.flush = flush;
                                        rc = -1;
                                        goto out;
                                }
@@ -973,13 +987,13 @@ static int record__mmap_read_evlist(struct record *rec, struct evlist *evlist,
                                if (record__aio_push(rec, map, &off) < 0) {
                                        record__aio_set_pos(trace_fd, off);
                                        if (synch)
-                                               map->flush = flush;
+                                               map->core.flush = flush;
                                        rc = -1;
                                        goto out;
                                }
                        }
                        if (synch)
-                               map->flush = flush;
+                               map->core.flush = flush;
                }
 
                if (map->auxtrace_mmap.base && !rec->opts.auxtrace_snapshot_mode &&
@@ -1180,23 +1194,14 @@ static void workload_exec_failed_signal(int signo __maybe_unused,
 static void snapshot_sig_handler(int sig);
 static void alarm_sig_handler(int sig);
 
-int __weak
-perf_event__synth_time_conv(const struct perf_event_mmap_page *pc __maybe_unused,
-                           struct perf_tool *tool __maybe_unused,
-                           perf_event__handler_t process __maybe_unused,
-                           struct machine *machine __maybe_unused)
-{
-       return 0;
-}
-
 static const struct perf_event_mmap_page *
 perf_evlist__pick_pc(struct evlist *evlist)
 {
        if (evlist) {
-               if (evlist->mmap && evlist->mmap[0].base)
-                       return evlist->mmap[0].base;
-               if (evlist->overwrite_mmap && evlist->overwrite_mmap[0].base)
-                       return evlist->overwrite_mmap[0].base;
+               if (evlist->mmap && evlist->mmap[0].core.base)
+                       return evlist->mmap[0].core.base;
+               if (evlist->overwrite_mmap && evlist->overwrite_mmap[0].core.base)
+                       return evlist->overwrite_mmap[0].core.base;
        }
        return NULL;
 }
@@ -1362,9 +1367,9 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
        }
 
        session = perf_session__new(data, false, tool);
-       if (session == NULL) {
+       if (IS_ERR(session)) {
                pr_err("Perf session creation failed.\n");
-               return -1;
+               return PTR_ERR(session);
        }
 
        fd = perf_data__fd(data);
@@ -1407,7 +1412,7 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
                err = -1;
                goto out_child;
        }
-       session->header.env.comp_mmap_len = session->evlist->mmap_len;
+       session->header.env.comp_mmap_len = session->evlist->core.mmap_len;
 
        err = bpf__apply_obj_config();
        if (err) {
@@ -1610,7 +1615,7 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
                if (hits == rec->samples) {
                        if (done || draining)
                                break;
-                       err = perf_evlist__poll(rec->evlist, -1);
+                       err = evlist__poll(rec->evlist, -1);
                        /*
                         * Propagate error, only if there's any. Ignore positive
                         * number of returned events and interrupt error.
@@ -1619,7 +1624,7 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
                                err = 0;
                        waking++;
 
-                       if (perf_evlist__filter_pollfd(rec->evlist, POLLERR | POLLHUP) == 0)
+                       if (evlist__filter_pollfd(rec->evlist, POLLERR | POLLHUP) == 0)
                                draining = true;
                }
 
@@ -1976,7 +1981,7 @@ out_free:
 
 static void switch_output_size_warn(struct record *rec)
 {
-       u64 wakeup_size = perf_evlist__mmap_size(rec->opts.mmap_pages);
+       u64 wakeup_size = evlist__mmap_size(rec->opts.mmap_pages);
        struct switch_output *s = &rec->switch_output;
 
        wakeup_size /= 2;
@@ -2371,16 +2376,6 @@ int cmd_record(int argc, const char **argv)
 
        err = -ENOMEM;
 
-       if (symbol_conf.kptr_restrict && !perf_evlist__exclude_kernel(rec->evlist))
-               pr_warning(
-"WARNING: Kernel address maps (/proc/{kallsyms,modules}) are restricted,\n"
-"check /proc/sys/kernel/kptr_restrict and /proc/sys/kernel/perf_event_paranoid.\n\n"
-"Samples in kernel functions may not be resolved if a suitable vmlinux\n"
-"file is not found in the buildid cache or in the vmlinux path.\n\n"
-"Samples in kernel modules won't be resolved at all.\n\n"
-"If some relocation was applied (e.g. kexec) symbols may be misresolved\n"
-"even with a suitable vmlinux or kallsyms file.\n\n");
-
        if (rec->no_buildid_cache || rec->no_buildid) {
                disable_buildid_cache();
        } else if (rec->switch_output.enabled) {
index b18fab9..aae0e57 100644 (file)
@@ -48,7 +48,7 @@
 #include "util/auxtrace.h"
 #include "util/units.h"
 #include "util/branch.h"
-#include "util/util.h"
+#include "util/util.h" // perf_tip()
 #include "ui/ui.h"
 #include "ui/progress.h"
 
@@ -1269,8 +1269,8 @@ int cmd_report(int argc, const char **argv)
 
 repeat:
        session = perf_session__new(&data, false, &report.tool);
-       if (session == NULL)
-               return -1;
+       if (IS_ERR(session))
+               return PTR_ERR(session);
 
        ret = evswitch__init(&report.evswitch, session->evlist, stderr);
        if (ret)
index ec96d64..5cacc4f 100644 (file)
@@ -3,8 +3,10 @@
 #include "perf.h"
 #include "perf-sys.h"
 
+#include "util/cpumap.h"
 #include "util/evlist.h"
 #include "util/evsel.h"
+#include "util/evsel_fprintf.h"
 #include "util/symbol.h"
 #include "util/thread.h"
 #include "util/header.h"
@@ -23,6 +25,7 @@
 #include "util/trace-event.h"
 
 #include "util/debug.h"
+#include "util/event.h"
 
 #include <linux/kernel.h>
 #include <linux/log2.h>
@@ -36,7 +39,9 @@
 #include <pthread.h>
 #include <math.h>
 #include <api/fs/fs.h>
+#include <perf/cpumap.h>
 #include <linux/time64.h>
+#include <linux/err.h>
 
 #include <linux/ctype.h>
 
@@ -1794,9 +1799,9 @@ static int perf_sched__read_events(struct perf_sched *sched)
        int rc = -1;
 
        session = perf_session__new(&data, false, &sched->tool);
-       if (session == NULL) {
-               pr_debug("No Memory for session\n");
-               return -1;
+       if (IS_ERR(session)) {
+               pr_debug("Error creating perf session");
+               return PTR_ERR(session);
        }
 
        symbol__init(&session->header.env);
@@ -2051,7 +2056,7 @@ static void timehist_print_sample(struct perf_sched *sched,
                            EVSEL__PRINT_SYM | EVSEL__PRINT_ONELINE |
                            EVSEL__PRINT_CALLCHAIN_ARROW |
                            EVSEL__PRINT_SKIP_IGNORED,
-                           &callchain_cursor, stdout);
+                           &callchain_cursor, symbol_conf.bt_stop_list,  stdout);
 
 out:
        printf("\n");
@@ -2986,8 +2991,8 @@ static int perf_sched__timehist(struct perf_sched *sched)
        symbol_conf.use_callchain = sched->show_callchain;
 
        session = perf_session__new(&data, false, &sched->tool);
-       if (session == NULL)
-               return -ENOMEM;
+       if (IS_ERR(session))
+               return PTR_ERR(session);
 
        evlist = session->evlist;
 
index e079b34..286fc70 100644 (file)
@@ -17,6 +17,7 @@
 #include "util/trace-event.h"
 #include "util/evlist.h"
 #include "util/evsel.h"
+#include "util/evsel_fprintf.h"
 #include "util/evswitch.h"
 #include "util/sort.h"
 #include "util/data.h"
@@ -52,6 +53,7 @@
 #include <unistd.h>
 #include <subcmd/pager.h>
 #include <perf/evlist.h>
+#include <linux/err.h>
 #include "util/record.h"
 #include "util/util.h"
 #include "perf.h"
@@ -1324,7 +1326,8 @@ static int perf_sample__fprintf_bts(struct perf_sample *sample,
                } else
                        printed += fprintf(fp, "\n");
 
-               printed += sample__fprintf_sym(sample, al, 0, print_opts, cursor, fp);
+               printed += sample__fprintf_sym(sample, al, 0, print_opts, cursor,
+                                              symbol_conf.bt_stop_list, fp);
        }
 
        /* print branch_to information */
@@ -1866,7 +1869,8 @@ static void process_event(struct perf_script *script,
                        cursor = &callchain_cursor;
 
                fputc(cursor ? '\n' : ' ', fp);
-               sample__fprintf_sym(sample, al, 0, output[type].print_ip_opts, cursor, fp);
+               sample__fprintf_sym(sample, al, 0, output[type].print_ip_opts, cursor,
+                                   symbol_conf.bt_stop_list, fp);
        }
 
        if (PRINT_FIELD(IREGS))
@@ -1915,7 +1919,7 @@ static void __process_stat(struct evsel *counter, u64 tstamp)
        int cpu, thread;
        static int header_printed;
 
-       if (counter->system_wide)
+       if (counter->core.system_wide)
                nthreads = 1;
 
        if (!header_printed) {
@@ -2042,7 +2046,7 @@ static int process_attr(struct perf_tool *tool, union perf_event *event,
                return err;
 
        evlist = *pevlist;
-       evsel = perf_evlist__last(*pevlist);
+       evsel = evlist__last(*pevlist);
 
        if (!evsel->priv) {
                if (scr->per_event_dump) {
@@ -3083,8 +3087,8 @@ int find_scripts(char **scripts_array, char **scripts_path_array, int num,
        int i = 0;
 
        session = perf_session__new(&data, false, NULL);
-       if (!session)
-               return -1;
+       if (IS_ERR(session))
+               return PTR_ERR(session);
 
        snprintf(scripts_path, MAXPATHLEN, "%s/scripts", get_argv_exec_path());
 
@@ -3754,8 +3758,8 @@ int cmd_script(int argc, const char **argv)
        }
 
        session = perf_session__new(&data, false, &script.tool);
-       if (session == NULL)
-               return -1;
+       if (IS_ERR(session))
+               return PTR_ERR(session);
 
        if (header || header_only) {
                script.tool.show_feat_hdr = SHOW_FEAT_HEADER;
index 7e17bf9..468fc49 100644 (file)
@@ -61,6 +61,7 @@
 #include "util/tool.h"
 #include "util/string2.h"
 #include "util/metricgroup.h"
+#include "util/synthetic-events.h"
 #include "util/target.h"
 #include "util/time-utils.h"
 #include "util/top.h"
@@ -82,6 +83,7 @@
 #include <unistd.h>
 #include <sys/time.h>
 #include <sys/resource.h>
+#include <linux/err.h>
 
 #include <linux/ctype.h>
 #include <perf/evlist.h>
@@ -233,7 +235,7 @@ static int write_stat_round_event(u64 tm, u64 type)
 #define WRITE_STAT_ROUND_EVENT(time, interval) \
        write_stat_round_event(time, PERF_STAT_ROUND_TYPE__ ## interval)
 
-#define SID(e, x, y) xyarray__entry(e->sample_id, x, y)
+#define SID(e, x, y) xyarray__entry(e->core.sample_id, x, y)
 
 static int
 perf_evsel__write_stat_event(struct evsel *counter, u32 cpu, u32 thread,
@@ -276,7 +278,7 @@ static int read_counter(struct evsel *counter, struct timespec *rs)
        if (!counter->supported)
                return -ENOENT;
 
-       if (counter->system_wide)
+       if (counter->core.system_wide)
                nthreads = 1;
 
        for (thread = 0; thread < nthreads; thread++) {
@@ -540,8 +542,8 @@ try_again:
                if (err < 0)
                        return err;
 
-               err = perf_stat_synthesize_config(&stat_config, NULL, evsel_list,
-                                                 process_synthesized_event, is_pipe);
+               err = perf_event__synthesize_stat_events(&stat_config, NULL, evsel_list,
+                                                        process_synthesized_event, is_pipe);
                if (err < 0)
                        return err;
        }
@@ -822,18 +824,6 @@ static int perf_stat__get_core(struct perf_stat_config *config __maybe_unused,
        return cpu_map__get_core(map, cpu, NULL);
 }
 
-static int cpu_map__get_max(struct perf_cpu_map *map)
-{
-       int i, max = -1;
-
-       for (i = 0; i < map->nr; i++) {
-               if (map->map[i] > max)
-                       max = map->map[i];
-       }
-
-       return max;
-}
-
 static int perf_stat__get_aggr(struct perf_stat_config *config,
                               aggr_get_id_t get_id, struct perf_cpu_map *map, int idx)
 {
@@ -928,7 +918,7 @@ static int perf_stat_init_aggr_mode(void)
         * taking the highest cpu number to be the size of
         * the aggregation translate cpumap.
         */
-       nr = cpu_map__get_max(evsel_list->core.cpus);
+       nr = perf_cpu_map__max(evsel_list->core.cpus);
        stat_config.cpus_aggr_map = perf_cpu_map__empty_new(nr + 1);
        return stat_config.cpus_aggr_map ? 0 : -ENOMEM;
 }
@@ -1447,9 +1437,9 @@ static int __cmd_record(int argc, const char **argv)
        }
 
        session = perf_session__new(data, false, NULL);
-       if (session == NULL) {
-               pr_err("Perf session creation failed.\n");
-               return -1;
+       if (IS_ERR(session)) {
+               pr_err("Perf session creation failed\n");
+               return PTR_ERR(session);
        }
 
        init_features(session);
@@ -1646,8 +1636,8 @@ static int __cmd_report(int argc, const char **argv)
        perf_stat.data.mode = PERF_DATA_MODE_READ;
 
        session = perf_session__new(&perf_stat.data, false, &perf_stat.tool);
-       if (session == NULL)
-               return -1;
+       if (IS_ERR(session))
+               return PTR_ERR(session);
 
        perf_stat.session  = session;
        stat_config.output = stderr;
@@ -1681,7 +1671,7 @@ static void setup_system_wide(int forks)
                struct evsel *counter;
 
                evlist__for_each_entry(evsel_list, counter) {
-                       if (!counter->system_wide)
+                       if (!counter->core.system_wide)
                                return;
                }
 
@@ -1963,8 +1953,11 @@ int cmd_stat(int argc, const char **argv)
                        fprintf(output, "[ perf stat: executing run #%d ... ]\n",
                                run_idx + 1);
 
+               if (run_idx != 0)
+                       perf_evlist__reset_prev_raw_counts(evsel_list);
+
                status = run_perf_stat(argc, argv, run_idx);
-               if (forever && status != -1) {
+               if (forever && status != -1 && !interval) {
                        print_counters(NULL, argc, argv);
                        perf_stat__reset_stats();
                }
index e0e8226..9e84fae 100644 (file)
@@ -35,6 +35,7 @@
 #include "util/tool.h"
 #include "util/data.h"
 #include "util/debug.h"
+#include <linux/err.h>
 
 #ifdef LACKS_OPEN_MEMSTREAM_PROTOTYPE
 FILE *open_memstream(char **ptr, size_t *sizeloc);
@@ -1601,8 +1602,8 @@ static int __cmd_timechart(struct timechart *tchart, const char *output_name)
                                                         &tchart->tool);
        int ret = -EINVAL;
 
-       if (session == NULL)
-               return -1;
+       if (IS_ERR(session))
+               return PTR_ERR(session);
 
        symbol__init(&session->header.env);
 
index 726e3f2..1f60124 100644 (file)
 #include "util/dso.h"
 #include "util/evlist.h"
 #include "util/evsel.h"
+#include "util/evsel_config.h"
 #include "util/event.h"
 #include "util/machine.h"
 #include "util/map.h"
+#include "util/mmap.h"
 #include "util/session.h"
 #include "util/symbol.h"
+#include "util/synthetic-events.h"
 #include "util/top.h"
 #include "util/util.h"
 #include <linux/rbtree.h>
@@ -76,6 +79,7 @@
 #include <linux/stringify.h>
 #include <linux/time64.h>
 #include <linux/types.h>
+#include <linux/err.h>
 
 #include <linux/ctype.h>
 
@@ -528,7 +532,7 @@ static bool perf_top__handle_keypress(struct perf_top *top, int c)
                                prompt_integer(&counter, "Enter details event counter");
 
                                if (counter >= top->evlist->core.nr_entries) {
-                                       top->sym_evsel = perf_evlist__first(top->evlist);
+                                       top->sym_evsel = evlist__first(top->evlist);
                                        fprintf(stderr, "Sorry, no such event, using %s.\n", perf_evsel__name(top->sym_evsel));
                                        sleep(1);
                                        break;
@@ -537,7 +541,7 @@ static bool perf_top__handle_keypress(struct perf_top *top, int c)
                                        if (top->sym_evsel->idx == counter)
                                                break;
                        } else
-                               top->sym_evsel = perf_evlist__first(top->evlist);
+                               top->sym_evsel = evlist__first(top->evlist);
                        break;
                case 'f':
                        prompt_integer(&top->count_filter, "Enter display event count filter");
@@ -861,7 +865,7 @@ static void perf_top__mmap_read_idx(struct perf_top *top, int idx)
 {
        struct record_opts *opts = &top->record_opts;
        struct evlist *evlist = top->evlist;
-       struct perf_mmap *md;
+       struct mmap *md;
        union perf_event *event;
 
        md = opts->overwrite ? &evlist->overwrite_mmap[idx] : &evlist->mmap[idx];
@@ -901,7 +905,7 @@ static void perf_top__mmap_read(struct perf_top *top)
        if (overwrite)
                perf_evlist__toggle_bkw_mmap(evlist, BKW_MMAP_DATA_PENDING);
 
-       for (i = 0; i < top->evlist->nr_mmaps; i++)
+       for (i = 0; i < top->evlist->core.nr_mmaps; i++)
                perf_top__mmap_read_idx(top, i);
 
        if (overwrite) {
@@ -959,7 +963,7 @@ static int perf_top__overwrite_check(struct perf_top *top)
                /* has term for current event */
                if ((overwrite < 0) && (set >= 0)) {
                        /* if it's first event, set overwrite */
-                       if (evsel == perf_evlist__first(evlist))
+                       if (evsel == evlist__first(evlist))
                                overwrite = set;
                        else
                                return -1;
@@ -983,7 +987,7 @@ static int perf_top_overwrite_fallback(struct perf_top *top,
                return 0;
 
        /* only fall back when first event fails */
-       if (evsel != perf_evlist__first(evlist))
+       if (evsel != evlist__first(evlist))
                return 0;
 
        evlist__for_each_entry(evlist, counter)
@@ -1040,7 +1044,7 @@ try_again:
                }
        }
 
-       if (perf_evlist__mmap(evlist, opts->mmap_pages) < 0) {
+       if (evlist__mmap(evlist, opts->mmap_pages) < 0) {
                ui__error("Failed to mmap with %d (%s)\n",
                            errno, str_error_r(errno, msg, sizeof(msg)));
                goto out_err;
@@ -1304,7 +1308,7 @@ static int __cmd_top(struct perf_top *top)
        }
 
        /* Wait for a minimal set of events before starting the snapshot */
-       perf_evlist__poll(top->evlist, 100);
+       evlist__poll(top->evlist, 100);
 
        perf_top__mmap_read(top);
 
@@ -1314,7 +1318,7 @@ static int __cmd_top(struct perf_top *top)
                perf_top__mmap_read(top);
 
                if (opts->overwrite || (hits == top->samples))
-                       ret = perf_evlist__poll(top->evlist, 100);
+                       ret = evlist__poll(top->evlist, 100);
 
                if (resize) {
                        perf_top__resize(top);
@@ -1641,7 +1645,7 @@ int cmd_top(int argc, const char **argv)
                goto out_delete_evlist;
        }
 
-       top.sym_evsel = perf_evlist__first(top.evlist);
+       top.sym_evsel = evlist__first(top.evlist);
 
        if (!callchain_param.enabled) {
                symbol_conf.cumulate_callchain = false;
@@ -1671,8 +1675,8 @@ int cmd_top(int argc, const char **argv)
        }
 
        top.session = perf_session__new(NULL, false, NULL);
-       if (top.session == NULL) {
-               status = -1;
+       if (IS_ERR(top.session)) {
+               status = PTR_ERR(top.session);
                goto out_delete_evlist;
        }
 
index 0f633f0..bb5130d 100644 (file)
 #include "util/dso.h"
 #include "util/env.h"
 #include "util/event.h"
+#include "util/evsel.h"
+#include "util/evsel_fprintf.h"
+#include "util/synthetic-events.h"
 #include "util/evlist.h"
 #include "util/evswitch.h"
+#include "util/mmap.h"
 #include <subcmd/pager.h>
 #include <subcmd/exec-cmd.h>
 #include "util/machine.h"
@@ -2074,7 +2078,7 @@ static int trace__fprintf_callchain(struct trace *trace, struct perf_sample *sam
                                        EVSEL__PRINT_DSO |
                                        EVSEL__PRINT_UNKNOWN_AS_ADDR;
 
-       return sample__fprintf_callchain(sample, 38, print_opts, &callchain_cursor, trace->output);
+       return sample__fprintf_callchain(sample, 38, print_opts, &callchain_cursor, symbol_conf.bt_stop_list, trace->output);
 }
 
 static const char *errno_to_name(struct evsel *evsel, int err)
@@ -3408,7 +3412,7 @@ static int trace__run(struct trace *trace, int argc, const char **argv)
        if (trace->dump.map)
                bpf_map__fprintf(trace->dump.map, trace->output);
 
-       err = perf_evlist__mmap(evlist, trace->opts.mmap_pages);
+       err = evlist__mmap(evlist, trace->opts.mmap_pages);
        if (err < 0)
                goto out_error_mmap;
 
@@ -3425,7 +3429,7 @@ static int trace__run(struct trace *trace, int argc, const char **argv)
 
        trace->multiple_threads = perf_thread_map__pid(evlist->core.threads, 0) == -1 ||
                                  evlist->core.threads->nr > 1 ||
-                                 perf_evlist__first(evlist)->core.attr.inherit;
+                                 evlist__first(evlist)->core.attr.inherit;
 
        /*
         * Now that we already used evsel->core.attr to ask the kernel to setup the
@@ -3441,9 +3445,9 @@ static int trace__run(struct trace *trace, int argc, const char **argv)
 again:
        before = trace->nr_events;
 
-       for (i = 0; i < evlist->nr_mmaps; i++) {
+       for (i = 0; i < evlist->core.nr_mmaps; i++) {
                union perf_event *event;
-               struct perf_mmap *md;
+               struct mmap *md;
 
                md = &evlist->mmap[i];
                if (perf_mmap__read_init(md) < 0)
@@ -3472,8 +3476,8 @@ again:
        if (trace->nr_events == before) {
                int timeout = done ? 100 : -1;
 
-               if (!draining && perf_evlist__poll(evlist, timeout) > 0) {
-                       if (perf_evlist__filter_pollfd(evlist, POLLERR | POLLHUP | POLLNVAL) == 0)
+               if (!draining && evlist__poll(evlist, timeout) > 0) {
+                       if (evlist__filter_pollfd(evlist, POLLERR | POLLHUP | POLLNVAL) == 0)
                                draining = true;
 
                        goto again;
@@ -3584,8 +3588,8 @@ static int trace__replay(struct trace *trace)
        trace->multiple_threads = true;
 
        session = perf_session__new(&data, false, &trace->tool);
-       if (session == NULL)
-               return -1;
+       if (IS_ERR(session))
+               return PTR_ERR(session);
 
        if (trace->opts.target.pid)
                symbol_conf.pid_list_str = strdup(trace->opts.target.pid);
index eaeb8cb..1e148bb 100644 (file)
@@ -1,8 +1,17 @@
 jvmti-y += libjvmti.o
 jvmti-y += jvmti_agent.o
 
+# For strlcpy
+jvmti-y += libstring.o
+
 CFLAGS_jvmti         = -fPIC -DPIC -I$(JDIR)/include -I$(JDIR)/include/linux
 CFLAGS_REMOVE_jvmti  = -Wmissing-declarations
 CFLAGS_REMOVE_jvmti += -Wstrict-prototypes
 CFLAGS_REMOVE_jvmti += -Wextra
 CFLAGS_REMOVE_jvmti += -Wwrite-strings
+
+CFLAGS_libstring.o += -Wno-unused-parameter -DETC_PERFCONFIG="BUILD_STR($(ETC_PERFCONFIG_SQ))"
+
+$(OUTPUT)jvmti/libstring.o: ../lib/string.c FORCE
+       $(call rule_mkdir)
+       $(call if_changed_dep,cc_o_c)
index a67efb8..85ccb8c 100644 (file)
@@ -59,7 +59,13 @@ else
   CFLAGS := -g -Wall
 endif
 
-INCLUDES = -I$(srctree)/tools/perf/lib/include -I$(srctree)/tools/include -I$(srctree)/tools/arch/$(SRCARCH)/include/ -I$(srctree)/tools/arch/$(SRCARCH)/include/uapi -I$(srctree)/tools/include/uapi
+INCLUDES = \
+-I$(srctree)/tools/perf/lib/include \
+-I$(srctree)/tools/lib/ \
+-I$(srctree)/tools/include \
+-I$(srctree)/tools/arch/$(SRCARCH)/include/ \
+-I$(srctree)/tools/arch/$(SRCARCH)/include/uapi \
+-I$(srctree)/tools/include/uapi
 
 # Append required CFLAGS
 override CFLAGS += $(EXTRA_WARNINGS)
@@ -88,13 +94,34 @@ LIBPERF_PC := $(OUTPUT)libperf.pc
 
 LIBPERF_ALL := $(LIBPERF_A) $(OUTPUT)libperf.so*
 
+LIB_DIR := $(srctree)/tools/lib/api/
+
+ifneq ($(OUTPUT),)
+ifneq ($(subdir),)
+  API_PATH=$(OUTPUT)/../lib/api/
+else
+  API_PATH=$(OUTPUT)
+endif
+else
+  API_PATH=$(LIB_DIR)
+endif
+
+LIBAPI = $(API_PATH)libapi.a
+
+$(LIBAPI): FORCE
+       $(Q)$(MAKE) -C $(LIB_DIR) O=$(OUTPUT) $(OUTPUT)libapi.a
+
+$(LIBAPI)-clean:
+       $(call QUIET_CLEAN, libapi)
+       $(Q)$(MAKE) -C $(LIB_DIR) O=$(OUTPUT) clean >/dev/null
+
 $(LIBPERF_IN): FORCE
        $(Q)$(MAKE) $(build)=libperf
 
 $(LIBPERF_A): $(LIBPERF_IN)
        $(QUIET_AR)$(RM) $@ && $(AR) rcs $@ $(LIBPERF_IN)
 
-$(LIBPERF_SO): $(LIBPERF_IN)
+$(LIBPERF_SO): $(LIBPERF_IN) $(LIBAPI)
        $(QUIET_LINK)$(CC) --shared -Wl,-soname,libperf.so \
                                     -Wl,--version-script=$(VERSION_SCRIPT) $^ -o $@
        @ln -sf $(@F) $(OUTPUT)libperf.so
@@ -106,12 +133,12 @@ libs: $(LIBPERF_A) $(LIBPERF_SO) $(LIBPERF_PC)
 all: fixdep
        $(Q)$(MAKE) libs
 
-clean:
+clean: $(LIBAPI)-clean
        $(call QUIET_CLEAN, libperf) $(RM) $(LIBPERF_A) \
                 *.o *~ *.a *.so *.so.$(VERSION) *.so.$(LIBPERF_VERSION) .*.d .*.cmd LIBPERF-CFLAGS $(LIBPERF_PC)
        $(Q)$(MAKE) -C tests clean
 
-tests:
+tests: libs
        $(Q)$(MAKE) -C tests
        $(Q)$(MAKE) -C tests run
 
@@ -146,6 +173,7 @@ install_headers:
                $(call do_install,include/perf/threadmap.h,$(prefix)/include/perf,644); \
                $(call do_install,include/perf/evlist.h,$(prefix)/include/perf,644); \
                $(call do_install,include/perf/evsel.h,$(prefix)/include/perf,644);
+               $(call do_install,include/perf/event.h,$(prefix)/include/perf,644);
 
 install_pkgconfig: $(LIBPERF_PC)
        $(call QUIET_INSTALL, $(LIBPERF_PC)) \
index 29d5e33..d0b9ae4 100644 (file)
@@ -4,7 +4,9 @@
 
 #include <stdio.h>
 #include <stdarg.h>
+#include <unistd.h>
 #include <perf/core.h>
+#include <internal/lib.h>
 #include "internal.h"
 
 static int __base_pr(enum libperf_print_level level, const char *format,
@@ -15,11 +17,6 @@ static int __base_pr(enum libperf_print_level level, const char *format,
 
 static libperf_print_fn_t __libperf_pr = __base_pr;
 
-void libperf_set_print(libperf_print_fn_t fn)
-{
-       __libperf_pr = fn;
-}
-
 __printf(2, 3)
 void libperf_print(enum libperf_print_level level, const char *format, ...)
 {
@@ -32,3 +29,9 @@ void libperf_print(enum libperf_print_level level, const char *format, ...)
        __libperf_pr(level, format, args);
        va_end(args);
 }
+
+void libperf_init(libperf_print_fn_t fn)
+{
+       page_size = sysconf(_SC_PAGE_SIZE);
+       __libperf_pr = fn;
+}
index 1f0e6f3..2ca1faf 100644 (file)
@@ -260,3 +260,15 @@ int perf_cpu_map__idx(struct perf_cpu_map *cpus, int cpu)
 
        return -1;
 }
+
+int perf_cpu_map__max(struct perf_cpu_map *map)
+{
+       int i, max = -1;
+
+       for (i = 0; i < map->nr; i++) {
+               if (map->map[i] > max)
+                       max = map->map[i];
+       }
+
+       return max;
+}
index f4dc9a2..d1496fe 100644 (file)
@@ -1,16 +1,30 @@
 // SPDX-License-Identifier: GPL-2.0
 #include <perf/evlist.h>
 #include <perf/evsel.h>
+#include <linux/bitops.h>
 #include <linux/list.h>
+#include <linux/hash.h>
+#include <sys/ioctl.h>
 #include <internal/evlist.h>
 #include <internal/evsel.h>
+#include <internal/xyarray.h>
 #include <linux/zalloc.h>
 #include <stdlib.h>
+#include <errno.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <signal.h>
+#include <poll.h>
 #include <perf/cpumap.h>
 #include <perf/threadmap.h>
+#include <api/fd/array.h>
 
 void perf_evlist__init(struct perf_evlist *evlist)
 {
+       int i;
+
+       for (i = 0; i < PERF_EVLIST__HLIST_SIZE; ++i)
+               INIT_HLIST_HEAD(&evlist->heads[i]);
        INIT_LIST_HEAD(&evlist->entries);
        evlist->nr_entries = 0;
 }
@@ -157,3 +171,113 @@ void perf_evlist__disable(struct perf_evlist *evlist)
        perf_evlist__for_each_entry(evlist, evsel)
                perf_evsel__disable(evsel);
 }
+
+u64 perf_evlist__read_format(struct perf_evlist *evlist)
+{
+       struct perf_evsel *first = perf_evlist__first(evlist);
+
+       return first->attr.read_format;
+}
+
+#define SID(e, x, y) xyarray__entry(e->sample_id, x, y)
+
+static void perf_evlist__id_hash(struct perf_evlist *evlist,
+                                struct perf_evsel *evsel,
+                                int cpu, int thread, u64 id)
+{
+       int hash;
+       struct perf_sample_id *sid = SID(evsel, cpu, thread);
+
+       sid->id = id;
+       sid->evsel = evsel;
+       hash = hash_64(sid->id, PERF_EVLIST__HLIST_BITS);
+       hlist_add_head(&sid->node, &evlist->heads[hash]);
+}
+
+void perf_evlist__id_add(struct perf_evlist *evlist,
+                        struct perf_evsel *evsel,
+                        int cpu, int thread, u64 id)
+{
+       perf_evlist__id_hash(evlist, evsel, cpu, thread, id);
+       evsel->id[evsel->ids++] = id;
+}
+
+int perf_evlist__id_add_fd(struct perf_evlist *evlist,
+                          struct perf_evsel *evsel,
+                          int cpu, int thread, int fd)
+{
+       u64 read_data[4] = { 0, };
+       int id_idx = 1; /* The first entry is the counter value */
+       u64 id;
+       int ret;
+
+       ret = ioctl(fd, PERF_EVENT_IOC_ID, &id);
+       if (!ret)
+               goto add;
+
+       if (errno != ENOTTY)
+               return -1;
+
+       /* Legacy way to get event id.. All hail to old kernels! */
+
+       /*
+        * This way does not work with group format read, so bail
+        * out in that case.
+        */
+       if (perf_evlist__read_format(evlist) & PERF_FORMAT_GROUP)
+               return -1;
+
+       if (!(evsel->attr.read_format & PERF_FORMAT_ID) ||
+           read(fd, &read_data, sizeof(read_data)) == -1)
+               return -1;
+
+       if (evsel->attr.read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)
+               ++id_idx;
+       if (evsel->attr.read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)
+               ++id_idx;
+
+       id = read_data[id_idx];
+
+add:
+       perf_evlist__id_add(evlist, evsel, cpu, thread, id);
+       return 0;
+}
+
+int perf_evlist__alloc_pollfd(struct perf_evlist *evlist)
+{
+       int nr_cpus = perf_cpu_map__nr(evlist->cpus);
+       int nr_threads = perf_thread_map__nr(evlist->threads);
+       int nfds = 0;
+       struct perf_evsel *evsel;
+
+       perf_evlist__for_each_entry(evlist, evsel) {
+               if (evsel->system_wide)
+                       nfds += nr_cpus;
+               else
+                       nfds += nr_cpus * nr_threads;
+       }
+
+       if (fdarray__available_entries(&evlist->pollfd) < nfds &&
+           fdarray__grow(&evlist->pollfd, nfds) < 0)
+               return -ENOMEM;
+
+       return 0;
+}
+
+int perf_evlist__add_pollfd(struct perf_evlist *evlist, int fd,
+                           void *ptr, short revent)
+{
+       int pos = fdarray__add(&evlist->pollfd, fd, revent | POLLERR | POLLHUP);
+
+       if (pos >= 0) {
+               evlist->pollfd.priv[pos].ptr = ptr;
+               fcntl(fd, F_SETFL, O_NONBLOCK);
+       }
+
+       return pos;
+}
+
+int perf_evlist__poll(struct perf_evlist *evlist, int timeout)
+{
+       return fdarray__poll(&evlist->pollfd, timeout);
+}
index 24abc80..a8cb582 100644 (file)
@@ -230,3 +230,33 @@ struct perf_event_attr *perf_evsel__attr(struct perf_evsel *evsel)
 {
        return &evsel->attr;
 }
+
+int perf_evsel__alloc_id(struct perf_evsel *evsel, int ncpus, int nthreads)
+{
+       if (ncpus == 0 || nthreads == 0)
+               return 0;
+
+       if (evsel->system_wide)
+               nthreads = 1;
+
+       evsel->sample_id = xyarray__new(ncpus, nthreads, sizeof(struct perf_sample_id));
+       if (evsel->sample_id == NULL)
+               return -ENOMEM;
+
+       evsel->id = zalloc(ncpus * nthreads * sizeof(u64));
+       if (evsel->id == NULL) {
+               xyarray__delete(evsel->sample_id);
+               evsel->sample_id = NULL;
+               return -ENOMEM;
+       }
+
+       return 0;
+}
+
+void perf_evsel__free_id(struct perf_evsel *evsel)
+{
+       xyarray__delete(evsel->sample_id);
+       evsel->sample_id = NULL;
+       zfree(&evsel->id);
+       evsel->ids = 0;
+}
index 448891f..9f440ab 100644 (file)
@@ -3,6 +3,11 @@
 #define __LIBPERF_INTERNAL_EVLIST_H
 
 #include <linux/list.h>
+#include <api/fd/array.h>
+#include <internal/evsel.h>
+
+#define PERF_EVLIST__HLIST_BITS 8
+#define PERF_EVLIST__HLIST_SIZE (1 << PERF_EVLIST__HLIST_BITS)
 
 struct perf_cpu_map;
 struct perf_thread_map;
@@ -13,8 +18,16 @@ struct perf_evlist {
        bool                     has_user_cpus;
        struct perf_cpu_map     *cpus;
        struct perf_thread_map  *threads;
+       int                      nr_mmaps;
+       size_t                   mmap_len;
+       struct fdarray           pollfd;
+       struct hlist_head        heads[PERF_EVLIST__HLIST_SIZE];
 };
 
+int perf_evlist__alloc_pollfd(struct perf_evlist *evlist);
+int perf_evlist__add_pollfd(struct perf_evlist *evlist, int fd,
+                           void *ptr, short revent);
+
 /**
  * __perf_evlist__for_each_entry - iterate thru all the evsels
  * @list: list_head instance to iterate
@@ -47,4 +60,24 @@ struct perf_evlist {
 #define perf_evlist__for_each_entry_reverse(evlist, evsel) \
        __perf_evlist__for_each_entry_reverse(&(evlist)->entries, evsel)
 
+static inline struct perf_evsel *perf_evlist__first(struct perf_evlist *evlist)
+{
+       return list_entry(evlist->entries.next, struct perf_evsel, node);
+}
+
+static inline struct perf_evsel *perf_evlist__last(struct perf_evlist *evlist)
+{
+       return list_entry(evlist->entries.prev, struct perf_evsel, node);
+}
+
+u64 perf_evlist__read_format(struct perf_evlist *evlist);
+
+void perf_evlist__id_add(struct perf_evlist *evlist,
+                        struct perf_evsel *evsel,
+                        int cpu, int thread, u64 id);
+
+int perf_evlist__id_add_fd(struct perf_evlist *evlist,
+                          struct perf_evsel *evsel,
+                          int cpu, int thread, int fd);
+
 #endif /* __LIBPERF_INTERNAL_EVLIST_H */
index 8b854d1..a69b829 100644 (file)
@@ -4,9 +4,35 @@
 
 #include <linux/types.h>
 #include <linux/perf_event.h>
+#include <stdbool.h>
+#include <sys/types.h>
 
 struct perf_cpu_map;
 struct perf_thread_map;
+struct xyarray;
+
+/*
+ * Per fd, to map back from PERF_SAMPLE_ID to evsel, only used when there are
+ * more than one entry in the evlist.
+ */
+struct perf_sample_id {
+       struct hlist_node        node;
+       u64                      id;
+       struct perf_evsel       *evsel;
+       /*
+       * 'idx' will be used for AUX area sampling. A sample will have AUX area
+       * data that will be queued for decoding, where there are separate
+       * queues for each CPU (per-cpu tracing) or task (per-thread tracing).
+       * The sample ID can be used to lookup 'idx' which is effectively the
+       * queue number.
+       */
+       int                      idx;
+       int                      cpu;
+       pid_t                    tid;
+
+       /* Holds total ID period value for PERF_SAMPLE_READ processing. */
+       u64                      period;
+};
 
 struct perf_evsel {
        struct list_head         node;
@@ -15,9 +41,13 @@ struct perf_evsel {
        struct perf_cpu_map     *own_cpus;
        struct perf_thread_map  *threads;
        struct xyarray          *fd;
+       struct xyarray          *sample_id;
+       u64                     *id;
+       u32                      ids;
 
        /* parse modifier helper */
        int                      nr_members;
+       bool                     system_wide;
 };
 
 int perf_evsel__alloc_fd(struct perf_evsel *evsel, int ncpus, int nthreads);
@@ -26,4 +56,7 @@ void perf_evsel__free_fd(struct perf_evsel *evsel);
 int perf_evsel__read_size(struct perf_evsel *evsel);
 int perf_evsel__apply_filter(struct perf_evsel *evsel, const char *filter);
 
+int perf_evsel__alloc_id(struct perf_evsel *evsel, int ncpus, int nthreads);
+void perf_evsel__free_id(struct perf_evsel *evsel);
+
 #endif /* __LIBPERF_INTERNAL_EVSEL_H */
index 0b56f12..5175d49 100644 (file)
@@ -2,7 +2,9 @@
 #ifndef __LIBPERF_INTERNAL_LIB_H
 #define __LIBPERF_INTERNAL_LIB_H
 
-#include <unistd.h>
+#include <sys/types.h>
+
+extern unsigned int page_size;
 
 ssize_t readn(int fd, void *buf, size_t n);
 ssize_t writen(int fd, const void *buf, size_t n);
diff --git a/tools/perf/lib/include/internal/mmap.h b/tools/perf/lib/include/internal/mmap.h
new file mode 100644 (file)
index 0000000..ba1e519
--- /dev/null
@@ -0,0 +1,32 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __LIBPERF_INTERNAL_MMAP_H
+#define __LIBPERF_INTERNAL_MMAP_H
+
+#include <linux/compiler.h>
+#include <linux/refcount.h>
+#include <linux/types.h>
+#include <stdbool.h>
+
+/* perf sample has 16 bits size limit */
+#define PERF_SAMPLE_MAX_SIZE (1 << 16)
+
+/**
+ * struct perf_mmap - perf's ring buffer mmap details
+ *
+ * @refcnt - e.g. code using PERF_EVENT_IOC_SET_OUTPUT to share this
+ */
+struct perf_mmap {
+       void            *base;
+       int              mask;
+       int              fd;
+       int              cpu;
+       refcount_t       refcnt;
+       u64              prev;
+       u64              start;
+       u64              end;
+       bool             overwrite;
+       u64              flush;
+       char             event_copy[PERF_SAMPLE_MAX_SIZE] __aligned(8);
+};
+
+#endif /* __LIBPERF_INTERNAL_MMAP_H */
index c341a7b..cfd70e7 100644 (file)
@@ -17,6 +17,6 @@ enum libperf_print_level {
 typedef int (*libperf_print_fn_t)(enum libperf_print_level level,
                                  const char *, va_list ap);
 
-LIBPERF_API void libperf_set_print(libperf_print_fn_t fn);
+LIBPERF_API void libperf_init(libperf_print_fn_t fn);
 
 #endif /* __LIBPERF_CORE_H */
index 8aa995c..ac9aa49 100644 (file)
@@ -16,6 +16,7 @@ LIBPERF_API void perf_cpu_map__put(struct perf_cpu_map *map);
 LIBPERF_API int perf_cpu_map__cpu(const struct perf_cpu_map *cpus, int idx);
 LIBPERF_API int perf_cpu_map__nr(const struct perf_cpu_map *cpus);
 LIBPERF_API bool perf_cpu_map__empty(const struct perf_cpu_map *map);
+LIBPERF_API int perf_cpu_map__max(struct perf_cpu_map *map);
 
 #define perf_cpu_map__for_each_cpu(cpu, idx, cpus)             \
        for ((idx) = 0, (cpu) = perf_cpu_map__cpu(cpus, idx);   \
index 38365f8..8a2ce07 100644 (file)
@@ -31,5 +31,6 @@ LIBPERF_API void perf_evlist__disable(struct perf_evlist *evlist);
 LIBPERF_API void perf_evlist__set_maps(struct perf_evlist *evlist,
                                       struct perf_cpu_map *cpus,
                                       struct perf_thread_map *threads);
+LIBPERF_API int perf_evlist__poll(struct perf_evlist *evlist, int timeout);
 
 #endif /* __LIBPERF_EVLIST_H */
index 2a81819..1865893 100644 (file)
@@ -5,6 +5,8 @@
 #include <linux/kernel.h>
 #include <internal/lib.h>
 
+unsigned int page_size;
+
 static ssize_t ion(bool is_read, int fd, void *buf, size_t n)
 {
        void *buf_start = buf;
index dc4d663..ab8dbde 100644 (file)
@@ -1,6 +1,6 @@
 LIBPERF_0.0.1 {
        global:
-               libperf_set_print;
+               libperf_init;
                perf_cpu_map__dummy_new;
                perf_cpu_map__get;
                perf_cpu_map__put;
@@ -9,6 +9,7 @@ LIBPERF_0.0.1 {
                perf_cpu_map__nr;
                perf_cpu_map__cpu;
                perf_cpu_map__empty;
+               perf_cpu_map__max;
                perf_thread_map__new_dummy;
                perf_thread_map__set_pid;
                perf_thread_map__comm;
@@ -38,6 +39,7 @@ LIBPERF_0.0.1 {
                perf_evlist__remove;
                perf_evlist__next;
                perf_evlist__set_maps;
+               perf_evlist__poll;
        local:
                *;
 };
index 76a43cf..aa34c20 100644 (file)
@@ -1,13 +1,23 @@
 // SPDX-License-Identifier: GPL-2.0
+#include <stdarg.h>
+#include <stdio.h>
 #include <perf/cpumap.h>
 #include <internal/tests.h>
 
+static int libperf_print(enum libperf_print_level level,
+                        const char *fmt, va_list ap)
+{
+       return vfprintf(stderr, fmt, ap);
+}
+
 int main(int argc, char **argv)
 {
        struct perf_cpu_map *cpus;
 
        __T_START;
 
+       libperf_init(libperf_print);
+
        cpus = perf_cpu_map__dummy_new();
        if (!cpus)
                return -1;
index 4e1407f..e6b2ab2 100644 (file)
@@ -1,4 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0
+#include <stdio.h>
+#include <stdarg.h>
 #include <linux/perf_event.h>
 #include <perf/cpumap.h>
 #include <perf/threadmap.h>
@@ -6,6 +8,12 @@
 #include <perf/evsel.h>
 #include <internal/tests.h>
 
+static int libperf_print(enum libperf_print_level level,
+                        const char *fmt, va_list ap)
+{
+       return vfprintf(stderr, fmt, ap);
+}
+
 static int test_stat_cpu(void)
 {
        struct perf_cpu_map *cpus;
@@ -177,6 +185,8 @@ int main(int argc, char **argv)
 {
        __T_START;
 
+       libperf_init(libperf_print);
+
        test_stat_cpu();
        test_stat_thread();
        test_stat_thread_enable();
index 2c648fe..1b6c428 100644 (file)
@@ -1,10 +1,18 @@
 // SPDX-License-Identifier: GPL-2.0
+#include <stdarg.h>
+#include <stdio.h>
 #include <linux/perf_event.h>
 #include <perf/cpumap.h>
 #include <perf/threadmap.h>
 #include <perf/evsel.h>
 #include <internal/tests.h>
 
+static int libperf_print(enum libperf_print_level level,
+                        const char *fmt, va_list ap)
+{
+       return vfprintf(stderr, fmt, ap);
+}
+
 static int test_stat_cpu(void)
 {
        struct perf_cpu_map *cpus;
@@ -116,6 +124,8 @@ int main(int argc, char **argv)
 {
        __T_START;
 
+       libperf_init(libperf_print);
+
        test_stat_cpu();
        test_stat_thread();
        test_stat_thread_enable();
index 10a4f4c..8c5f472 100644 (file)
@@ -1,13 +1,23 @@
 // SPDX-License-Identifier: GPL-2.0
+#include <stdarg.h>
+#include <stdio.h>
 #include <perf/threadmap.h>
 #include <internal/tests.h>
 
+static int libperf_print(enum libperf_print_level level,
+                        const char *fmt, va_list ap)
+{
+       return vfprintf(stderr, fmt, ap);
+}
+
 int main(int argc, char **argv)
 {
        struct perf_thread_map *threads;
 
        __T_START;
 
+       libperf_init(libperf_print);
+
        threads = perf_thread_map__new_dummy();
        if (!threads)
                return -1;
index 1193b92..27f94b0 100644 (file)
@@ -12,6 +12,7 @@
 #include "util/build-id.h"
 #include "util/cache.h"
 #include "util/env.h"
+#include <internal/lib.h> // page_size
 #include <subcmd/exec-cmd.h>
 #include "util/config.h"
 #include <subcmd/run-command.h>
 #include "util/bpf-loader.h"
 #include "util/debug.h"
 #include "util/event.h"
-#include "util/util.h"
+#include "util/util.h" // usage()
 #include "ui/ui.h"
 #include "perf-sys.h"
 #include <api/fs/fs.h>
 #include <api/fs/tracing_path.h>
+#include <perf/core.h>
 #include <errno.h>
 #include <pthread.h>
 #include <signal.h>
@@ -428,6 +430,12 @@ void pthread__unblock_sigwinch(void)
        pthread_sigmask(SIG_UNBLOCK, &set, NULL);
 }
 
+static int libperf_print(enum libperf_print_level level,
+                        const char *fmt, va_list ap)
+{
+       return eprintf(level, verbose, fmt, ap);
+}
+
 int main(int argc, const char **argv)
 {
        int err;
@@ -438,8 +446,7 @@ int main(int argc, const char **argv)
        exec_cmd_init("perf", PREFIX, PERF_EXEC_PATH, EXEC_PATH_ENVIRONMENT);
        pager_init(PERF_PAGER_ENVIRONMENT);
 
-       /* The page_size is placed in util object. */
-       page_size = sysconf(_SC_PAGE_SIZE);
+       libperf_init(libperf_print);
 
        cmd = extract_argv0_path(argv[0]);
        if (!cmd)
index e62b09b..de7efa2 100644 (file)
@@ -30,9 +30,9 @@ the topic. Eg: "Floating-point.json".
 All the topic JSON files for a CPU model/family should be in a separate
 sub directory. Thus for the Silvermont X86 CPU:
 
-       $ ls tools/perf/pmu-events/arch/x86/Silvermont_core
-       Cache.json      Memory.json     Virtual-Memory.json
-       Frontend.json   Pipeline.json
+       $ ls tools/perf/pmu-events/arch/x86/silvermont
+       cache.json     memory.json    virtual-memory.json
+       frontend.json  pipeline.json
 
 The JSONs folder for a CPU model/family may be placed in the root arch
 folder, or may be placed in a vendor sub-folder under the arch folder
@@ -94,7 +94,7 @@ users to specify events by their name:
 
 where 'pm_1plus_ppc_cmpl' is a Power8 PMU event.
 
-However some errors in processing may cause the perf build to fail.
+However some errors in processing may cause the alias build to fail.
 
 Mapfile format
 ===============
@@ -119,7 +119,7 @@ where:
 
        Header line
                The header line is the first line in the file, which is
-               always _IGNORED_. It can empty.
+               always _IGNORED_. It can be empty.
 
        CPUID:
                CPUID is an arch-specific char string, that can be used
@@ -138,15 +138,15 @@ where:
                files, relative to the directory containing the mapfile.csv
 
        Type:
-               indicates whether the events or "core" or "uncore" events.
+               indicates whether the events are "core" or "uncore" events.
 
 
        Eg:
 
-       $ grep Silvermont tools/perf/pmu-events/arch/x86/mapfile.csv
-       GenuineIntel-6-37,V13,Silvermont_core,core
-       GenuineIntel-6-4D,V13,Silvermont_core,core
-       GenuineIntel-6-4C,V13,Silvermont_core,core
+       $ grep silvermont tools/perf/pmu-events/arch/x86/mapfile.csv
+       GenuineIntel-6-37,v13,silvermont,core
+       GenuineIntel-6-4D,v13,silvermont,core
+       GenuineIntel-6-4C,v13,silvermont,core
 
        i.e the three CPU models use the JSON files (i.e PMU events) listed
-       in the directory 'tools/perf/pmu-events/arch/x86/Silvermont_core'.
+       in the directory 'tools/perf/pmu-events/arch/x86/silvermont'.
diff --git a/tools/perf/pmu-events/arch/arm64/arm/cortex-a76-n1/branch.json b/tools/perf/pmu-events/arch/arm64/arm/cortex-a76-n1/branch.json
new file mode 100644 (file)
index 0000000..b5e5d05
--- /dev/null
@@ -0,0 +1,14 @@
+[
+    {
+        "PublicDescription": "Mispredicted or not predicted branch speculatively executed. This event counts any predictable branch instruction which is mispredicted either due to dynamic misprediction or because the MMU is off and the branches are statically predicted not taken.",
+        "EventCode": "0x10",
+        "EventName": "BR_MIS_PRED",
+        "BriefDescription": "Mispredicted or not predicted branch speculatively executed."
+    },
+    {
+        "PublicDescription": "Predictable branch speculatively executed. This event counts all predictable branches.",
+        "EventCode": "0x12",
+        "EventName": "BR_PRED",
+        "BriefDescription": "Predictable branch speculatively executed."
+    }
+]
diff --git a/tools/perf/pmu-events/arch/arm64/arm/cortex-a76-n1/bus.json b/tools/perf/pmu-events/arch/arm64/arm/cortex-a76-n1/bus.json
new file mode 100644 (file)
index 0000000..fce7309
--- /dev/null
@@ -0,0 +1,24 @@
+[
+    {
+        "EventCode": "0x11",
+        "EventName": "CPU_CYCLES",
+        "BriefDescription": "The number of core clock cycles."
+    },
+    {
+        "PublicDescription": "Bus access. This event counts for every beat of data transferred over the data channels between the core and the SCU. If both read and write data beats are transferred on a given cycle, this event is counted twice on that cycle. This event counts the sum of BUS_ACCESS_RD and BUS_ACCESS_WR.",
+        "EventCode": "0x19",
+        "EventName": "BUS_ACCESS",
+        "BriefDescription": "Bus access."
+    },
+    {
+        "EventCode": "0x1D",
+        "EventName": "BUS_CYCLES",
+        "BriefDescription": "Bus cycles. This event duplicates CPU_CYCLES."
+    },
+    {
+        "ArchStdEvent":  "BUS_ACCESS_RD"
+    },
+    {
+        "ArchStdEvent":  "BUS_ACCESS_WR"
+    }
+]
diff --git a/tools/perf/pmu-events/arch/arm64/arm/cortex-a76-n1/cache.json b/tools/perf/pmu-events/arch/arm64/arm/cortex-a76-n1/cache.json
new file mode 100644 (file)
index 0000000..2459408
--- /dev/null
@@ -0,0 +1,207 @@
+[
+    {
+        "PublicDescription": "L1 instruction cache refill. This event counts any instruction fetch which misses in the cache.",
+        "EventCode": "0x01",
+        "EventName": "L1I_CACHE_REFILL",
+        "BriefDescription": "L1 instruction cache refill"
+    },
+    {
+        "PublicDescription": "L1 instruction TLB refill. This event counts any refill of the instruction L1 TLB from the L2 TLB. This includes refills that result in a translation fault.",
+        "EventCode": "0x02",
+        "EventName": "L1I_TLB_REFILL",
+        "BriefDescription": "L1 instruction TLB refill"
+    },
+    {
+        "PublicDescription": "L1 data cache refill. This event counts any load or store operation or page table walk access which causes data to be read from outside the L1, including accesses which do not allocate into L1.",
+        "EventCode": "0x03",
+        "EventName": "L1D_CACHE_REFILL",
+        "BriefDescription": "L1 data cache refill"
+    },
+    {
+        "PublicDescription": "L1 data cache access. This event counts any load or store operation or page table walk access which looks up in the L1 data cache. In particular, any access which could count the L1D_CACHE_REFILL event causes this event to count.",
+        "EventCode": "0x04",
+        "EventName": "L1D_CACHE",
+        "BriefDescription": "L1 data cache access"
+    },
+    {
+        "PublicDescription": "L1 data TLB refill. This event counts any refill of the data L1 TLB from the L2 TLB. This includes refills that result in a translation fault.",
+        "EventCode": "0x05",
+        "EventName": "L1D_TLB_REFILL",
+        "BriefDescription": "L1 data TLB refill"
+    },
+    {
+        "PublicDescription": "Level 1 instruction cache access or Level 0 Macro-op cache access. This event counts any instruction fetch which accesses the L1 instruction cache or L0 Macro-op cache.",
+        "EventCode": "0x14",
+        "EventName": "L1I_CACHE",
+        "BriefDescription": "L1 instruction cache access"
+    },
+    {
+        "PublicDescription": "L1 data cache Write-Back. This event counts any write-back of data from the L1 data cache to L2 or L3. This counts both victim line evictions and snoops, including cache maintenance operations.",
+        "EventCode": "0x15",
+        "EventName": "L1D_CACHE_WB",
+        "BriefDescription": "L1 data cache Write-Back"
+    },
+    {
+        "PublicDescription": "L2 data cache access. This event counts any transaction from L1 which looks up in the L2 cache, and any write-back from the L1 to the L2. Snoops from outside the core and cache maintenance operations are not counted.",
+        "EventCode": "0x16",
+        "EventName": "L2D_CACHE",
+        "BriefDescription": "L2 data cache access"
+    },
+    {
+        "PublicDescription": "L2 data cache refill. This event counts any cacheable transaction from L1 which causes data to be read from outside the core. L2 refills caused by stashes into L2 should not be counted",
+        "EventCode": "0x17",
+        "EventName": "L2D_CACHE_REFILL",
+        "BriefDescription": "L2 data cache refill"
+    },
+    {
+        "PublicDescription": "L2 data cache write-back. This event counts any write-back of data from the L2 cache to outside the core. This includes snoops to the L2 which return data, regardless of whether they cause an invalidation. Invalidations from the L2 which do not write data outside of the core and snoops which return data from the L1 are not counted",
+        "EventCode": "0x18",
+        "EventName": "L2D_CACHE_WB",
+        "BriefDescription": "L2 data cache write-back"
+    },
+    {
+        "PublicDescription": "L2 data cache allocation without refill. This event counts any full cache line write into the L2 cache which does not cause a linefill, including write-backs from L1 to L2 and full-line writes which do not allocate into L1.",
+        "EventCode": "0x20",
+        "EventName": "L2D_CACHE_ALLOCATE",
+        "BriefDescription": "L2 data cache allocation without refill"
+    },
+    {
+        "PublicDescription": "Level 1 data TLB access. This event counts any load or store operation which accesses the data L1 TLB. If both a load and a store are executed on a cycle, this event counts twice. This event counts regardless of whether the MMU is enabled.",
+        "EventCode": "0x25",
+        "EventName": "L1D_TLB",
+        "BriefDescription": "Level 1 data TLB access."
+    },
+    {
+        "PublicDescription": "Level 1 instruction TLB access. This event counts any instruction fetch which accesses the instruction L1 TLB.This event counts regardless of whether the MMU is enabled.",
+        "EventCode": "0x26",
+        "EventName": "L1I_TLB",
+        "BriefDescription": "Level 1 instruction TLB access"
+    },
+    {
+        "PublicDescription": "This event counts any full cache line write into the L3 cache which does not cause a linefill, including write-backs from L2 to L3 and full-line writes which do not allocate into L2",
+        "EventCode": "0x29",
+        "EventName": "L3D_CACHE_ALLOCATE",
+        "BriefDescription": "Allocation without refill"
+    },
+    {
+        "PublicDescription": "Attributable Level 3 unified cache refill. This event counts for any cacheable read transaction returning datafrom the SCU for which the data source was outside the cluster. Transactions such as ReadUnique are counted here as 'read' transactions, even though they can be generated by store instructions.",
+        "EventCode": "0x2A",
+        "EventName": "L3D_CACHE_REFILL",
+        "BriefDescription": "Attributable Level 3 unified cache refill."
+    },
+    {
+        "PublicDescription": "Attributable Level 3 unified cache access. This event counts for any cacheable read transaction returning datafrom the SCU, or for any cacheable write to the SCU.",
+        "EventCode": "0x2B",
+        "EventName": "L3D_CACHE",
+        "BriefDescription": "Attributable Level 3 unified cache access."
+    },
+    {
+        "PublicDescription": "Attributable L2 data or unified TLB refill. This event counts on anyrefill of the L2 TLB, caused by either an instruction or data access.This event does not count if the MMU is disabled.",
+        "EventCode": "0x2D",
+        "EventName": "L2D_TLB_REFILL",
+        "BriefDescription": "Attributable L2 data or unified TLB refill"
+    },
+    {
+        "PublicDescription": "Attributable L2 data or unified TLB access. This event counts on any access to the L2 TLB (caused by a refill of any of the L1 TLBs). This event does not count if the MMU is disabled.",
+        "EventCode": "0x2F",
+        "EventName": "L2D_TLB",
+        "BriefDescription": "Attributable L2 data or unified TLB access"
+    },
+    {
+        "PublicDescription": "Access to data TLB that caused a page table walk. This event counts on any data access which causes L2D_TLB_REFILL to count.",
+        "EventCode": "0x34",
+        "EventName": "DTLB_WALK",
+        "BriefDescription": "Access to data TLB that caused a page table walk."
+    },
+    {
+        "PublicDescription": "Access to instruction TLB that caused a page table walk. This event counts on any instruction access which causes L2D_TLB_REFILL to count.",
+        "EventCode": "0x35",
+        "EventName": "ITLB_WALK",
+        "BriefDescription": "Access to instruction TLB that caused a page table walk."
+    },
+    {
+        "EventCode": "0x36",
+        "EventName": "LL_CACHE_RD",
+        "BriefDescription": "Last level cache access, read"
+    },
+    {
+        "EventCode": "0x37",
+        "EventName": "LL_CACHE_MISS_RD",
+        "BriefDescription": "Last level cache miss, read"
+    },
+    {
+        "ArchStdEvent": "L1D_CACHE_INVAL"
+    },
+    {
+        "ArchStdEvent": "L1D_CACHE_RD"
+    },
+    {
+        "ArchStdEvent": "L1D_CACHE_REFILL_INNER"
+    },
+    {
+        "ArchStdEvent": "L1D_CACHE_REFILL_OUTER"
+    },
+    {
+        "ArchStdEvent": "L1D_CACHE_REFILL_RD"
+    },
+    {
+        "ArchStdEvent": "L1D_CACHE_REFILL_WR"
+    },
+    {
+        "ArchStdEvent": "L1D_CACHE_WB_CLEAN"
+    },
+    {
+        "ArchStdEvent": "L1D_CACHE_WB_VICTIM"
+    },
+    {
+        "ArchStdEvent": "L1D_CACHE_WR"
+    },
+    {
+        "ArchStdEvent": "L1D_TLB_RD"
+    },
+    {
+        "ArchStdEvent": "L1D_TLB_REFILL_RD"
+    },
+    {
+        "ArchStdEvent": "L1D_TLB_REFILL_WR"
+    },
+    {
+        "ArchStdEvent": "L1D_TLB_WR"
+    },
+    {
+        "ArchStdEvent": "L2D_CACHE_INVAL"
+    },
+    {
+        "ArchStdEvent": "L2D_CACHE_RD"
+    },
+    {
+        "ArchStdEvent": "L2D_CACHE_REFILL_RD"
+    },
+    {
+        "ArchStdEvent": "L2D_CACHE_REFILL_WR"
+    },
+    {
+        "ArchStdEvent": "L2D_CACHE_WB_CLEAN"
+    },
+    {
+        "ArchStdEvent": "L2D_CACHE_WB_VICTIM"
+    },
+    {
+        "ArchStdEvent": "L2D_CACHE_WR"
+    },
+    {
+        "ArchStdEvent": "L2D_TLB_RD"
+    },
+    {
+        "ArchStdEvent": "L2D_TLB_REFILL_RD"
+    },
+    {
+        "ArchStdEvent": "L2D_TLB_REFILL_WR"
+    },
+    {
+        "ArchStdEvent": "L2D_TLB_WR"
+    },
+    {
+        "ArchStdEvent": "L3D_CACHE_RD"
+    }
+]
diff --git a/tools/perf/pmu-events/arch/arm64/arm/cortex-a76-n1/exception.json b/tools/perf/pmu-events/arch/arm64/arm/cortex-a76-n1/exception.json
new file mode 100644 (file)
index 0000000..98d29c8
--- /dev/null
@@ -0,0 +1,52 @@
+[
+    {
+        "EventCode": "0x09",
+        "EventName": "EXC_TAKEN",
+        "BriefDescription": "Exception taken."
+    },
+    {
+        "PublicDescription": "Local memory error. This event counts any correctable or uncorrectable memory error (ECC or parity) in the protected core RAMs",
+        "EventCode": "0x1A",
+        "EventName": "MEMORY_ERROR",
+        "BriefDescription": "Local memory error."
+    },
+    {
+        "ArchStdEvent": "EXC_DABORT"
+    },
+    {
+        "ArchStdEvent": "EXC_FIQ"
+    },
+    {
+        "ArchStdEvent": "EXC_HVC"
+    },
+    {
+        "ArchStdEvent": "EXC_IRQ"
+    },
+    {
+        "ArchStdEvent": "EXC_PABORT"
+    },
+    {
+        "ArchStdEvent": "EXC_SMC"
+    },
+    {
+        "ArchStdEvent": "EXC_SVC"
+    },
+    {
+        "ArchStdEvent": "EXC_TRAP_DABORT"
+    },
+    {
+        "ArchStdEvent": "EXC_TRAP_FIQ"
+    },
+    {
+        "ArchStdEvent": "EXC_TRAP_IRQ"
+    },
+    {
+        "ArchStdEvent": "EXC_TRAP_OTHER"
+    },
+    {
+        "ArchStdEvent": "EXC_TRAP_PABORT"
+    },
+    {
+        "ArchStdEvent": "EXC_UNDEF"
+    }
+]
diff --git a/tools/perf/pmu-events/arch/arm64/arm/cortex-a76-n1/instruction.json b/tools/perf/pmu-events/arch/arm64/arm/cortex-a76-n1/instruction.json
new file mode 100644 (file)
index 0000000..c153ac7
--- /dev/null
@@ -0,0 +1,108 @@
+[
+    {
+        "PublicDescription": "Software increment. Instruction architecturally executed (condition code check pass).",
+        "EventCode": "0x00",
+        "EventName": "SW_INCR",
+        "BriefDescription": "Software increment."
+    },
+    {
+        "PublicDescription": "Instruction architecturally executed. This event counts all retired instructions, including those that fail their condition check.",
+        "EventCode": "0x08",
+        "EventName": "INST_RETIRED",
+        "BriefDescription": "Instruction architecturally executed."
+    },
+    {
+        "EventCode": "0x0A",
+        "EventName": "EXC_RETURN",
+        "BriefDescription": "Instruction architecturally executed, condition code check pass, exception return."
+    },
+    {
+        "PublicDescription": "Instruction architecturally executed, condition code check pass, write to CONTEXTIDR. This event only counts writes to CONTEXTIDR in AArch32 state, and via the CONTEXTIDR_EL1 mnemonic in AArch64 state.",
+        "EventCode": "0x0B",
+        "EventName": "CID_WRITE_RETIRED",
+        "BriefDescription": "Instruction architecturally executed, condition code check pass, write to CONTEXTIDR."
+    },
+    {
+        "EventCode": "0x1B",
+        "EventName": "INST_SPEC",
+        "BriefDescription": "Operation speculatively executed"
+    },
+    {
+        "PublicDescription": "Instruction architecturally executed, condition code check pass, write to TTBR. This event only counts writes to TTBR0/TTBR1 in AArch32 state and TTBR0_EL1/TTBR1_EL1 in AArch64 state.",
+        "EventCode": "0x1C",
+        "EventName": "TTBR_WRITE_RETIRED",
+        "BriefDescription": "Instruction architecturally executed, condition code check pass, write to TTBR"
+    },
+    {
+        "PublicDescription": "Instruction architecturally executed, branch. This event counts all branches, taken or not. This excludes exception entries, debug entries and CCFAIL branches.",
+        "EventCode": "0x21",
+        "EventName": "BR_RETIRED",
+        "BriefDescription": "Instruction architecturally executed, branch."
+    },
+    {
+        "PublicDescription": "Instruction architecturally executed, mispredicted branch. This event counts any branch counted by BR_RETIRED which is not correctly predicted and causes a pipeline flush.",
+        "EventCode": "0x22",
+        "EventName": "BR_MIS_PRED_RETIRED",
+        "BriefDescription": "Instruction architecturally executed, mispredicted branch."
+    },
+    {
+        "ArchStdEvent": "ASE_SPEC"
+    },
+    {
+        "ArchStdEvent": "BR_IMMED_SPEC"
+    },
+    {
+        "ArchStdEvent": "BR_INDIRECT_SPEC"
+    },
+    {
+        "ArchStdEvent": "BR_RETURN_SPEC"
+    },
+    {
+        "ArchStdEvent": "CRYPTO_SPEC"
+    },
+    {
+        "ArchStdEvent": "DMB_SPEC"
+    },
+    {
+        "ArchStdEvent": "DP_SPEC"
+    },
+    {
+        "ArchStdEvent": "DSB_SPEC"
+    },
+    {
+        "ArchStdEvent": "ISB_SPEC"
+    },
+    {
+        "ArchStdEvent": "LDREX_SPEC"
+    },
+    {
+        "ArchStdEvent": "LDST_SPEC"
+    },
+    {
+        "ArchStdEvent": "LD_SPEC"
+    },
+    {
+        "ArchStdEvent": "PC_WRITE_SPEC"
+    },
+    {
+        "ArchStdEvent": "RC_LD_SPEC"
+    },
+    {
+        "ArchStdEvent": "RC_ST_SPEC"
+    },
+    {
+        "ArchStdEvent": "STREX_FAIL_SPEC"
+    },
+    {
+        "ArchStdEvent": "STREX_PASS_SPEC"
+    },
+    {
+        "ArchStdEvent": "STREX_SPEC"
+    },
+    {
+        "ArchStdEvent": "ST_SPEC"
+    },
+    {
+        "ArchStdEvent": "VFP_SPEC"
+    }
+]
diff --git a/tools/perf/pmu-events/arch/arm64/arm/cortex-a76-n1/memory.json b/tools/perf/pmu-events/arch/arm64/arm/cortex-a76-n1/memory.json
new file mode 100644 (file)
index 0000000..b866432
--- /dev/null
@@ -0,0 +1,23 @@
+[
+    {
+        "PublicDescription": "Data memory access. This event counts memory accesses due to load or store instructions. This event counts the sum of MEM_ACCESS_RD and MEM_ACCESS_WR.",
+        "EventCode": "0x13",
+        "EventName": "MEM_ACCESS",
+        "BriefDescription": "Data memory access"
+    },
+    {
+         "ArchStdEvent": "MEM_ACCESS_RD"
+    },
+    {
+         "ArchStdEvent": "MEM_ACCESS_WR"
+    },
+    {
+         "ArchStdEvent": "UNALIGNED_LD_SPEC"
+    },
+    {
+         "ArchStdEvent": "UNALIGNED_ST_SPEC"
+    },
+    {
+         "ArchStdEvent": "UNALIGNED_LDST_SPEC"
+    }
+]
diff --git a/tools/perf/pmu-events/arch/arm64/arm/cortex-a76-n1/other.json b/tools/perf/pmu-events/arch/arm64/arm/cortex-a76-n1/other.json
new file mode 100644 (file)
index 0000000..8bde029
--- /dev/null
@@ -0,0 +1,7 @@
+[
+    {
+        "EventCode": "0x31",
+        "EventName": "REMOTE_ACCESS",
+        "BriefDescription": "Access to another socket in a multi-socket system"
+    }
+]
diff --git a/tools/perf/pmu-events/arch/arm64/arm/cortex-a76-n1/pipeline.json b/tools/perf/pmu-events/arch/arm64/arm/cortex-a76-n1/pipeline.json
new file mode 100644 (file)
index 0000000..010a647
--- /dev/null
@@ -0,0 +1,14 @@
+[
+    {
+        "PublicDescription": "No operation issued because of the frontend. The counter counts on any cycle when there are no fetched instructions available to dispatch.",
+        "EventCode": "0x23",
+        "EventName": "STALL_FRONTEND",
+        "BriefDescription": "No operation issued because of the frontend."
+    },
+    {
+        "PublicDescription": "No operation issued because of the backend. The counter counts on any cycle fetched instructions are not dispatched due to resource constraints.",
+        "EventCode": "0x24",
+        "EventName": "STALL_BACKEND",
+        "BriefDescription": "No operation issued because of the backend."
+    }
+]
index 927fcdd..0d60914 100644 (file)
@@ -16,6 +16,8 @@
 0x00000000420f1000,v1,arm/cortex-a53,core
 0x00000000410fd070,v1,arm/cortex-a57-a72,core
 0x00000000410fd080,v1,arm/cortex-a57-a72,core
+0x00000000410fd0b0,v1,arm/cortex-a76-n1,core
+0x00000000410fd0c0,v1,arm/cortex-a76-n1,core
 0x00000000420f5160,v1,cavium/thunderx2,core
 0x00000000430f0af0,v1,cavium/thunderx2,core
 0x00000000480fd010,v1,hisilicon/hip08,core
index 9dc2f6b..b2a3df0 100644 (file)
     "PublicDescription": ""
   },
   {,
-    "EventCode": "0xa29084",
-    "EventName": "PM_L3_P0_GRP_PUMP",
-    "BriefDescription": "L3 pf sent with grp scope port 0",
-    "PublicDescription": ""
-  },
-  {,
-    "EventCode": "0x528084",
-    "EventName": "PM_L3_P0_LCO_DATA",
-    "BriefDescription": "lco sent with data port 0",
-    "PublicDescription": ""
-  },
-  {,
-    "EventCode": "0x518080",
-    "EventName": "PM_L3_P0_LCO_NO_DATA",
-    "BriefDescription": "dataless l3 lco sent port 0",
-    "PublicDescription": ""
-  },
-  {,
-    "EventCode": "0xa4908c",
-    "EventName": "PM_L3_P0_LCO_RTY",
-    "BriefDescription": "L3 LCO received retry port 0",
-    "PublicDescription": ""
-  },
-  {,
     "EventCode": "0x84908d",
     "EventName": "PM_L3_PF0_ALLOC",
     "BriefDescription": "lifetime, sample of PF machine 0 valid",
index fad4af9..6221a84 100644 (file)
     "BriefDescription": "Total cycles spent with one or more fill requests in flight from L2.",
     "PublicDescription": "Total cycles spent with one or more fill requests in flight from L2.",
     "UMask": "0x1"
+  },
+  {
+    "EventName": "l3_request_g1.caching_l3_cache_accesses",
+    "EventCode": "0x01",
+    "BriefDescription": "Caching: L3 cache accesses",
+    "UMask": "0x80",
+    "Unit": "L3PMC"
+  },
+  {
+    "EventName": "l3_lookup_state.all_l3_req_typs",
+    "EventCode": "0x04",
+    "BriefDescription": "All L3 Request Types",
+    "UMask": "0xff",
+    "Unit": "L3PMC"
+  },
+  {
+    "EventName": "l3_comb_clstr_state.other_l3_miss_typs",
+    "EventCode": "0x06",
+    "BriefDescription": "Other L3 Miss Request Types",
+    "UMask": "0xfe",
+    "Unit": "L3PMC"
+  },
+  {
+    "EventName": "l3_comb_clstr_state.request_miss",
+    "EventCode": "0x06",
+    "BriefDescription": "L3 cache misses",
+    "UMask": "0x01",
+    "Unit": "L3PMC"
+  },
+  {
+    "EventName": "xi_sys_fill_latency",
+    "EventCode": "0x90",
+    "BriefDescription": "L3 Cache Miss Latency. Total cycles for all transactions divided by 16. Ignores SliceMask and ThreadMask.",
+    "UMask": "0x00",
+    "Unit": "L3PMC"
+  },
+  {
+    "EventName": "xi_ccx_sdp_req1.all_l3_miss_req_typs",
+    "EventCode": "0x9a",
+    "BriefDescription": "All L3 Miss Request Types. Ignores SliceMask and ThreadMask.",
+    "UMask": "0x3f",
+    "Unit": "L3PMC"
   }
 ]
index 7b285b0..1079544 100644 (file)
@@ -13,7 +13,7 @@
   {
     "EventName": "ex_ret_brn",
     "EventCode": "0xc2",
-    "BriefDescription": "[Retired Branch Instructions.",
+    "BriefDescription": "Retired Branch Instructions.",
     "PublicDescription": "The number of branch instructions retired. This includes all types of architectural control flow changes, including exceptions and interrupts."
   },
   {
index d413761..9e37287 100644 (file)
@@ -239,6 +239,7 @@ static struct map {
        { "hisi_sccl,ddrc", "hisi_sccl,ddrc" },
        { "hisi_sccl,hha", "hisi_sccl,hha" },
        { "hisi_sccl,l3c", "hisi_sccl,l3c" },
+       { "L3PMC", "amd_l3" },
        {}
 };
 
index a637a4a..338cd9f 100644 (file)
@@ -10,6 +10,7 @@
 #include "tests.h"
 #include "debug.h"
 #include "parse-events.h"
+#include "util/mmap.h"
 #include <errno.h>
 #include <linux/string.h>
 
@@ -32,8 +33,8 @@ static int count_samples(struct evlist *evlist, int *sample_count,
 {
        int i;
 
-       for (i = 0; i < evlist->nr_mmaps; i++) {
-               struct perf_mmap *map = &evlist->overwrite_mmap[i];
+       for (i = 0; i < evlist->core.nr_mmaps; i++) {
+               struct mmap *map = &evlist->overwrite_mmap[i];
                union perf_event *event;
 
                perf_mmap__read_init(map);
@@ -63,9 +64,9 @@ static int do_test(struct evlist *evlist, int mmap_pages,
        int err;
        char sbuf[STRERR_BUFSIZE];
 
-       err = perf_evlist__mmap(evlist, mmap_pages);
+       err = evlist__mmap(evlist, mmap_pages);
        if (err < 0) {
-               pr_debug("perf_evlist__mmap: %s\n",
+               pr_debug("evlist__mmap: %s\n",
                         str_error_r(errno, sbuf, sizeof(sbuf)));
                return TEST_FAIL;
        }
@@ -75,7 +76,7 @@ static int do_test(struct evlist *evlist, int mmap_pages,
        evlist__disable(evlist);
 
        err = count_samples(evlist, sample_count, comm_count);
-       perf_evlist__munmap(evlist);
+       evlist__munmap(evlist);
        return err;
 }
 
index db2aadf..96c1373 100644 (file)
@@ -2,8 +2,8 @@
 #include <linux/compiler.h>
 #include <linux/bitmap.h>
 #include <perf/cpumap.h>
+#include <internal/cpumap.h>
 #include "tests.h"
-#include "cpumap.h"
 #include "debug.h"
 
 #define NBITS 100
index fc102e4..1eb0bff 100644 (file)
@@ -19,6 +19,7 @@
 #include "llvm.h"
 #include "debug.h"
 #include "parse-events.h"
+#include "util/mmap.h"
 #define NR_ITERS       111
 #define PERF_TEST_BPF_PATH "/sys/fs/bpf/perf_test"
 
@@ -167,9 +168,9 @@ static int do_test(struct bpf_object *obj, int (*func)(void),
                goto out_delete_evlist;
        }
 
-       err = perf_evlist__mmap(evlist, opts.mmap_pages);
+       err = evlist__mmap(evlist, opts.mmap_pages);
        if (err < 0) {
-               pr_debug("perf_evlist__mmap: %s\n",
+               pr_debug("evlist__mmap: %s\n",
                         str_error_r(errno, sbuf, sizeof(sbuf)));
                goto out_delete_evlist;
        }
@@ -178,9 +179,9 @@ static int do_test(struct bpf_object *obj, int (*func)(void),
        (*func)();
        evlist__disable(evlist);
 
-       for (i = 0; i < evlist->nr_mmaps; i++) {
+       for (i = 0; i < evlist->core.nr_mmaps; i++) {
                union perf_event *event;
-               struct perf_mmap *md;
+               struct mmap *md;
 
                md = &evlist->mmap[i];
                if (perf_mmap__read_init(md) < 0)
index f45fe11..2577d3e 100644 (file)
@@ -1,7 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
 #include "tests.h"
-#include "debug.h"
-#include "util.h"
 #include "c++/clang-c.h"
 #include <linux/kernel.h>
 
index c1c29e0..f5764a3 100644 (file)
 #include "evlist.h"
 #include "evsel.h"
 #include "thread_map.h"
-#include "cpumap.h"
 #include "machine.h"
 #include "map.h"
 #include "symbol.h"
 #include "event.h"
 #include "record.h"
+#include "util/mmap.h"
+#include "util/synthetic-events.h"
 #include "thread.h"
 
 #include "tests.h"
@@ -419,10 +420,10 @@ static int process_events(struct machine *machine, struct evlist *evlist,
                          struct state *state)
 {
        union perf_event *event;
-       struct perf_mmap *md;
+       struct mmap *md;
        int i, ret;
 
-       for (i = 0; i < evlist->nr_mmaps; i++) {
+       for (i = 0; i < evlist->core.nr_mmaps; i++) {
                md = &evlist->mmap[i];
                if (perf_mmap__read_init(md) < 0)
                        continue;
@@ -651,7 +652,7 @@ static int do_test_code_reading(bool try_kcore)
 
                perf_evlist__config(evlist, &opts, NULL);
 
-               evsel = perf_evlist__first(evlist);
+               evsel = evlist__first(evlist);
 
                evsel->core.attr.comm = 1;
                evsel->core.attr.disabled = 1;
@@ -685,9 +686,9 @@ static int do_test_code_reading(bool try_kcore)
                break;
        }
 
-       ret = perf_evlist__mmap(evlist, UINT_MAX);
+       ret = evlist__mmap(evlist, UINT_MAX);
        if (ret < 0) {
-               pr_debug("perf_evlist__mmap failed\n");
+               pr_debug("evlist__mmap failed\n");
                goto out_put;
        }
 
index 39493de..8a0d236 100644 (file)
@@ -3,6 +3,7 @@
 #include <stdio.h>
 #include "cpumap.h"
 #include "event.h"
+#include "util/synthetic-events.h"
 #include <string.h>
 #include <linux/bitops.h>
 #include <perf/cpumap.h>
index a4874d4..627c1aa 100644 (file)
@@ -10,7 +10,6 @@
 #include <sys/resource.h>
 #include <api/fs/fs.h>
 #include "dso.h"
-#include "util.h"
 #include "machine.h"
 #include "symbol.h"
 #include "tests.h"
index 4125255..4f4ecbc 100644 (file)
@@ -15,6 +15,7 @@
 #include "symbol.h"
 #include "thread.h"
 #include "callchain.h"
+#include "util/synthetic-events.h"
 
 #if defined (__x86_64__) || defined (__i386__) || defined (__powerpc__)
 #include "arch-tests.h"
index d824a72..1ee8704 100644 (file)
@@ -9,7 +9,6 @@
 #include "tests.h"
 #include "evlist.h"
 #include "evsel.h"
-#include "util.h"
 #include "debug.h"
 #include "parse-events.h"
 #include "thread_map.h"
@@ -17,7 +16,7 @@
 
 static int attach__enable_on_exec(struct evlist *evlist)
 {
-       struct evsel *evsel = perf_evlist__last(evlist);
+       struct evsel *evsel = evlist__last(evlist);
        struct target target = {
                .uid = UINT_MAX,
        };
@@ -59,7 +58,7 @@ static int detach__enable_on_exec(struct evlist *evlist)
 
 static int attach__current_disabled(struct evlist *evlist)
 {
-       struct evsel *evsel = perf_evlist__last(evlist);
+       struct evsel *evsel = evlist__last(evlist);
        struct perf_thread_map *threads;
        int err;
 
@@ -85,7 +84,7 @@ static int attach__current_disabled(struct evlist *evlist)
 
 static int attach__current_enabled(struct evlist *evlist)
 {
-       struct evsel *evsel = perf_evlist__last(evlist);
+       struct evsel *evsel = evlist__last(evlist);
        struct perf_thread_map *threads;
        int err;
 
@@ -105,14 +104,14 @@ static int attach__current_enabled(struct evlist *evlist)
 
 static int detach__disable(struct evlist *evlist)
 {
-       struct evsel *evsel = perf_evlist__last(evlist);
+       struct evsel *evsel = evlist__last(evlist);
 
        return evsel__enable(evsel);
 }
 
 static int attach__cpu_disabled(struct evlist *evlist)
 {
-       struct evsel *evsel = perf_evlist__last(evlist);
+       struct evsel *evsel = evlist__last(evlist);
        struct perf_cpu_map *cpus;
        int err;
 
@@ -141,7 +140,7 @@ static int attach__cpu_disabled(struct evlist *evlist)
 
 static int attach__cpu_enabled(struct evlist *evlist)
 {
-       struct evsel *evsel = perf_evlist__last(evlist);
+       struct evsel *evsel = evlist__last(evlist);
        struct perf_cpu_map *cpus;
        int err;
 
@@ -181,7 +180,7 @@ static int test_times(int (attach)(struct evlist *),
                goto out_err;
        }
 
-       evsel = perf_evlist__last(evlist);
+       evsel = evlist__last(evlist);
        evsel->core.attr.read_format |=
                PERF_FORMAT_TOTAL_TIME_ENABLED |
                PERF_FORMAT_TOTAL_TIME_RUNNING;
index cac4290..c727379 100644 (file)
@@ -2,10 +2,12 @@
 #include <linux/compiler.h>
 #include <perf/cpumap.h>
 #include <string.h>
+#include "cpumap.h"
 #include "evlist.h"
 #include "evsel.h"
 #include "header.h"
 #include "machine.h"
+#include "util/synthetic-events.h"
 #include "tool.h"
 #include "tests.h"
 #include "debug.h"
@@ -90,12 +92,12 @@ int test__event_update(struct test *test __maybe_unused, int subtest __maybe_unu
        evlist = perf_evlist__new_default();
        TEST_ASSERT_VAL("failed to get evlist", evlist);
 
-       evsel = perf_evlist__first(evlist);
+       evsel = evlist__first(evlist);
 
-       TEST_ASSERT_VAL("failed to allos ids",
-                       !perf_evsel__alloc_id(evsel, 1, 1));
+       TEST_ASSERT_VAL("failed to allocate ids",
+                       !perf_evsel__alloc_id(&evsel->core, 1, 1));
 
-       perf_evlist__id_add(evlist, evsel, 0, 0, 123);
+       perf_evlist__id_add(&evlist->core, &evsel->core, 0, 0, 123);
 
        evsel->unit = strdup("KRAVA");
 
index 5330f10..956205b 100644 (file)
@@ -34,7 +34,7 @@ static int perf_evsel__roundtrip_cache_name_test(void)
        }
 
        idx = 0;
-       evsel = perf_evlist__first(evlist);
+       evsel = evlist__first(evlist);
 
        for (type = 0; type < PERF_COUNT_HW_CACHE_MAX; type++) {
                for (op = 0; op < PERF_COUNT_HW_CACHE_OP_MAX; op++) {
index de110d8..6f34d08 100644 (file)
@@ -2,6 +2,7 @@
 #include <inttypes.h>
 #include "util/debug.h"
 #include "util/dso.h"
+#include "util/event.h" // struct perf_sample
 #include "util/map.h"
 #include "util/symbol.h"
 #include "util/sort.h"
@@ -10,6 +11,7 @@
 #include "util/thread.h"
 #include "tests/hists_common.h"
 #include <linux/kernel.h>
+#include <linux/perf_event.h>
 
 static struct {
        u32 pid;
index fa55b7b..6367c8f 100644 (file)
@@ -721,7 +721,7 @@ int test__hists_cumulate(struct test *test __maybe_unused, int subtest __maybe_u
        if (verbose > 1)
                machine__fprintf(machine, stderr);
 
-       evsel = perf_evlist__first(evlist);
+       evsel = evlist__first(evlist);
 
        for (i = 0; i < ARRAY_SIZE(testcases); i++) {
                err = testcases[i](evsel, machine);
index 8be4d0b..a024d3f 100644 (file)
@@ -8,6 +8,7 @@
 #include "machine.h"
 #include "parse-events.h"
 #include "hists_common.h"
+#include "util/mmap.h"
 #include <errno.h>
 #include <linux/kernel.h>
 
@@ -310,8 +311,8 @@ int test__hists_link(struct test *test __maybe_unused, int subtest __maybe_unuse
                        print_hists_in(hists);
        }
 
-       first = perf_evlist__first(evlist);
-       evsel = perf_evlist__last(evlist);
+       first = evlist__first(evlist);
+       evsel = evlist__last(evlist);
 
        first_hists = evsel__hists(first);
        hists = evsel__hists(evsel);
index 3f6dfa2..38f804f 100644 (file)
@@ -608,7 +608,7 @@ int test__hists_output(struct test *test __maybe_unused, int subtest __maybe_unu
        if (verbose > 1)
                machine__fprintf(machine, stderr);
 
-       evsel = perf_evlist__first(evlist);
+       evsel = evlist__first(evlist);
 
        for (i = 0; i < ARRAY_SIZE(testcases); i++) {
                err = testcases[i](evsel, machine);
index 9f0762d..92c7d59 100644 (file)
@@ -12,8 +12,8 @@
 #include "evsel.h"
 #include "record.h"
 #include "thread_map.h"
-#include "cpumap.h"
 #include "tests.h"
+#include "util/mmap.h"
 
 #define CHECK__(x) {                           \
        while ((x) < 0) {                       \
 static int find_comm(struct evlist *evlist, const char *comm)
 {
        union perf_event *event;
-       struct perf_mmap *md;
+       struct mmap *md;
        int i, found;
 
        found = 0;
-       for (i = 0; i < evlist->nr_mmaps; i++) {
+       for (i = 0; i < evlist->core.nr_mmaps; i++) {
                md = &evlist->mmap[i];
                if (perf_mmap__read_init(md) < 0)
                        continue;
@@ -93,7 +93,7 @@ int test__keep_tracking(struct test *test __maybe_unused, int subtest __maybe_un
 
        perf_evlist__config(evlist, &opts, NULL);
 
-       evsel = perf_evlist__first(evlist);
+       evsel = evlist__first(evlist);
 
        evsel->core.attr.comm = 1;
        evsel->core.attr.disabled = 1;
@@ -105,7 +105,7 @@ int test__keep_tracking(struct test *test __maybe_unused, int subtest __maybe_un
                goto out_err;
        }
 
-       CHECK__(perf_evlist__mmap(evlist, UINT_MAX));
+       CHECK__(evlist__mmap(evlist, UINT_MAX));
 
        /*
         * First, test that a 'comm' event can be found when the event is
@@ -132,7 +132,7 @@ int test__keep_tracking(struct test *test __maybe_unused, int subtest __maybe_un
 
        evlist__enable(evlist);
 
-       evsel = perf_evlist__last(evlist);
+       evsel = evlist__last(evlist);
 
        CHECK__(evsel__disable(evsel));
 
@@ -143,7 +143,7 @@ int test__keep_tracking(struct test *test __maybe_unused, int subtest __maybe_un
 
        found = find_comm(evlist, comm);
        if (found != 1) {
-               pr_debug("Seconf time, failed to find tracking event.\n");
+               pr_debug("Second time, failed to find tracking event.\n");
                goto out_err;
        }
 
index 022e4c9..ae6cda8 100644 (file)
@@ -7,7 +7,6 @@
 #include "llvm.h"
 #include "tests.h"
 #include "debug.h"
-#include "util.h"
 
 #ifdef HAVE_LIBBPF_SUPPORT
 static int test__bpf_parsing(void *obj_buf, size_t obj_buf_sz)
index 70c4847..c850d16 100644 (file)
@@ -100,7 +100,7 @@ make_install_info   := install-info
 make_install_pdf    := install-pdf
 make_install_prefix       := install prefix=/tmp/krava
 make_install_prefix_slash := install prefix=/tmp/krava/
-make_static         := LDFLAGS=-static
+make_static         := LDFLAGS=-static NO_PERF_READ_VDSO32=1 NO_PERF_READ_VDSOX32=1 NO_JVMTI=1
 
 # all the NO_* variable combined
 make_minimal        := NO_LIBPERL=1 NO_LIBPYTHON=1 NO_NEWT=1 NO_GTK2=1
@@ -327,6 +327,10 @@ make_kernelsrc_tools:
        (make -C ../../tools $(PARALLEL_OPT) $(K_O_OPT) perf) > $@ 2>&1 && \
        test -x $(KERNEL_O)/tools/perf/perf && rm -f $@ || (cat $@ ; false)
 
+make_libperf:
+       @echo "- make -C lib";
+       make -C lib clean >$@ 2>&1; make -C lib >>$@ 2>&1 && rm $@
+
 FEATURES_DUMP_FILE := $(FULL_O)/BUILD_TEST_FEATURE_DUMP
 FEATURES_DUMP_FILE_STATIC := $(FULL_O)/BUILD_TEST_FEATURE_DUMP_STATIC
 
@@ -365,5 +369,5 @@ $(foreach t,$(run),$(if $(findstring make_static,$(t)),\
                        $(eval $(t) := $($(t)) FEATURES_DUMP=$(FEATURES_DUMP_FILE))))
 endif
 
-.PHONY: all $(run) $(run_O) tarpkg clean make_kernelsrc make_kernelsrc_tools
+.PHONY: all $(run) $(run_O) tarpkg clean make_kernelsrc make_kernelsrc_tools make_libperf
 endif # ifndef MK
index 7672ade..a258bd5 100644 (file)
@@ -4,7 +4,7 @@
 #include <linux/kernel.h>
 #include <linux/zalloc.h>
 #include <perf/cpumap.h>
-#include "cpumap.h"
+#include <internal/cpumap.h>
 #include "debug.h"
 #include "env.h"
 #include "mem2node.h"
index 85e1d73..3a22dce 100644 (file)
@@ -10,8 +10,8 @@
 #include "evlist.h"
 #include "evsel.h"
 #include "thread_map.h"
-#include "cpumap.h"
 #include "tests.h"
+#include "util/mmap.h"
 #include <linux/err.h>
 #include <linux/kernel.h>
 #include <linux/string.h>
@@ -43,7 +43,7 @@ int test__basic_mmap(struct test *test __maybe_unused, int subtest __maybe_unuse
                     expected_nr_events[nsyscalls], i, j;
        struct evsel *evsels[nsyscalls], *evsel;
        char sbuf[STRERR_BUFSIZE];
-       struct perf_mmap *md;
+       struct mmap *md;
 
        threads = thread_map__new(-1, getpid(), UINT_MAX);
        if (threads == NULL) {
@@ -53,7 +53,7 @@ int test__basic_mmap(struct test *test __maybe_unused, int subtest __maybe_unuse
 
        cpus = perf_cpu_map__new(NULL);
        if (cpus == NULL) {
-               pr_debug("cpu_map__new\n");
+               pr_debug("perf_cpu_map__new\n");
                goto out_free_threads;
        }
 
@@ -100,7 +100,7 @@ int test__basic_mmap(struct test *test __maybe_unused, int subtest __maybe_unuse
                expected_nr_events[i] = 1 + rand() % 127;
        }
 
-       if (perf_evlist__mmap(evlist, 128) < 0) {
+       if (evlist__mmap(evlist, 128) < 0) {
                pr_debug("failed to mmap events: %d (%s)\n", errno,
                         str_error_r(errno, sbuf, sizeof(sbuf)));
                goto out_delete_evlist;
index 360d70d..8d9d4cb 100644 (file)
@@ -8,13 +8,15 @@
 #include <stdlib.h>
 #include <stdio.h>
 #include "debug.h"
+#include "event.h"
 #include "tests.h"
 #include "machine.h"
 #include "thread_map.h"
 #include "map.h"
 #include "symbol.h"
+#include "util/synthetic-events.h"
 #include "thread.h"
-#include "util.h"
+#include <internal/lib.h> // page_size
 
 #define THREADS 4
 
index 9171f77..93c1765 100644 (file)
@@ -14,7 +14,8 @@
 #include "evsel.h"
 #include "tests.h"
 #include "thread_map.h"
-#include "cpumap.h"
+#include <perf/cpumap.h>
+#include <internal/cpumap.h>
 #include "debug.h"
 #include "stat.h"
 #include "util/counts.h"
@@ -37,7 +38,7 @@ int test__openat_syscall_event_on_all_cpus(struct test *test __maybe_unused, int
 
        cpus = perf_cpu_map__new(NULL);
        if (cpus == NULL) {
-               pr_debug("cpu_map__new\n");
+               pr_debug("perf_cpu_map__new\n");
                goto out_thread_map_delete;
        }
 
index b71167b..2b5c468 100644 (file)
@@ -11,6 +11,7 @@
 #include "record.h"
 #include "tests.h"
 #include "debug.h"
+#include "util/mmap.h"
 #include <errno.h>
 
 #ifndef O_DIRECTORY
@@ -69,9 +70,9 @@ int test__syscall_openat_tp_fields(struct test *test __maybe_unused, int subtest
                goto out_delete_evlist;
        }
 
-       err = perf_evlist__mmap(evlist, UINT_MAX);
+       err = evlist__mmap(evlist, UINT_MAX);
        if (err < 0) {
-               pr_debug("perf_evlist__mmap: %s\n",
+               pr_debug("evlist__mmap: %s\n",
                         str_error_r(errno, sbuf, sizeof(sbuf)));
                goto out_delete_evlist;
        }
@@ -86,9 +87,9 @@ int test__syscall_openat_tp_fields(struct test *test __maybe_unused, int subtest
        while (1) {
                int before = nr_events;
 
-               for (i = 0; i < evlist->nr_mmaps; i++) {
+               for (i = 0; i < evlist->core.nr_mmaps; i++) {
                        union perf_event *event;
-                       struct perf_mmap *md;
+                       struct mmap *md;
 
                        md = &evlist->mmap[i];
                        if (perf_mmap__read_init(md) < 0)
@@ -126,7 +127,7 @@ int test__syscall_openat_tp_fields(struct test *test __maybe_unused, int subtest
                }
 
                if (nr_events == before)
-                       perf_evlist__poll(evlist, 10);
+                       evlist__poll(evlist, 10);
 
                if (++nr_polls > 5) {
                        pr_debug("%s: no events!\n", __func__);
index 02ba696..25e0ed2 100644 (file)
@@ -6,7 +6,6 @@
 #include "tests.h"
 #include "debug.h"
 #include "pmu.h"
-#include "util.h"
 #include <dirent.h>
 #include <errno.h>
 #include <sys/types.h>
@@ -47,7 +46,7 @@ static bool kvm_s390_create_vm_valid(void)
 
 static int test__checkevent_tracepoint(struct evlist *evlist)
 {
-       struct evsel *evsel = perf_evlist__first(evlist);
+       struct evsel *evsel = evlist__first(evlist);
 
        TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->core.nr_entries);
        TEST_ASSERT_VAL("wrong number of groups", 0 == evlist->nr_groups);
@@ -78,7 +77,7 @@ static int test__checkevent_tracepoint_multi(struct evlist *evlist)
 
 static int test__checkevent_raw(struct evlist *evlist)
 {
-       struct evsel *evsel = perf_evlist__first(evlist);
+       struct evsel *evsel = evlist__first(evlist);
 
        TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->core.nr_entries);
        TEST_ASSERT_VAL("wrong type", PERF_TYPE_RAW == evsel->core.attr.type);
@@ -88,7 +87,7 @@ static int test__checkevent_raw(struct evlist *evlist)
 
 static int test__checkevent_numeric(struct evlist *evlist)
 {
-       struct evsel *evsel = perf_evlist__first(evlist);
+       struct evsel *evsel = evlist__first(evlist);
 
        TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->core.nr_entries);
        TEST_ASSERT_VAL("wrong type", 1 == evsel->core.attr.type);
@@ -98,7 +97,7 @@ static int test__checkevent_numeric(struct evlist *evlist)
 
 static int test__checkevent_symbolic_name(struct evlist *evlist)
 {
-       struct evsel *evsel = perf_evlist__first(evlist);
+       struct evsel *evsel = evlist__first(evlist);
 
        TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->core.nr_entries);
        TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type);
@@ -109,7 +108,7 @@ static int test__checkevent_symbolic_name(struct evlist *evlist)
 
 static int test__checkevent_symbolic_name_config(struct evlist *evlist)
 {
-       struct evsel *evsel = perf_evlist__first(evlist);
+       struct evsel *evsel = evlist__first(evlist);
 
        TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->core.nr_entries);
        TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type);
@@ -130,7 +129,7 @@ static int test__checkevent_symbolic_name_config(struct evlist *evlist)
 
 static int test__checkevent_symbolic_alias(struct evlist *evlist)
 {
-       struct evsel *evsel = perf_evlist__first(evlist);
+       struct evsel *evsel = evlist__first(evlist);
 
        TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->core.nr_entries);
        TEST_ASSERT_VAL("wrong type", PERF_TYPE_SOFTWARE == evsel->core.attr.type);
@@ -141,7 +140,7 @@ static int test__checkevent_symbolic_alias(struct evlist *evlist)
 
 static int test__checkevent_genhw(struct evlist *evlist)
 {
-       struct evsel *evsel = perf_evlist__first(evlist);
+       struct evsel *evsel = evlist__first(evlist);
 
        TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->core.nr_entries);
        TEST_ASSERT_VAL("wrong type", PERF_TYPE_HW_CACHE == evsel->core.attr.type);
@@ -151,7 +150,7 @@ static int test__checkevent_genhw(struct evlist *evlist)
 
 static int test__checkevent_breakpoint(struct evlist *evlist)
 {
-       struct evsel *evsel = perf_evlist__first(evlist);
+       struct evsel *evsel = evlist__first(evlist);
 
        TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->core.nr_entries);
        TEST_ASSERT_VAL("wrong type", PERF_TYPE_BREAKPOINT == evsel->core.attr.type);
@@ -165,7 +164,7 @@ static int test__checkevent_breakpoint(struct evlist *evlist)
 
 static int test__checkevent_breakpoint_x(struct evlist *evlist)
 {
-       struct evsel *evsel = perf_evlist__first(evlist);
+       struct evsel *evsel = evlist__first(evlist);
 
        TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->core.nr_entries);
        TEST_ASSERT_VAL("wrong type", PERF_TYPE_BREAKPOINT == evsel->core.attr.type);
@@ -178,7 +177,7 @@ static int test__checkevent_breakpoint_x(struct evlist *evlist)
 
 static int test__checkevent_breakpoint_r(struct evlist *evlist)
 {
-       struct evsel *evsel = perf_evlist__first(evlist);
+       struct evsel *evsel = evlist__first(evlist);
 
        TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->core.nr_entries);
        TEST_ASSERT_VAL("wrong type",
@@ -193,7 +192,7 @@ static int test__checkevent_breakpoint_r(struct evlist *evlist)
 
 static int test__checkevent_breakpoint_w(struct evlist *evlist)
 {
-       struct evsel *evsel = perf_evlist__first(evlist);
+       struct evsel *evsel = evlist__first(evlist);
 
        TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->core.nr_entries);
        TEST_ASSERT_VAL("wrong type",
@@ -208,7 +207,7 @@ static int test__checkevent_breakpoint_w(struct evlist *evlist)
 
 static int test__checkevent_breakpoint_rw(struct evlist *evlist)
 {
-       struct evsel *evsel = perf_evlist__first(evlist);
+       struct evsel *evsel = evlist__first(evlist);
 
        TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->core.nr_entries);
        TEST_ASSERT_VAL("wrong type",
@@ -223,7 +222,7 @@ static int test__checkevent_breakpoint_rw(struct evlist *evlist)
 
 static int test__checkevent_tracepoint_modifier(struct evlist *evlist)
 {
-       struct evsel *evsel = perf_evlist__first(evlist);
+       struct evsel *evsel = evlist__first(evlist);
 
        TEST_ASSERT_VAL("wrong exclude_user", evsel->core.attr.exclude_user);
        TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->core.attr.exclude_kernel);
@@ -254,7 +253,7 @@ test__checkevent_tracepoint_multi_modifier(struct evlist *evlist)
 
 static int test__checkevent_raw_modifier(struct evlist *evlist)
 {
-       struct evsel *evsel = perf_evlist__first(evlist);
+       struct evsel *evsel = evlist__first(evlist);
 
        TEST_ASSERT_VAL("wrong exclude_user", evsel->core.attr.exclude_user);
        TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->core.attr.exclude_kernel);
@@ -266,7 +265,7 @@ static int test__checkevent_raw_modifier(struct evlist *evlist)
 
 static int test__checkevent_numeric_modifier(struct evlist *evlist)
 {
-       struct evsel *evsel = perf_evlist__first(evlist);
+       struct evsel *evsel = evlist__first(evlist);
 
        TEST_ASSERT_VAL("wrong exclude_user", evsel->core.attr.exclude_user);
        TEST_ASSERT_VAL("wrong exclude_kernel", evsel->core.attr.exclude_kernel);
@@ -278,7 +277,7 @@ static int test__checkevent_numeric_modifier(struct evlist *evlist)
 
 static int test__checkevent_symbolic_name_modifier(struct evlist *evlist)
 {
-       struct evsel *evsel = perf_evlist__first(evlist);
+       struct evsel *evsel = evlist__first(evlist);
 
        TEST_ASSERT_VAL("wrong exclude_user", evsel->core.attr.exclude_user);
        TEST_ASSERT_VAL("wrong exclude_kernel", evsel->core.attr.exclude_kernel);
@@ -290,7 +289,7 @@ static int test__checkevent_symbolic_name_modifier(struct evlist *evlist)
 
 static int test__checkevent_exclude_host_modifier(struct evlist *evlist)
 {
-       struct evsel *evsel = perf_evlist__first(evlist);
+       struct evsel *evsel = evlist__first(evlist);
 
        TEST_ASSERT_VAL("wrong exclude guest", !evsel->core.attr.exclude_guest);
        TEST_ASSERT_VAL("wrong exclude host", evsel->core.attr.exclude_host);
@@ -300,7 +299,7 @@ static int test__checkevent_exclude_host_modifier(struct evlist *evlist)
 
 static int test__checkevent_exclude_guest_modifier(struct evlist *evlist)
 {
-       struct evsel *evsel = perf_evlist__first(evlist);
+       struct evsel *evsel = evlist__first(evlist);
 
        TEST_ASSERT_VAL("wrong exclude guest", evsel->core.attr.exclude_guest);
        TEST_ASSERT_VAL("wrong exclude host", !evsel->core.attr.exclude_host);
@@ -310,7 +309,7 @@ static int test__checkevent_exclude_guest_modifier(struct evlist *evlist)
 
 static int test__checkevent_symbolic_alias_modifier(struct evlist *evlist)
 {
-       struct evsel *evsel = perf_evlist__first(evlist);
+       struct evsel *evsel = evlist__first(evlist);
 
        TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user);
        TEST_ASSERT_VAL("wrong exclude_kernel", evsel->core.attr.exclude_kernel);
@@ -322,7 +321,7 @@ static int test__checkevent_symbolic_alias_modifier(struct evlist *evlist)
 
 static int test__checkevent_genhw_modifier(struct evlist *evlist)
 {
-       struct evsel *evsel = perf_evlist__first(evlist);
+       struct evsel *evsel = evlist__first(evlist);
 
        TEST_ASSERT_VAL("wrong exclude_user", evsel->core.attr.exclude_user);
        TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->core.attr.exclude_kernel);
@@ -334,7 +333,7 @@ static int test__checkevent_genhw_modifier(struct evlist *evlist)
 
 static int test__checkevent_exclude_idle_modifier(struct evlist *evlist)
 {
-       struct evsel *evsel = perf_evlist__first(evlist);
+       struct evsel *evsel = evlist__first(evlist);
 
        TEST_ASSERT_VAL("wrong exclude idle", evsel->core.attr.exclude_idle);
        TEST_ASSERT_VAL("wrong exclude guest", !evsel->core.attr.exclude_guest);
@@ -349,7 +348,7 @@ static int test__checkevent_exclude_idle_modifier(struct evlist *evlist)
 
 static int test__checkevent_exclude_idle_modifier_1(struct evlist *evlist)
 {
-       struct evsel *evsel = perf_evlist__first(evlist);
+       struct evsel *evsel = evlist__first(evlist);
 
        TEST_ASSERT_VAL("wrong exclude idle", evsel->core.attr.exclude_idle);
        TEST_ASSERT_VAL("wrong exclude guest", !evsel->core.attr.exclude_guest);
@@ -364,7 +363,7 @@ static int test__checkevent_exclude_idle_modifier_1(struct evlist *evlist)
 
 static int test__checkevent_breakpoint_modifier(struct evlist *evlist)
 {
-       struct evsel *evsel = perf_evlist__first(evlist);
+       struct evsel *evsel = evlist__first(evlist);
 
 
        TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user);
@@ -379,7 +378,7 @@ static int test__checkevent_breakpoint_modifier(struct evlist *evlist)
 
 static int test__checkevent_breakpoint_x_modifier(struct evlist *evlist)
 {
-       struct evsel *evsel = perf_evlist__first(evlist);
+       struct evsel *evsel = evlist__first(evlist);
 
        TEST_ASSERT_VAL("wrong exclude_user", evsel->core.attr.exclude_user);
        TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->core.attr.exclude_kernel);
@@ -393,7 +392,7 @@ static int test__checkevent_breakpoint_x_modifier(struct evlist *evlist)
 
 static int test__checkevent_breakpoint_r_modifier(struct evlist *evlist)
 {
-       struct evsel *evsel = perf_evlist__first(evlist);
+       struct evsel *evsel = evlist__first(evlist);
 
        TEST_ASSERT_VAL("wrong exclude_user", evsel->core.attr.exclude_user);
        TEST_ASSERT_VAL("wrong exclude_kernel", evsel->core.attr.exclude_kernel);
@@ -407,7 +406,7 @@ static int test__checkevent_breakpoint_r_modifier(struct evlist *evlist)
 
 static int test__checkevent_breakpoint_w_modifier(struct evlist *evlist)
 {
-       struct evsel *evsel = perf_evlist__first(evlist);
+       struct evsel *evsel = evlist__first(evlist);
 
        TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user);
        TEST_ASSERT_VAL("wrong exclude_kernel", evsel->core.attr.exclude_kernel);
@@ -421,7 +420,7 @@ static int test__checkevent_breakpoint_w_modifier(struct evlist *evlist)
 
 static int test__checkevent_breakpoint_rw_modifier(struct evlist *evlist)
 {
-       struct evsel *evsel = perf_evlist__first(evlist);
+       struct evsel *evsel = evlist__first(evlist);
 
        TEST_ASSERT_VAL("wrong exclude_user", evsel->core.attr.exclude_user);
        TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->core.attr.exclude_kernel);
@@ -436,7 +435,7 @@ static int test__checkevent_breakpoint_rw_modifier(struct evlist *evlist)
 static int test__checkevent_pmu(struct evlist *evlist)
 {
 
-       struct evsel *evsel = perf_evlist__first(evlist);
+       struct evsel *evsel = evlist__first(evlist);
 
        TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->core.nr_entries);
        TEST_ASSERT_VAL("wrong type", PERF_TYPE_RAW == evsel->core.attr.type);
@@ -454,7 +453,7 @@ static int test__checkevent_pmu(struct evlist *evlist)
 
 static int test__checkevent_list(struct evlist *evlist)
 {
-       struct evsel *evsel = perf_evlist__first(evlist);
+       struct evsel *evsel = evlist__first(evlist);
 
        TEST_ASSERT_VAL("wrong number of entries", 3 == evlist->core.nr_entries);
 
@@ -493,7 +492,7 @@ static int test__checkevent_list(struct evlist *evlist)
 
 static int test__checkevent_pmu_name(struct evlist *evlist)
 {
-       struct evsel *evsel = perf_evlist__first(evlist);
+       struct evsel *evsel = evlist__first(evlist);
 
        /* cpu/config=1,name=krava/u */
        TEST_ASSERT_VAL("wrong number of entries", 2 == evlist->core.nr_entries);
@@ -514,7 +513,7 @@ static int test__checkevent_pmu_name(struct evlist *evlist)
 
 static int test__checkevent_pmu_partial_time_callgraph(struct evlist *evlist)
 {
-       struct evsel *evsel = perf_evlist__first(evlist);
+       struct evsel *evsel = evlist__first(evlist);
 
        /* cpu/config=1,call-graph=fp,time,period=100000/ */
        TEST_ASSERT_VAL("wrong number of entries", 2 == evlist->core.nr_entries);
@@ -547,7 +546,7 @@ static int test__checkevent_pmu_partial_time_callgraph(struct evlist *evlist)
 
 static int test__checkevent_pmu_events(struct evlist *evlist)
 {
-       struct evsel *evsel = perf_evlist__first(evlist);
+       struct evsel *evsel = evlist__first(evlist);
 
        TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->core.nr_entries);
        TEST_ASSERT_VAL("wrong type", PERF_TYPE_RAW == evsel->core.attr.type);
@@ -565,7 +564,7 @@ static int test__checkevent_pmu_events(struct evlist *evlist)
 
 static int test__checkevent_pmu_events_mix(struct evlist *evlist)
 {
-       struct evsel *evsel = perf_evlist__first(evlist);
+       struct evsel *evsel = evlist__first(evlist);
 
        /* pmu-event:u */
        TEST_ASSERT_VAL("wrong number of entries", 2 == evlist->core.nr_entries);
@@ -643,7 +642,7 @@ static int test__group1(struct evlist *evlist)
        TEST_ASSERT_VAL("wrong number of groups", 1 == evlist->nr_groups);
 
        /* instructions:k */
-       evsel = leader = perf_evlist__first(evlist);
+       evsel = leader = evlist__first(evlist);
        TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type);
        TEST_ASSERT_VAL("wrong config",
                        PERF_COUNT_HW_INSTRUCTIONS == evsel->core.attr.config);
@@ -685,7 +684,7 @@ static int test__group2(struct evlist *evlist)
        TEST_ASSERT_VAL("wrong number of groups", 1 == evlist->nr_groups);
 
        /* faults + :ku modifier */
-       evsel = leader = perf_evlist__first(evlist);
+       evsel = leader = evlist__first(evlist);
        TEST_ASSERT_VAL("wrong type", PERF_TYPE_SOFTWARE == evsel->core.attr.type);
        TEST_ASSERT_VAL("wrong config",
                        PERF_COUNT_SW_PAGE_FAULTS == evsel->core.attr.config);
@@ -740,7 +739,7 @@ static int test__group3(struct evlist *evlist __maybe_unused)
        TEST_ASSERT_VAL("wrong number of groups", 2 == evlist->nr_groups);
 
        /* group1 syscalls:sys_enter_openat:H */
-       evsel = leader = perf_evlist__first(evlist);
+       evsel = leader = evlist__first(evlist);
        TEST_ASSERT_VAL("wrong type", PERF_TYPE_TRACEPOINT == evsel->core.attr.type);
        TEST_ASSERT_VAL("wrong sample_type",
                PERF_TP_SAMPLE_TYPE == evsel->core.attr.sample_type);
@@ -832,7 +831,7 @@ static int test__group4(struct evlist *evlist __maybe_unused)
        TEST_ASSERT_VAL("wrong number of groups", 1 == evlist->nr_groups);
 
        /* cycles:u + p */
-       evsel = leader = perf_evlist__first(evlist);
+       evsel = leader = evlist__first(evlist);
        TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type);
        TEST_ASSERT_VAL("wrong config",
                        PERF_COUNT_HW_CPU_CYCLES == evsel->core.attr.config);
@@ -876,7 +875,7 @@ static int test__group5(struct evlist *evlist __maybe_unused)
        TEST_ASSERT_VAL("wrong number of groups", 2 == evlist->nr_groups);
 
        /* cycles + G */
-       evsel = leader = perf_evlist__first(evlist);
+       evsel = leader = evlist__first(evlist);
        TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type);
        TEST_ASSERT_VAL("wrong config",
                        PERF_COUNT_HW_CPU_CYCLES == evsel->core.attr.config);
@@ -962,7 +961,7 @@ static int test__group_gh1(struct evlist *evlist)
        TEST_ASSERT_VAL("wrong number of groups", 1 == evlist->nr_groups);
 
        /* cycles + :H group modifier */
-       evsel = leader = perf_evlist__first(evlist);
+       evsel = leader = evlist__first(evlist);
        TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type);
        TEST_ASSERT_VAL("wrong config",
                        PERF_COUNT_HW_CPU_CYCLES == evsel->core.attr.config);
@@ -1002,7 +1001,7 @@ static int test__group_gh2(struct evlist *evlist)
        TEST_ASSERT_VAL("wrong number of groups", 1 == evlist->nr_groups);
 
        /* cycles + :G group modifier */
-       evsel = leader = perf_evlist__first(evlist);
+       evsel = leader = evlist__first(evlist);
        TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type);
        TEST_ASSERT_VAL("wrong config",
                        PERF_COUNT_HW_CPU_CYCLES == evsel->core.attr.config);
@@ -1042,7 +1041,7 @@ static int test__group_gh3(struct evlist *evlist)
        TEST_ASSERT_VAL("wrong number of groups", 1 == evlist->nr_groups);
 
        /* cycles:G + :u group modifier */
-       evsel = leader = perf_evlist__first(evlist);
+       evsel = leader = evlist__first(evlist);
        TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type);
        TEST_ASSERT_VAL("wrong config",
                        PERF_COUNT_HW_CPU_CYCLES == evsel->core.attr.config);
@@ -1082,7 +1081,7 @@ static int test__group_gh4(struct evlist *evlist)
        TEST_ASSERT_VAL("wrong number of groups", 1 == evlist->nr_groups);
 
        /* cycles:G + :uG group modifier */
-       evsel = leader = perf_evlist__first(evlist);
+       evsel = leader = evlist__first(evlist);
        TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type);
        TEST_ASSERT_VAL("wrong config",
                        PERF_COUNT_HW_CPU_CYCLES == evsel->core.attr.config);
@@ -1121,7 +1120,7 @@ static int test__leader_sample1(struct evlist *evlist)
        TEST_ASSERT_VAL("wrong number of entries", 3 == evlist->core.nr_entries);
 
        /* cycles - sampling group leader */
-       evsel = leader = perf_evlist__first(evlist);
+       evsel = leader = evlist__first(evlist);
        TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type);
        TEST_ASSERT_VAL("wrong config",
                        PERF_COUNT_HW_CPU_CYCLES == evsel->core.attr.config);
@@ -1174,7 +1173,7 @@ static int test__leader_sample2(struct evlist *evlist __maybe_unused)
        TEST_ASSERT_VAL("wrong number of entries", 2 == evlist->core.nr_entries);
 
        /* instructions - sampling group leader */
-       evsel = leader = perf_evlist__first(evlist);
+       evsel = leader = evlist__first(evlist);
        TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type);
        TEST_ASSERT_VAL("wrong config",
                        PERF_COUNT_HW_INSTRUCTIONS == evsel->core.attr.config);
@@ -1208,7 +1207,7 @@ static int test__leader_sample2(struct evlist *evlist __maybe_unused)
 
 static int test__checkevent_pinned_modifier(struct evlist *evlist)
 {
-       struct evsel *evsel = perf_evlist__first(evlist);
+       struct evsel *evsel = evlist__first(evlist);
 
        TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user);
        TEST_ASSERT_VAL("wrong exclude_kernel", evsel->core.attr.exclude_kernel);
@@ -1226,7 +1225,7 @@ static int test__pinned_group(struct evlist *evlist)
        TEST_ASSERT_VAL("wrong number of entries", 3 == evlist->core.nr_entries);
 
        /* cycles - group leader */
-       evsel = leader = perf_evlist__first(evlist);
+       evsel = leader = evlist__first(evlist);
        TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type);
        TEST_ASSERT_VAL("wrong config",
                        PERF_COUNT_HW_CPU_CYCLES == evsel->core.attr.config);
@@ -1252,7 +1251,7 @@ static int test__pinned_group(struct evlist *evlist)
 
 static int test__checkevent_breakpoint_len(struct evlist *evlist)
 {
-       struct evsel *evsel = perf_evlist__first(evlist);
+       struct evsel *evsel = evlist__first(evlist);
 
        TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->core.nr_entries);
        TEST_ASSERT_VAL("wrong type", PERF_TYPE_BREAKPOINT == evsel->core.attr.type);
@@ -1267,7 +1266,7 @@ static int test__checkevent_breakpoint_len(struct evlist *evlist)
 
 static int test__checkevent_breakpoint_len_w(struct evlist *evlist)
 {
-       struct evsel *evsel = perf_evlist__first(evlist);
+       struct evsel *evsel = evlist__first(evlist);
 
        TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->core.nr_entries);
        TEST_ASSERT_VAL("wrong type", PERF_TYPE_BREAKPOINT == evsel->core.attr.type);
@@ -1283,7 +1282,7 @@ static int test__checkevent_breakpoint_len_w(struct evlist *evlist)
 static int
 test__checkevent_breakpoint_len_rw_modifier(struct evlist *evlist)
 {
-       struct evsel *evsel = perf_evlist__first(evlist);
+       struct evsel *evsel = evlist__first(evlist);
 
        TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user);
        TEST_ASSERT_VAL("wrong exclude_kernel", evsel->core.attr.exclude_kernel);
@@ -1295,7 +1294,7 @@ test__checkevent_breakpoint_len_rw_modifier(struct evlist *evlist)
 
 static int test__checkevent_precise_max_modifier(struct evlist *evlist)
 {
-       struct evsel *evsel = perf_evlist__first(evlist);
+       struct evsel *evsel = evlist__first(evlist);
 
        TEST_ASSERT_VAL("wrong number of entries", 2 == evlist->core.nr_entries);
        TEST_ASSERT_VAL("wrong type", PERF_TYPE_SOFTWARE == evsel->core.attr.type);
@@ -1306,7 +1305,7 @@ static int test__checkevent_precise_max_modifier(struct evlist *evlist)
 
 static int test__checkevent_config_symbol(struct evlist *evlist)
 {
-       struct evsel *evsel = perf_evlist__first(evlist);
+       struct evsel *evsel = evlist__first(evlist);
 
        TEST_ASSERT_VAL("wrong name setting", strcmp(evsel->name, "insn") == 0);
        return 0;
@@ -1314,7 +1313,7 @@ static int test__checkevent_config_symbol(struct evlist *evlist)
 
 static int test__checkevent_config_raw(struct evlist *evlist)
 {
-       struct evsel *evsel = perf_evlist__first(evlist);
+       struct evsel *evsel = evlist__first(evlist);
 
        TEST_ASSERT_VAL("wrong name setting", strcmp(evsel->name, "rawpmu") == 0);
        return 0;
@@ -1322,7 +1321,7 @@ static int test__checkevent_config_raw(struct evlist *evlist)
 
 static int test__checkevent_config_num(struct evlist *evlist)
 {
-       struct evsel *evsel = perf_evlist__first(evlist);
+       struct evsel *evsel = evlist__first(evlist);
 
        TEST_ASSERT_VAL("wrong name setting", strcmp(evsel->name, "numpmu") == 0);
        return 0;
@@ -1330,7 +1329,7 @@ static int test__checkevent_config_num(struct evlist *evlist)
 
 static int test__checkevent_config_cache(struct evlist *evlist)
 {
-       struct evsel *evsel = perf_evlist__first(evlist);
+       struct evsel *evsel = evlist__first(evlist);
 
        TEST_ASSERT_VAL("wrong name setting", strcmp(evsel->name, "cachepmu") == 0);
        return 0;
@@ -1343,7 +1342,7 @@ static bool test__intel_pt_valid(void)
 
 static int test__intel_pt(struct evlist *evlist)
 {
-       struct evsel *evsel = perf_evlist__first(evlist);
+       struct evsel *evsel = evlist__first(evlist);
 
        TEST_ASSERT_VAL("wrong name setting", strcmp(evsel->name, "intel_pt//u") == 0);
        return 0;
@@ -1351,7 +1350,7 @@ static int test__intel_pt(struct evlist *evlist)
 
 static int test__checkevent_complex_name(struct evlist *evlist)
 {
-       struct evsel *evsel = perf_evlist__first(evlist);
+       struct evsel *evsel = evlist__first(evlist);
 
        TEST_ASSERT_VAL("wrong complex name parsing", strcmp(evsel->name, "COMPLEX_CYCLES_NAME:orig=cycles,desc=chip-clock-ticks") == 0);
        return 0;
@@ -1359,7 +1358,7 @@ static int test__checkevent_complex_name(struct evlist *evlist)
 
 static int test__sym_event_slash(struct evlist *evlist)
 {
-       struct evsel *evsel = perf_evlist__first(evlist);
+       struct evsel *evsel = evlist__first(evlist);
 
        TEST_ASSERT_VAL("wrong type", evsel->core.attr.type == PERF_TYPE_HARDWARE);
        TEST_ASSERT_VAL("wrong config", evsel->core.attr.config == PERF_COUNT_HW_CPU_CYCLES);
@@ -1369,7 +1368,7 @@ static int test__sym_event_slash(struct evlist *evlist)
 
 static int test__sym_event_dc(struct evlist *evlist)
 {
-       struct evsel *evsel = perf_evlist__first(evlist);
+       struct evsel *evsel = evlist__first(evlist);
 
        TEST_ASSERT_VAL("wrong type", evsel->core.attr.type == PERF_TYPE_HARDWARE);
        TEST_ASSERT_VAL("wrong config", evsel->core.attr.config == PERF_COUNT_HW_CPU_CYCLES);
index 8284752..adf3c9c 100644 (file)
@@ -1,4 +1,3 @@
-// SPDX-License-Identifier: GPL-2.0
 #include <linux/kernel.h>
 #include <linux/types.h>
 #include <stddef.h>
@@ -8,7 +7,6 @@
 #include "event.h"
 #include "evlist.h"
 #include "header.h"
-#include "util.h"
 #include "debug.h"
 
 static int process_event(struct evlist **pevlist, union perf_event *event)
index a693bcf..dbc2719 100644 (file)
@@ -4,7 +4,6 @@
 
 #include "tests.h"
 #include "debug.h"
-#include "util.h"
 #include "perf-hooks.h"
 
 static void sigsegv_handler(int sig __maybe_unused)
index e1b4229..437426b 100644 (file)
@@ -11,6 +11,7 @@
 #include "debug.h"
 #include "record.h"
 #include "tests.h"
+#include "util/mmap.h"
 
 static int sched__get_first_possible_cpu(pid_t pid, cpu_set_t *maskp)
 {
@@ -103,7 +104,7 @@ int test__PERF_RECORD(struct test *test __maybe_unused, int subtest __maybe_unus
        /*
         * Config the evsels, setting attr->comm on the first one, etc.
         */
-       evsel = perf_evlist__first(evlist);
+       evsel = evlist__first(evlist);
        perf_evsel__set_sample_bit(evsel, CPU);
        perf_evsel__set_sample_bit(evsel, TID);
        perf_evsel__set_sample_bit(evsel, TIME);
@@ -143,9 +144,9 @@ int test__PERF_RECORD(struct test *test __maybe_unused, int subtest __maybe_unus
         * fds in the same CPU to be injected in the same mmap ring buffer
         * (using ioctl(PERF_EVENT_IOC_SET_OUTPUT)).
         */
-       err = perf_evlist__mmap(evlist, opts.mmap_pages);
+       err = evlist__mmap(evlist, opts.mmap_pages);
        if (err < 0) {
-               pr_debug("perf_evlist__mmap: %s\n",
+               pr_debug("evlist__mmap: %s\n",
                         str_error_r(errno, sbuf, sizeof(sbuf)));
                goto out_delete_evlist;
        }
@@ -164,9 +165,9 @@ int test__PERF_RECORD(struct test *test __maybe_unused, int subtest __maybe_unus
        while (1) {
                int before = total_events;
 
-               for (i = 0; i < evlist->nr_mmaps; i++) {
+               for (i = 0; i < evlist->core.nr_mmaps; i++) {
                        union perf_event *event;
-                       struct perf_mmap *md;
+                       struct mmap *md;
 
                        md = &evlist->mmap[i];
                        if (perf_mmap__read_init(md) < 0)
@@ -286,7 +287,7 @@ int test__PERF_RECORD(struct test *test __maybe_unused, int subtest __maybe_unus
                 * perf_event_attr.wakeup_events, just PERF_EVENT_SAMPLE does.
                 */
                if (total_events == before && false)
-                       perf_evlist__poll(evlist, -1);
+                       evlist__poll(evlist, -1);
 
                sleep(1);
                if (++wakeups > 5) {
index 14a7889..74379ff 100644 (file)
@@ -1,7 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0
 #include "parse-events.h"
 #include "pmu.h"
-#include "util.h"
 #include "tests.h"
 #include <errno.h>
 #include <stdio.h>
index 5fcc068..3a02426 100644 (file)
@@ -9,10 +9,10 @@
 
 #include "map_symbol.h"
 #include "branch.h"
-#include "util.h"
 #include "event.h"
 #include "evsel.h"
 #include "debug.h"
+#include "util/synthetic-events.h"
 
 #include "tests.h"
 
index cf1bd57..60f0e9e 100644 (file)
@@ -3,6 +3,7 @@
 #include <limits.h>
 #include <stdio.h>
 #include <stdlib.h>
+#include <unistd.h>
 #include <sys/epoll.h>
 #include <util/symbol.h>
 #include <linux/filter.h>
index cc10b41..c191150 100644 (file)
@@ -5,6 +5,7 @@
 #include "stat.h"
 #include "counts.h"
 #include "debug.h"
+#include "util/synthetic-events.h"
 
 static bool has_term(struct perf_record_stat_config *config,
                     u64 tag, u64 val)
index 97694a0..84519df 100644 (file)
@@ -12,6 +12,7 @@
 #include "util/evsel.h"
 #include "util/evlist.h"
 #include "util/cpumap.h"
+#include "util/mmap.h"
 #include "util/thread_map.h"
 #include <perf/evlist.h>
 
@@ -42,7 +43,7 @@ static int __test__sw_clock_freq(enum perf_sw_ids clock_id)
        };
        struct perf_cpu_map *cpus;
        struct perf_thread_map *threads;
-       struct perf_mmap *md;
+       struct mmap *md;
 
        attr.sample_freq = 500;
 
@@ -82,7 +83,7 @@ static int __test__sw_clock_freq(enum perf_sw_ids clock_id)
                goto out_delete_evlist;
        }
 
-       err = perf_evlist__mmap(evlist, 128);
+       err = evlist__mmap(evlist, 128);
        if (err < 0) {
                pr_debug("failed to mmap event: %d (%s)\n", errno,
                         str_error_r(errno, sbuf, sizeof(sbuf)));
index 1a60fa1..ffa592e 100644 (file)
@@ -14,9 +14,9 @@
 #include "evlist.h"
 #include "evsel.h"
 #include "thread_map.h"
-#include "cpumap.h"
 #include "record.h"
 #include "tests.h"
+#include "util/mmap.h"
 
 static int spin_sleep(void)
 {
@@ -144,7 +144,7 @@ static int process_sample_event(struct evlist *evlist,
                        return err;
                /*
                 * Check for no missing sched_switch events i.e. that the
-                * evsel->system_wide flag has worked.
+                * evsel->core.system_wide flag has worked.
                 */
                if (switch_tracking->tids[cpu] != -1 &&
                    switch_tracking->tids[cpu] != prev_tid) {
@@ -264,10 +264,10 @@ static int process_events(struct evlist *evlist,
        unsigned pos, cnt = 0;
        LIST_HEAD(events);
        struct event_node *events_array, *node;
-       struct perf_mmap *md;
+       struct mmap *md;
        int i, ret;
 
-       for (i = 0; i < evlist->nr_mmaps; i++) {
+       for (i = 0; i < evlist->core.nr_mmaps; i++) {
                md = &evlist->mmap[i];
                if (perf_mmap__read_init(md) < 0)
                        continue;
@@ -316,7 +316,7 @@ out_free_nodes:
  *
  * This function implements a test that checks that sched_switch events and
  * tracking events can be recorded for a workload (current process) using the
- * evsel->system_wide and evsel->tracking flags (respectively) with other events
+ * evsel->core.system_wide and evsel->tracking flags (respectively) with other events
  * sometimes enabled or disabled.
  */
 int test__switch_tracking(struct test *test __maybe_unused, int subtest __maybe_unused)
@@ -367,7 +367,7 @@ int test__switch_tracking(struct test *test __maybe_unused, int subtest __maybe_
                goto out_err;
        }
 
-       cpu_clocks_evsel = perf_evlist__last(evlist);
+       cpu_clocks_evsel = evlist__last(evlist);
 
        /* Second event */
        err = parse_events(evlist, "cycles:u", NULL);
@@ -376,7 +376,7 @@ int test__switch_tracking(struct test *test __maybe_unused, int subtest __maybe_
                goto out_err;
        }
 
-       cycles_evsel = perf_evlist__last(evlist);
+       cycles_evsel = evlist__last(evlist);
 
        /* Third event */
        if (!perf_evlist__can_select_event(evlist, sched_switch)) {
@@ -391,22 +391,22 @@ int test__switch_tracking(struct test *test __maybe_unused, int subtest __maybe_
                goto out_err;
        }
 
-       switch_evsel = perf_evlist__last(evlist);
+       switch_evsel = evlist__last(evlist);
 
        perf_evsel__set_sample_bit(switch_evsel, CPU);
        perf_evsel__set_sample_bit(switch_evsel, TIME);
 
-       switch_evsel->system_wide = true;
+       switch_evsel->core.system_wide = true;
        switch_evsel->no_aux_samples = true;
        switch_evsel->immediate = true;
 
        /* Test moving an event to the front */
-       if (cycles_evsel == perf_evlist__first(evlist)) {
+       if (cycles_evsel == evlist__first(evlist)) {
                pr_debug("cycles event already at front");
                goto out_err;
        }
        perf_evlist__to_front(evlist, cycles_evsel);
-       if (cycles_evsel != perf_evlist__first(evlist)) {
+       if (cycles_evsel != evlist__first(evlist)) {
                pr_debug("Failed to move cycles event to front");
                goto out_err;
        }
@@ -421,7 +421,7 @@ int test__switch_tracking(struct test *test __maybe_unused, int subtest __maybe_
                goto out_err;
        }
 
-       tracking_evsel = perf_evlist__last(evlist);
+       tracking_evsel = evlist__last(evlist);
 
        perf_evlist__set_tracking_event(evlist, tracking_evsel);
 
@@ -434,7 +434,7 @@ int test__switch_tracking(struct test *test __maybe_unused, int subtest __maybe_
        perf_evlist__config(evlist, &opts, NULL);
 
        /* Check moved event is still at the front */
-       if (cycles_evsel != perf_evlist__first(evlist)) {
+       if (cycles_evsel != evlist__first(evlist)) {
                pr_debug("Front event no longer at front");
                goto out_err;
        }
@@ -461,9 +461,9 @@ int test__switch_tracking(struct test *test __maybe_unused, int subtest __maybe_
                goto out;
        }
 
-       err = perf_evlist__mmap(evlist, UINT_MAX);
+       err = evlist__mmap(evlist, UINT_MAX);
        if (err) {
-               pr_debug("perf_evlist__mmap failed!\n");
+               pr_debug("evlist__mmap failed!\n");
                goto out_err;
        }
 
index f610e8c..bce3a4c 100644 (file)
@@ -4,12 +4,13 @@
 #include "evsel.h"
 #include "target.h"
 #include "thread_map.h"
-#include "cpumap.h"
 #include "tests.h"
+#include "util/mmap.h"
 
 #include <errno.h>
 #include <signal.h>
 #include <linux/string.h>
+#include <perf/cpumap.h>
 #include <perf/evlist.h>
 
 static int exited;
@@ -51,7 +52,7 @@ int test__task_exit(struct test *test __maybe_unused, int subtest __maybe_unused
        char sbuf[STRERR_BUFSIZE];
        struct perf_cpu_map *cpus;
        struct perf_thread_map *threads;
-       struct perf_mmap *md;
+       struct mmap *md;
 
        signal(SIGCHLD, sig_handler);
 
@@ -87,7 +88,7 @@ int test__task_exit(struct test *test __maybe_unused, int subtest __maybe_unused
                goto out_delete_evlist;
        }
 
-       evsel = perf_evlist__first(evlist);
+       evsel = evlist__first(evlist);
        evsel->core.attr.task = 1;
 #ifdef __s390x__
        evsel->core.attr.sample_freq = 1000000;
@@ -106,7 +107,7 @@ int test__task_exit(struct test *test __maybe_unused, int subtest __maybe_unused
                goto out_delete_evlist;
        }
 
-       if (perf_evlist__mmap(evlist, 128) < 0) {
+       if (evlist__mmap(evlist, 128) < 0) {
                pr_debug("failed to mmap events: %d (%s)\n", errno,
                         str_error_r(errno, sbuf, sizeof(sbuf)));
                goto out_delete_evlist;
@@ -129,7 +130,7 @@ retry:
 
 out_init:
        if (!exited || !nr_exit) {
-               perf_evlist__poll(evlist, -1);
+               evlist__poll(evlist, -1);
                goto retry;
        }
 
index 39168c5..28f51c4 100644 (file)
@@ -8,6 +8,7 @@
 #include "thread_map.h"
 #include "debug.h"
 #include "event.h"
+#include "util/synthetic-events.h"
 #include <linux/zalloc.h>
 #include <perf/event.h>
 
index a4f9f51..4a80049 100644 (file)
@@ -3,11 +3,12 @@
 #include <stdlib.h>
 #include <stdio.h>
 #include <perf/cpumap.h>
+#include "cpumap.h"
 #include "tests.h"
-#include "util.h"
 #include "session.h"
 #include "evlist.h"
 #include "debug.h"
+#include <linux/err.h>
 
 #define TEMPL "/tmp/perf-test-XXXXXX"
 #define DATA_SIZE      10
@@ -39,7 +40,7 @@ static int session_write_header(char *path)
        };
 
        session = perf_session__new(&data, false, NULL);
-       TEST_ASSERT_VAL("can't get session", session);
+       TEST_ASSERT_VAL("can't get session", !IS_ERR(session));
 
        session->evlist = perf_evlist__new_default();
        TEST_ASSERT_VAL("can't get evlist", session->evlist);
@@ -70,7 +71,7 @@ static int check_cpu_topology(char *path, struct perf_cpu_map *map)
        int i;
 
        session = perf_session__new(&data, false, NULL);
-       TEST_ASSERT_VAL("can't get session", session);
+       TEST_ASSERT_VAL("can't get session", !IS_ERR(session));
 
        /* On platforms with large numbers of CPUs process_cpu_topology()
         * might issue an error while reading the perf.data file section
index 01f434c..aa296ff 100644 (file)
@@ -7,7 +7,7 @@
 #include "dso.h"
 #include "map.h"
 #include "symbol.h"
-#include "util.h"
+#include <internal/lib.h> // page_size
 #include "tests.h"
 #include "debug.h"
 #include "machine.h"
index f93d40b..781afe4 100644 (file)
@@ -1,5 +1,4 @@
 // SPDX-License-Identifier: GPL-2.0
-#include "../util/util.h"
 #include "../util/string2.h"
 #include "../util/config.h"
 #include "libslang.h"
index ac74ed2..82207db 100644 (file)
@@ -2,7 +2,6 @@
 #include "../browser.h"
 #include "../helpline.h"
 #include "../ui.h"
-#include "../util.h"
 #include "../../util/annotate.h"
 #include "../../util/debug.h"
 #include "../../util/dso.h"
index 0f59a70..57e6e43 100644 (file)
@@ -1,5 +1,4 @@
 // SPDX-License-Identifier: GPL-2.0
-#include "util/debug.h"
 #include "ui/browser.h"
 #include "ui/keysyms.h"
 #include "ui/ui.h"
index 589168c..7a7187e 100644 (file)
@@ -3319,13 +3319,13 @@ browse_hists:
                        switch (key) {
                        case K_TAB:
                                if (pos->core.node.next == &evlist->core.entries)
-                                       pos = perf_evlist__first(evlist);
+                                       pos = evlist__first(evlist);
                                else
                                        pos = perf_evsel__next(pos);
                                goto browse_hists;
                        case K_UNTAB:
                                if (pos->core.node.prev == &evlist->core.entries)
-                                       pos = perf_evlist__last(evlist);
+                                       pos = evlist__last(evlist);
                                else
                                        pos = perf_evsel__prev(pos);
                                goto browse_hists;
@@ -3417,7 +3417,7 @@ int perf_evlist__tui_browse_hists(struct evlist *evlist, const char *help,
 
 single_entry:
        if (nr_entries == 1) {
-               struct evsel *first = perf_evlist__first(evlist);
+               struct evsel *first = evlist__first(evlist);
 
                return perf_evsel__hists_browse(first, nr_entries, help,
                                                false, hbt, min_pcnt,
index 893b065..3d49b91 100644 (file)
@@ -5,7 +5,6 @@
 #include <stdlib.h>
 #include <string.h>
 #include <linux/bitops.h>
-#include "../../util/util.h"
 #include "../../util/debug.h"
 #include "../../util/map.h"
 #include "../../util/dso.h"
index f16a38f..76d356a 100644 (file)
@@ -7,7 +7,7 @@
 #include "config.h"
 #include "time-utils.h"
 #include "../util.h"
-#include "../../util/util.h"
+#include "../../util/util.h" // perf_exe()
 #include "../../perf.h"
 #include <stdlib.h>
 #include <string.h>
index 586a21a..fc733a6 100644 (file)
@@ -1,7 +1,8 @@
 // SPDX-License-Identifier: GPL-2.0
 #include "../../builtin.h"
 #include "../../perf.h"
-#include "../../util/util.h"
+#include "../../util/util.h" // perf_exe()
+#include "../util.h"
 #include "../../util/hist.h"
 #include "../../util/debug.h"
 #include "../../util/symbol.h"
index e166da9..e40a006 100644 (file)
@@ -6,7 +6,6 @@
 #include "gtk.h"
 #include "../ui.h"
 #include "../helpline.h"
-#include "../../util/debug.h"
 
 static void gtk_helpline_pop(void)
 {
index 6c2efc1..ed1a97b 100644 (file)
@@ -8,6 +8,7 @@
 #include "../string2.h"
 #include "gtk.h"
 #include <signal.h>
+#include <stdlib.h>
 #include <linux/string.h>
 
 #define MAX_COLUMNS                    32
index b6ad885..eea6fcd 100644 (file)
@@ -3,7 +3,6 @@
 
 #include "gtk.h"
 #include "../progress.h"
-#include "util.h"
 
 static GtkWidget *dialog;
 static GtkWidget *progress;
index 1a2616b..f5eee4d 100644 (file)
@@ -1,6 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0
 #include "gtk.h"
-#include "../../util/debug.h"
+#include <linux/compiler.h>
+#include "../util.h"
 
 extern struct perf_error_ops perf_gtk_eops;
 
index c2c5589..c47f5c3 100644 (file)
@@ -1,6 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
 #include "../util.h"
-#include "../../util/debug.h"
 #include "gtk.h"
 
 #include <stdlib.h>
index 54bcd08..911182b 100644 (file)
@@ -3,10 +3,8 @@
 #include <stdlib.h>
 #include <string.h>
 
-#include "../util/debug.h"
 #include "helpline.h"
 #include "ui.h"
-#include "../util/util.h"
 
 char ui_helpline__current[512];
 
index 3e533de..f736755 100644 (file)
@@ -8,7 +8,6 @@
 #include "../util/callchain.h"
 #include "../util/debug.h"
 #include "../util/hist.h"
-#include "../util/util.h"
 #include "../util/sort.h"
 #include "../util/evsel.h"
 #include "../util/evlist.h"
index c7a86b4..700335c 100644 (file)
@@ -1,11 +1,11 @@
 // SPDX-License-Identifier: GPL-2.0
 #include <pthread.h>
 #include <dlfcn.h>
+#include <unistd.h>
 
 #include <subcmd/pager.h>
 #include "../util/debug.h"
 #include "../util/hist.h"
-#include "../util/util.h"
 #include "ui.h"
 
 pthread_mutex_t ui__lock = PTHREAD_MUTEX_INITIALIZER;
index 832ca6c..5365606 100644 (file)
@@ -5,6 +5,7 @@
 
 #include "../../util/callchain.h"
 #include "../../util/debug.h"
+#include "../../util/event.h"
 #include "../../util/hist.h"
 #include "../../util/map.h"
 #include "../../util/map_groups.h"
index 5f188f6..298d6af 100644 (file)
@@ -6,7 +6,6 @@
 #include <linux/kernel.h>
 #include <linux/string.h>
 
-#include "../../util/debug.h"
 #include "../helpline.h"
 #include "../ui.h"
 #include "../libslang.h"
index 56651a4..e9bfe85 100644 (file)
@@ -2,13 +2,13 @@
 #include <signal.h>
 #include <stdbool.h>
 #include <stdlib.h>
+#include <unistd.h>
 #include <linux/kernel.h>
 #ifdef HAVE_BACKTRACE_SUPPORT
 #include <execinfo.h>
 #endif
 
 #include "../../util/debug.h"
-#include "../../util/util.h"
 #include "../../perf.h"
 #include "../browser.h"
 #include "../helpline.h"
index 087d9ab..b98dd0e 100644 (file)
@@ -5,7 +5,6 @@
 #include <stdlib.h>
 #include <sys/ttydefaults.h>
 
-#include "../../util/debug.h"
 #include "../browser.h"
 #include "../keysyms.h"
 #include "../helpline.h"
index 0b4d8e0..8dcfca1 100644 (file)
@@ -3,6 +3,7 @@ perf-y += block-range.o
 perf-y += build-id.o
 perf-y += cacheline.o
 perf-y += config.o
+perf-y += copyfile.o
 perf-y += ctype.o
 perf-y += db-export.o
 perf-y += env.o
@@ -10,6 +11,7 @@ perf-y += event.o
 perf-y += evlist.o
 perf-y += evsel.o
 perf-y += evsel_fprintf.o
+perf-y += perf_event_attr_fprintf.o
 perf-y += evswitch.o
 perf-y += find_bit.o
 perf-y += get_current_dir_name.o
@@ -86,6 +88,7 @@ perf-y += stat-display.o
 perf-y += record.o
 perf-y += srcline.o
 perf-y += srccode.o
+perf-y += synthetic-events.o
 perf-y += data.o
 perf-y += tsc.o
 perf-y += cloexec.o
index 1748f52..e830ead 100644 (file)
@@ -14,7 +14,7 @@
 #include <bpf/btf.h>
 #include <bpf/libbpf.h>
 #include <linux/btf.h>
-#include "util.h"
+#include "util.h" // hex_width()
 #include "ui/ui.h"
 #include "sort.h"
 #include "build-id.h"
@@ -34,6 +34,7 @@
 #include "bpf-event.h"
 #include "block-range.h"
 #include "string2.h"
+#include "util/event.h"
 #include "arch/common.h"
 #include <regex.h>
 #include <pthread.h>
index 8a7340f..53be12b 100644 (file)
@@ -16,7 +16,6 @@
 #include <linux/log2.h>
 #include <linux/zalloc.h>
 
-#include "cpumap.h"
 #include "color.h"
 #include "evsel.h"
 #include "machine.h"
index 6f25224..8470dfe 100644 (file)
@@ -31,8 +31,8 @@
 #include "map.h"
 #include "pmu.h"
 #include "evsel.h"
-#include "cpumap.h"
 #include "symbol.h"
+#include "util/synthetic-events.h"
 #include "thread_map.h"
 #include "asm/bug.h"
 #include "auxtrace.h"
 #include "intel-bts.h"
 #include "arm-spe.h"
 #include "s390-cpumsf.h"
-#include "util.h"
+#include "util/mmap.h"
 
 #include <linux/ctype.h>
+#include <linux/kernel.h>
 #include "symbol/kallsyms.h"
+#include <internal/lib.h>
 
 static bool auxtrace__dont_decode(struct perf_session *session)
 {
@@ -1226,7 +1228,7 @@ int perf_event__process_auxtrace_error(struct perf_session *session,
        return 0;
 }
 
-static int __auxtrace_mmap__read(struct perf_mmap *map,
+static int __auxtrace_mmap__read(struct mmap *map,
                                 struct auxtrace_record *itr,
                                 struct perf_tool *tool, process_auxtrace_t fn,
                                 bool snapshot, size_t snapshot_size)
@@ -1337,13 +1339,13 @@ static int __auxtrace_mmap__read(struct perf_mmap *map,
        return 1;
 }
 
-int auxtrace_mmap__read(struct perf_mmap *map, struct auxtrace_record *itr,
+int auxtrace_mmap__read(struct mmap *map, struct auxtrace_record *itr,
                        struct perf_tool *tool, process_auxtrace_t fn)
 {
        return __auxtrace_mmap__read(map, itr, tool, fn, false, 0);
 }
 
-int auxtrace_mmap__read_snapshot(struct perf_mmap *map,
+int auxtrace_mmap__read_snapshot(struct mmap *map,
                                 struct auxtrace_record *itr,
                                 struct perf_tool *tool, process_auxtrace_t fn,
                                 size_t snapshot_size)
index 37e70dc..f201f36 100644 (file)
 #include <errno.h>
 #include <stdbool.h>
 #include <stddef.h>
+#include <stdio.h> // FILE
 #include <linux/list.h>
 #include <linux/perf_event.h>
 #include <linux/types.h>
 #include <asm/bitsperlong.h>
 #include <asm/barrier.h>
 
-#include "event.h"
-
 union perf_event;
 struct perf_session;
 struct evlist;
 struct perf_tool;
-struct perf_mmap;
+struct mmap;
+struct perf_sample;
 struct option;
 struct record_opts;
+struct perf_record_auxtrace_error;
 struct perf_record_auxtrace_info;
 struct events_stats;
 
@@ -444,14 +445,14 @@ void auxtrace_mmap_params__set_idx(struct auxtrace_mmap_params *mp,
                                   bool per_cpu);
 
 typedef int (*process_auxtrace_t)(struct perf_tool *tool,
-                                 struct perf_mmap *map,
+                                 struct mmap *map,
                                  union perf_event *event, void *data1,
                                  size_t len1, void *data2, size_t len2);
 
-int auxtrace_mmap__read(struct perf_mmap *map, struct auxtrace_record *itr,
+int auxtrace_mmap__read(struct mmap *map, struct auxtrace_record *itr,
                        struct perf_tool *tool, process_auxtrace_t fn);
 
-int auxtrace_mmap__read_snapshot(struct perf_mmap *map,
+int auxtrace_mmap__read_snapshot(struct mmap *map,
                                 struct auxtrace_record *itr,
                                 struct perf_tool *tool, process_auxtrace_t fn,
                                 size_t snapshot_size);
@@ -524,10 +525,6 @@ void auxtrace_synth_error(struct perf_record_auxtrace_error *auxtrace_error, int
                          int code, int cpu, pid_t pid, pid_t tid, u64 ip,
                          const char *msg, u64 timestamp);
 
-int perf_event__synthesize_auxtrace_info(struct auxtrace_record *itr,
-                                        struct perf_tool *tool,
-                                        struct perf_session *session,
-                                        perf_event__handler_t process);
 int perf_event__process_auxtrace_info(struct perf_session *session,
                                      union perf_event *event);
 s64 perf_event__process_auxtrace(struct perf_session *session,
@@ -604,15 +601,6 @@ void auxtrace_record__free(struct auxtrace_record *itr __maybe_unused)
 {
 }
 
-static inline int
-perf_event__synthesize_auxtrace_info(struct auxtrace_record *itr __maybe_unused,
-                                    struct perf_tool *tool __maybe_unused,
-                                    struct perf_session *session __maybe_unused,
-                                    perf_event__handler_t process __maybe_unused)
-{
-       return -EINVAL;
-}
-
 static inline
 int auxtrace_record__options(struct auxtrace_record *itr __maybe_unused,
                             struct evlist *evlist __maybe_unused,
index 7a3d4b1..f7ed5d1 100644 (file)
@@ -16,6 +16,7 @@
 #include "map.h"
 #include "evlist.h"
 #include "record.h"
+#include "util/synthetic-events.h"
 
 #define ptr_to_u64(ptr)    ((__u64)(unsigned long)(ptr))
 
index a01c2fd..81fdc88 100644 (file)
@@ -6,9 +6,9 @@
 #include <linux/rbtree.h>
 #include <pthread.h>
 #include <api/fd/array.h>
-#include "event.h"
 #include <stdio.h>
 
+struct bpf_prog_info;
 struct machine;
 union perf_event;
 struct perf_env;
@@ -33,11 +33,6 @@ struct btf_node {
 #ifdef HAVE_LIBBPF_SUPPORT
 int machine__process_bpf(struct machine *machine, union perf_event *event,
                         struct perf_sample *sample);
-
-int perf_event__synthesize_bpf_events(struct perf_session *session,
-                                     perf_event__handler_t process,
-                                     struct machine *machine,
-                                     struct record_opts *opts);
 int bpf_event__add_sb_event(struct evlist **evlist,
                                 struct perf_env *env);
 void bpf_event__print_bpf_prog_info(struct bpf_prog_info *info,
@@ -51,14 +46,6 @@ static inline int machine__process_bpf(struct machine *machine __maybe_unused,
        return 0;
 }
 
-static inline int perf_event__synthesize_bpf_events(struct perf_session *session __maybe_unused,
-                                                   perf_event__handler_t process __maybe_unused,
-                                                   struct machine *machine __maybe_unused,
-                                                   struct record_opts *opts __maybe_unused)
-{
-       return 0;
-}
-
 static inline int bpf_event__add_sb_event(struct evlist **evlist __maybe_unused,
                                          struct perf_env *env __maybe_unused)
 {
index 37283e8..10c187b 100644 (file)
@@ -1568,7 +1568,7 @@ struct evsel *bpf__setup_output_event(struct evlist *evlist, const char *name)
                        return ERR_PTR(-err);
                }
 
-               evsel = perf_evlist__last(evlist);
+               evsel = evlist__last(evlist);
        }
 
        bpf__for_each_map_named(map, obj, tmp, name) {
index 9d1e090..2285b1e 100644 (file)
@@ -1,5 +1,3 @@
-#include "util/util.h"
-#include "util/debug.h"
 #include "util/map_symbol.h"
 #include "util/branch.h"
 #include <linux/kernel.h>
index 06f66da..88e00d2 100644 (file)
@@ -1,8 +1,15 @@
 #ifndef _PERF_BRANCH_H
 #define _PERF_BRANCH_H 1
-
+/*
+ * The linux/stddef.h isn't need here, but is needed for __always_inline used
+ * in files included from uapi/linux/perf_event.h such as
+ * /usr/include/linux/swab.h and /usr/include/linux/byteorder/little_endian.h,
+ * detected in at least musl libc, used in Alpine Linux. -acme
+ */
 #include <stdio.h>
 #include <stdint.h>
+#include <linux/compiler.h>
+#include <linux/stddef.h>
 #include <linux/perf_event.h>
 #include <linux/types.h>
 
index e5fb777..c076fc7 100644 (file)
@@ -7,12 +7,13 @@
  * Copyright (C) 2009, 2010 Red Hat Inc.
  * Copyright (C) 2009, 2010 Arnaldo Carvalho de Melo <acme@redhat.com>
  */
-#include "util.h"
+#include "util.h" // lsdir(), mkdir_p(), rm_rf()
 #include <dirent.h>
 #include <errno.h>
 #include <stdio.h>
 #include <sys/stat.h>
 #include <sys/types.h>
+#include "util/copyfile.h"
 #include "dso.h"
 #include "build-id.h"
 #include "event.h"
index c14646c..9a9b56e 100644 (file)
@@ -23,6 +23,7 @@
 
 #include "debug.h"
 #include "dso.h"
+#include "event.h"
 #include "hist.h"
 #include "sort.h"
 #include "machine.h"
index b042cee..83398e5 100644 (file)
@@ -4,12 +4,15 @@
 
 #include <linux/list.h>
 #include <linux/rbtree.h>
-#include "event.h"
 #include "map_symbol.h"
 #include "branch.h"
 
+struct addr_location;
 struct evsel;
+struct ip_callchain;
 struct map;
+struct perf_sample;
+struct thread;
 
 #define HELP_PAD "\t\t\t\t"
 
index 4e904fc..a12872f 100644 (file)
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0
 #include <errno.h>
 #include <sched.h>
-#include "util.h"
+#include "util.h" // for sched_getcpu()
 #include "../perf-sys.h"
 #include "cloexec.h"
 #include "event.h"
diff --git a/tools/perf/util/copyfile.c b/tools/perf/util/copyfile.c
new file mode 100644 (file)
index 0000000..3fa0db1
--- /dev/null
@@ -0,0 +1,144 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "util/copyfile.h"
+#include "util/namespaces.h"
+#include <internal/lib.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+static int slow_copyfile(const char *from, const char *to, struct nsinfo *nsi)
+{
+       int err = -1;
+       char *line = NULL;
+       size_t n;
+       FILE *from_fp, *to_fp;
+       struct nscookie nsc;
+
+       nsinfo__mountns_enter(nsi, &nsc);
+       from_fp = fopen(from, "r");
+       nsinfo__mountns_exit(&nsc);
+       if (from_fp == NULL)
+               goto out;
+
+       to_fp = fopen(to, "w");
+       if (to_fp == NULL)
+               goto out_fclose_from;
+
+       while (getline(&line, &n, from_fp) > 0)
+               if (fputs(line, to_fp) == EOF)
+                       goto out_fclose_to;
+       err = 0;
+out_fclose_to:
+       fclose(to_fp);
+       free(line);
+out_fclose_from:
+       fclose(from_fp);
+out:
+       return err;
+}
+
+int copyfile_offset(int ifd, loff_t off_in, int ofd, loff_t off_out, u64 size)
+{
+       void *ptr;
+       loff_t pgoff;
+
+       pgoff = off_in & ~(page_size - 1);
+       off_in -= pgoff;
+
+       ptr = mmap(NULL, off_in + size, PROT_READ, MAP_PRIVATE, ifd, pgoff);
+       if (ptr == MAP_FAILED)
+               return -1;
+
+       while (size) {
+               ssize_t ret = pwrite(ofd, ptr + off_in, size, off_out);
+               if (ret < 0 && errno == EINTR)
+                       continue;
+               if (ret <= 0)
+                       break;
+
+               size -= ret;
+               off_in += ret;
+               off_out += ret;
+       }
+       munmap(ptr, off_in + size);
+
+       return size ? -1 : 0;
+}
+
+static int copyfile_mode_ns(const char *from, const char *to, mode_t mode,
+                           struct nsinfo *nsi)
+{
+       int fromfd, tofd;
+       struct stat st;
+       int err;
+       char *tmp = NULL, *ptr = NULL;
+       struct nscookie nsc;
+
+       nsinfo__mountns_enter(nsi, &nsc);
+       err = stat(from, &st);
+       nsinfo__mountns_exit(&nsc);
+       if (err)
+               goto out;
+       err = -1;
+
+       /* extra 'x' at the end is to reserve space for '.' */
+       if (asprintf(&tmp, "%s.XXXXXXx", to) < 0) {
+               tmp = NULL;
+               goto out;
+       }
+       ptr = strrchr(tmp, '/');
+       if (!ptr)
+               goto out;
+       ptr = memmove(ptr + 1, ptr, strlen(ptr) - 1);
+       *ptr = '.';
+
+       tofd = mkstemp(tmp);
+       if (tofd < 0)
+               goto out;
+
+       if (fchmod(tofd, mode))
+               goto out_close_to;
+
+       if (st.st_size == 0) { /* /proc? do it slowly... */
+               err = slow_copyfile(from, tmp, nsi);
+               goto out_close_to;
+       }
+
+       nsinfo__mountns_enter(nsi, &nsc);
+       fromfd = open(from, O_RDONLY);
+       nsinfo__mountns_exit(&nsc);
+       if (fromfd < 0)
+               goto out_close_to;
+
+       err = copyfile_offset(fromfd, 0, tofd, 0, st.st_size);
+
+       close(fromfd);
+out_close_to:
+       close(tofd);
+       if (!err)
+               err = link(tmp, to);
+       unlink(tmp);
+out:
+       free(tmp);
+       return err;
+}
+
+int copyfile_ns(const char *from, const char *to, struct nsinfo *nsi)
+{
+       return copyfile_mode_ns(from, to, 0755, nsi);
+}
+
+int copyfile_mode(const char *from, const char *to, mode_t mode)
+{
+       return copyfile_mode_ns(from, to, mode, NULL);
+}
+
+int copyfile(const char *from, const char *to)
+{
+       return copyfile_mode(from, to, 0755);
+}
diff --git a/tools/perf/util/copyfile.h b/tools/perf/util/copyfile.h
new file mode 100644 (file)
index 0000000..e85d2f2
--- /dev/null
@@ -0,0 +1,16 @@
+// SPDX-License-Identifier: GPL-2.0
+#ifndef PERF_COPYFILE_H_
+#define PERF_COPYFILE_H_
+
+#include <linux/types.h>
+#include <sys/types.h>
+#include <fcntl.h>
+
+struct nsinfo;
+
+int copyfile(const char *from, const char *to);
+int copyfile_mode(const char *from, const char *to, mode_t mode);
+int copyfile_ns(const char *from, const char *to, struct nsinfo *nsi);
+int copyfile_offset(int ifd, loff_t off_in, int ofd, loff_t off_out, u64 size);
+
+#endif // PERF_COPYFILE_H_
index 37d7c49..cd92a99 100644 (file)
@@ -17,7 +17,6 @@
 #include "cs-etm.h"
 #include "cs-etm-decoder.h"
 #include "intlist.h"
-#include "util.h"
 
 /* use raw logging */
 #ifdef CS_DEBUG_RAW
index 707afdb..4ba0f87 100644 (file)
@@ -35,7 +35,7 @@
 #include "thread.h"
 #include "thread-stack.h"
 #include <tools/libc_compat.h>
-#include "util.h"
+#include "util/synthetic-events.h"
 
 #define MAX_TIMESTAMP (~0ULL)
 
@@ -1298,7 +1298,7 @@ static int cs_etm__synth_events(struct cs_etm_auxtrace *etm,
        attr.read_format = evsel->core.attr.read_format;
 
        /* create new id val to be a fixed offset from evsel id */
-       id = evsel->id[0] + 1000000000;
+       id = evsel->core.id[0] + 1000000000;
 
        if (!id)
                id = 1;
index 0c26844..dbc772b 100644 (file)
@@ -30,6 +30,7 @@
 #include "machine.h"
 #include "config.h"
 #include <linux/ctype.h>
+#include <linux/err.h>
 
 #define pr_N(n, fmt, ...) \
        eprintf(n, debug_data_convert, fmt, ##__VA_ARGS__)
@@ -1619,8 +1620,10 @@ int bt_convert__perf2ctf(const char *input, const char *path,
        err = -1;
        /* perf.data session */
        session = perf_session__new(&data, 0, &c.tool);
-       if (!session)
+       if (IS_ERR(session)) {
+               err = PTR_ERR(session);
                goto free_writer;
+       }
 
        if (c.queue_size) {
                ordered_events__set_alloc_size(&session->ordered_events,
index e75c3a2..88fba2b 100644 (file)
 #include <dirent.h>
 
 #include "data.h"
-#include "util.h"
+#include "util.h" // rm_rf_perf_data()
 #include "debug.h"
 #include "header.h"
+#include <internal/lib.h>
 
 static void close_dir(struct perf_data_file *files, int nr)
 {
index a1b59bd..e55114f 100644 (file)
@@ -17,7 +17,6 @@
 #include "event.h"
 #include "debug.h"
 #include "print_binary.h"
-#include "util.h"
 #include "target.h"
 #include "ui/helpline.h"
 #include "ui/ui.h"
index b2deee9..d25ae1c 100644 (file)
@@ -3,9 +3,9 @@
 #ifndef __PERF_DEBUG_H
 #define __PERF_DEBUG_H
 
+#include <stdarg.h>
 #include <stdbool.h>
 #include <linux/compiler.h>
-#include "../ui/util.h"
 
 extern int verbose;
 extern bool quiet, dump_trace;
index 763328c..6fb7f34 100644 (file)
@@ -3,7 +3,6 @@
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
-#include "debug.h"
 #include "symbol.h"
 
 #include "demangle-java.h"
index 423afbb..a659fc6 100644 (file)
@@ -1,6 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
 #include <string.h>
-#include "util.h"
 #include "debug.h"
 
 #include "demangle-rust.h"
index db55edd..1b49ece 100644 (file)
@@ -5,7 +5,6 @@
  * Written by: Masami Hiramatsu <mhiramat@kernel.org>
  */
 
-#include <util.h>
 #include <debug.h>
 #include <dwarf-regs.h>
 #include <elf.h>
index d8e083d..db40906 100644 (file)
@@ -4,9 +4,10 @@
 
 #include <linux/types.h>
 #include <linux/rbtree.h>
-#include "cpumap.h"
 #include "rwsem.h"
 
+struct perf_cpu_map;
+
 struct cpu_topology_map {
        int     socket_id;
        int     die_id;
index f4afbb8..fc1e5a9 100644 (file)
@@ -1,16 +1,16 @@
-#include <dirent.h>
 #include <errno.h>
 #include <fcntl.h>
 #include <inttypes.h>
 #include <linux/kernel.h>
 #include <linux/types.h>
+#include <perf/cpumap.h>
 #include <sys/types.h>
 #include <sys/stat.h>
 #include <unistd.h>
 #include <uapi/linux/mman.h> /* To get things like MAP_HUGETLB even on older libc headers */
-#include <api/fs/fs.h>
 #include <linux/perf_event.h>
 #include <linux/zalloc.h>
+#include "cpumap.h"
 #include "dso.h"
 #include "event.h"
 #include "debug.h"
@@ -24,6 +24,7 @@
 #include "time-utils.h"
 #include <linux/ctype.h>
 #include "map.h"
+#include "util/namespaces.h"
 #include "symbol.h"
 #include "symbol/kallsyms.h"
 #include "asm/bug.h"
@@ -33,8 +34,6 @@
 #include "tool.h"
 #include "../perf.h"
 
-#define DEFAULT_PROC_MAP_PARSE_TIMEOUT 500
-
 static const char *perf_event__names[] = {
        [0]                                     = "TOTAL",
        [PERF_RECORD_MMAP]                      = "MMAP",
@@ -75,18 +74,6 @@ static const char *perf_event__names[] = {
        [PERF_RECORD_COMPRESSED]                = "COMPRESSED",
 };
 
-static const char *perf_ns__names[] = {
-       [NET_NS_INDEX]          = "net",
-       [UTS_NS_INDEX]          = "uts",
-       [IPC_NS_INDEX]          = "ipc",
-       [PID_NS_INDEX]          = "pid",
-       [USER_NS_INDEX]         = "user",
-       [MNT_NS_INDEX]          = "mnt",
-       [CGROUP_NS_INDEX]       = "cgroup",
-};
-
-unsigned int proc_map_timeout = DEFAULT_PROC_MAP_PARSE_TIMEOUT;
-
 const char *perf_event__name(unsigned int id)
 {
        if (id >= ARRAY_SIZE(perf_event__names))
@@ -96,775 +83,6 @@ const char *perf_event__name(unsigned int id)
        return perf_event__names[id];
 }
 
-static const char *perf_ns__name(unsigned int id)
-{
-       if (id >= ARRAY_SIZE(perf_ns__names))
-               return "UNKNOWN";
-       return perf_ns__names[id];
-}
-
-int perf_tool__process_synth_event(struct perf_tool *tool,
-                                  union perf_event *event,
-                                  struct machine *machine,
-                                  perf_event__handler_t process)
-{
-       struct perf_sample synth_sample = {
-       .pid       = -1,
-       .tid       = -1,
-       .time      = -1,
-       .stream_id = -1,
-       .cpu       = -1,
-       .period    = 1,
-       .cpumode   = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK,
-       };
-
-       return process(tool, event, &synth_sample, machine);
-};
-
-/*
- * Assumes that the first 4095 bytes of /proc/pid/stat contains
- * the comm, tgid and ppid.
- */
-static int perf_event__get_comm_ids(pid_t pid, char *comm, size_t len,
-                                   pid_t *tgid, pid_t *ppid)
-{
-       char filename[PATH_MAX];
-       char bf[4096];
-       int fd;
-       size_t size = 0;
-       ssize_t n;
-       char *name, *tgids, *ppids;
-
-       *tgid = -1;
-       *ppid = -1;
-
-       snprintf(filename, sizeof(filename), "/proc/%d/status", pid);
-
-       fd = open(filename, O_RDONLY);
-       if (fd < 0) {
-               pr_debug("couldn't open %s\n", filename);
-               return -1;
-       }
-
-       n = read(fd, bf, sizeof(bf) - 1);
-       close(fd);
-       if (n <= 0) {
-               pr_warning("Couldn't get COMM, tigd and ppid for pid %d\n",
-                          pid);
-               return -1;
-       }
-       bf[n] = '\0';
-
-       name = strstr(bf, "Name:");
-       tgids = strstr(bf, "Tgid:");
-       ppids = strstr(bf, "PPid:");
-
-       if (name) {
-               char *nl;
-
-               name = skip_spaces(name + 5);  /* strlen("Name:") */
-               nl = strchr(name, '\n');
-               if (nl)
-                       *nl = '\0';
-
-               size = strlen(name);
-               if (size >= len)
-                       size = len - 1;
-               memcpy(comm, name, size);
-               comm[size] = '\0';
-       } else {
-               pr_debug("Name: string not found for pid %d\n", pid);
-       }
-
-       if (tgids) {
-               tgids += 5;  /* strlen("Tgid:") */
-               *tgid = atoi(tgids);
-       } else {
-               pr_debug("Tgid: string not found for pid %d\n", pid);
-       }
-
-       if (ppids) {
-               ppids += 5;  /* strlen("PPid:") */
-               *ppid = atoi(ppids);
-       } else {
-               pr_debug("PPid: string not found for pid %d\n", pid);
-       }
-
-       return 0;
-}
-
-static int perf_event__prepare_comm(union perf_event *event, pid_t pid,
-                                   struct machine *machine,
-                                   pid_t *tgid, pid_t *ppid)
-{
-       size_t size;
-
-       *ppid = -1;
-
-       memset(&event->comm, 0, sizeof(event->comm));
-
-       if (machine__is_host(machine)) {
-               if (perf_event__get_comm_ids(pid, event->comm.comm,
-                                            sizeof(event->comm.comm),
-                                            tgid, ppid) != 0) {
-                       return -1;
-               }
-       } else {
-               *tgid = machine->pid;
-       }
-
-       if (*tgid < 0)
-               return -1;
-
-       event->comm.pid = *tgid;
-       event->comm.header.type = PERF_RECORD_COMM;
-
-       size = strlen(event->comm.comm) + 1;
-       size = PERF_ALIGN(size, sizeof(u64));
-       memset(event->comm.comm + size, 0, machine->id_hdr_size);
-       event->comm.header.size = (sizeof(event->comm) -
-                               (sizeof(event->comm.comm) - size) +
-                               machine->id_hdr_size);
-       event->comm.tid = pid;
-
-       return 0;
-}
-
-pid_t perf_event__synthesize_comm(struct perf_tool *tool,
-                                        union perf_event *event, pid_t pid,
-                                        perf_event__handler_t process,
-                                        struct machine *machine)
-{
-       pid_t tgid, ppid;
-
-       if (perf_event__prepare_comm(event, pid, machine, &tgid, &ppid) != 0)
-               return -1;
-
-       if (perf_tool__process_synth_event(tool, event, machine, process) != 0)
-               return -1;
-
-       return tgid;
-}
-
-static void perf_event__get_ns_link_info(pid_t pid, const char *ns,
-                                        struct perf_ns_link_info *ns_link_info)
-{
-       struct stat64 st;
-       char proc_ns[128];
-
-       sprintf(proc_ns, "/proc/%u/ns/%s", pid, ns);
-       if (stat64(proc_ns, &st) == 0) {
-               ns_link_info->dev = st.st_dev;
-               ns_link_info->ino = st.st_ino;
-       }
-}
-
-int perf_event__synthesize_namespaces(struct perf_tool *tool,
-                                     union perf_event *event,
-                                     pid_t pid, pid_t tgid,
-                                     perf_event__handler_t process,
-                                     struct machine *machine)
-{
-       u32 idx;
-       struct perf_ns_link_info *ns_link_info;
-
-       if (!tool || !tool->namespace_events)
-               return 0;
-
-       memset(&event->namespaces, 0, (sizeof(event->namespaces) +
-              (NR_NAMESPACES * sizeof(struct perf_ns_link_info)) +
-              machine->id_hdr_size));
-
-       event->namespaces.pid = tgid;
-       event->namespaces.tid = pid;
-
-       event->namespaces.nr_namespaces = NR_NAMESPACES;
-
-       ns_link_info = event->namespaces.link_info;
-
-       for (idx = 0; idx < event->namespaces.nr_namespaces; idx++)
-               perf_event__get_ns_link_info(pid, perf_ns__name(idx),
-                                            &ns_link_info[idx]);
-
-       event->namespaces.header.type = PERF_RECORD_NAMESPACES;
-
-       event->namespaces.header.size = (sizeof(event->namespaces) +
-                       (NR_NAMESPACES * sizeof(struct perf_ns_link_info)) +
-                       machine->id_hdr_size);
-
-       if (perf_tool__process_synth_event(tool, event, machine, process) != 0)
-               return -1;
-
-       return 0;
-}
-
-static int perf_event__synthesize_fork(struct perf_tool *tool,
-                                      union perf_event *event,
-                                      pid_t pid, pid_t tgid, pid_t ppid,
-                                      perf_event__handler_t process,
-                                      struct machine *machine)
-{
-       memset(&event->fork, 0, sizeof(event->fork) + machine->id_hdr_size);
-
-       /*
-        * for main thread set parent to ppid from status file. For other
-        * threads set parent pid to main thread. ie., assume main thread
-        * spawns all threads in a process
-       */
-       if (tgid == pid) {
-               event->fork.ppid = ppid;
-               event->fork.ptid = ppid;
-       } else {
-               event->fork.ppid = tgid;
-               event->fork.ptid = tgid;
-       }
-       event->fork.pid  = tgid;
-       event->fork.tid  = pid;
-       event->fork.header.type = PERF_RECORD_FORK;
-       event->fork.header.misc = PERF_RECORD_MISC_FORK_EXEC;
-
-       event->fork.header.size = (sizeof(event->fork) + machine->id_hdr_size);
-
-       if (perf_tool__process_synth_event(tool, event, machine, process) != 0)
-               return -1;
-
-       return 0;
-}
-
-int perf_event__synthesize_mmap_events(struct perf_tool *tool,
-                                      union perf_event *event,
-                                      pid_t pid, pid_t tgid,
-                                      perf_event__handler_t process,
-                                      struct machine *machine,
-                                      bool mmap_data)
-{
-       char filename[PATH_MAX];
-       FILE *fp;
-       unsigned long long t;
-       bool truncation = false;
-       unsigned long long timeout = proc_map_timeout * 1000000ULL;
-       int rc = 0;
-       const char *hugetlbfs_mnt = hugetlbfs__mountpoint();
-       int hugetlbfs_mnt_len = hugetlbfs_mnt ? strlen(hugetlbfs_mnt) : 0;
-
-       if (machine__is_default_guest(machine))
-               return 0;
-
-       snprintf(filename, sizeof(filename), "%s/proc/%d/task/%d/maps",
-                machine->root_dir, pid, pid);
-
-       fp = fopen(filename, "r");
-       if (fp == NULL) {
-               /*
-                * We raced with a task exiting - just return:
-                */
-               pr_debug("couldn't open %s\n", filename);
-               return -1;
-       }
-
-       event->header.type = PERF_RECORD_MMAP2;
-       t = rdclock();
-
-       while (1) {
-               char bf[BUFSIZ];
-               char prot[5];
-               char execname[PATH_MAX];
-               char anonstr[] = "//anon";
-               unsigned int ino;
-               size_t size;
-               ssize_t n;
-
-               if (fgets(bf, sizeof(bf), fp) == NULL)
-                       break;
-
-               if ((rdclock() - t) > timeout) {
-                       pr_warning("Reading %s time out. "
-                                  "You may want to increase "
-                                  "the time limit by --proc-map-timeout\n",
-                                  filename);
-                       truncation = true;
-                       goto out;
-               }
-
-               /* ensure null termination since stack will be reused. */
-               strcpy(execname, "");
-
-               /* 00400000-0040c000 r-xp 00000000 fd:01 41038  /bin/cat */
-               n = sscanf(bf, "%"PRI_lx64"-%"PRI_lx64" %s %"PRI_lx64" %x:%x %u %[^\n]\n",
-                      &event->mmap2.start, &event->mmap2.len, prot,
-                      &event->mmap2.pgoff, &event->mmap2.maj,
-                      &event->mmap2.min,
-                      &ino, execname);
-
-               /*
-                * Anon maps don't have the execname.
-                */
-               if (n < 7)
-                       continue;
-
-               event->mmap2.ino = (u64)ino;
-
-               /*
-                * Just like the kernel, see __perf_event_mmap in kernel/perf_event.c
-                */
-               if (machine__is_host(machine))
-                       event->header.misc = PERF_RECORD_MISC_USER;
-               else
-                       event->header.misc = PERF_RECORD_MISC_GUEST_USER;
-
-               /* map protection and flags bits */
-               event->mmap2.prot = 0;
-               event->mmap2.flags = 0;
-               if (prot[0] == 'r')
-                       event->mmap2.prot |= PROT_READ;
-               if (prot[1] == 'w')
-                       event->mmap2.prot |= PROT_WRITE;
-               if (prot[2] == 'x')
-                       event->mmap2.prot |= PROT_EXEC;
-
-               if (prot[3] == 's')
-                       event->mmap2.flags |= MAP_SHARED;
-               else
-                       event->mmap2.flags |= MAP_PRIVATE;
-
-               if (prot[2] != 'x') {
-                       if (!mmap_data || prot[0] != 'r')
-                               continue;
-
-                       event->header.misc |= PERF_RECORD_MISC_MMAP_DATA;
-               }
-
-out:
-               if (truncation)
-                       event->header.misc |= PERF_RECORD_MISC_PROC_MAP_PARSE_TIMEOUT;
-
-               if (!strcmp(execname, ""))
-                       strcpy(execname, anonstr);
-
-               if (hugetlbfs_mnt_len &&
-                   !strncmp(execname, hugetlbfs_mnt, hugetlbfs_mnt_len)) {
-                       strcpy(execname, anonstr);
-                       event->mmap2.flags |= MAP_HUGETLB;
-               }
-
-               size = strlen(execname) + 1;
-               memcpy(event->mmap2.filename, execname, size);
-               size = PERF_ALIGN(size, sizeof(u64));
-               event->mmap2.len -= event->mmap.start;
-               event->mmap2.header.size = (sizeof(event->mmap2) -
-                                       (sizeof(event->mmap2.filename) - size));
-               memset(event->mmap2.filename + size, 0, machine->id_hdr_size);
-               event->mmap2.header.size += machine->id_hdr_size;
-               event->mmap2.pid = tgid;
-               event->mmap2.tid = pid;
-
-               if (perf_tool__process_synth_event(tool, event, machine, process) != 0) {
-                       rc = -1;
-                       break;
-               }
-
-               if (truncation)
-                       break;
-       }
-
-       fclose(fp);
-       return rc;
-}
-
-int perf_event__synthesize_modules(struct perf_tool *tool,
-                                  perf_event__handler_t process,
-                                  struct machine *machine)
-{
-       int rc = 0;
-       struct map *pos;
-       struct maps *maps = machine__kernel_maps(machine);
-       union perf_event *event = zalloc((sizeof(event->mmap) +
-                                         machine->id_hdr_size));
-       if (event == NULL) {
-               pr_debug("Not enough memory synthesizing mmap event "
-                        "for kernel modules\n");
-               return -1;
-       }
-
-       event->header.type = PERF_RECORD_MMAP;
-
-       /*
-        * kernel uses 0 for user space maps, see kernel/perf_event.c
-        * __perf_event_mmap
-        */
-       if (machine__is_host(machine))
-               event->header.misc = PERF_RECORD_MISC_KERNEL;
-       else
-               event->header.misc = PERF_RECORD_MISC_GUEST_KERNEL;
-
-       for (pos = maps__first(maps); pos; pos = map__next(pos)) {
-               size_t size;
-
-               if (!__map__is_kmodule(pos))
-                       continue;
-
-               size = PERF_ALIGN(pos->dso->long_name_len + 1, sizeof(u64));
-               event->mmap.header.type = PERF_RECORD_MMAP;
-               event->mmap.header.size = (sizeof(event->mmap) -
-                                       (sizeof(event->mmap.filename) - size));
-               memset(event->mmap.filename + size, 0, machine->id_hdr_size);
-               event->mmap.header.size += machine->id_hdr_size;
-               event->mmap.start = pos->start;
-               event->mmap.len   = pos->end - pos->start;
-               event->mmap.pid   = machine->pid;
-
-               memcpy(event->mmap.filename, pos->dso->long_name,
-                      pos->dso->long_name_len + 1);
-               if (perf_tool__process_synth_event(tool, event, machine, process) != 0) {
-                       rc = -1;
-                       break;
-               }
-       }
-
-       free(event);
-       return rc;
-}
-
-static int __event__synthesize_thread(union perf_event *comm_event,
-                                     union perf_event *mmap_event,
-                                     union perf_event *fork_event,
-                                     union perf_event *namespaces_event,
-                                     pid_t pid, int full,
-                                     perf_event__handler_t process,
-                                     struct perf_tool *tool,
-                                     struct machine *machine,
-                                     bool mmap_data)
-{
-       char filename[PATH_MAX];
-       DIR *tasks;
-       struct dirent *dirent;
-       pid_t tgid, ppid;
-       int rc = 0;
-
-       /* special case: only send one comm event using passed in pid */
-       if (!full) {
-               tgid = perf_event__synthesize_comm(tool, comm_event, pid,
-                                                  process, machine);
-
-               if (tgid == -1)
-                       return -1;
-
-               if (perf_event__synthesize_namespaces(tool, namespaces_event, pid,
-                                                     tgid, process, machine) < 0)
-                       return -1;
-
-               /*
-                * send mmap only for thread group leader
-                * see thread__init_map_groups
-                */
-               if (pid == tgid &&
-                   perf_event__synthesize_mmap_events(tool, mmap_event, pid, tgid,
-                                                      process, machine, mmap_data))
-                       return -1;
-
-               return 0;
-       }
-
-       if (machine__is_default_guest(machine))
-               return 0;
-
-       snprintf(filename, sizeof(filename), "%s/proc/%d/task",
-                machine->root_dir, pid);
-
-       tasks = opendir(filename);
-       if (tasks == NULL) {
-               pr_debug("couldn't open %s\n", filename);
-               return 0;
-       }
-
-       while ((dirent = readdir(tasks)) != NULL) {
-               char *end;
-               pid_t _pid;
-
-               _pid = strtol(dirent->d_name, &end, 10);
-               if (*end)
-                       continue;
-
-               rc = -1;
-               if (perf_event__prepare_comm(comm_event, _pid, machine,
-                                            &tgid, &ppid) != 0)
-                       break;
-
-               if (perf_event__synthesize_fork(tool, fork_event, _pid, tgid,
-                                               ppid, process, machine) < 0)
-                       break;
-
-               if (perf_event__synthesize_namespaces(tool, namespaces_event, _pid,
-                                                     tgid, process, machine) < 0)
-                       break;
-
-               /*
-                * Send the prepared comm event
-                */
-               if (perf_tool__process_synth_event(tool, comm_event, machine, process) != 0)
-                       break;
-
-               rc = 0;
-               if (_pid == pid) {
-                       /* process the parent's maps too */
-                       rc = perf_event__synthesize_mmap_events(tool, mmap_event, pid, tgid,
-                                               process, machine, mmap_data);
-                       if (rc)
-                               break;
-               }
-       }
-
-       closedir(tasks);
-       return rc;
-}
-
-int perf_event__synthesize_thread_map(struct perf_tool *tool,
-                                     struct perf_thread_map *threads,
-                                     perf_event__handler_t process,
-                                     struct machine *machine,
-                                     bool mmap_data)
-{
-       union perf_event *comm_event, *mmap_event, *fork_event;
-       union perf_event *namespaces_event;
-       int err = -1, thread, j;
-
-       comm_event = malloc(sizeof(comm_event->comm) + machine->id_hdr_size);
-       if (comm_event == NULL)
-               goto out;
-
-       mmap_event = malloc(sizeof(mmap_event->mmap2) + machine->id_hdr_size);
-       if (mmap_event == NULL)
-               goto out_free_comm;
-
-       fork_event = malloc(sizeof(fork_event->fork) + machine->id_hdr_size);
-       if (fork_event == NULL)
-               goto out_free_mmap;
-
-       namespaces_event = malloc(sizeof(namespaces_event->namespaces) +
-                                 (NR_NAMESPACES * sizeof(struct perf_ns_link_info)) +
-                                 machine->id_hdr_size);
-       if (namespaces_event == NULL)
-               goto out_free_fork;
-
-       err = 0;
-       for (thread = 0; thread < threads->nr; ++thread) {
-               if (__event__synthesize_thread(comm_event, mmap_event,
-                                              fork_event, namespaces_event,
-                                              perf_thread_map__pid(threads, thread), 0,
-                                              process, tool, machine,
-                                              mmap_data)) {
-                       err = -1;
-                       break;
-               }
-
-               /*
-                * comm.pid is set to thread group id by
-                * perf_event__synthesize_comm
-                */
-               if ((int) comm_event->comm.pid != perf_thread_map__pid(threads, thread)) {
-                       bool need_leader = true;
-
-                       /* is thread group leader in thread_map? */
-                       for (j = 0; j < threads->nr; ++j) {
-                               if ((int) comm_event->comm.pid == perf_thread_map__pid(threads, j)) {
-                                       need_leader = false;
-                                       break;
-                               }
-                       }
-
-                       /* if not, generate events for it */
-                       if (need_leader &&
-                           __event__synthesize_thread(comm_event, mmap_event,
-                                                      fork_event, namespaces_event,
-                                                      comm_event->comm.pid, 0,
-                                                      process, tool, machine,
-                                                      mmap_data)) {
-                               err = -1;
-                               break;
-                       }
-               }
-       }
-       free(namespaces_event);
-out_free_fork:
-       free(fork_event);
-out_free_mmap:
-       free(mmap_event);
-out_free_comm:
-       free(comm_event);
-out:
-       return err;
-}
-
-static int __perf_event__synthesize_threads(struct perf_tool *tool,
-                                           perf_event__handler_t process,
-                                           struct machine *machine,
-                                           bool mmap_data,
-                                           struct dirent **dirent,
-                                           int start,
-                                           int num)
-{
-       union perf_event *comm_event, *mmap_event, *fork_event;
-       union perf_event *namespaces_event;
-       int err = -1;
-       char *end;
-       pid_t pid;
-       int i;
-
-       comm_event = malloc(sizeof(comm_event->comm) + machine->id_hdr_size);
-       if (comm_event == NULL)
-               goto out;
-
-       mmap_event = malloc(sizeof(mmap_event->mmap2) + machine->id_hdr_size);
-       if (mmap_event == NULL)
-               goto out_free_comm;
-
-       fork_event = malloc(sizeof(fork_event->fork) + machine->id_hdr_size);
-       if (fork_event == NULL)
-               goto out_free_mmap;
-
-       namespaces_event = malloc(sizeof(namespaces_event->namespaces) +
-                                 (NR_NAMESPACES * sizeof(struct perf_ns_link_info)) +
-                                 machine->id_hdr_size);
-       if (namespaces_event == NULL)
-               goto out_free_fork;
-
-       for (i = start; i < start + num; i++) {
-               if (!isdigit(dirent[i]->d_name[0]))
-                       continue;
-
-               pid = (pid_t)strtol(dirent[i]->d_name, &end, 10);
-               /* only interested in proper numerical dirents */
-               if (*end)
-                       continue;
-               /*
-                * We may race with exiting thread, so don't stop just because
-                * one thread couldn't be synthesized.
-                */
-               __event__synthesize_thread(comm_event, mmap_event, fork_event,
-                                          namespaces_event, pid, 1, process,
-                                          tool, machine, mmap_data);
-       }
-       err = 0;
-
-       free(namespaces_event);
-out_free_fork:
-       free(fork_event);
-out_free_mmap:
-       free(mmap_event);
-out_free_comm:
-       free(comm_event);
-out:
-       return err;
-}
-
-struct synthesize_threads_arg {
-       struct perf_tool *tool;
-       perf_event__handler_t process;
-       struct machine *machine;
-       bool mmap_data;
-       struct dirent **dirent;
-       int num;
-       int start;
-};
-
-static void *synthesize_threads_worker(void *arg)
-{
-       struct synthesize_threads_arg *args = arg;
-
-       __perf_event__synthesize_threads(args->tool, args->process,
-                                        args->machine, args->mmap_data,
-                                        args->dirent,
-                                        args->start, args->num);
-       return NULL;
-}
-
-int perf_event__synthesize_threads(struct perf_tool *tool,
-                                  perf_event__handler_t process,
-                                  struct machine *machine,
-                                  bool mmap_data,
-                                  unsigned int nr_threads_synthesize)
-{
-       struct synthesize_threads_arg *args = NULL;
-       pthread_t *synthesize_threads = NULL;
-       char proc_path[PATH_MAX];
-       struct dirent **dirent;
-       int num_per_thread;
-       int m, n, i, j;
-       int thread_nr;
-       int base = 0;
-       int err = -1;
-
-
-       if (machine__is_default_guest(machine))
-               return 0;
-
-       snprintf(proc_path, sizeof(proc_path), "%s/proc", machine->root_dir);
-       n = scandir(proc_path, &dirent, 0, alphasort);
-       if (n < 0)
-               return err;
-
-       if (nr_threads_synthesize == UINT_MAX)
-               thread_nr = sysconf(_SC_NPROCESSORS_ONLN);
-       else
-               thread_nr = nr_threads_synthesize;
-
-       if (thread_nr <= 1) {
-               err = __perf_event__synthesize_threads(tool, process,
-                                                      machine, mmap_data,
-                                                      dirent, base, n);
-               goto free_dirent;
-       }
-       if (thread_nr > n)
-               thread_nr = n;
-
-       synthesize_threads = calloc(sizeof(pthread_t), thread_nr);
-       if (synthesize_threads == NULL)
-               goto free_dirent;
-
-       args = calloc(sizeof(*args), thread_nr);
-       if (args == NULL)
-               goto free_threads;
-
-       num_per_thread = n / thread_nr;
-       m = n % thread_nr;
-       for (i = 0; i < thread_nr; i++) {
-               args[i].tool = tool;
-               args[i].process = process;
-               args[i].machine = machine;
-               args[i].mmap_data = mmap_data;
-               args[i].dirent = dirent;
-       }
-       for (i = 0; i < m; i++) {
-               args[i].num = num_per_thread + 1;
-               args[i].start = i * args[i].num;
-       }
-       if (i != 0)
-               base = args[i-1].start + args[i-1].num;
-       for (j = i; j < thread_nr; j++) {
-               args[j].num = num_per_thread;
-               args[j].start = base + (j - i) * args[i].num;
-       }
-
-       for (i = 0; i < thread_nr; i++) {
-               if (pthread_create(&synthesize_threads[i], NULL,
-                                  synthesize_threads_worker, &args[i]))
-                       goto out_join;
-       }
-       err = 0;
-out_join:
-       for (i = 0; i < thread_nr; i++)
-               pthread_join(synthesize_threads[i], NULL);
-       free(args);
-free_threads:
-       free(synthesize_threads);
-free_dirent:
-       for (i = 0; i < n; i++)
-               zfree(&dirent[i]);
-       free(dirent);
-
-       return err;
-}
-
 struct process_symbol_args {
        const char *name;
        u64        start;
@@ -899,327 +117,6 @@ int kallsyms__get_function_start(const char *kallsyms_filename,
        return 0;
 }
 
-int __weak perf_event__synthesize_extra_kmaps(struct perf_tool *tool __maybe_unused,
-                                             perf_event__handler_t process __maybe_unused,
-                                             struct machine *machine __maybe_unused)
-{
-       return 0;
-}
-
-static int __perf_event__synthesize_kernel_mmap(struct perf_tool *tool,
-                                               perf_event__handler_t process,
-                                               struct machine *machine)
-{
-       size_t size;
-       struct map *map = machine__kernel_map(machine);
-       struct kmap *kmap;
-       int err;
-       union perf_event *event;
-
-       if (map == NULL)
-               return -1;
-
-       kmap = map__kmap(map);
-       if (!kmap->ref_reloc_sym)
-               return -1;
-
-       /*
-        * We should get this from /sys/kernel/sections/.text, but till that is
-        * available use this, and after it is use this as a fallback for older
-        * kernels.
-        */
-       event = zalloc((sizeof(event->mmap) + machine->id_hdr_size));
-       if (event == NULL) {
-               pr_debug("Not enough memory synthesizing mmap event "
-                        "for kernel modules\n");
-               return -1;
-       }
-
-       if (machine__is_host(machine)) {
-               /*
-                * kernel uses PERF_RECORD_MISC_USER for user space maps,
-                * see kernel/perf_event.c __perf_event_mmap
-                */
-               event->header.misc = PERF_RECORD_MISC_KERNEL;
-       } else {
-               event->header.misc = PERF_RECORD_MISC_GUEST_KERNEL;
-       }
-
-       size = snprintf(event->mmap.filename, sizeof(event->mmap.filename),
-                       "%s%s", machine->mmap_name, kmap->ref_reloc_sym->name) + 1;
-       size = PERF_ALIGN(size, sizeof(u64));
-       event->mmap.header.type = PERF_RECORD_MMAP;
-       event->mmap.header.size = (sizeof(event->mmap) -
-                       (sizeof(event->mmap.filename) - size) + machine->id_hdr_size);
-       event->mmap.pgoff = kmap->ref_reloc_sym->addr;
-       event->mmap.start = map->start;
-       event->mmap.len   = map->end - event->mmap.start;
-       event->mmap.pid   = machine->pid;
-
-       err = perf_tool__process_synth_event(tool, event, machine, process);
-       free(event);
-
-       return err;
-}
-
-int perf_event__synthesize_kernel_mmap(struct perf_tool *tool,
-                                      perf_event__handler_t process,
-                                      struct machine *machine)
-{
-       int err;
-
-       err = __perf_event__synthesize_kernel_mmap(tool, process, machine);
-       if (err < 0)
-               return err;
-
-       return perf_event__synthesize_extra_kmaps(tool, process, machine);
-}
-
-int perf_event__synthesize_thread_map2(struct perf_tool *tool,
-                                     struct perf_thread_map *threads,
-                                     perf_event__handler_t process,
-                                     struct machine *machine)
-{
-       union perf_event *event;
-       int i, err, size;
-
-       size  = sizeof(event->thread_map);
-       size += threads->nr * sizeof(event->thread_map.entries[0]);
-
-       event = zalloc(size);
-       if (!event)
-               return -ENOMEM;
-
-       event->header.type = PERF_RECORD_THREAD_MAP;
-       event->header.size = size;
-       event->thread_map.nr = threads->nr;
-
-       for (i = 0; i < threads->nr; i++) {
-               struct perf_record_thread_map_entry *entry = &event->thread_map.entries[i];
-               char *comm = perf_thread_map__comm(threads, i);
-
-               if (!comm)
-                       comm = (char *) "";
-
-               entry->pid = perf_thread_map__pid(threads, i);
-               strncpy((char *) &entry->comm, comm, sizeof(entry->comm));
-       }
-
-       err = process(tool, event, NULL, machine);
-
-       free(event);
-       return err;
-}
-
-static void synthesize_cpus(struct cpu_map_entries *cpus,
-                           struct perf_cpu_map *map)
-{
-       int i;
-
-       cpus->nr = map->nr;
-
-       for (i = 0; i < map->nr; i++)
-               cpus->cpu[i] = map->map[i];
-}
-
-static void synthesize_mask(struct perf_record_record_cpu_map *mask,
-                           struct perf_cpu_map *map, int max)
-{
-       int i;
-
-       mask->nr = BITS_TO_LONGS(max);
-       mask->long_size = sizeof(long);
-
-       for (i = 0; i < map->nr; i++)
-               set_bit(map->map[i], mask->mask);
-}
-
-static size_t cpus_size(struct perf_cpu_map *map)
-{
-       return sizeof(struct cpu_map_entries) + map->nr * sizeof(u16);
-}
-
-static size_t mask_size(struct perf_cpu_map *map, int *max)
-{
-       int i;
-
-       *max = 0;
-
-       for (i = 0; i < map->nr; i++) {
-               /* bit possition of the cpu is + 1 */
-               int bit = map->map[i] + 1;
-
-               if (bit > *max)
-                       *max = bit;
-       }
-
-       return sizeof(struct perf_record_record_cpu_map) + BITS_TO_LONGS(*max) * sizeof(long);
-}
-
-void *cpu_map_data__alloc(struct perf_cpu_map *map, size_t *size, u16 *type, int *max)
-{
-       size_t size_cpus, size_mask;
-       bool is_dummy = perf_cpu_map__empty(map);
-
-       /*
-        * Both array and mask data have variable size based
-        * on the number of cpus and their actual values.
-        * The size of the 'struct perf_record_cpu_map_data' is:
-        *
-        *   array = size of 'struct cpu_map_entries' +
-        *           number of cpus * sizeof(u64)
-        *
-        *   mask  = size of 'struct perf_record_record_cpu_map' +
-        *           maximum cpu bit converted to size of longs
-        *
-        * and finaly + the size of 'struct perf_record_cpu_map_data'.
-        */
-       size_cpus = cpus_size(map);
-       size_mask = mask_size(map, max);
-
-       if (is_dummy || (size_cpus < size_mask)) {
-               *size += size_cpus;
-               *type  = PERF_CPU_MAP__CPUS;
-       } else {
-               *size += size_mask;
-               *type  = PERF_CPU_MAP__MASK;
-       }
-
-       *size += sizeof(struct perf_record_cpu_map_data);
-       *size = PERF_ALIGN(*size, sizeof(u64));
-       return zalloc(*size);
-}
-
-void cpu_map_data__synthesize(struct perf_record_cpu_map_data *data, struct perf_cpu_map *map,
-                             u16 type, int max)
-{
-       data->type = type;
-
-       switch (type) {
-       case PERF_CPU_MAP__CPUS:
-               synthesize_cpus((struct cpu_map_entries *) data->data, map);
-               break;
-       case PERF_CPU_MAP__MASK:
-               synthesize_mask((struct perf_record_record_cpu_map *)data->data, map, max);
-       default:
-               break;
-       };
-}
-
-static struct perf_record_cpu_map *cpu_map_event__new(struct perf_cpu_map *map)
-{
-       size_t size = sizeof(struct perf_record_cpu_map);
-       struct perf_record_cpu_map *event;
-       int max;
-       u16 type;
-
-       event = cpu_map_data__alloc(map, &size, &type, &max);
-       if (!event)
-               return NULL;
-
-       event->header.type = PERF_RECORD_CPU_MAP;
-       event->header.size = size;
-       event->data.type   = type;
-
-       cpu_map_data__synthesize(&event->data, map, type, max);
-       return event;
-}
-
-int perf_event__synthesize_cpu_map(struct perf_tool *tool,
-                                  struct perf_cpu_map *map,
-                                  perf_event__handler_t process,
-                                  struct machine *machine)
-{
-       struct perf_record_cpu_map *event;
-       int err;
-
-       event = cpu_map_event__new(map);
-       if (!event)
-               return -ENOMEM;
-
-       err = process(tool, (union perf_event *) event, NULL, machine);
-
-       free(event);
-       return err;
-}
-
-int perf_event__synthesize_stat_config(struct perf_tool *tool,
-                                      struct perf_stat_config *config,
-                                      perf_event__handler_t process,
-                                      struct machine *machine)
-{
-       struct perf_record_stat_config *event;
-       int size, i = 0, err;
-
-       size  = sizeof(*event);
-       size += (PERF_STAT_CONFIG_TERM__MAX * sizeof(event->data[0]));
-
-       event = zalloc(size);
-       if (!event)
-               return -ENOMEM;
-
-       event->header.type = PERF_RECORD_STAT_CONFIG;
-       event->header.size = size;
-       event->nr          = PERF_STAT_CONFIG_TERM__MAX;
-
-#define ADD(__term, __val)                                     \
-       event->data[i].tag = PERF_STAT_CONFIG_TERM__##__term;   \
-       event->data[i].val = __val;                             \
-       i++;
-
-       ADD(AGGR_MODE,  config->aggr_mode)
-       ADD(INTERVAL,   config->interval)
-       ADD(SCALE,      config->scale)
-
-       WARN_ONCE(i != PERF_STAT_CONFIG_TERM__MAX,
-                 "stat config terms unbalanced\n");
-#undef ADD
-
-       err = process(tool, (union perf_event *) event, NULL, machine);
-
-       free(event);
-       return err;
-}
-
-int perf_event__synthesize_stat(struct perf_tool *tool,
-                               u32 cpu, u32 thread, u64 id,
-                               struct perf_counts_values *count,
-                               perf_event__handler_t process,
-                               struct machine *machine)
-{
-       struct perf_record_stat event;
-
-       event.header.type = PERF_RECORD_STAT;
-       event.header.size = sizeof(event);
-       event.header.misc = 0;
-
-       event.id        = id;
-       event.cpu       = cpu;
-       event.thread    = thread;
-       event.val       = count->val;
-       event.ena       = count->ena;
-       event.run       = count->run;
-
-       return process(tool, (union perf_event *) &event, NULL, machine);
-}
-
-int perf_event__synthesize_stat_round(struct perf_tool *tool,
-                                     u64 evtime, u64 type,
-                                     perf_event__handler_t process,
-                                     struct machine *machine)
-{
-       struct perf_record_stat_round event;
-
-       event.header.type = PERF_RECORD_STAT_ROUND;
-       event.header.size = sizeof(event);
-       event.header.misc = 0;
-
-       event.time = evtime;
-       event.type = type;
-
-       return process(tool, (union perf_event *) &event, NULL, machine);
-}
-
 void perf_event__read_stat_config(struct perf_stat_config *config,
                                  struct perf_record_stat_config *event)
 {
index 47ad81d..a0a0c91 100644 (file)
@@ -279,54 +279,13 @@ enum {
 
 void perf_event__print_totals(void);
 
-struct perf_tool;
-struct perf_thread_map;
 struct perf_cpu_map;
+struct perf_record_stat_config;
 struct perf_stat_config;
-struct perf_counts_values;
-
-typedef int (*perf_event__handler_t)(struct perf_tool *tool,
-                                    union perf_event *event,
-                                    struct perf_sample *sample,
-                                    struct machine *machine);
+struct perf_tool;
 
-int perf_event__synthesize_thread_map(struct perf_tool *tool,
-                                     struct perf_thread_map *threads,
-                                     perf_event__handler_t process,
-                                     struct machine *machine, bool mmap_data);
-int perf_event__synthesize_thread_map2(struct perf_tool *tool,
-                                     struct perf_thread_map *threads,
-                                     perf_event__handler_t process,
-                                     struct machine *machine);
-int perf_event__synthesize_cpu_map(struct perf_tool *tool,
-                                  struct perf_cpu_map *cpus,
-                                  perf_event__handler_t process,
-                                  struct machine *machine);
-int perf_event__synthesize_threads(struct perf_tool *tool,
-                                  perf_event__handler_t process,
-                                  struct machine *machine, bool mmap_data,
-                                  unsigned int nr_threads_synthesize);
-int perf_event__synthesize_kernel_mmap(struct perf_tool *tool,
-                                      perf_event__handler_t process,
-                                      struct machine *machine);
-int perf_event__synthesize_stat_config(struct perf_tool *tool,
-                                      struct perf_stat_config *config,
-                                      perf_event__handler_t process,
-                                      struct machine *machine);
 void perf_event__read_stat_config(struct perf_stat_config *config,
                                  struct perf_record_stat_config *event);
-int perf_event__synthesize_stat(struct perf_tool *tool,
-                               u32 cpu, u32 thread, u64 id,
-                               struct perf_counts_values *count,
-                               perf_event__handler_t process,
-                               struct machine *machine);
-int perf_event__synthesize_stat_round(struct perf_tool *tool,
-                                     u64 time, u64 type,
-                                     perf_event__handler_t process,
-                                     struct machine *machine);
-int perf_event__synthesize_modules(struct perf_tool *tool,
-                                  perf_event__handler_t process,
-                                  struct machine *machine);
 
 int perf_event__process_comm(struct perf_tool *tool,
                             union perf_event *event,
@@ -380,10 +339,6 @@ int perf_event__process_bpf(struct perf_tool *tool,
                            union perf_event *event,
                            struct perf_sample *sample,
                            struct machine *machine);
-int perf_tool__process_synth_event(struct perf_tool *tool,
-                                  union perf_event *event,
-                                  struct machine *machine,
-                                  perf_event__handler_t process);
 int perf_event__process(struct perf_tool *tool,
                        union perf_event *event,
                        struct perf_sample *sample,
@@ -405,34 +360,6 @@ void thread__resolve(struct thread *thread, struct addr_location *al,
 
 const char *perf_event__name(unsigned int id);
 
-size_t perf_event__sample_event_size(const struct perf_sample *sample, u64 type,
-                                    u64 read_format);
-int perf_event__synthesize_sample(union perf_event *event, u64 type,
-                                 u64 read_format,
-                                 const struct perf_sample *sample);
-
-pid_t perf_event__synthesize_comm(struct perf_tool *tool,
-                                 union perf_event *event, pid_t pid,
-                                 perf_event__handler_t process,
-                                 struct machine *machine);
-
-int perf_event__synthesize_namespaces(struct perf_tool *tool,
-                                     union perf_event *event,
-                                     pid_t pid, pid_t tgid,
-                                     perf_event__handler_t process,
-                                     struct machine *machine);
-
-int perf_event__synthesize_mmap_events(struct perf_tool *tool,
-                                      union perf_event *event,
-                                      pid_t pid, pid_t tgid,
-                                      perf_event__handler_t process,
-                                      struct machine *machine,
-                                      bool mmap_data);
-
-int perf_event__synthesize_extra_kmaps(struct perf_tool *tool,
-                                      perf_event__handler_t process,
-                                      struct machine *machine);
-
 size_t perf_event__fprintf_comm(union perf_event *event, FILE *fp);
 size_t perf_event__fprintf_mmap(union perf_event *event, FILE *fp);
 size_t perf_event__fprintf_mmap2(union perf_event *event, FILE *fp);
index 095924a..d277a98 100644 (file)
 #include <inttypes.h>
 #include <poll.h>
 #include "cpumap.h"
+#include "util/mmap.h"
 #include "thread_map.h"
 #include "target.h"
 #include "evlist.h"
 #include "evsel.h"
 #include "debug.h"
 #include "units.h"
-#include "util.h"
+#include <internal/lib.h> // page_size
 #include "../perf.h"
 #include "asm/bug.h"
 #include "bpf-event.h"
@@ -49,18 +50,14 @@ int sigqueue(pid_t pid, int sig, const union sigval value);
 #endif
 
 #define FD(e, x, y) (*(int *)xyarray__entry(e->core.fd, x, y))
-#define SID(e, x, y) xyarray__entry(e->sample_id, x, y)
+#define SID(e, x, y) xyarray__entry(e->core.sample_id, x, y)
 
 void evlist__init(struct evlist *evlist, struct perf_cpu_map *cpus,
                  struct perf_thread_map *threads)
 {
-       int i;
-
-       for (i = 0; i < PERF_EVLIST__HLIST_SIZE; ++i)
-               INIT_HLIST_HEAD(&evlist->heads[i]);
        perf_evlist__init(&evlist->core);
        perf_evlist__set_maps(&evlist->core, cpus, threads);
-       fdarray__init(&evlist->pollfd, 64);
+       fdarray__init(&evlist->core.pollfd, 64);
        evlist->workload.pid = -1;
        evlist->bkw_mmap_state = BKW_MMAP_NOTREADY;
 }
@@ -108,7 +105,7 @@ struct evlist *perf_evlist__new_dummy(void)
  */
 void perf_evlist__set_id_pos(struct evlist *evlist)
 {
-       struct evsel *first = perf_evlist__first(evlist);
+       struct evsel *first = evlist__first(evlist);
 
        evlist->id_pos = first->id_pos;
        evlist->is_pos = first->is_pos;
@@ -124,7 +121,7 @@ static void perf_evlist__update_id_pos(struct evlist *evlist)
        perf_evlist__set_id_pos(evlist);
 }
 
-static void perf_evlist__purge(struct evlist *evlist)
+static void evlist__purge(struct evlist *evlist)
 {
        struct evsel *pos, *n;
 
@@ -137,11 +134,11 @@ static void perf_evlist__purge(struct evlist *evlist)
        evlist->core.nr_entries = 0;
 }
 
-void perf_evlist__exit(struct evlist *evlist)
+void evlist__exit(struct evlist *evlist)
 {
        zfree(&evlist->mmap);
        zfree(&evlist->overwrite_mmap);
-       fdarray__exit(&evlist->pollfd);
+       fdarray__exit(&evlist->core.pollfd);
 }
 
 void evlist__delete(struct evlist *evlist)
@@ -149,14 +146,14 @@ void evlist__delete(struct evlist *evlist)
        if (evlist == NULL)
                return;
 
-       perf_evlist__munmap(evlist);
+       evlist__munmap(evlist);
        evlist__close(evlist);
        perf_cpu_map__put(evlist->core.cpus);
        perf_thread_map__put(evlist->core.threads);
        evlist->core.cpus = NULL;
        evlist->core.threads = NULL;
-       perf_evlist__purge(evlist);
-       perf_evlist__exit(evlist);
+       evlist__purge(evlist);
+       evlist__exit(evlist);
        free(evlist);
 }
 
@@ -318,7 +315,7 @@ int perf_evlist__add_newtp(struct evlist *evlist,
 static int perf_evlist__nr_threads(struct evlist *evlist,
                                   struct evsel *evsel)
 {
-       if (evsel->system_wide)
+       if (evsel->core.system_wide)
                return 1;
        else
                return perf_thread_map__nr(evlist->core.threads);
@@ -401,128 +398,29 @@ int perf_evlist__enable_event_idx(struct evlist *evlist,
                return perf_evlist__enable_event_thread(evlist, evsel, idx);
 }
 
-int perf_evlist__alloc_pollfd(struct evlist *evlist)
+int evlist__add_pollfd(struct evlist *evlist, int fd)
 {
-       int nr_cpus = perf_cpu_map__nr(evlist->core.cpus);
-       int nr_threads = perf_thread_map__nr(evlist->core.threads);
-       int nfds = 0;
-       struct evsel *evsel;
-
-       evlist__for_each_entry(evlist, evsel) {
-               if (evsel->system_wide)
-                       nfds += nr_cpus;
-               else
-                       nfds += nr_cpus * nr_threads;
-       }
-
-       if (fdarray__available_entries(&evlist->pollfd) < nfds &&
-           fdarray__grow(&evlist->pollfd, nfds) < 0)
-               return -ENOMEM;
-
-       return 0;
-}
-
-static int __perf_evlist__add_pollfd(struct evlist *evlist, int fd,
-                                    struct perf_mmap *map, short revent)
-{
-       int pos = fdarray__add(&evlist->pollfd, fd, revent | POLLERR | POLLHUP);
-       /*
-        * Save the idx so that when we filter out fds POLLHUP'ed we can
-        * close the associated evlist->mmap[] entry.
-        */
-       if (pos >= 0) {
-               evlist->pollfd.priv[pos].ptr = map;
-
-               fcntl(fd, F_SETFL, O_NONBLOCK);
-       }
-
-       return pos;
-}
-
-int perf_evlist__add_pollfd(struct evlist *evlist, int fd)
-{
-       return __perf_evlist__add_pollfd(evlist, fd, NULL, POLLIN);
+       return perf_evlist__add_pollfd(&evlist->core, fd, NULL, POLLIN);
 }
 
 static void perf_evlist__munmap_filtered(struct fdarray *fda, int fd,
                                         void *arg __maybe_unused)
 {
-       struct perf_mmap *map = fda->priv[fd].ptr;
+       struct mmap *map = fda->priv[fd].ptr;
 
        if (map)
                perf_mmap__put(map);
 }
 
-int perf_evlist__filter_pollfd(struct evlist *evlist, short revents_and_mask)
+int evlist__filter_pollfd(struct evlist *evlist, short revents_and_mask)
 {
-       return fdarray__filter(&evlist->pollfd, revents_and_mask,
+       return fdarray__filter(&evlist->core.pollfd, revents_and_mask,
                               perf_evlist__munmap_filtered, NULL);
 }
 
-int perf_evlist__poll(struct evlist *evlist, int timeout)
+int evlist__poll(struct evlist *evlist, int timeout)
 {
-       return fdarray__poll(&evlist->pollfd, timeout);
-}
-
-static void perf_evlist__id_hash(struct evlist *evlist,
-                                struct evsel *evsel,
-                                int cpu, int thread, u64 id)
-{
-       int hash;
-       struct perf_sample_id *sid = SID(evsel, cpu, thread);
-
-       sid->id = id;
-       sid->evsel = evsel;
-       hash = hash_64(sid->id, PERF_EVLIST__HLIST_BITS);
-       hlist_add_head(&sid->node, &evlist->heads[hash]);
-}
-
-void perf_evlist__id_add(struct evlist *evlist, struct evsel *evsel,
-                        int cpu, int thread, u64 id)
-{
-       perf_evlist__id_hash(evlist, evsel, cpu, thread, id);
-       evsel->id[evsel->ids++] = id;
-}
-
-int perf_evlist__id_add_fd(struct evlist *evlist,
-                          struct evsel *evsel,
-                          int cpu, int thread, int fd)
-{
-       u64 read_data[4] = { 0, };
-       int id_idx = 1; /* The first entry is the counter value */
-       u64 id;
-       int ret;
-
-       ret = ioctl(fd, PERF_EVENT_IOC_ID, &id);
-       if (!ret)
-               goto add;
-
-       if (errno != ENOTTY)
-               return -1;
-
-       /* Legacy way to get event id.. All hail to old kernels! */
-
-       /*
-        * This way does not work with group format read, so bail
-        * out in that case.
-        */
-       if (perf_evlist__read_format(evlist) & PERF_FORMAT_GROUP)
-               return -1;
-
-       if (!(evsel->core.attr.read_format & PERF_FORMAT_ID) ||
-           read(fd, &read_data, sizeof(read_data)) == -1)
-               return -1;
-
-       if (evsel->core.attr.read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)
-               ++id_idx;
-       if (evsel->core.attr.read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)
-               ++id_idx;
-
-       id = read_data[id_idx];
-
- add:
-       perf_evlist__id_add(evlist, evsel, cpu, thread, id);
-       return 0;
+       return perf_evlist__poll(&evlist->core, timeout);
 }
 
 static void perf_evlist__set_sid_idx(struct evlist *evlist,
@@ -535,7 +433,7 @@ static void perf_evlist__set_sid_idx(struct evlist *evlist,
                sid->cpu = evlist->core.cpus->map[cpu];
        else
                sid->cpu = -1;
-       if (!evsel->system_wide && evlist->core.threads && thread >= 0)
+       if (!evsel->core.system_wide && evlist->core.threads && thread >= 0)
                sid->tid = perf_thread_map__pid(evlist->core.threads, thread);
        else
                sid->tid = -1;
@@ -548,7 +446,7 @@ struct perf_sample_id *perf_evlist__id2sid(struct evlist *evlist, u64 id)
        int hash;
 
        hash = hash_64(id, PERF_EVLIST__HLIST_BITS);
-       head = &evlist->heads[hash];
+       head = &evlist->core.heads[hash];
 
        hlist_for_each_entry(sid, head, node)
                if (sid->id == id)
@@ -562,14 +460,14 @@ struct evsel *perf_evlist__id2evsel(struct evlist *evlist, u64 id)
        struct perf_sample_id *sid;
 
        if (evlist->core.nr_entries == 1 || !id)
-               return perf_evlist__first(evlist);
+               return evlist__first(evlist);
 
        sid = perf_evlist__id2sid(evlist, id);
        if (sid)
-               return sid->evsel;
+               return container_of(sid->evsel, struct evsel, core);
 
        if (!perf_evlist__sample_id_all(evlist))
-               return perf_evlist__first(evlist);
+               return evlist__first(evlist);
 
        return NULL;
 }
@@ -584,7 +482,7 @@ struct evsel *perf_evlist__id2evsel_strict(struct evlist *evlist,
 
        sid = perf_evlist__id2sid(evlist, id);
        if (sid)
-               return sid->evsel;
+               return container_of(sid->evsel, struct evsel, core);
 
        return NULL;
 }
@@ -613,7 +511,7 @@ static int perf_evlist__event2id(struct evlist *evlist,
 struct evsel *perf_evlist__event2evsel(struct evlist *evlist,
                                            union perf_event *event)
 {
-       struct evsel *first = perf_evlist__first(evlist);
+       struct evsel *first = evlist__first(evlist);
        struct hlist_head *head;
        struct perf_sample_id *sid;
        int hash;
@@ -634,11 +532,11 @@ struct evsel *perf_evlist__event2evsel(struct evlist *evlist,
                return first;
 
        hash = hash_64(id, PERF_EVLIST__HLIST_BITS);
-       head = &evlist->heads[hash];
+       head = &evlist->core.heads[hash];
 
        hlist_for_each_entry(sid, head, node) {
                if (sid->id == id)
-                       return sid->evsel;
+                       return container_of(sid->evsel, struct evsel, core);
        }
        return NULL;
 }
@@ -650,8 +548,8 @@ static int perf_evlist__set_paused(struct evlist *evlist, bool value)
        if (!evlist->overwrite_mmap)
                return 0;
 
-       for (i = 0; i < evlist->nr_mmaps; i++) {
-               int fd = evlist->overwrite_mmap[i].fd;
+       for (i = 0; i < evlist->core.nr_mmaps; i++) {
+               int fd = evlist->overwrite_mmap[i].core.fd;
                int err;
 
                if (fd < 0)
@@ -673,42 +571,42 @@ static int perf_evlist__resume(struct evlist *evlist)
        return perf_evlist__set_paused(evlist, false);
 }
 
-static void perf_evlist__munmap_nofree(struct evlist *evlist)
+static void evlist__munmap_nofree(struct evlist *evlist)
 {
        int i;
 
        if (evlist->mmap)
-               for (i = 0; i < evlist->nr_mmaps; i++)
+               for (i = 0; i < evlist->core.nr_mmaps; i++)
                        perf_mmap__munmap(&evlist->mmap[i]);
 
        if (evlist->overwrite_mmap)
-               for (i = 0; i < evlist->nr_mmaps; i++)
+               for (i = 0; i < evlist->core.nr_mmaps; i++)
                        perf_mmap__munmap(&evlist->overwrite_mmap[i]);
 }
 
-void perf_evlist__munmap(struct evlist *evlist)
+void evlist__munmap(struct evlist *evlist)
 {
-       perf_evlist__munmap_nofree(evlist);
+       evlist__munmap_nofree(evlist);
        zfree(&evlist->mmap);
        zfree(&evlist->overwrite_mmap);
 }
 
-static struct perf_mmap *perf_evlist__alloc_mmap(struct evlist *evlist,
-                                                bool overwrite)
+static struct mmap *evlist__alloc_mmap(struct evlist *evlist,
+                                      bool overwrite)
 {
        int i;
-       struct perf_mmap *map;
+       struct mmap *map;
 
-       evlist->nr_mmaps = perf_cpu_map__nr(evlist->core.cpus);
+       evlist->core.nr_mmaps = perf_cpu_map__nr(evlist->core.cpus);
        if (perf_cpu_map__empty(evlist->core.cpus))
-               evlist->nr_mmaps = perf_thread_map__nr(evlist->core.threads);
-       map = zalloc(evlist->nr_mmaps * sizeof(struct perf_mmap));
+               evlist->core.nr_mmaps = perf_thread_map__nr(evlist->core.threads);
+       map = zalloc(evlist->core.nr_mmaps * sizeof(struct mmap));
        if (!map)
                return NULL;
 
-       for (i = 0; i < evlist->nr_mmaps; i++) {
-               map[i].fd = -1;
-               map[i].overwrite = overwrite;
+       for (i = 0; i < evlist->core.nr_mmaps; i++) {
+               map[i].core.fd = -1;
+               map[i].core.overwrite = overwrite;
                /*
                 * When the perf_mmap() call is made we grab one refcount, plus
                 * one extra to let perf_mmap__consume() get the last
@@ -718,7 +616,7 @@ static struct perf_mmap *perf_evlist__alloc_mmap(struct evlist *evlist,
                 * Each PERF_EVENT_IOC_SET_OUTPUT points to this mmap and
                 * thus does perf_mmap__get() on it.
                 */
-               refcount_set(&map[i].refcnt, 0);
+               refcount_set(&map[i].core.refcnt, 0);
        }
        return map;
 }
@@ -732,7 +630,7 @@ perf_evlist__should_poll(struct evlist *evlist __maybe_unused,
        return true;
 }
 
-static int perf_evlist__mmap_per_evsel(struct evlist *evlist, int idx,
+static int evlist__mmap_per_evsel(struct evlist *evlist, int idx,
                                       struct mmap_params *mp, int cpu_idx,
                                       int thread, int *_output, int *_output_overwrite)
 {
@@ -741,7 +639,7 @@ static int perf_evlist__mmap_per_evsel(struct evlist *evlist, int idx,
        int evlist_cpu = cpu_map__cpu(evlist->core.cpus, cpu_idx);
 
        evlist__for_each_entry(evlist, evsel) {
-               struct perf_mmap *maps = evlist->mmap;
+               struct mmap *maps = evlist->mmap;
                int *output = _output;
                int fd;
                int cpu;
@@ -752,7 +650,7 @@ static int perf_evlist__mmap_per_evsel(struct evlist *evlist, int idx,
                        maps = evlist->overwrite_mmap;
 
                        if (!maps) {
-                               maps = perf_evlist__alloc_mmap(evlist, true);
+                               maps = evlist__alloc_mmap(evlist, true);
                                if (!maps)
                                        return -1;
                                evlist->overwrite_mmap = maps;
@@ -762,7 +660,7 @@ static int perf_evlist__mmap_per_evsel(struct evlist *evlist, int idx,
                        mp->prot &= ~PROT_WRITE;
                }
 
-               if (evsel->system_wide && thread)
+               if (evsel->core.system_wide && thread)
                        continue;
 
                cpu = perf_cpu_map__idx(evsel->core.cpus, evlist_cpu);
@@ -792,14 +690,14 @@ static int perf_evlist__mmap_per_evsel(struct evlist *evlist, int idx,
                 * other events, so it should not need to be polled anyway.
                 * Therefore don't add it for polling.
                 */
-               if (!evsel->system_wide &&
-                   __perf_evlist__add_pollfd(evlist, fd, &maps[idx], revent) < 0) {
+               if (!evsel->core.system_wide &&
+                    perf_evlist__add_pollfd(&evlist->core, fd, &maps[idx], revent) < 0) {
                        perf_mmap__put(&maps[idx]);
                        return -1;
                }
 
                if (evsel->core.attr.read_format & PERF_FORMAT_ID) {
-                       if (perf_evlist__id_add_fd(evlist, evsel, cpu, thread,
+                       if (perf_evlist__id_add_fd(&evlist->core, &evsel->core, cpu, thread,
                                                   fd) < 0)
                                return -1;
                        perf_evlist__set_sid_idx(evlist, evsel, idx, cpu,
@@ -810,7 +708,7 @@ static int perf_evlist__mmap_per_evsel(struct evlist *evlist, int idx,
        return 0;
 }
 
-static int perf_evlist__mmap_per_cpu(struct evlist *evlist,
+static int evlist__mmap_per_cpu(struct evlist *evlist,
                                     struct mmap_params *mp)
 {
        int cpu, thread;
@@ -826,7 +724,7 @@ static int perf_evlist__mmap_per_cpu(struct evlist *evlist,
                                              true);
 
                for (thread = 0; thread < nr_threads; thread++) {
-                       if (perf_evlist__mmap_per_evsel(evlist, cpu, mp, cpu,
+                       if (evlist__mmap_per_evsel(evlist, cpu, mp, cpu,
                                                        thread, &output, &output_overwrite))
                                goto out_unmap;
                }
@@ -835,11 +733,11 @@ static int perf_evlist__mmap_per_cpu(struct evlist *evlist,
        return 0;
 
 out_unmap:
-       perf_evlist__munmap_nofree(evlist);
+       evlist__munmap_nofree(evlist);
        return -1;
 }
 
-static int perf_evlist__mmap_per_thread(struct evlist *evlist,
+static int evlist__mmap_per_thread(struct evlist *evlist,
                                        struct mmap_params *mp)
 {
        int thread;
@@ -853,7 +751,7 @@ static int perf_evlist__mmap_per_thread(struct evlist *evlist,
                auxtrace_mmap_params__set_idx(&mp->auxtrace_mp, evlist, thread,
                                              false);
 
-               if (perf_evlist__mmap_per_evsel(evlist, thread, mp, 0, thread,
+               if (evlist__mmap_per_evsel(evlist, thread, mp, 0, thread,
                                                &output, &output_overwrite))
                        goto out_unmap;
        }
@@ -861,7 +759,7 @@ static int perf_evlist__mmap_per_thread(struct evlist *evlist,
        return 0;
 
 out_unmap:
-       perf_evlist__munmap_nofree(evlist);
+       evlist__munmap_nofree(evlist);
        return -1;
 }
 
@@ -888,7 +786,7 @@ unsigned long perf_event_mlock_kb_in_pages(void)
        return pages;
 }
 
-size_t perf_evlist__mmap_size(unsigned long pages)
+size_t evlist__mmap_size(unsigned long pages)
 {
        if (pages == UINT_MAX)
                pages = perf_event_mlock_kb_in_pages();
@@ -971,7 +869,7 @@ int perf_evlist__parse_mmap_pages(const struct option *opt, const char *str,
 }
 
 /**
- * perf_evlist__mmap_ex - Create mmaps to receive events.
+ * evlist__mmap_ex - Create mmaps to receive events.
  * @evlist: list of events
  * @pages: map length in pages
  * @overwrite: overwrite older events?
@@ -979,7 +877,7 @@ int perf_evlist__parse_mmap_pages(const struct option *opt, const char *str,
  * @auxtrace_overwrite - overwrite older auxtrace data?
  *
  * If @overwrite is %false the user needs to signal event consumption using
- * perf_mmap__write_tail().  Using perf_evlist__mmap_read() does this
+ * perf_mmap__write_tail().  Using evlist__mmap_read() does this
  * automatically.
  *
  * Similarly, if @auxtrace_overwrite is %false the user needs to signal data
@@ -987,7 +885,7 @@ int perf_evlist__parse_mmap_pages(const struct option *opt, const char *str,
  *
  * Return: %0 on success, negative error code otherwise.
  */
-int perf_evlist__mmap_ex(struct evlist *evlist, unsigned int pages,
+int evlist__mmap_ex(struct evlist *evlist, unsigned int pages,
                         unsigned int auxtrace_pages,
                         bool auxtrace_overwrite, int nr_cblocks, int affinity, int flush,
                         int comp_level)
@@ -1004,36 +902,36 @@ int perf_evlist__mmap_ex(struct evlist *evlist, unsigned int pages,
                                  .comp_level = comp_level };
 
        if (!evlist->mmap)
-               evlist->mmap = perf_evlist__alloc_mmap(evlist, false);
+               evlist->mmap = evlist__alloc_mmap(evlist, false);
        if (!evlist->mmap)
                return -ENOMEM;
 
-       if (evlist->pollfd.entries == NULL && perf_evlist__alloc_pollfd(evlist) < 0)
+       if (evlist->core.pollfd.entries == NULL && perf_evlist__alloc_pollfd(&evlist->core) < 0)
                return -ENOMEM;
 
-       evlist->mmap_len = perf_evlist__mmap_size(pages);
-       pr_debug("mmap size %zuB\n", evlist->mmap_len);
-       mp.mask = evlist->mmap_len - page_size - 1;
+       evlist->core.mmap_len = evlist__mmap_size(pages);
+       pr_debug("mmap size %zuB\n", evlist->core.mmap_len);
+       mp.mask = evlist->core.mmap_len - page_size - 1;
 
-       auxtrace_mmap_params__init(&mp.auxtrace_mp, evlist->mmap_len,
+       auxtrace_mmap_params__init(&mp.auxtrace_mp, evlist->core.mmap_len,
                                   auxtrace_pages, auxtrace_overwrite);
 
        evlist__for_each_entry(evlist, evsel) {
                if ((evsel->core.attr.read_format & PERF_FORMAT_ID) &&
-                   evsel->sample_id == NULL &&
-                   perf_evsel__alloc_id(evsel, perf_cpu_map__nr(cpus), threads->nr) < 0)
+                   evsel->core.sample_id == NULL &&
+                   perf_evsel__alloc_id(&evsel->core, perf_cpu_map__nr(cpus), threads->nr) < 0)
                        return -ENOMEM;
        }
 
        if (perf_cpu_map__empty(cpus))
-               return perf_evlist__mmap_per_thread(evlist, &mp);
+               return evlist__mmap_per_thread(evlist, &mp);
 
-       return perf_evlist__mmap_per_cpu(evlist, &mp);
+       return evlist__mmap_per_cpu(evlist, &mp);
 }
 
-int perf_evlist__mmap(struct evlist *evlist, unsigned int pages)
+int evlist__mmap(struct evlist *evlist, unsigned int pages)
 {
-       return perf_evlist__mmap_ex(evlist, pages, 0, false, 0, PERF_AFFINITY_SYS, 1, 0);
+       return evlist__mmap_ex(evlist, pages, 0, false, 0, PERF_AFFINITY_SYS, 1, 0);
 }
 
 int perf_evlist__create_maps(struct evlist *evlist, struct target *target)
@@ -1225,7 +1123,7 @@ u64 perf_evlist__combined_branch_type(struct evlist *evlist)
 
 bool perf_evlist__valid_read_format(struct evlist *evlist)
 {
-       struct evsel *first = perf_evlist__first(evlist), *pos = first;
+       struct evsel *first = evlist__first(evlist), *pos = first;
        u64 read_format = first->core.attr.read_format;
        u64 sample_type = first->core.attr.sample_type;
 
@@ -1243,15 +1141,9 @@ bool perf_evlist__valid_read_format(struct evlist *evlist)
        return true;
 }
 
-u64 perf_evlist__read_format(struct evlist *evlist)
-{
-       struct evsel *first = perf_evlist__first(evlist);
-       return first->core.attr.read_format;
-}
-
 u16 perf_evlist__id_hdr_size(struct evlist *evlist)
 {
-       struct evsel *first = perf_evlist__first(evlist);
+       struct evsel *first = evlist__first(evlist);
        struct perf_sample *data;
        u64 sample_type;
        u16 size = 0;
@@ -1284,7 +1176,7 @@ out:
 
 bool perf_evlist__valid_sample_id_all(struct evlist *evlist)
 {
-       struct evsel *first = perf_evlist__first(evlist), *pos = first;
+       struct evsel *first = evlist__first(evlist), *pos = first;
 
        evlist__for_each_entry_continue(evlist, pos) {
                if (first->core.attr.sample_id_all != pos->core.attr.sample_id_all)
@@ -1296,7 +1188,7 @@ bool perf_evlist__valid_sample_id_all(struct evlist *evlist)
 
 bool perf_evlist__sample_id_all(struct evlist *evlist)
 {
-       struct evsel *first = perf_evlist__first(evlist);
+       struct evsel *first = evlist__first(evlist);
        return first->core.attr.sample_id_all;
 }
 
@@ -1529,19 +1421,6 @@ int perf_evlist__parse_sample_timestamp(struct evlist *evlist,
        return perf_evsel__parse_sample_timestamp(evsel, event, timestamp);
 }
 
-size_t perf_evlist__fprintf(struct evlist *evlist, FILE *fp)
-{
-       struct evsel *evsel;
-       size_t printed = 0;
-
-       evlist__for_each_entry(evlist, evsel) {
-               printed += fprintf(fp, "%s%s", evsel->idx ? ", " : "",
-                                  perf_evsel__name(evsel));
-       }
-
-       return printed + fprintf(fp, "\n");
-}
-
 int perf_evlist__strerror_open(struct evlist *evlist,
                               int err, char *buf, size_t size)
 {
@@ -1571,7 +1450,7 @@ int perf_evlist__strerror_open(struct evlist *evlist,
                                    "Hint:\tThe current value is %d.", value);
                break;
        case EINVAL: {
-               struct evsel *first = perf_evlist__first(evlist);
+               struct evsel *first = evlist__first(evlist);
                int max_freq;
 
                if (sysctl__read_int("kernel/perf_event_max_sample_rate", &max_freq) < 0)
@@ -1599,7 +1478,7 @@ out_default:
 int perf_evlist__strerror_mmap(struct evlist *evlist, int err, char *buf, size_t size)
 {
        char sbuf[STRERR_BUFSIZE], *emsg = str_error_r(err, sbuf, sizeof(sbuf));
-       int pages_attempted = evlist->mmap_len / 1024, pages_max_per_user, printed = 0;
+       int pages_attempted = evlist->core.mmap_len / 1024, pages_max_per_user, printed = 0;
 
        switch (err) {
        case EPERM:
@@ -1633,7 +1512,7 @@ void perf_evlist__to_front(struct evlist *evlist,
        struct evsel *evsel, *n;
        LIST_HEAD(move);
 
-       if (move_evsel == perf_evlist__first(evlist))
+       if (move_evsel == evlist__first(evlist))
                return;
 
        evlist__for_each_entry_safe(evlist, n, evsel) {
@@ -1754,7 +1633,7 @@ bool perf_evlist__exclude_kernel(struct evlist *evlist)
 void perf_evlist__force_leader(struct evlist *evlist)
 {
        if (!evlist->nr_groups) {
-               struct evsel *leader = perf_evlist__first(evlist);
+               struct evsel *leader = evlist__first(evlist);
 
                perf_evlist__set_leader(evlist);
                leader->forced_leader = true;
@@ -1780,7 +1659,7 @@ struct evsel *perf_evlist__reset_weak_group(struct evlist *evsel_list,
                        is_open = false;
                if (c2->leader == leader) {
                        if (is_open)
-                               evsel__close(c2);
+                               perf_evsel__close(&evsel->core);
                        c2->leader = c2;
                        c2->core.nr_members = 0;
                }
@@ -1844,10 +1723,10 @@ static void *perf_evlist__poll_thread(void *arg)
                        draining = true;
 
                if (!draining)
-                       perf_evlist__poll(evlist, 1000);
+                       evlist__poll(evlist, 1000);
 
-               for (i = 0; i < evlist->nr_mmaps; i++) {
-                       struct perf_mmap *map = &evlist->mmap[i];
+               for (i = 0; i < evlist->core.nr_mmaps; i++) {
+                       struct mmap *map = &evlist->mmap[i];
                        union perf_event *event;
 
                        if (perf_mmap__read_init(map))
@@ -1889,7 +1768,7 @@ int perf_evlist__start_sb_thread(struct evlist *evlist,
                        goto out_delete_evlist;
        }
 
-       if (perf_evlist__mmap(evlist, UINT_MAX))
+       if (evlist__mmap(evlist, UINT_MAX))
                goto out_delete_evlist;
 
        evlist__for_each_entry(evlist, counter) {
index a55f0f2..7cfe755 100644 (file)
@@ -7,11 +7,11 @@
 #include <linux/refcount.h>
 #include <linux/list.h>
 #include <api/fd/array.h>
-#include <stdio.h>
 #include <internal/evlist.h>
+#include <internal/evsel.h>
 #include "events_stats.h"
 #include "evsel.h"
-#include "mmap.h"
+#include <pthread.h>
 #include <signal.h>
 #include <unistd.h>
 
@@ -20,16 +20,38 @@ struct thread_map;
 struct perf_cpu_map;
 struct record_opts;
 
-#define PERF_EVLIST__HLIST_BITS 8
-#define PERF_EVLIST__HLIST_SIZE (1 << PERF_EVLIST__HLIST_BITS)
+/*
+ * State machine of bkw_mmap_state:
+ *
+ *                     .________________(forbid)_____________.
+ *                     |                                     V
+ * NOTREADY --(0)--> RUNNING --(1)--> DATA_PENDING --(2)--> EMPTY
+ *                     ^  ^              |   ^               |
+ *                     |  |__(forbid)____/   |___(forbid)___/|
+ *                     |                                     |
+ *                      \_________________(3)_______________/
+ *
+ * NOTREADY     : Backward ring buffers are not ready
+ * RUNNING      : Backward ring buffers are recording
+ * DATA_PENDING : We are required to collect data from backward ring buffers
+ * EMPTY        : We have collected data from backward ring buffers.
+ *
+ * (0): Setup backward ring buffer
+ * (1): Pause ring buffers for reading
+ * (2): Read from ring buffers
+ * (3): Resume ring buffers for recording
+ */
+enum bkw_mmap_state {
+       BKW_MMAP_NOTREADY,
+       BKW_MMAP_RUNNING,
+       BKW_MMAP_DATA_PENDING,
+       BKW_MMAP_EMPTY,
+};
 
 struct evlist {
        struct perf_evlist core;
-       struct hlist_head heads[PERF_EVLIST__HLIST_SIZE];
        int              nr_groups;
-       int              nr_mmaps;
        bool             enabled;
-       size_t           mmap_len;
        int              id_pos;
        int              is_pos;
        u64              combined_sample_type;
@@ -38,9 +60,8 @@ struct evlist {
                int     cork_fd;
                pid_t   pid;
        } workload;
-       struct fdarray   pollfd;
-       struct perf_mmap *mmap;
-       struct perf_mmap *overwrite_mmap;
+       struct mmap *mmap;
+       struct mmap *overwrite_mmap;
        struct evsel *selected;
        struct events_stats stats;
        struct perf_env *env;
@@ -65,7 +86,7 @@ struct evlist *perf_evlist__new_default(void);
 struct evlist *perf_evlist__new_dummy(void);
 void evlist__init(struct evlist *evlist, struct perf_cpu_map *cpus,
                  struct perf_thread_map *threads);
-void perf_evlist__exit(struct evlist *evlist);
+void evlist__exit(struct evlist *evlist);
 void evlist__delete(struct evlist *evlist);
 
 void evlist__add(struct evlist *evlist, struct evsel *entry);
@@ -119,17 +140,10 @@ struct evsel *
 perf_evlist__find_tracepoint_by_name(struct evlist *evlist,
                                     const char *name);
 
-void perf_evlist__id_add(struct evlist *evlist, struct evsel *evsel,
-                        int cpu, int thread, u64 id);
-int perf_evlist__id_add_fd(struct evlist *evlist,
-                          struct evsel *evsel,
-                          int cpu, int thread, int fd);
-
-int perf_evlist__add_pollfd(struct evlist *evlist, int fd);
-int perf_evlist__alloc_pollfd(struct evlist *evlist);
-int perf_evlist__filter_pollfd(struct evlist *evlist, short revents_and_mask);
+int evlist__add_pollfd(struct evlist *evlist, int fd);
+int evlist__filter_pollfd(struct evlist *evlist, short revents_and_mask);
 
-int perf_evlist__poll(struct evlist *evlist, int timeout);
+int evlist__poll(struct evlist *evlist, int timeout);
 
 struct evsel *perf_evlist__id2evsel(struct evlist *evlist, u64 id);
 struct evsel *perf_evlist__id2evsel_strict(struct evlist *evlist,
@@ -139,7 +153,7 @@ struct perf_sample_id *perf_evlist__id2sid(struct evlist *evlist, u64 id);
 
 void perf_evlist__toggle_bkw_mmap(struct evlist *evlist, enum bkw_mmap_state state);
 
-void perf_evlist__mmap_consume(struct evlist *evlist, int idx);
+void evlist__mmap_consume(struct evlist *evlist, int idx);
 
 int evlist__open(struct evlist *evlist);
 void evlist__close(struct evlist *evlist);
@@ -170,14 +184,14 @@ int perf_evlist__parse_mmap_pages(const struct option *opt,
 
 unsigned long perf_event_mlock_kb_in_pages(void);
 
-int perf_evlist__mmap_ex(struct evlist *evlist, unsigned int pages,
+int evlist__mmap_ex(struct evlist *evlist, unsigned int pages,
                         unsigned int auxtrace_pages,
                         bool auxtrace_overwrite, int nr_cblocks,
                         int affinity, int flush, int comp_level);
-int perf_evlist__mmap(struct evlist *evlist, unsigned int pages);
-void perf_evlist__munmap(struct evlist *evlist);
+int evlist__mmap(struct evlist *evlist, unsigned int pages);
+void evlist__munmap(struct evlist *evlist);
 
-size_t perf_evlist__mmap_size(unsigned long pages);
+size_t evlist__mmap_size(unsigned long pages);
 
 void evlist__disable(struct evlist *evlist);
 void evlist__enable(struct evlist *evlist);
@@ -195,7 +209,6 @@ int perf_evlist__apply_filters(struct evlist *evlist, struct evsel **err_evsel);
 void __perf_evlist__set_leader(struct list_head *list);
 void perf_evlist__set_leader(struct evlist *evlist);
 
-u64 perf_evlist__read_format(struct evlist *evlist);
 u64 __perf_evlist__combined_sample_type(struct evlist *evlist);
 u64 perf_evlist__combined_sample_type(struct evlist *evlist);
 u64 perf_evlist__combined_branch_type(struct evlist *evlist);
@@ -221,17 +234,19 @@ static inline bool perf_evlist__empty(struct evlist *evlist)
        return list_empty(&evlist->core.entries);
 }
 
-static inline struct evsel *perf_evlist__first(struct evlist *evlist)
+static inline struct evsel *evlist__first(struct evlist *evlist)
 {
-       return list_entry(evlist->core.entries.next, struct evsel, core.node);
+       struct perf_evsel *evsel = perf_evlist__first(&evlist->core);
+
+       return container_of(evsel, struct evsel, core);
 }
 
-static inline struct evsel *perf_evlist__last(struct evlist *evlist)
+static inline struct evsel *evlist__last(struct evlist *evlist)
 {
-       return list_entry(evlist->core.entries.prev, struct evsel, core.node);
-}
+       struct perf_evsel *evsel = perf_evlist__last(&evlist->core);
 
-size_t perf_evlist__fprintf(struct evlist *evlist, FILE *fp);
+       return container_of(evsel, struct evsel, core);
+}
 
 int perf_evlist__strerror_open(struct evlist *evlist, int err, char *buf, size_t size);
 int perf_evlist__strerror_mmap(struct evlist *evlist, int err, char *buf, size_t size);
index 8582538..5591af8 100644 (file)
 #include "counts.h"
 #include "event.h"
 #include "evsel.h"
+#include "util/evsel_config.h"
+#include "util/evsel_fprintf.h"
 #include "evlist.h"
-#include "cpumap.h"
+#include <perf/cpumap.h>
 #include "thread_map.h"
 #include "target.h"
 #include "perf_regs.h"
@@ -45,6 +47,7 @@
 #include "../perf-sys.h"
 #include "util/parse-branch-options.h"
 #include <internal/xyarray.h>
+#include <internal/lib.h>
 
 #include <linux/ctype.h>
 
@@ -1226,36 +1229,6 @@ int evsel__disable(struct evsel *evsel)
        return err;
 }
 
-int perf_evsel__alloc_id(struct evsel *evsel, int ncpus, int nthreads)
-{
-       if (ncpus == 0 || nthreads == 0)
-               return 0;
-
-       if (evsel->system_wide)
-               nthreads = 1;
-
-       evsel->sample_id = xyarray__new(ncpus, nthreads, sizeof(struct perf_sample_id));
-       if (evsel->sample_id == NULL)
-               return -ENOMEM;
-
-       evsel->id = zalloc(ncpus * nthreads * sizeof(u64));
-       if (evsel->id == NULL) {
-               xyarray__delete(evsel->sample_id);
-               evsel->sample_id = NULL;
-               return -ENOMEM;
-       }
-
-       return 0;
-}
-
-static void perf_evsel__free_id(struct evsel *evsel)
-{
-       xyarray__delete(evsel->sample_id);
-       evsel->sample_id = NULL;
-       zfree(&evsel->id);
-       evsel->ids = 0;
-}
-
 static void perf_evsel__free_config_terms(struct evsel *evsel)
 {
        struct perf_evsel_config_term *term, *h;
@@ -1272,7 +1245,7 @@ void perf_evsel__exit(struct evsel *evsel)
        assert(evsel->evlist == NULL);
        perf_evsel__free_counts(evsel);
        perf_evsel__free_fd(&evsel->core);
-       perf_evsel__free_id(evsel);
+       perf_evsel__free_id(&evsel->core);
        perf_evsel__free_config_terms(evsel);
        cgroup__put(evsel->cgrp);
        perf_cpu_map__put(evsel->core.cpus);
@@ -1472,152 +1445,6 @@ static int get_group_fd(struct evsel *evsel, int cpu, int thread)
        return fd;
 }
 
-struct bit_names {
-       int bit;
-       const char *name;
-};
-
-static void __p_bits(char *buf, size_t size, u64 value, struct bit_names *bits)
-{
-       bool first_bit = true;
-       int i = 0;
-
-       do {
-               if (value & bits[i].bit) {
-                       buf += scnprintf(buf, size, "%s%s", first_bit ? "" : "|", bits[i].name);
-                       first_bit = false;
-               }
-       } while (bits[++i].name != NULL);
-}
-
-static void __p_sample_type(char *buf, size_t size, u64 value)
-{
-#define bit_name(n) { PERF_SAMPLE_##n, #n }
-       struct bit_names bits[] = {
-               bit_name(IP), bit_name(TID), bit_name(TIME), bit_name(ADDR),
-               bit_name(READ), bit_name(CALLCHAIN), bit_name(ID), bit_name(CPU),
-               bit_name(PERIOD), bit_name(STREAM_ID), bit_name(RAW),
-               bit_name(BRANCH_STACK), bit_name(REGS_USER), bit_name(STACK_USER),
-               bit_name(IDENTIFIER), bit_name(REGS_INTR), bit_name(DATA_SRC),
-               bit_name(WEIGHT), bit_name(PHYS_ADDR),
-               { .name = NULL, }
-       };
-#undef bit_name
-       __p_bits(buf, size, value, bits);
-}
-
-static void __p_branch_sample_type(char *buf, size_t size, u64 value)
-{
-#define bit_name(n) { PERF_SAMPLE_BRANCH_##n, #n }
-       struct bit_names bits[] = {
-               bit_name(USER), bit_name(KERNEL), bit_name(HV), bit_name(ANY),
-               bit_name(ANY_CALL), bit_name(ANY_RETURN), bit_name(IND_CALL),
-               bit_name(ABORT_TX), bit_name(IN_TX), bit_name(NO_TX),
-               bit_name(COND), bit_name(CALL_STACK), bit_name(IND_JUMP),
-               bit_name(CALL), bit_name(NO_FLAGS), bit_name(NO_CYCLES),
-               { .name = NULL, }
-       };
-#undef bit_name
-       __p_bits(buf, size, value, bits);
-}
-
-static void __p_read_format(char *buf, size_t size, u64 value)
-{
-#define bit_name(n) { PERF_FORMAT_##n, #n }
-       struct bit_names bits[] = {
-               bit_name(TOTAL_TIME_ENABLED), bit_name(TOTAL_TIME_RUNNING),
-               bit_name(ID), bit_name(GROUP),
-               { .name = NULL, }
-       };
-#undef bit_name
-       __p_bits(buf, size, value, bits);
-}
-
-#define BUF_SIZE               1024
-
-#define p_hex(val)             snprintf(buf, BUF_SIZE, "%#"PRIx64, (uint64_t)(val))
-#define p_unsigned(val)                snprintf(buf, BUF_SIZE, "%"PRIu64, (uint64_t)(val))
-#define p_signed(val)          snprintf(buf, BUF_SIZE, "%"PRId64, (int64_t)(val))
-#define p_sample_type(val)     __p_sample_type(buf, BUF_SIZE, val)
-#define p_branch_sample_type(val) __p_branch_sample_type(buf, BUF_SIZE, val)
-#define p_read_format(val)     __p_read_format(buf, BUF_SIZE, val)
-
-#define PRINT_ATTRn(_n, _f, _p)                                \
-do {                                                   \
-       if (attr->_f) {                                 \
-               _p(attr->_f);                           \
-               ret += attr__fprintf(fp, _n, buf, priv);\
-       }                                               \
-} while (0)
-
-#define PRINT_ATTRf(_f, _p)    PRINT_ATTRn(#_f, _f, _p)
-
-int perf_event_attr__fprintf(FILE *fp, struct perf_event_attr *attr,
-                            attr__fprintf_f attr__fprintf, void *priv)
-{
-       char buf[BUF_SIZE];
-       int ret = 0;
-
-       PRINT_ATTRf(type, p_unsigned);
-       PRINT_ATTRf(size, p_unsigned);
-       PRINT_ATTRf(config, p_hex);
-       PRINT_ATTRn("{ sample_period, sample_freq }", sample_period, p_unsigned);
-       PRINT_ATTRf(sample_type, p_sample_type);
-       PRINT_ATTRf(read_format, p_read_format);
-
-       PRINT_ATTRf(disabled, p_unsigned);
-       PRINT_ATTRf(inherit, p_unsigned);
-       PRINT_ATTRf(pinned, p_unsigned);
-       PRINT_ATTRf(exclusive, p_unsigned);
-       PRINT_ATTRf(exclude_user, p_unsigned);
-       PRINT_ATTRf(exclude_kernel, p_unsigned);
-       PRINT_ATTRf(exclude_hv, p_unsigned);
-       PRINT_ATTRf(exclude_idle, p_unsigned);
-       PRINT_ATTRf(mmap, p_unsigned);
-       PRINT_ATTRf(comm, p_unsigned);
-       PRINT_ATTRf(freq, p_unsigned);
-       PRINT_ATTRf(inherit_stat, p_unsigned);
-       PRINT_ATTRf(enable_on_exec, p_unsigned);
-       PRINT_ATTRf(task, p_unsigned);
-       PRINT_ATTRf(watermark, p_unsigned);
-       PRINT_ATTRf(precise_ip, p_unsigned);
-       PRINT_ATTRf(mmap_data, p_unsigned);
-       PRINT_ATTRf(sample_id_all, p_unsigned);
-       PRINT_ATTRf(exclude_host, p_unsigned);
-       PRINT_ATTRf(exclude_guest, p_unsigned);
-       PRINT_ATTRf(exclude_callchain_kernel, p_unsigned);
-       PRINT_ATTRf(exclude_callchain_user, p_unsigned);
-       PRINT_ATTRf(mmap2, p_unsigned);
-       PRINT_ATTRf(comm_exec, p_unsigned);
-       PRINT_ATTRf(use_clockid, p_unsigned);
-       PRINT_ATTRf(context_switch, p_unsigned);
-       PRINT_ATTRf(write_backward, p_unsigned);
-       PRINT_ATTRf(namespaces, p_unsigned);
-       PRINT_ATTRf(ksymbol, p_unsigned);
-       PRINT_ATTRf(bpf_event, p_unsigned);
-       PRINT_ATTRf(aux_output, p_unsigned);
-
-       PRINT_ATTRn("{ wakeup_events, wakeup_watermark }", wakeup_events, p_unsigned);
-       PRINT_ATTRf(bp_type, p_unsigned);
-       PRINT_ATTRn("{ bp_addr, config1 }", bp_addr, p_hex);
-       PRINT_ATTRn("{ bp_len, config2 }", bp_len, p_hex);
-       PRINT_ATTRf(branch_sample_type, p_branch_sample_type);
-       PRINT_ATTRf(sample_regs_user, p_hex);
-       PRINT_ATTRf(sample_stack_user, p_unsigned);
-       PRINT_ATTRf(clockid, p_signed);
-       PRINT_ATTRf(sample_regs_intr, p_hex);
-       PRINT_ATTRf(aux_watermark, p_unsigned);
-       PRINT_ATTRf(sample_max_stack, p_unsigned);
-
-       return ret;
-}
-
-static int __open_attr__fprintf(FILE *fp, const char *name, const char *val,
-                               void *priv __maybe_unused)
-{
-       return fprintf(fp, "  %-32s %s\n", name, val);
-}
-
 static void perf_evsel__remove_fd(struct evsel *pos,
                                  int nr_cpus, int nr_threads,
                                  int thread_idx)
@@ -1662,7 +1489,7 @@ static bool ignore_missing_thread(struct evsel *evsel,
                return false;
 
        /* The system wide setup does not work with threads. */
-       if (evsel->system_wide)
+       if (evsel->core.system_wide)
                return false;
 
        /* The -ESRCH is perf event syscall errno for pid's not found. */
@@ -1688,6 +1515,12 @@ static bool ignore_missing_thread(struct evsel *evsel,
        return true;
 }
 
+static int __open_attr__fprintf(FILE *fp, const char *name, const char *val,
+                               void *priv __maybe_unused)
+{
+       return fprintf(fp, "  %-32s %s\n", name, val);
+}
+
 static void display_attr(struct perf_event_attr *attr)
 {
        if (verbose >= 2) {
@@ -1771,7 +1604,7 @@ int evsel__open(struct evsel *evsel, struct perf_cpu_map *cpus,
                threads = empty_thread_map;
        }
 
-       if (evsel->system_wide)
+       if (evsel->core.system_wide)
                nthreads = 1;
        else
                nthreads = threads->nr;
@@ -1818,7 +1651,7 @@ retry_sample_id:
                for (thread = 0; thread < nthreads; thread++) {
                        int fd, group_fd;
 
-                       if (!evsel->cgrp && !evsel->system_wide)
+                       if (!evsel->cgrp && !evsel->core.system_wide)
                                pid = perf_thread_map__pid(threads, thread);
 
                        group_fd = get_group_fd(evsel, cpu, thread);
@@ -1991,7 +1824,7 @@ out_close:
 void evsel__close(struct evsel *evsel)
 {
        perf_evsel__close(&evsel->core);
-       perf_evsel__free_id(evsel);
+       perf_evsel__free_id(&evsel->core);
 }
 
 int perf_evsel__open_per_cpu(struct evsel *evsel,
@@ -2419,283 +2252,6 @@ int perf_evsel__parse_sample_timestamp(struct evsel *evsel,
        return 0;
 }
 
-size_t perf_event__sample_event_size(const struct perf_sample *sample, u64 type,
-                                    u64 read_format)
-{
-       size_t sz, result = sizeof(struct perf_record_sample);
-
-       if (type & PERF_SAMPLE_IDENTIFIER)
-               result += sizeof(u64);
-
-       if (type & PERF_SAMPLE_IP)
-               result += sizeof(u64);
-
-       if (type & PERF_SAMPLE_TID)
-               result += sizeof(u64);
-
-       if (type & PERF_SAMPLE_TIME)
-               result += sizeof(u64);
-
-       if (type & PERF_SAMPLE_ADDR)
-               result += sizeof(u64);
-
-       if (type & PERF_SAMPLE_ID)
-               result += sizeof(u64);
-
-       if (type & PERF_SAMPLE_STREAM_ID)
-               result += sizeof(u64);
-
-       if (type & PERF_SAMPLE_CPU)
-               result += sizeof(u64);
-
-       if (type & PERF_SAMPLE_PERIOD)
-               result += sizeof(u64);
-
-       if (type & PERF_SAMPLE_READ) {
-               result += sizeof(u64);
-               if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)
-                       result += sizeof(u64);
-               if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)
-                       result += sizeof(u64);
-               /* PERF_FORMAT_ID is forced for PERF_SAMPLE_READ */
-               if (read_format & PERF_FORMAT_GROUP) {
-                       sz = sample->read.group.nr *
-                            sizeof(struct sample_read_value);
-                       result += sz;
-               } else {
-                       result += sizeof(u64);
-               }
-       }
-
-       if (type & PERF_SAMPLE_CALLCHAIN) {
-               sz = (sample->callchain->nr + 1) * sizeof(u64);
-               result += sz;
-       }
-
-       if (type & PERF_SAMPLE_RAW) {
-               result += sizeof(u32);
-               result += sample->raw_size;
-       }
-
-       if (type & PERF_SAMPLE_BRANCH_STACK) {
-               sz = sample->branch_stack->nr * sizeof(struct branch_entry);
-               sz += sizeof(u64);
-               result += sz;
-       }
-
-       if (type & PERF_SAMPLE_REGS_USER) {
-               if (sample->user_regs.abi) {
-                       result += sizeof(u64);
-                       sz = hweight64(sample->user_regs.mask) * sizeof(u64);
-                       result += sz;
-               } else {
-                       result += sizeof(u64);
-               }
-       }
-
-       if (type & PERF_SAMPLE_STACK_USER) {
-               sz = sample->user_stack.size;
-               result += sizeof(u64);
-               if (sz) {
-                       result += sz;
-                       result += sizeof(u64);
-               }
-       }
-
-       if (type & PERF_SAMPLE_WEIGHT)
-               result += sizeof(u64);
-
-       if (type & PERF_SAMPLE_DATA_SRC)
-               result += sizeof(u64);
-
-       if (type & PERF_SAMPLE_TRANSACTION)
-               result += sizeof(u64);
-
-       if (type & PERF_SAMPLE_REGS_INTR) {
-               if (sample->intr_regs.abi) {
-                       result += sizeof(u64);
-                       sz = hweight64(sample->intr_regs.mask) * sizeof(u64);
-                       result += sz;
-               } else {
-                       result += sizeof(u64);
-               }
-       }
-
-       if (type & PERF_SAMPLE_PHYS_ADDR)
-               result += sizeof(u64);
-
-       return result;
-}
-
-int perf_event__synthesize_sample(union perf_event *event, u64 type,
-                                 u64 read_format,
-                                 const struct perf_sample *sample)
-{
-       __u64 *array;
-       size_t sz;
-       /*
-        * used for cross-endian analysis. See git commit 65014ab3
-        * for why this goofiness is needed.
-        */
-       union u64_swap u;
-
-       array = event->sample.array;
-
-       if (type & PERF_SAMPLE_IDENTIFIER) {
-               *array = sample->id;
-               array++;
-       }
-
-       if (type & PERF_SAMPLE_IP) {
-               *array = sample->ip;
-               array++;
-       }
-
-       if (type & PERF_SAMPLE_TID) {
-               u.val32[0] = sample->pid;
-               u.val32[1] = sample->tid;
-               *array = u.val64;
-               array++;
-       }
-
-       if (type & PERF_SAMPLE_TIME) {
-               *array = sample->time;
-               array++;
-       }
-
-       if (type & PERF_SAMPLE_ADDR) {
-               *array = sample->addr;
-               array++;
-       }
-
-       if (type & PERF_SAMPLE_ID) {
-               *array = sample->id;
-               array++;
-       }
-
-       if (type & PERF_SAMPLE_STREAM_ID) {
-               *array = sample->stream_id;
-               array++;
-       }
-
-       if (type & PERF_SAMPLE_CPU) {
-               u.val32[0] = sample->cpu;
-               u.val32[1] = 0;
-               *array = u.val64;
-               array++;
-       }
-
-       if (type & PERF_SAMPLE_PERIOD) {
-               *array = sample->period;
-               array++;
-       }
-
-       if (type & PERF_SAMPLE_READ) {
-               if (read_format & PERF_FORMAT_GROUP)
-                       *array = sample->read.group.nr;
-               else
-                       *array = sample->read.one.value;
-               array++;
-
-               if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) {
-                       *array = sample->read.time_enabled;
-                       array++;
-               }
-
-               if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) {
-                       *array = sample->read.time_running;
-                       array++;
-               }
-
-               /* PERF_FORMAT_ID is forced for PERF_SAMPLE_READ */
-               if (read_format & PERF_FORMAT_GROUP) {
-                       sz = sample->read.group.nr *
-                            sizeof(struct sample_read_value);
-                       memcpy(array, sample->read.group.values, sz);
-                       array = (void *)array + sz;
-               } else {
-                       *array = sample->read.one.id;
-                       array++;
-               }
-       }
-
-       if (type & PERF_SAMPLE_CALLCHAIN) {
-               sz = (sample->callchain->nr + 1) * sizeof(u64);
-               memcpy(array, sample->callchain, sz);
-               array = (void *)array + sz;
-       }
-
-       if (type & PERF_SAMPLE_RAW) {
-               u.val32[0] = sample->raw_size;
-               *array = u.val64;
-               array = (void *)array + sizeof(u32);
-
-               memcpy(array, sample->raw_data, sample->raw_size);
-               array = (void *)array + sample->raw_size;
-       }
-
-       if (type & PERF_SAMPLE_BRANCH_STACK) {
-               sz = sample->branch_stack->nr * sizeof(struct branch_entry);
-               sz += sizeof(u64);
-               memcpy(array, sample->branch_stack, sz);
-               array = (void *)array + sz;
-       }
-
-       if (type & PERF_SAMPLE_REGS_USER) {
-               if (sample->user_regs.abi) {
-                       *array++ = sample->user_regs.abi;
-                       sz = hweight64(sample->user_regs.mask) * sizeof(u64);
-                       memcpy(array, sample->user_regs.regs, sz);
-                       array = (void *)array + sz;
-               } else {
-                       *array++ = 0;
-               }
-       }
-
-       if (type & PERF_SAMPLE_STACK_USER) {
-               sz = sample->user_stack.size;
-               *array++ = sz;
-               if (sz) {
-                       memcpy(array, sample->user_stack.data, sz);
-                       array = (void *)array + sz;
-                       *array++ = sz;
-               }
-       }
-
-       if (type & PERF_SAMPLE_WEIGHT) {
-               *array = sample->weight;
-               array++;
-       }
-
-       if (type & PERF_SAMPLE_DATA_SRC) {
-               *array = sample->data_src;
-               array++;
-       }
-
-       if (type & PERF_SAMPLE_TRANSACTION) {
-               *array = sample->transaction;
-               array++;
-       }
-
-       if (type & PERF_SAMPLE_REGS_INTR) {
-               if (sample->intr_regs.abi) {
-                       *array++ = sample->intr_regs.abi;
-                       sz = hweight64(sample->intr_regs.mask) * sizeof(u64);
-                       memcpy(array, sample->intr_regs.regs, sz);
-                       array = (void *)array + sz;
-               } else {
-                       *array++ = 0;
-               }
-       }
-
-       if (type & PERF_SAMPLE_PHYS_ADDR) {
-               *array = sample->phys_addr;
-               array++;
-       }
-
-       return 0;
-}
-
 struct tep_format_field *perf_evsel__field(struct evsel *evsel, const char *name)
 {
        return tep_find_field(evsel->tp_format, name);
@@ -2811,9 +2367,11 @@ bool perf_evsel__fallback(struct evsel *evsel, int err,
                if (evsel->name)
                        free(evsel->name);
                evsel->name = new_name;
-               scnprintf(msg, msgsize,
-"kernel.perf_event_paranoid=%d, trying to fall back to excluding kernel samples", paranoid);
+               scnprintf(msg, msgsize, "kernel.perf_event_paranoid=%d, trying "
+                         "to fall back to excluding kernel and hypervisor "
+                         " samples", paranoid);
                evsel->core.attr.exclude_kernel = 1;
+               evsel->core.attr.exclude_hv     = 1;
 
                return true;
        }
@@ -2966,7 +2524,7 @@ static int store_evsel_ids(struct evsel *evsel, struct evlist *evlist)
                     thread++) {
                        int fd = FD(evsel, cpu, thread);
 
-                       if (perf_evlist__id_add_fd(evlist, evsel,
+                       if (perf_evlist__id_add_fd(&evlist->core, &evsel->core,
                                                   cpu, thread, fd) < 0)
                                return -1;
                }
@@ -2980,7 +2538,7 @@ int perf_evsel__store_ids(struct evsel *evsel, struct evlist *evlist)
        struct perf_cpu_map *cpus = evsel->core.cpus;
        struct perf_thread_map *threads = evsel->core.threads;
 
-       if (perf_evsel__alloc_id(evsel, cpus->nr, threads->nr))
+       if (perf_evsel__alloc_id(&evsel->core, cpus->nr, threads->nr))
                return -ENOMEM;
 
        return store_evsel_ids(evsel, evlist);
index 68321d1..ddc5ee6 100644 (file)
@@ -4,7 +4,6 @@
 
 #include <linux/list.h>
 #include <stdbool.h>
-#include <stdio.h>
 #include <sys/types.h>
 #include <linux/perf_event.h>
 #include <linux/types.h>
 #include "symbol_conf.h"
 #include <internal/cpumap.h>
 
-struct addr_location;
-struct evsel;
-union perf_event;
-
-/*
- * Per fd, to map back from PERF_SAMPLE_ID to evsel, only used when there are
- * more than one entry in the evlist.
- */
-struct perf_sample_id {
-       struct hlist_node       node;
-       u64                     id;
-       struct evsel            *evsel;
-       /*
-       * 'idx' will be used for AUX area sampling. A sample will have AUX area
-       * data that will be queued for decoding, where there are separate
-       * queues for each CPU (per-cpu tracing) or task (per-thread tracing).
-       * The sample ID can be used to lookup 'idx' which is effectively the
-       * queue number.
-       */
-       int                     idx;
-       int                     cpu;
-       pid_t                   tid;
-
-       /* Holds total ID period value for PERF_SAMPLE_READ processing. */
-       u64                     period;
-};
-
+struct bpf_object;
 struct cgroup;
-
-/*
- * The 'struct perf_evsel_config_term' is used to pass event
- * specific configuration data to perf_evsel__config routine.
- * It is allocated within event parsing and attached to
- * perf_evsel::config_terms list head.
-*/
-enum term_type {
-       PERF_EVSEL__CONFIG_TERM_PERIOD,
-       PERF_EVSEL__CONFIG_TERM_FREQ,
-       PERF_EVSEL__CONFIG_TERM_TIME,
-       PERF_EVSEL__CONFIG_TERM_CALLGRAPH,
-       PERF_EVSEL__CONFIG_TERM_STACK_USER,
-       PERF_EVSEL__CONFIG_TERM_INHERIT,
-       PERF_EVSEL__CONFIG_TERM_MAX_STACK,
-       PERF_EVSEL__CONFIG_TERM_MAX_EVENTS,
-       PERF_EVSEL__CONFIG_TERM_OVERWRITE,
-       PERF_EVSEL__CONFIG_TERM_DRV_CFG,
-       PERF_EVSEL__CONFIG_TERM_BRANCH,
-       PERF_EVSEL__CONFIG_TERM_PERCORE,
-       PERF_EVSEL__CONFIG_TERM_AUX_OUTPUT,
-};
-
-struct perf_evsel_config_term {
-       struct list_head        list;
-       enum term_type  type;
-       union {
-               u64     period;
-               u64     freq;
-               bool    time;
-               char    *callgraph;
-               char    *drv_cfg;
-               u64     stack_user;
-               int     max_stack;
-               bool    inherit;
-               bool    overwrite;
-               char    *branch;
-               unsigned long max_events;
-               bool    percore;
-               bool    aux_output;
-       } val;
-       bool weak;
-};
-
+struct perf_counts;
 struct perf_stat_evsel;
+union perf_event;
 
 typedef int (perf_evsel__sb_cb_t)(union perf_event *event, void *data);
 
@@ -94,10 +25,6 @@ enum perf_tool_event {
        PERF_TOOL_DURATION_TIME = 1,
 };
 
-struct bpf_object;
-struct perf_counts;
-struct xyarray;
-
 /** struct evsel - event selector
  *
  * @evlist - evlist this evsel is in, if it is in one.
@@ -117,12 +44,9 @@ struct evsel {
        struct perf_evsel       core;
        struct evlist   *evlist;
        char                    *filter;
-       struct xyarray          *sample_id;
-       u64                     *id;
        struct perf_counts      *counts;
        struct perf_counts      *prev_raw_counts;
        int                     idx;
-       u32                     ids;
        unsigned long           max_events;
        unsigned long           nr_events_printed;
        char                    *name;
@@ -146,7 +70,6 @@ struct evsel {
        bool                    disabled;
        bool                    no_aux_samples;
        bool                    immediate;
-       bool                    system_wide;
        bool                    tracking;
        bool                    per_pkg;
        bool                    precise_max;
@@ -179,11 +102,6 @@ struct evsel {
        } side_band;
 };
 
-union u64_swap {
-       u64 val64;
-       u32 val32[2];
-};
-
 struct perf_missing_features {
        bool sample_id_all;
        bool exclude_guest;
@@ -282,8 +200,6 @@ const char *perf_evsel__name(struct evsel *evsel);
 const char *perf_evsel__group_name(struct evsel *evsel);
 int perf_evsel__group_desc(struct evsel *evsel, char *buf, size_t size);
 
-int perf_evsel__alloc_id(struct evsel *evsel, int ncpus, int nthreads);
-
 void __perf_evsel__set_sample_bit(struct evsel *evsel,
                                  enum perf_event_sample_format bit);
 void __perf_evsel__reset_sample_bit(struct evsel *evsel,
@@ -439,37 +355,6 @@ static inline bool perf_evsel__is_clock(struct evsel *evsel)
               perf_evsel__match(evsel, SOFTWARE, SW_TASK_CLOCK);
 }
 
-struct perf_attr_details {
-       bool freq;
-       bool verbose;
-       bool event_group;
-       bool force;
-       bool trace_fields;
-};
-
-int perf_evsel__fprintf(struct evsel *evsel,
-                       struct perf_attr_details *details, FILE *fp);
-
-#define EVSEL__PRINT_IP                        (1<<0)
-#define EVSEL__PRINT_SYM               (1<<1)
-#define EVSEL__PRINT_DSO               (1<<2)
-#define EVSEL__PRINT_SYMOFFSET         (1<<3)
-#define EVSEL__PRINT_ONELINE           (1<<4)
-#define EVSEL__PRINT_SRCLINE           (1<<5)
-#define EVSEL__PRINT_UNKNOWN_AS_ADDR   (1<<6)
-#define EVSEL__PRINT_CALLCHAIN_ARROW   (1<<7)
-#define EVSEL__PRINT_SKIP_IGNORED      (1<<8)
-
-struct callchain_cursor;
-
-int sample__fprintf_callchain(struct perf_sample *sample, int left_alignment,
-                             unsigned int print_opts,
-                             struct callchain_cursor *cursor, FILE *fp);
-
-int sample__fprintf_sym(struct perf_sample *sample, struct addr_location *al,
-                       int left_alignment, unsigned int print_opts,
-                       struct callchain_cursor *cursor, FILE *fp);
-
 bool perf_evsel__fallback(struct evsel *evsel, int err,
                          char *msg, size_t msgsize);
 int perf_evsel__open_strerror(struct evsel *evsel, struct target *target,
@@ -502,11 +387,6 @@ static inline bool evsel__has_callchain(const struct evsel *evsel)
        return (evsel->core.attr.sample_type & PERF_SAMPLE_CALLCHAIN) != 0;
 }
 
-typedef int (*attr__fprintf_f)(FILE *, const char *, const char *, void *);
-
-int perf_event_attr__fprintf(FILE *fp, struct perf_event_attr *attr,
-                            attr__fprintf_f attr__fprintf, void *priv);
-
 struct perf_env *perf_evsel__env(struct evsel *evsel);
 
 int perf_evsel__store_ids(struct evsel *evsel, struct evlist *evlist);
diff --git a/tools/perf/util/evsel_config.h b/tools/perf/util/evsel_config.h
new file mode 100644 (file)
index 0000000..8a76480
--- /dev/null
@@ -0,0 +1,50 @@
+// SPDX-License-Identifier: GPL-2.0
+#ifndef __PERF_EVSEL_CONFIG_H
+#define __PERF_EVSEL_CONFIG_H 1
+
+#include <linux/types.h>
+#include <stdbool.h>
+
+/*
+ * The 'struct perf_evsel_config_term' is used to pass event
+ * specific configuration data to perf_evsel__config routine.
+ * It is allocated within event parsing and attached to
+ * perf_evsel::config_terms list head.
+*/
+enum evsel_term_type {
+       PERF_EVSEL__CONFIG_TERM_PERIOD,
+       PERF_EVSEL__CONFIG_TERM_FREQ,
+       PERF_EVSEL__CONFIG_TERM_TIME,
+       PERF_EVSEL__CONFIG_TERM_CALLGRAPH,
+       PERF_EVSEL__CONFIG_TERM_STACK_USER,
+       PERF_EVSEL__CONFIG_TERM_INHERIT,
+       PERF_EVSEL__CONFIG_TERM_MAX_STACK,
+       PERF_EVSEL__CONFIG_TERM_MAX_EVENTS,
+       PERF_EVSEL__CONFIG_TERM_OVERWRITE,
+       PERF_EVSEL__CONFIG_TERM_DRV_CFG,
+       PERF_EVSEL__CONFIG_TERM_BRANCH,
+       PERF_EVSEL__CONFIG_TERM_PERCORE,
+       PERF_EVSEL__CONFIG_TERM_AUX_OUTPUT,
+};
+
+struct perf_evsel_config_term {
+       struct list_head      list;
+       enum evsel_term_type  type;
+       union {
+               u64           period;
+               u64           freq;
+               bool          time;
+               char          *callgraph;
+               char          *drv_cfg;
+               u64           stack_user;
+               int           max_stack;
+               bool          inherit;
+               bool          overwrite;
+               char          *branch;
+               unsigned long max_events;
+               bool          percore;
+               bool          aux_output;
+       } val;
+       bool weak;
+};
+#endif // __PERF_EVSEL_CONFIG_H
index 496fec0..028df7a 100644 (file)
@@ -4,6 +4,8 @@
 #include <stdbool.h>
 #include <traceevent/event-parse.h>
 #include "evsel.h"
+#include "util/evsel_fprintf.h"
+#include "util/event.h"
 #include "callchain.h"
 #include "map.h"
 #include "strlist.h"
@@ -101,7 +103,7 @@ out:
 
 int sample__fprintf_callchain(struct perf_sample *sample, int left_alignment,
                              unsigned int print_opts, struct callchain_cursor *cursor,
-                             FILE *fp)
+                             struct strlist *bt_stop_list, FILE *fp)
 {
        int printed = 0;
        struct callchain_cursor_node *node;
@@ -174,10 +176,8 @@ int sample__fprintf_callchain(struct perf_sample *sample, int left_alignment,
                                printed += fprintf(fp, "\n");
 
                        /* Add srccode here too? */
-                       if (symbol_conf.bt_stop_list &&
-                           node->sym &&
-                           strlist__has_entry(symbol_conf.bt_stop_list,
-                                              node->sym->name)) {
+                       if (bt_stop_list && node->sym &&
+                           strlist__has_entry(bt_stop_list, node->sym->name)) {
                                break;
                        }
 
@@ -192,7 +192,7 @@ next:
 
 int sample__fprintf_sym(struct perf_sample *sample, struct addr_location *al,
                        int left_alignment, unsigned int print_opts,
-                       struct callchain_cursor *cursor, FILE *fp)
+                       struct callchain_cursor *cursor, struct strlist *bt_stop_list, FILE *fp)
 {
        int printed = 0;
        int print_ip = print_opts & EVSEL__PRINT_IP;
@@ -203,8 +203,8 @@ int sample__fprintf_sym(struct perf_sample *sample, struct addr_location *al,
        int print_unknown_as_addr = print_opts & EVSEL__PRINT_UNKNOWN_AS_ADDR;
 
        if (cursor != NULL) {
-               printed += sample__fprintf_callchain(sample, left_alignment,
-                                                    print_opts, cursor, fp);
+               printed += sample__fprintf_callchain(sample, left_alignment, print_opts,
+                                                    cursor, bt_stop_list, fp);
        } else {
                printed += fprintf(fp, "%-*.*s", left_alignment, left_alignment, " ");
 
diff --git a/tools/perf/util/evsel_fprintf.h b/tools/perf/util/evsel_fprintf.h
new file mode 100644 (file)
index 0000000..47e6c84
--- /dev/null
@@ -0,0 +1,50 @@
+// SPDX-License-Identifier: GPL-2.0
+#ifndef __PERF_EVSEL_FPRINTF_H
+#define __PERF_EVSEL_FPRINTF_H 1
+
+#include <stdio.h>
+#include <stdbool.h>
+
+struct evsel;
+
+struct perf_attr_details {
+       bool freq;
+       bool verbose;
+       bool event_group;
+       bool force;
+       bool trace_fields;
+};
+
+int perf_evsel__fprintf(struct evsel *evsel,
+                       struct perf_attr_details *details, FILE *fp);
+
+#define EVSEL__PRINT_IP                        (1<<0)
+#define EVSEL__PRINT_SYM               (1<<1)
+#define EVSEL__PRINT_DSO               (1<<2)
+#define EVSEL__PRINT_SYMOFFSET         (1<<3)
+#define EVSEL__PRINT_ONELINE           (1<<4)
+#define EVSEL__PRINT_SRCLINE           (1<<5)
+#define EVSEL__PRINT_UNKNOWN_AS_ADDR   (1<<6)
+#define EVSEL__PRINT_CALLCHAIN_ARROW   (1<<7)
+#define EVSEL__PRINT_SKIP_IGNORED      (1<<8)
+
+struct addr_location;
+struct perf_event_attr;
+struct perf_sample;
+struct callchain_cursor;
+struct strlist;
+
+int sample__fprintf_callchain(struct perf_sample *sample, int left_alignment,
+                             unsigned int print_opts, struct callchain_cursor *cursor,
+                             struct strlist *bt_stop_list, FILE *fp);
+
+int sample__fprintf_sym(struct perf_sample *sample, struct addr_location *al,
+                       int left_alignment, unsigned int print_opts,
+                       struct callchain_cursor *cursor,
+                       struct strlist *bt_stop_list, FILE *fp);
+
+typedef int (*attr__fprintf_f)(FILE *, const char *, const char *, void *);
+
+int perf_event_attr__fprintf(FILE *fp, struct perf_event_attr *attr,
+                            attr__fprintf_f attr__fprintf, void *priv);
+#endif // __PERF_EVSEL_H
index b72440b..d413755 100644 (file)
@@ -35,6 +35,9 @@ int jit_add_debug_info(Elf *e, uint64_t code_addr, void *debug, int nr_debug_ent
 #elif defined(__sparc__)
 #define GEN_ELF_ARCH   EM_SPARC
 #define GEN_ELF_CLASS  ELFCLASS32
+#elif defined(__s390x__)
+#define GEN_ELF_ARCH   EM_S390
+#define GEN_ELF_CLASS  ELFCLASS64
 #else
 #error "unsupported architecture"
 #endif
index b0c34dd..86d9396 100644 (file)
@@ -25,6 +25,7 @@
 #include "dso.h"
 #include "evlist.h"
 #include "evsel.h"
+#include "util/evsel_fprintf.h"
 #include "header.h"
 #include "memswap.h"
 #include "trace-event.h"
 #include "tool.h"
 #include "time-utils.h"
 #include "units.h"
-#include "util.h"
+#include "util/util.h" // perf_exe()
 #include "cputopo.h"
 #include "bpf-event.h"
 
 #include <linux/ctype.h>
+#include <internal/lib.h>
 
 /*
  * magic2 = "PERFILE2"
@@ -70,15 +72,6 @@ struct perf_file_attr {
        struct perf_file_section        ids;
 };
 
-struct feat_fd {
-       struct perf_header      *ph;
-       int                     fd;
-       void                    *buf;   /* Either buf != NULL or fd >= 0 */
-       ssize_t                 offset;
-       size_t                  size;
-       struct evsel    *events;
-};
-
 void perf_header__set_feat(struct perf_header *header, int feat)
 {
        set_bit(feat, header->adds_features);
@@ -524,7 +517,7 @@ static int write_event_desc(struct feat_fd *ff,
                 * copy into an nri to be independent of the
                 * type of ids,
                 */
-               nri = evsel->ids;
+               nri = evsel->core.ids;
                ret = do_write(ff, &nri, sizeof(nri));
                if (ret < 0)
                        return ret;
@@ -538,7 +531,7 @@ static int write_event_desc(struct feat_fd *ff,
                /*
                 * write unique ids for this event
                 */
-               ret = do_write(ff, evsel->id, evsel->ids * sizeof(u64));
+               ret = do_write(ff, evsel->core.id, evsel->core.ids * sizeof(u64));
                if (ret < 0)
                        return ret;
        }
@@ -1081,7 +1074,7 @@ static int cpu_cache_level__read(struct cpu_cache_level *cache, u32 cpu, u16 lev
 
        scnprintf(file, PATH_MAX, "%s/shared_cpu_list", path);
        if (sysfs__read_str(file, &cache->map, &len)) {
-               zfree(&cache->map);
+               zfree(&cache->size);
                zfree(&cache->type);
                return -1;
        }
@@ -1598,7 +1591,7 @@ static void free_event_desc(struct evsel *events)
 
        for (evsel = events; evsel->core.attr.size; evsel++) {
                zfree(&evsel->name);
-               zfree(&evsel->id);
+               zfree(&evsel->core.id);
        }
 
        free(events);
@@ -1664,8 +1657,8 @@ static struct evsel *read_event_desc(struct feat_fd *ff)
                id = calloc(nr, sizeof(*id));
                if (!id)
                        goto error;
-               evsel->ids = nr;
-               evsel->id = id;
+               evsel->core.ids = nr;
+               evsel->core.id = id;
 
                for (j = 0 ; j < nr; j++) {
                        if (do_read_u64(ff, id))
@@ -1707,9 +1700,9 @@ static void print_event_desc(struct feat_fd *ff, FILE *fp)
        for (evsel = events; evsel->core.attr.size; evsel++) {
                fprintf(fp, "# event : name = %s, ", evsel->name);
 
-               if (evsel->ids) {
+               if (evsel->core.ids) {
                        fprintf(fp, ", id = {");
-                       for (j = 0, id = evsel->id; j < evsel->ids; j++, id++) {
+                       for (j = 0, id = evsel->core.id; j < evsel->core.ids; j++, id++) {
                                if (j)
                                        fputc(',', fp);
                                fprintf(fp, " %"PRIu64, *id);
@@ -2823,15 +2816,6 @@ static int process_compressed(struct feat_fd *ff,
        return 0;
 }
 
-struct feature_ops {
-       int (*write)(struct feat_fd *ff, struct evlist *evlist);
-       void (*print)(struct feat_fd *ff, FILE *fp);
-       int (*process)(struct feat_fd *ff, void *data);
-       const char *name;
-       bool full_only;
-       bool synthesize;
-};
-
 #define FEAT_OPR(n, func, __full_only) \
        [HEADER_##n] = {                                        \
                .name       = __stringify(n),                   \
@@ -2858,8 +2842,10 @@ struct feature_ops {
 #define process_branch_stack   NULL
 #define process_stat           NULL
 
+// Only used in util/synthetic-events.c
+const struct perf_header_feature_ops feat_ops[HEADER_LAST_FEATURE];
 
-static const struct feature_ops feat_ops[HEADER_LAST_FEATURE] = {
+const struct perf_header_feature_ops feat_ops[HEADER_LAST_FEATURE] = {
        FEAT_OPN(TRACING_DATA,  tracing_data,   false),
        FEAT_OPN(BUILD_ID,      build_id,       false),
        FEAT_OPR(HOSTNAME,      hostname,       false),
@@ -3083,7 +3069,7 @@ int perf_session__write_header(struct perf_session *session,
 
        evlist__for_each_entry(session->evlist, evsel) {
                evsel->id_offset = lseek(fd, 0, SEEK_CUR);
-               err = do_write(&ff, evsel->id, evsel->ids * sizeof(u64));
+               err = do_write(&ff, evsel->core.id, evsel->core.ids * sizeof(u64));
                if (err < 0) {
                        pr_debug("failed to write perf header\n");
                        return err;
@@ -3097,7 +3083,7 @@ int perf_session__write_header(struct perf_session *session,
                        .attr = evsel->core.attr,
                        .ids  = {
                                .offset = evsel->id_offset,
-                               .size   = evsel->ids * sizeof(u64),
+                               .size   = evsel->core.ids * sizeof(u64),
                        }
                };
                err = do_write(&ff, &f_attr, sizeof(f_attr));
@@ -3624,7 +3610,7 @@ int perf_session__read_header(struct perf_session *session)
                 * for allocating the perf_sample_id table we fake 1 cpu and
                 * hattr->ids threads.
                 */
-               if (perf_evsel__alloc_id(evsel, 1, nr_ids))
+               if (perf_evsel__alloc_id(&evsel->core, 1, nr_ids))
                        goto out_delete_evlist;
 
                lseek(fd, f_attr.ids.offset, SEEK_SET);
@@ -3633,7 +3619,7 @@ int perf_session__read_header(struct perf_session *session)
                        if (perf_header__getbuffer64(header, fd, &f_id, sizeof(f_id)))
                                goto out_errno;
 
-                       perf_evlist__id_add(session->evlist, evsel, 0, j, f_id);
+                       perf_evlist__id_add(&session->evlist->core, &evsel->core, 0, j, f_id);
                }
 
                lseek(fd, tmp, SEEK_SET);
@@ -3656,105 +3642,6 @@ out_delete_evlist:
        return -ENOMEM;
 }
 
-int perf_event__synthesize_attr(struct perf_tool *tool,
-                               struct perf_event_attr *attr, u32 ids, u64 *id,
-                               perf_event__handler_t process)
-{
-       union perf_event *ev;
-       size_t size;
-       int err;
-
-       size = sizeof(struct perf_event_attr);
-       size = PERF_ALIGN(size, sizeof(u64));
-       size += sizeof(struct perf_event_header);
-       size += ids * sizeof(u64);
-
-       ev = zalloc(size);
-
-       if (ev == NULL)
-               return -ENOMEM;
-
-       ev->attr.attr = *attr;
-       memcpy(ev->attr.id, id, ids * sizeof(u64));
-
-       ev->attr.header.type = PERF_RECORD_HEADER_ATTR;
-       ev->attr.header.size = (u16)size;
-
-       if (ev->attr.header.size == size)
-               err = process(tool, ev, NULL, NULL);
-       else
-               err = -E2BIG;
-
-       free(ev);
-
-       return err;
-}
-
-int perf_event__synthesize_features(struct perf_tool *tool,
-                                   struct perf_session *session,
-                                   struct evlist *evlist,
-                                   perf_event__handler_t process)
-{
-       struct perf_header *header = &session->header;
-       struct feat_fd ff;
-       struct perf_record_header_feature *fe;
-       size_t sz, sz_hdr;
-       int feat, ret;
-
-       sz_hdr = sizeof(fe->header);
-       sz = sizeof(union perf_event);
-       /* get a nice alignment */
-       sz = PERF_ALIGN(sz, page_size);
-
-       memset(&ff, 0, sizeof(ff));
-
-       ff.buf = malloc(sz);
-       if (!ff.buf)
-               return -ENOMEM;
-
-       ff.size = sz - sz_hdr;
-       ff.ph = &session->header;
-
-       for_each_set_bit(feat, header->adds_features, HEADER_FEAT_BITS) {
-               if (!feat_ops[feat].synthesize) {
-                       pr_debug("No record header feature for header :%d\n", feat);
-                       continue;
-               }
-
-               ff.offset = sizeof(*fe);
-
-               ret = feat_ops[feat].write(&ff, evlist);
-               if (ret || ff.offset <= (ssize_t)sizeof(*fe)) {
-                       pr_debug("Error writing feature\n");
-                       continue;
-               }
-               /* ff.buf may have changed due to realloc in do_write() */
-               fe = ff.buf;
-               memset(fe, 0, sizeof(*fe));
-
-               fe->feat_id = feat;
-               fe->header.type = PERF_RECORD_HEADER_FEATURE;
-               fe->header.size = ff.offset;
-
-               ret = process(tool, ff.buf, NULL, NULL);
-               if (ret) {
-                       free(ff.buf);
-                       return ret;
-               }
-       }
-
-       /* Send HEADER_LAST_FEATURE mark. */
-       fe = ff.buf;
-       fe->feat_id     = HEADER_LAST_FEATURE;
-       fe->header.type = PERF_RECORD_HEADER_FEATURE;
-       fe->header.size = sizeof(*fe);
-
-       ret = process(tool, ff.buf, NULL, NULL);
-
-       free(ff.buf);
-       return ret;
-}
-
 int perf_event__process_feature(struct perf_session *session,
                                union perf_event *event)
 {
@@ -3797,113 +3684,6 @@ int perf_event__process_feature(struct perf_session *session,
        return 0;
 }
 
-static struct perf_record_event_update *
-event_update_event__new(size_t size, u64 type, u64 id)
-{
-       struct perf_record_event_update *ev;
-
-       size += sizeof(*ev);
-       size  = PERF_ALIGN(size, sizeof(u64));
-
-       ev = zalloc(size);
-       if (ev) {
-               ev->header.type = PERF_RECORD_EVENT_UPDATE;
-               ev->header.size = (u16)size;
-               ev->type = type;
-               ev->id = id;
-       }
-       return ev;
-}
-
-int
-perf_event__synthesize_event_update_unit(struct perf_tool *tool,
-                                        struct evsel *evsel,
-                                        perf_event__handler_t process)
-{
-       struct perf_record_event_update *ev;
-       size_t size = strlen(evsel->unit);
-       int err;
-
-       ev = event_update_event__new(size + 1, PERF_EVENT_UPDATE__UNIT, evsel->id[0]);
-       if (ev == NULL)
-               return -ENOMEM;
-
-       strlcpy(ev->data, evsel->unit, size + 1);
-       err = process(tool, (union perf_event *)ev, NULL, NULL);
-       free(ev);
-       return err;
-}
-
-int
-perf_event__synthesize_event_update_scale(struct perf_tool *tool,
-                                         struct evsel *evsel,
-                                         perf_event__handler_t process)
-{
-       struct perf_record_event_update *ev;
-       struct perf_record_event_update_scale *ev_data;
-       int err;
-
-       ev = event_update_event__new(sizeof(*ev_data), PERF_EVENT_UPDATE__SCALE, evsel->id[0]);
-       if (ev == NULL)
-               return -ENOMEM;
-
-       ev_data = (struct perf_record_event_update_scale *)ev->data;
-       ev_data->scale = evsel->scale;
-       err = process(tool, (union perf_event*) ev, NULL, NULL);
-       free(ev);
-       return err;
-}
-
-int
-perf_event__synthesize_event_update_name(struct perf_tool *tool,
-                                        struct evsel *evsel,
-                                        perf_event__handler_t process)
-{
-       struct perf_record_event_update *ev;
-       size_t len = strlen(evsel->name);
-       int err;
-
-       ev = event_update_event__new(len + 1, PERF_EVENT_UPDATE__NAME, evsel->id[0]);
-       if (ev == NULL)
-               return -ENOMEM;
-
-       strlcpy(ev->data, evsel->name, len + 1);
-       err = process(tool, (union perf_event*) ev, NULL, NULL);
-       free(ev);
-       return err;
-}
-
-int
-perf_event__synthesize_event_update_cpus(struct perf_tool *tool,
-                                       struct evsel *evsel,
-                                       perf_event__handler_t process)
-{
-       size_t size = sizeof(struct perf_record_event_update);
-       struct perf_record_event_update *ev;
-       int max, err;
-       u16 type;
-
-       if (!evsel->core.own_cpus)
-               return 0;
-
-       ev = cpu_map_data__alloc(evsel->core.own_cpus, &size, &type, &max);
-       if (!ev)
-               return -ENOMEM;
-
-       ev->header.type = PERF_RECORD_EVENT_UPDATE;
-       ev->header.size = (u16)size;
-       ev->type = PERF_EVENT_UPDATE__CPUS;
-       ev->id   = evsel->id[0];
-
-       cpu_map_data__synthesize((struct perf_record_cpu_map_data *)ev->data,
-                                evsel->core.own_cpus,
-                                type, max);
-
-       err = process(tool, (union perf_event*) ev, NULL, NULL);
-       free(ev);
-       return err;
-}
-
 size_t perf_event__fprintf_event_update(union perf_event *event, FILE *fp)
 {
        struct perf_record_event_update *ev = &event->event_update;
@@ -3943,93 +3723,6 @@ size_t perf_event__fprintf_event_update(union perf_event *event, FILE *fp)
        return ret;
 }
 
-int perf_event__synthesize_attrs(struct perf_tool *tool,
-                                struct evlist *evlist,
-                                perf_event__handler_t process)
-{
-       struct evsel *evsel;
-       int err = 0;
-
-       evlist__for_each_entry(evlist, evsel) {
-               err = perf_event__synthesize_attr(tool, &evsel->core.attr, evsel->ids,
-                                                 evsel->id, process);
-               if (err) {
-                       pr_debug("failed to create perf header attribute\n");
-                       return err;
-               }
-       }
-
-       return err;
-}
-
-static bool has_unit(struct evsel *counter)
-{
-       return counter->unit && *counter->unit;
-}
-
-static bool has_scale(struct evsel *counter)
-{
-       return counter->scale != 1;
-}
-
-int perf_event__synthesize_extra_attr(struct perf_tool *tool,
-                                     struct evlist *evsel_list,
-                                     perf_event__handler_t process,
-                                     bool is_pipe)
-{
-       struct evsel *counter;
-       int err;
-
-       /*
-        * Synthesize other events stuff not carried within
-        * attr event - unit, scale, name
-        */
-       evlist__for_each_entry(evsel_list, counter) {
-               if (!counter->supported)
-                       continue;
-
-               /*
-                * Synthesize unit and scale only if it's defined.
-                */
-               if (has_unit(counter)) {
-                       err = perf_event__synthesize_event_update_unit(tool, counter, process);
-                       if (err < 0) {
-                               pr_err("Couldn't synthesize evsel unit.\n");
-                               return err;
-                       }
-               }
-
-               if (has_scale(counter)) {
-                       err = perf_event__synthesize_event_update_scale(tool, counter, process);
-                       if (err < 0) {
-                               pr_err("Couldn't synthesize evsel counter.\n");
-                               return err;
-                       }
-               }
-
-               if (counter->core.own_cpus) {
-                       err = perf_event__synthesize_event_update_cpus(tool, counter, process);
-                       if (err < 0) {
-                               pr_err("Couldn't synthesize evsel cpus.\n");
-                               return err;
-                       }
-               }
-
-               /*
-                * Name is needed only for pipe output,
-                * perf.data carries event names.
-                */
-               if (is_pipe) {
-                       err = perf_event__synthesize_event_update_name(tool, counter, process);
-                       if (err < 0) {
-                               pr_err("Couldn't synthesize evsel name.\n");
-                               return err;
-                       }
-               }
-       }
-       return 0;
-}
-
 int perf_event__process_attr(struct perf_tool *tool __maybe_unused,
                             union perf_event *event,
                             struct evlist **pevlist)
@@ -4058,11 +3751,11 @@ int perf_event__process_attr(struct perf_tool *tool __maybe_unused,
         * for allocating the perf_sample_id table we fake 1 cpu and
         * hattr->ids threads.
         */
-       if (perf_evsel__alloc_id(evsel, 1, n_ids))
+       if (perf_evsel__alloc_id(&evsel->core, 1, n_ids))
                return -ENOMEM;
 
        for (i = 0; i < n_ids; i++) {
-               perf_evlist__id_add(evlist, evsel, 0, i, event->attr.id[i]);
+               perf_evlist__id_add(&evlist->core, &evsel->core, 0, i, event->attr.id[i]);
        }
 
        return 0;
@@ -4114,55 +3807,6 @@ int perf_event__process_event_update(struct perf_tool *tool __maybe_unused,
        return 0;
 }
 
-int perf_event__synthesize_tracing_data(struct perf_tool *tool, int fd,
-                                       struct evlist *evlist,
-                                       perf_event__handler_t process)
-{
-       union perf_event ev;
-       struct tracing_data *tdata;
-       ssize_t size = 0, aligned_size = 0, padding;
-       struct feat_fd ff;
-       int err __maybe_unused = 0;
-
-       /*
-        * We are going to store the size of the data followed
-        * by the data contents. Since the fd descriptor is a pipe,
-        * we cannot seek back to store the size of the data once
-        * we know it. Instead we:
-        *
-        * - write the tracing data to the temp file
-        * - get/write the data size to pipe
-        * - write the tracing data from the temp file
-        *   to the pipe
-        */
-       tdata = tracing_data_get(&evlist->core.entries, fd, true);
-       if (!tdata)
-               return -1;
-
-       memset(&ev, 0, sizeof(ev));
-
-       ev.tracing_data.header.type = PERF_RECORD_HEADER_TRACING_DATA;
-       size = tdata->size;
-       aligned_size = PERF_ALIGN(size, sizeof(u64));
-       padding = aligned_size - size;
-       ev.tracing_data.header.size = sizeof(ev.tracing_data);
-       ev.tracing_data.size = aligned_size;
-
-       process(tool, &ev, NULL, NULL);
-
-       /*
-        * The put function will copy all the tracing data
-        * stored in temp file to the pipe.
-        */
-       tracing_data_put(tdata);
-
-       ff = (struct feat_fd){ .fd = fd };
-       if (write_padded(&ff, NULL, 0, padding))
-               return -1;
-
-       return aligned_size;
-}
-
 int perf_event__process_tracing_data(struct perf_session *session,
                                     union perf_event *event)
 {
@@ -4202,34 +3846,6 @@ int perf_event__process_tracing_data(struct perf_session *session,
        return size_read + padding;
 }
 
-int perf_event__synthesize_build_id(struct perf_tool *tool,
-                                   struct dso *pos, u16 misc,
-                                   perf_event__handler_t process,
-                                   struct machine *machine)
-{
-       union perf_event ev;
-       size_t len;
-       int err = 0;
-
-       if (!pos->hit)
-               return err;
-
-       memset(&ev, 0, sizeof(ev));
-
-       len = pos->long_name_len + 1;
-       len = PERF_ALIGN(len, NAME_ALIGN);
-       memcpy(&ev.build_id.build_id, pos->build_id, sizeof(pos->build_id));
-       ev.build_id.header.type = PERF_RECORD_HEADER_BUILD_ID;
-       ev.build_id.header.misc = misc;
-       ev.build_id.pid = machine->pid;
-       ev.build_id.header.size = sizeof(ev.build_id) + len;
-       memcpy(&ev.build_id.filename, pos->long_name, pos->long_name_len);
-
-       err = process(tool, &ev, NULL, machine);
-
-       return err;
-}
-
 int perf_event__process_build_id(struct perf_session *session,
                                 union perf_event *event)
 {
index 3e48ae3..ca53a92 100644 (file)
@@ -5,10 +5,10 @@
 #include <linux/stddef.h>
 #include <linux/perf_event.h>
 #include <sys/types.h>
+#include <stdio.h> // FILE
 #include <stdbool.h>
 #include <linux/bitmap.h>
 #include <linux/types.h>
-#include "event.h"
 #include "env.h"
 #include "pmu.h"
 
@@ -92,8 +92,28 @@ struct perf_header {
        struct perf_env         env;
 };
 
+struct feat_fd {
+       struct perf_header *ph;
+       int                fd;
+       void               *buf;        /* Either buf != NULL or fd >= 0 */
+       ssize_t            offset;
+       size_t             size;
+       struct evsel       *events;
+};
+
+struct perf_header_feature_ops {
+       int        (*write)(struct feat_fd *ff, struct evlist *evlist);
+       void       (*print)(struct feat_fd *ff, FILE *fp);
+       int        (*process)(struct feat_fd *ff, void *data);
+       const char *name;
+       bool       full_only;
+       bool       synthesize;
+};
+
 struct evlist;
 struct perf_session;
+struct perf_tool;
+union perf_event;
 
 int perf_session__read_header(struct perf_session *session);
 int perf_session__write_header(struct perf_session *session,
@@ -115,54 +135,16 @@ int perf_header__process_sections(struct perf_header *header, int fd,
 
 int perf_header__fprintf_info(struct perf_session *s, FILE *fp, bool full);
 
-int perf_event__synthesize_features(struct perf_tool *tool,
-                                   struct perf_session *session,
-                                   struct evlist *evlist,
-                                   perf_event__handler_t process);
-
-int perf_event__synthesize_extra_attr(struct perf_tool *tool,
-                                     struct evlist *evsel_list,
-                                     perf_event__handler_t process,
-                                     bool is_pipe);
-
 int perf_event__process_feature(struct perf_session *session,
                                union perf_event *event);
-
-int perf_event__synthesize_attr(struct perf_tool *tool,
-                               struct perf_event_attr *attr, u32 ids, u64 *id,
-                               perf_event__handler_t process);
-int perf_event__synthesize_attrs(struct perf_tool *tool,
-                                struct evlist *evlist,
-                                perf_event__handler_t process);
-int perf_event__synthesize_event_update_unit(struct perf_tool *tool,
-                                            struct evsel *evsel,
-                                            perf_event__handler_t process);
-int perf_event__synthesize_event_update_scale(struct perf_tool *tool,
-                                             struct evsel *evsel,
-                                             perf_event__handler_t process);
-int perf_event__synthesize_event_update_name(struct perf_tool *tool,
-                                            struct evsel *evsel,
-                                            perf_event__handler_t process);
-int perf_event__synthesize_event_update_cpus(struct perf_tool *tool,
-                                            struct evsel *evsel,
-                                            perf_event__handler_t process);
 int perf_event__process_attr(struct perf_tool *tool, union perf_event *event,
                             struct evlist **pevlist);
 int perf_event__process_event_update(struct perf_tool *tool,
                                     union perf_event *event,
                                     struct evlist **pevlist);
 size_t perf_event__fprintf_event_update(union perf_event *event, FILE *fp);
-
-int perf_event__synthesize_tracing_data(struct perf_tool *tool,
-                                       int fd, struct evlist *evlist,
-                                       perf_event__handler_t process);
 int perf_event__process_tracing_data(struct perf_session *session,
                                     union perf_event *event);
-
-int perf_event__synthesize_build_id(struct perf_tool *tool,
-                                   struct dso *pos, u16 misc,
-                                   perf_event__handler_t process,
-                                   struct machine *machine);
 int perf_event__process_build_id(struct perf_session *session,
                                 union perf_event *event);
 bool is_perf_magic(u64 magic);
index 34803e3..6a186b6 100644 (file)
@@ -15,6 +15,7 @@ struct addr_location;
 struct map_symbol;
 struct mem_info;
 struct branch_info;
+struct branch_stack;
 struct block_info;
 struct symbol;
 struct ui_progress;
index aacffa2..34cb380 100644 (file)
@@ -14,7 +14,6 @@
 #include <linux/log2.h>
 #include <linux/zalloc.h>
 
-#include "cpumap.h"
 #include "color.h"
 #include "evsel.h"
 #include "evlist.h"
@@ -29,6 +28,7 @@
 #include "auxtrace.h"
 #include "intel-pt-decoder/intel-pt-insn-decoder.h"
 #include "intel-bts.h"
+#include "util/synthetic-events.h"
 
 #define MAX_TIMESTAMP (~0ULL)
 
@@ -768,7 +768,7 @@ static int intel_bts_synth_events(struct intel_bts *bts,
        int err;
 
        evlist__for_each_entry(evlist, evsel) {
-               if (evsel->core.attr.type == bts->pmu_type && evsel->ids) {
+               if (evsel->core.attr.type == bts->pmu_type && evsel->core.ids) {
                        found = true;
                        break;
                }
@@ -795,7 +795,7 @@ static int intel_bts_synth_events(struct intel_bts *bts,
        attr.sample_id_all = evsel->core.attr.sample_id_all;
        attr.read_format = evsel->core.attr.read_format;
 
-       id = evsel->id[0] + 1000000000;
+       id = evsel->core.id[0] + 1000000000;
        if (!id)
                id = 1;
 
index 9b56fb7..a1c9eb6 100644 (file)
@@ -33,6 +33,7 @@
 #include "tsc.h"
 #include "intel-pt.h"
 #include "config.h"
+#include "util/synthetic-events.h"
 #include "time-utils.h"
 
 #include "../arch/x86/include/uapi/asm/perf_regs.h"
@@ -1704,7 +1705,7 @@ static int intel_pt_synth_pebs_sample(struct intel_pt_queue *ptq)
        struct intel_pt *pt = ptq->pt;
        struct evsel *evsel = pt->pebs_evsel;
        u64 sample_type = evsel->core.attr.sample_type;
-       u64 id = evsel->id[0];
+       u64 id = evsel->core.id[0];
        u8 cpumode;
 
        if (intel_pt_skip_event(pt))
@@ -2719,7 +2720,7 @@ static void intel_pt_set_event_name(struct evlist *evlist, u64 id,
        struct evsel *evsel;
 
        evlist__for_each_entry(evlist, evsel) {
-               if (evsel->id && evsel->id[0] == id) {
+               if (evsel->core.id && evsel->core.id[0] == id) {
                        if (evsel->name)
                                zfree(&evsel->name);
                        evsel->name = strdup(name);
@@ -2734,7 +2735,7 @@ static struct evsel *intel_pt_evsel(struct intel_pt *pt,
        struct evsel *evsel;
 
        evlist__for_each_entry(evlist, evsel) {
-               if (evsel->core.attr.type == pt->pmu_type && evsel->ids)
+               if (evsel->core.attr.type == pt->pmu_type && evsel->core.ids)
                        return evsel;
        }
 
@@ -2775,7 +2776,7 @@ static int intel_pt_synth_events(struct intel_pt *pt,
        attr.sample_id_all = evsel->core.attr.sample_id_all;
        attr.read_format = evsel->core.attr.read_format;
 
-       id = evsel->id[0] + 1000000000;
+       id = evsel->core.id[0] + 1000000000;
        if (!id)
                id = 1;
 
@@ -2902,7 +2903,7 @@ static void intel_pt_setup_pebs_events(struct intel_pt *pt)
                return;
 
        evlist__for_each_entry(pt->session->evlist, evsel) {
-               if (evsel->core.attr.aux_output && evsel->id) {
+               if (evsel->core.attr.aux_output && evsel->core.id) {
                        pt->sample_pebs = true;
                        pt->pebs_evsel = evsel;
                        return;
index b80f29b..1bdf4c6 100644 (file)
@@ -15,7 +15,6 @@
 #include <linux/stringify.h>
 
 #include "build-id.h"
-#include "util.h"
 #include "event.h"
 #include "debug.h"
 #include "evlist.h"
@@ -27,7 +26,6 @@
 #include "jit.h"
 #include "jitdump.h"
 #include "genelf.h"
-#include "../builtin.h"
 
 #include <linux/ctype.h>
 #include <linux/zalloc.h>
@@ -779,7 +777,7 @@ jit_process(struct perf_session *session,
         * track sample_type to compute id_all layout
         * perf sets the same sample type to all events as of now
         */
-       first = perf_evlist__first(session->evlist);
+       first = evlist__first(session->evlist);
        jd.sample_type = first->core.attr.sample_type;
 
        *nbytes = 0;
index 4691363..6f0fa05 100644 (file)
@@ -2,6 +2,8 @@
 #ifndef __PERF_KVM_STAT_H
 #define __PERF_KVM_STAT_H
 
+#ifdef HAVE_KVM_STAT_SUPPORT
+
 #include "tool.h"
 #include "stat.h"
 #include "record.h"
@@ -144,5 +146,7 @@ extern const int decode_str_len;
 extern const char *kvm_exit_reason;
 extern const char *kvm_entry_trace;
 extern const char *kvm_exit_trace;
+#endif /* HAVE_KVM_STAT_SUPPORT */
 
+extern int kvm_add_default_arch_event(int *argc, const char **argv);
 #endif /* __PERF_KVM_STAT_H */
index 66756e6..6b4e5a0 100644 (file)
@@ -22,7 +22,6 @@
 #define LIBUNWIND__ARCH_REG_SP PERF_REG_ARM64_SP
 
 #include "unwind.h"
-#include "debug.h"
 #include "libunwind-aarch64.h"
 #include <../../../../arch/arm64/include/uapi/asm/perf_regs.h>
 #include "../../arch/arm64/util/unwind-libunwind.c"
index c5e5681..21c216c 100644 (file)
@@ -22,7 +22,6 @@
 #define LIBUNWIND__ARCH_REG_SP PERF_REG_X86_SP
 
 #include "unwind.h"
-#include "debug.h"
 #include "libunwind-x86.h"
 #include <../../../../arch/x86/include/uapi/asm/perf_regs.h>
 
index 55fb4b3..8d04e3d 100644 (file)
@@ -8,6 +8,7 @@
 #include <limits.h>
 #include <stdio.h>
 #include <stdlib.h>
+#include <unistd.h>
 #include <linux/err.h>
 #include <linux/string.h>
 #include <linux/zalloc.h>
index 3974470..39062df 100644 (file)
@@ -7,10 +7,10 @@
 #include <sys/stat.h>
 #include <fcntl.h>
 #include "compress.h"
-#include "util.h"
 #include "debug.h"
 #include <string.h>
 #include <unistd.h>
+#include <internal/lib.h>
 
 #define BUFSIZE 8192
 
index b4749d3..70a9f87 100644 (file)
@@ -32,6 +32,7 @@
 #include "linux/hash.h"
 #include "asm/bug.h"
 #include "bpf-event.h"
+#include <internal/lib.h> // page_size
 
 #include <linux/ctype.h>
 #include <symbol/kallsyms.h>
@@ -2609,21 +2610,6 @@ int machines__for_each_thread(struct machines *machines,
        return rc;
 }
 
-int __machine__synthesize_threads(struct machine *machine, struct perf_tool *tool,
-                                 struct target *target, struct perf_thread_map *threads,
-                                 perf_event__handler_t process, bool data_mmap,
-                                 unsigned int nr_threads_synthesize)
-{
-       if (target__has_task(target))
-               return perf_event__synthesize_thread_map(tool, threads, process, machine, data_mmap);
-       else if (target__has_cpu(target))
-               return perf_event__synthesize_threads(tool, process,
-                                                     machine, data_mmap,
-                                                     nr_threads_synthesize);
-       /* command specified */
-       return 0;
-}
-
 pid_t machine__get_current_tid(struct machine *machine, int cpu)
 {
        int nr_cpus = min(machine->env->nr_cpus_online, MAX_NR_CPUS);
index ffd391a..18e13c0 100644 (file)
@@ -6,7 +6,6 @@
 #include <linux/rbtree.h>
 #include "map_groups.h"
 #include "dsos.h"
-#include "event.h"
 #include "rwsem.h"
 
 struct addr_location;
@@ -252,20 +251,6 @@ int machines__for_each_thread(struct machines *machines,
                              int (*fn)(struct thread *thread, void *p),
                              void *priv);
 
-int __machine__synthesize_threads(struct machine *machine, struct perf_tool *tool,
-                                 struct target *target, struct perf_thread_map *threads,
-                                 perf_event__handler_t process, bool data_mmap,
-                                 unsigned int nr_threads_synthesize);
-static inline
-int machine__synthesize_threads(struct machine *machine, struct target *target,
-                               struct perf_thread_map *threads, bool data_mmap,
-                               unsigned int nr_threads_synthesize)
-{
-       return __machine__synthesize_threads(machine, NULL, target, threads,
-                                            perf_event__process, data_mmap,
-                                            nr_threads_synthesize);
-}
-
 pid_t machine__get_current_tid(struct machine *machine, int cpu);
 int machine__set_current_tid(struct machine *machine, int cpu, pid_t pid,
                             pid_t tid);
index 1e29ff9..2c38e8c 100644 (file)
@@ -2,6 +2,13 @@
 #ifndef PERF_MEMSWAP_H_
 #define PERF_MEMSWAP_H_
 
+#include <linux/types.h>
+
+union u64_swap {
+       u64 val64;
+       u32 val32[2];
+};
+
 void mem_bswap_64(void *src, int byte_size);
 void mem_bswap_32(void *src, int byte_size);
 
index 33c5b54..a35dc57 100644 (file)
@@ -12,6 +12,7 @@
 #include <linux/zalloc.h>
 #include <stdlib.h>
 #include <string.h>
+#include <unistd.h> // sysconf()
 #ifdef HAVE_LIBNUMA_SUPPORT
 #include <numaif.h>
 #endif
 #include "event.h"
 #include "mmap.h"
 #include "../perf.h"
-#include "util.h" /* page_size */
+#include <internal/lib.h> /* page_size */
 
-size_t perf_mmap__mmap_len(struct perf_mmap *map)
+size_t perf_mmap__mmap_len(struct mmap *map)
 {
-       return map->mask + 1 + page_size;
+       return map->core.mask + 1 + page_size;
 }
 
 /* When check_messup is true, 'end' must points to a good entry */
-static union perf_event *perf_mmap__read(struct perf_mmap *map,
+static union perf_event *perf_mmap__read(struct mmap *map,
                                         u64 *startp, u64 end)
 {
-       unsigned char *data = map->base + page_size;
+       unsigned char *data = map->core.base + page_size;
        union perf_event *event = NULL;
        int diff = end - *startp;
 
        if (diff >= (int)sizeof(event->header)) {
                size_t size;
 
-               event = (union perf_event *)&data[*startp & map->mask];
+               event = (union perf_event *)&data[*startp & map->core.mask];
                size = event->header.size;
 
                if (size < sizeof(event->header) || diff < (int)size)
@@ -48,20 +49,20 @@ static union perf_event *perf_mmap__read(struct perf_mmap *map,
                 * Event straddles the mmap boundary -- header should always
                 * be inside due to u64 alignment of output.
                 */
-               if ((*startp & map->mask) + size != ((*startp + size) & map->mask)) {
+               if ((*startp & map->core.mask) + size != ((*startp + size) & map->core.mask)) {
                        unsigned int offset = *startp;
                        unsigned int len = min(sizeof(*event), size), cpy;
-                       void *dst = map->event_copy;
+                       void *dst = map->core.event_copy;
 
                        do {
-                               cpy = min(map->mask + 1 - (offset & map->mask), len);
-                               memcpy(dst, &data[offset & map->mask], cpy);
+                               cpy = min(map->core.mask + 1 - (offset & map->core.mask), len);
+                               memcpy(dst, &data[offset & map->core.mask], cpy);
                                offset += cpy;
                                dst += cpy;
                                len -= cpy;
                        } while (len);
 
-                       event = (union perf_event *)map->event_copy;
+                       event = (union perf_event *)map->core.event_copy;
                }
 
                *startp += size;
@@ -82,55 +83,55 @@ static union perf_event *perf_mmap__read(struct perf_mmap *map,
  * }
  * perf_mmap__read_done()
  */
-union perf_event *perf_mmap__read_event(struct perf_mmap *map)
+union perf_event *perf_mmap__read_event(struct mmap *map)
 {
        union perf_event *event;
 
        /*
         * Check if event was unmapped due to a POLLHUP/POLLERR.
         */
-       if (!refcount_read(&map->refcnt))
+       if (!refcount_read(&map->core.refcnt))
                return NULL;
 
        /* non-overwirte doesn't pause the ringbuffer */
-       if (!map->overwrite)
-               map->end = perf_mmap__read_head(map);
+       if (!map->core.overwrite)
+               map->core.end = perf_mmap__read_head(map);
 
-       event = perf_mmap__read(map, &map->start, map->end);
+       event = perf_mmap__read(map, &map->core.start, map->core.end);
 
-       if (!map->overwrite)
-               map->prev = map->start;
+       if (!map->core.overwrite)
+               map->core.prev = map->core.start;
 
        return event;
 }
 
-static bool perf_mmap__empty(struct perf_mmap *map)
+static bool perf_mmap__empty(struct mmap *map)
 {
-       return perf_mmap__read_head(map) == map->prev && !map->auxtrace_mmap.base;
+       return perf_mmap__read_head(map) == map->core.prev && !map->auxtrace_mmap.base;
 }
 
-void perf_mmap__get(struct perf_mmap *map)
+void perf_mmap__get(struct mmap *map)
 {
-       refcount_inc(&map->refcnt);
+       refcount_inc(&map->core.refcnt);
 }
 
-void perf_mmap__put(struct perf_mmap *map)
+void perf_mmap__put(struct mmap *map)
 {
-       BUG_ON(map->base && refcount_read(&map->refcnt) == 0);
+       BUG_ON(map->core.base && refcount_read(&map->core.refcnt) == 0);
 
-       if (refcount_dec_and_test(&map->refcnt))
+       if (refcount_dec_and_test(&map->core.refcnt))
                perf_mmap__munmap(map);
 }
 
-void perf_mmap__consume(struct perf_mmap *map)
+void perf_mmap__consume(struct mmap *map)
 {
-       if (!map->overwrite) {
-               u64 old = map->prev;
+       if (!map->core.overwrite) {
+               u64 old = map->core.prev;
 
                perf_mmap__write_tail(map, old);
        }
 
-       if (refcount_read(&map->refcnt) == 1 && perf_mmap__empty(map))
+       if (refcount_read(&map->core.refcnt) == 1 && perf_mmap__empty(map))
                perf_mmap__put(map);
 }
 
@@ -161,13 +162,13 @@ void __weak auxtrace_mmap_params__set_idx(struct auxtrace_mmap_params *mp __mayb
 }
 
 #ifdef HAVE_AIO_SUPPORT
-static int perf_mmap__aio_enabled(struct perf_mmap *map)
+static int perf_mmap__aio_enabled(struct mmap *map)
 {
        return map->aio.nr_cblocks > 0;
 }
 
 #ifdef HAVE_LIBNUMA_SUPPORT
-static int perf_mmap__aio_alloc(struct perf_mmap *map, int idx)
+static int perf_mmap__aio_alloc(struct mmap *map, int idx)
 {
        map->aio.data[idx] = mmap(NULL, perf_mmap__mmap_len(map), PROT_READ|PROT_WRITE,
                                  MAP_PRIVATE|MAP_ANONYMOUS, 0, 0);
@@ -179,7 +180,7 @@ static int perf_mmap__aio_alloc(struct perf_mmap *map, int idx)
        return 0;
 }
 
-static void perf_mmap__aio_free(struct perf_mmap *map, int idx)
+static void perf_mmap__aio_free(struct mmap *map, int idx)
 {
        if (map->aio.data[idx]) {
                munmap(map->aio.data[idx], perf_mmap__mmap_len(map));
@@ -187,7 +188,7 @@ static void perf_mmap__aio_free(struct perf_mmap *map, int idx)
        }
 }
 
-static int perf_mmap__aio_bind(struct perf_mmap *map, int idx, int cpu, int affinity)
+static int perf_mmap__aio_bind(struct mmap *map, int idx, int cpu, int affinity)
 {
        void *data;
        size_t mmap_len;
@@ -207,7 +208,7 @@ static int perf_mmap__aio_bind(struct perf_mmap *map, int idx, int cpu, int affi
        return 0;
 }
 #else /* !HAVE_LIBNUMA_SUPPORT */
-static int perf_mmap__aio_alloc(struct perf_mmap *map, int idx)
+static int perf_mmap__aio_alloc(struct mmap *map, int idx)
 {
        map->aio.data[idx] = malloc(perf_mmap__mmap_len(map));
        if (map->aio.data[idx] == NULL)
@@ -216,19 +217,19 @@ static int perf_mmap__aio_alloc(struct perf_mmap *map, int idx)
        return 0;
 }
 
-static void perf_mmap__aio_free(struct perf_mmap *map, int idx)
+static void perf_mmap__aio_free(struct mmap *map, int idx)
 {
        zfree(&(map->aio.data[idx]));
 }
 
-static int perf_mmap__aio_bind(struct perf_mmap *map __maybe_unused, int idx __maybe_unused,
+static int perf_mmap__aio_bind(struct mmap *map __maybe_unused, int idx __maybe_unused,
                int cpu __maybe_unused, int affinity __maybe_unused)
 {
        return 0;
 }
 #endif
 
-static int perf_mmap__aio_mmap(struct perf_mmap *map, struct mmap_params *mp)
+static int perf_mmap__aio_mmap(struct mmap *map, struct mmap_params *mp)
 {
        int delta_max, i, prio, ret;
 
@@ -256,7 +257,7 @@ static int perf_mmap__aio_mmap(struct perf_mmap *map, struct mmap_params *mp)
                                pr_debug2("failed to allocate data buffer area, error %m");
                                return -1;
                        }
-                       ret = perf_mmap__aio_bind(map, i, map->cpu, mp->affinity);
+                       ret = perf_mmap__aio_bind(map, i, map->core.cpu, mp->affinity);
                        if (ret == -1)
                                return -1;
                        /*
@@ -282,7 +283,7 @@ static int perf_mmap__aio_mmap(struct perf_mmap *map, struct mmap_params *mp)
        return 0;
 }
 
-static void perf_mmap__aio_munmap(struct perf_mmap *map)
+static void perf_mmap__aio_munmap(struct mmap *map)
 {
        int i;
 
@@ -294,34 +295,34 @@ static void perf_mmap__aio_munmap(struct perf_mmap *map)
        zfree(&map->aio.aiocb);
 }
 #else /* !HAVE_AIO_SUPPORT */
-static int perf_mmap__aio_enabled(struct perf_mmap *map __maybe_unused)
+static int perf_mmap__aio_enabled(struct mmap *map __maybe_unused)
 {
        return 0;
 }
 
-static int perf_mmap__aio_mmap(struct perf_mmap *map __maybe_unused,
+static int perf_mmap__aio_mmap(struct mmap *map __maybe_unused,
                               struct mmap_params *mp __maybe_unused)
 {
        return 0;
 }
 
-static void perf_mmap__aio_munmap(struct perf_mmap *map __maybe_unused)
+static void perf_mmap__aio_munmap(struct mmap *map __maybe_unused)
 {
 }
 #endif
 
-void perf_mmap__munmap(struct perf_mmap *map)
+void perf_mmap__munmap(struct mmap *map)
 {
        perf_mmap__aio_munmap(map);
        if (map->data != NULL) {
                munmap(map->data, perf_mmap__mmap_len(map));
                map->data = NULL;
        }
-       if (map->base != NULL) {
-               munmap(map->base, perf_mmap__mmap_len(map));
-               map->base = NULL;
-               map->fd = -1;
-               refcount_set(&map->refcnt, 0);
+       if (map->core.base != NULL) {
+               munmap(map->core.base, perf_mmap__mmap_len(map));
+               map->core.base = NULL;
+               map->core.fd = -1;
+               refcount_set(&map->core.refcnt, 0);
        }
        auxtrace_mmap__munmap(&map->auxtrace_mmap);
 }
@@ -343,16 +344,16 @@ static void build_node_mask(int node, cpu_set_t *mask)
        }
 }
 
-static void perf_mmap__setup_affinity_mask(struct perf_mmap *map, struct mmap_params *mp)
+static void perf_mmap__setup_affinity_mask(struct mmap *map, struct mmap_params *mp)
 {
        CPU_ZERO(&map->affinity_mask);
        if (mp->affinity == PERF_AFFINITY_NODE && cpu__max_node() > 1)
-               build_node_mask(cpu__get_node(map->cpu), &map->affinity_mask);
+               build_node_mask(cpu__get_node(map->core.cpu), &map->affinity_mask);
        else if (mp->affinity == PERF_AFFINITY_CPU)
-               CPU_SET(map->cpu, &map->affinity_mask);
+               CPU_SET(map->core.cpu, &map->affinity_mask);
 }
 
-int perf_mmap__mmap(struct perf_mmap *map, struct mmap_params *mp, int fd, int cpu)
+int perf_mmap__mmap(struct mmap *map, struct mmap_params *mp, int fd, int cpu)
 {
        /*
         * The last one will be done at perf_mmap__consume(), so that we
@@ -367,23 +368,23 @@ int perf_mmap__mmap(struct perf_mmap *map, struct mmap_params *mp, int fd, int c
         * evlist layer can't just drop it when filtering events in
         * perf_evlist__filter_pollfd().
         */
-       refcount_set(&map->refcnt, 2);
-       map->prev = 0;
-       map->mask = mp->mask;
-       map->base = mmap(NULL, perf_mmap__mmap_len(map), mp->prot,
+       refcount_set(&map->core.refcnt, 2);
+       map->core.prev = 0;
+       map->core.mask = mp->mask;
+       map->core.base = mmap(NULL, perf_mmap__mmap_len(map), mp->prot,
                         MAP_SHARED, fd, 0);
-       if (map->base == MAP_FAILED) {
+       if (map->core.base == MAP_FAILED) {
                pr_debug2("failed to mmap perf event ring buffer, error %d\n",
                          errno);
-               map->base = NULL;
+               map->core.base = NULL;
                return -1;
        }
-       map->fd = fd;
-       map->cpu = cpu;
+       map->core.fd = fd;
+       map->core.cpu = cpu;
 
        perf_mmap__setup_affinity_mask(map, mp);
 
-       map->flush = mp->flush;
+       map->core.flush = mp->flush;
 
        map->comp_level = mp->comp_level;
 
@@ -399,7 +400,7 @@ int perf_mmap__mmap(struct perf_mmap *map, struct mmap_params *mp, int fd, int c
        }
 
        if (auxtrace_mmap__mmap(&map->auxtrace_mmap,
-                               &mp->auxtrace_mp, map->base, fd))
+                               &mp->auxtrace_mp, map->core.base, fd))
                return -1;
 
        return perf_mmap__aio_mmap(map, mp);
@@ -440,25 +441,25 @@ static int overwrite_rb_find_range(void *buf, int mask, u64 *start, u64 *end)
 /*
  * Report the start and end of the available data in ringbuffer
  */
-static int __perf_mmap__read_init(struct perf_mmap *md)
+static int __perf_mmap__read_init(struct mmap *md)
 {
        u64 head = perf_mmap__read_head(md);
-       u64 old = md->prev;
-       unsigned char *data = md->base + page_size;
+       u64 old = md->core.prev;
+       unsigned char *data = md->core.base + page_size;
        unsigned long size;
 
-       md->start = md->overwrite ? head : old;
-       md->end = md->overwrite ? old : head;
+       md->core.start = md->core.overwrite ? head : old;
+       md->core.end = md->core.overwrite ? old : head;
 
-       if ((md->end - md->start) < md->flush)
+       if ((md->core.end - md->core.start) < md->core.flush)
                return -EAGAIN;
 
-       size = md->end - md->start;
-       if (size > (unsigned long)(md->mask) + 1) {
-               if (!md->overwrite) {
+       size = md->core.end - md->core.start;
+       if (size > (unsigned long)(md->core.mask) + 1) {
+               if (!md->core.overwrite) {
                        WARN_ONCE(1, "failed to keep up with mmap data. (warn only once)\n");
 
-                       md->prev = head;
+                       md->core.prev = head;
                        perf_mmap__consume(md);
                        return -EAGAIN;
                }
@@ -467,29 +468,29 @@ static int __perf_mmap__read_init(struct perf_mmap *md)
                 * Backward ring buffer is full. We still have a chance to read
                 * most of data from it.
                 */
-               if (overwrite_rb_find_range(data, md->mask, &md->start, &md->end))
+               if (overwrite_rb_find_range(data, md->core.mask, &md->core.start, &md->core.end))
                        return -EINVAL;
        }
 
        return 0;
 }
 
-int perf_mmap__read_init(struct perf_mmap *map)
+int perf_mmap__read_init(struct mmap *map)
 {
        /*
         * Check if event was unmapped due to a POLLHUP/POLLERR.
         */
-       if (!refcount_read(&map->refcnt))
+       if (!refcount_read(&map->core.refcnt))
                return -ENOENT;
 
        return __perf_mmap__read_init(map);
 }
 
-int perf_mmap__push(struct perf_mmap *md, void *to,
-                   int push(struct perf_mmap *map, void *to, void *buf, size_t size))
+int perf_mmap__push(struct mmap *md, void *to,
+                   int push(struct mmap *map, void *to, void *buf, size_t size))
 {
        u64 head = perf_mmap__read_head(md);
-       unsigned char *data = md->base + page_size;
+       unsigned char *data = md->core.base + page_size;
        unsigned long size;
        void *buf;
        int rc = 0;
@@ -498,12 +499,12 @@ int perf_mmap__push(struct perf_mmap *md, void *to,
        if (rc < 0)
                return (rc == -EAGAIN) ? 1 : -1;
 
-       size = md->end - md->start;
+       size = md->core.end - md->core.start;
 
-       if ((md->start & md->mask) + size != (md->end & md->mask)) {
-               buf = &data[md->start & md->mask];
-               size = md->mask + 1 - (md->start & md->mask);
-               md->start += size;
+       if ((md->core.start & md->core.mask) + size != (md->core.end & md->core.mask)) {
+               buf = &data[md->core.start & md->core.mask];
+               size = md->core.mask + 1 - (md->core.start & md->core.mask);
+               md->core.start += size;
 
                if (push(md, to, buf, size) < 0) {
                        rc = -1;
@@ -511,16 +512,16 @@ int perf_mmap__push(struct perf_mmap *md, void *to,
                }
        }
 
-       buf = &data[md->start & md->mask];
-       size = md->end - md->start;
-       md->start += size;
+       buf = &data[md->core.start & md->core.mask];
+       size = md->core.end - md->core.start;
+       md->core.start += size;
 
        if (push(md, to, buf, size) < 0) {
                rc = -1;
                goto out;
        }
 
-       md->prev = head;
+       md->core.prev = head;
        perf_mmap__consume(md);
 out:
        return rc;
@@ -529,16 +530,16 @@ out:
 /*
  * Mandatory for overwrite mode
  * The direction of overwrite mode is backward.
- * The last perf_mmap__read() will set tail to map->prev.
- * Need to correct the map->prev to head which is the end of next read.
+ * The last perf_mmap__read() will set tail to map->core.prev.
+ * Need to correct the map->core.prev to head which is the end of next read.
  */
-void perf_mmap__read_done(struct perf_mmap *map)
+void perf_mmap__read_done(struct mmap *map)
 {
        /*
         * Check if event was unmapped due to a POLLHUP/POLLERR.
         */
-       if (!refcount_read(&map->refcnt))
+       if (!refcount_read(&map->core.refcnt))
                return;
 
-       map->prev = perf_mmap__read_head(map);
+       map->core.prev = perf_mmap__read_head(map);
 }
index 3857a49..e567c1c 100644 (file)
@@ -1,6 +1,7 @@
 #ifndef __PERF_MMAP_H
 #define __PERF_MMAP_H 1
 
+#include <internal/mmap.h>
 #include <linux/compiler.h>
 #include <linux/refcount.h>
 #include <linux/types.h>
 
 struct aiocb;
 /**
- * struct perf_mmap - perf's ring buffer mmap details
+ * struct mmap - perf's ring buffer mmap details
  *
  * @refcnt - e.g. code using PERF_EVENT_IOC_SET_OUTPUT to share this
  */
-struct perf_mmap {
-       void             *base;
-       int              mask;
-       int              fd;
-       int              cpu;
-       refcount_t       refcnt;
-       u64              prev;
-       u64              start;
-       u64              end;
-       bool             overwrite;
+struct mmap {
+       struct perf_mmap        core;
        struct auxtrace_mmap auxtrace_mmap;
-       char             event_copy[PERF_SAMPLE_MAX_SIZE] __aligned(8);
 #ifdef HAVE_AIO_SUPPORT
        struct {
                void             **data;
@@ -40,71 +32,42 @@ struct perf_mmap {
        } aio;
 #endif
        cpu_set_t       affinity_mask;
-       u64             flush;
        void            *data;
        int             comp_level;
 };
 
-/*
- * State machine of bkw_mmap_state:
- *
- *                     .________________(forbid)_____________.
- *                     |                                     V
- * NOTREADY --(0)--> RUNNING --(1)--> DATA_PENDING --(2)--> EMPTY
- *                     ^  ^              |   ^               |
- *                     |  |__(forbid)____/   |___(forbid)___/|
- *                     |                                     |
- *                      \_________________(3)_______________/
- *
- * NOTREADY     : Backward ring buffers are not ready
- * RUNNING      : Backward ring buffers are recording
- * DATA_PENDING : We are required to collect data from backward ring buffers
- * EMPTY        : We have collected data from backward ring buffers.
- *
- * (0): Setup backward ring buffer
- * (1): Pause ring buffers for reading
- * (2): Read from ring buffers
- * (3): Resume ring buffers for recording
- */
-enum bkw_mmap_state {
-       BKW_MMAP_NOTREADY,
-       BKW_MMAP_RUNNING,
-       BKW_MMAP_DATA_PENDING,
-       BKW_MMAP_EMPTY,
-};
-
 struct mmap_params {
        int prot, mask, nr_cblocks, affinity, flush, comp_level;
        struct auxtrace_mmap_params auxtrace_mp;
 };
 
-int perf_mmap__mmap(struct perf_mmap *map, struct mmap_params *mp, int fd, int cpu);
-void perf_mmap__munmap(struct perf_mmap *map);
+int perf_mmap__mmap(struct mmap *map, struct mmap_params *mp, int fd, int cpu);
+void perf_mmap__munmap(struct mmap *map);
 
-void perf_mmap__get(struct perf_mmap *map);
-void perf_mmap__put(struct perf_mmap *map);
+void perf_mmap__get(struct mmap *map);
+void perf_mmap__put(struct mmap *map);
 
-void perf_mmap__consume(struct perf_mmap *map);
+void perf_mmap__consume(struct mmap *map);
 
-static inline u64 perf_mmap__read_head(struct perf_mmap *mm)
+static inline u64 perf_mmap__read_head(struct mmap *mm)
 {
-       return ring_buffer_read_head(mm->base);
+       return ring_buffer_read_head(mm->core.base);
 }
 
-static inline void perf_mmap__write_tail(struct perf_mmap *md, u64 tail)
+static inline void perf_mmap__write_tail(struct mmap *md, u64 tail)
 {
-       ring_buffer_write_tail(md->base, tail);
+       ring_buffer_write_tail(md->core.base, tail);
 }
 
-union perf_event *perf_mmap__read_forward(struct perf_mmap *map);
+union perf_event *perf_mmap__read_forward(struct mmap *map);
 
-union perf_event *perf_mmap__read_event(struct perf_mmap *map);
+union perf_event *perf_mmap__read_event(struct mmap *map);
 
-int perf_mmap__push(struct perf_mmap *md, void *to,
-                   int push(struct perf_mmap *map, void *to, void *buf, size_t size));
+int perf_mmap__push(struct mmap *md, void *to,
+                   int push(struct mmap *map, void *to, void *buf, size_t size));
 
-size_t perf_mmap__mmap_len(struct perf_mmap *map);
+size_t perf_mmap__mmap_len(struct mmap *map);
 
-int perf_mmap__read_init(struct perf_mmap *md);
-void perf_mmap__read_done(struct perf_mmap *map);
+int perf_mmap__read_init(struct mmap *md);
+void perf_mmap__read_done(struct mmap *map);
 #endif /*__PERF_MMAP_H */
index 99be15d..285d6f3 100644 (file)
 #include <string.h>
 #include <unistd.h>
 #include <asm/bug.h>
+#include <linux/kernel.h>
 #include <linux/zalloc.h>
 
+static const char *perf_ns__names[] = {
+       [NET_NS_INDEX]          = "net",
+       [UTS_NS_INDEX]          = "uts",
+       [IPC_NS_INDEX]          = "ipc",
+       [PID_NS_INDEX]          = "pid",
+       [USER_NS_INDEX]         = "user",
+       [MNT_NS_INDEX]          = "mnt",
+       [CGROUP_NS_INDEX]       = "cgroup",
+};
+
+const char *perf_ns__name(unsigned int id)
+{
+       if (id >= ARRAY_SIZE(perf_ns__names))
+               return "UNKNOWN";
+       return perf_ns__names[id];
+}
+
 struct namespaces *namespaces__new(struct perf_record_namespaces *event)
 {
        struct namespaces *namespaces;
index 40edef5..4b33f68 100644 (file)
@@ -66,4 +66,6 @@ static inline void __nsinfo__zput(struct nsinfo **nsip)
 
 #define nsinfo__zput(nsi) __nsinfo__zput(&nsi)
 
+const char *perf_ns__name(unsigned int id);
+
 #endif  /* __PERF_NAMESPACES_H */
index 5ec21d2..b5e2ade 100644 (file)
 #include "parse-events-flex.h"
 #include "pmu.h"
 #include "thread_map.h"
-#include "cpumap.h"
 #include "probe-file.h"
 #include "asm/bug.h"
 #include "util/parse-branch-options.h"
 #include "metricgroup.h"
+#include "util/evsel_config.h"
+#include "util/event.h"
 
 #define MAX_NAME_LEN 100
 
@@ -335,7 +336,7 @@ __add_event(struct list_head *list, int *idx,
        (*idx)++;
        evsel->core.cpus   = perf_cpu_map__get(cpus);
        evsel->core.own_cpus = perf_cpu_map__get(cpus);
-       evsel->system_wide = pmu ? pmu->is_uncore : false;
+       evsel->core.system_wide = pmu ? pmu->is_uncore : false;
        evsel->auto_merge_stats = auto_merge_stats;
 
        if (name)
@@ -1936,7 +1937,7 @@ int parse_events(struct evlist *evlist, const char *str,
 
                perf_evlist__splice_list_tail(evlist, &parse_state.list);
                evlist->nr_groups += parse_state.nr_groups;
-               last = perf_evlist__last(evlist);
+               last = evlist__last(evlist);
                last->cmdline_group_boundary = true;
 
                return 0;
@@ -2050,7 +2051,7 @@ foreach_evsel_in_last_glob(struct evlist *evlist,
         * So no need to WARN here, let *func do this.
         */
        if (evlist->core.nr_entries > 0)
-               last = perf_evlist__last(evlist);
+               last = evlist__last(evlist);
 
        do {
                err = (*func)(last, arg);
index f1c36ed..48126ae 100644 (file)
@@ -9,13 +9,11 @@
 #define YYDEBUG 1
 
 #include <fnmatch.h>
+#include <stdio.h>
 #include <linux/compiler.h>
-#include <linux/list.h>
 #include <linux/types.h>
-#include "util.h"
 #include "pmu.h"
 #include "evsel.h"
-#include "debug.h"
 #include "parse-events.h"
 #include "parse-events-bison.h"
 
index e635c59..7a0ab35 100644 (file)
@@ -12,7 +12,6 @@
 #include <setjmp.h>
 #include <linux/err.h>
 #include <linux/kernel.h>
-#include "util/util.h"
 #include "util/debug.h"
 #include "util/perf-hooks.h"
 
diff --git a/tools/perf/util/perf_event_attr_fprintf.c b/tools/perf/util/perf_event_attr_fprintf.c
new file mode 100644 (file)
index 0000000..d4ad3f0
--- /dev/null
@@ -0,0 +1,148 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <inttypes.h>
+#include <stdio.h>
+#include <stdbool.h>
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/perf_event.h>
+#include "util/evsel_fprintf.h"
+
+struct bit_names {
+       int bit;
+       const char *name;
+};
+
+static void __p_bits(char *buf, size_t size, u64 value, struct bit_names *bits)
+{
+       bool first_bit = true;
+       int i = 0;
+
+       do {
+               if (value & bits[i].bit) {
+                       buf += scnprintf(buf, size, "%s%s", first_bit ? "" : "|", bits[i].name);
+                       first_bit = false;
+               }
+       } while (bits[++i].name != NULL);
+}
+
+static void __p_sample_type(char *buf, size_t size, u64 value)
+{
+#define bit_name(n) { PERF_SAMPLE_##n, #n }
+       struct bit_names bits[] = {
+               bit_name(IP), bit_name(TID), bit_name(TIME), bit_name(ADDR),
+               bit_name(READ), bit_name(CALLCHAIN), bit_name(ID), bit_name(CPU),
+               bit_name(PERIOD), bit_name(STREAM_ID), bit_name(RAW),
+               bit_name(BRANCH_STACK), bit_name(REGS_USER), bit_name(STACK_USER),
+               bit_name(IDENTIFIER), bit_name(REGS_INTR), bit_name(DATA_SRC),
+               bit_name(WEIGHT), bit_name(PHYS_ADDR),
+               { .name = NULL, }
+       };
+#undef bit_name
+       __p_bits(buf, size, value, bits);
+}
+
+static void __p_branch_sample_type(char *buf, size_t size, u64 value)
+{
+#define bit_name(n) { PERF_SAMPLE_BRANCH_##n, #n }
+       struct bit_names bits[] = {
+               bit_name(USER), bit_name(KERNEL), bit_name(HV), bit_name(ANY),
+               bit_name(ANY_CALL), bit_name(ANY_RETURN), bit_name(IND_CALL),
+               bit_name(ABORT_TX), bit_name(IN_TX), bit_name(NO_TX),
+               bit_name(COND), bit_name(CALL_STACK), bit_name(IND_JUMP),
+               bit_name(CALL), bit_name(NO_FLAGS), bit_name(NO_CYCLES),
+               { .name = NULL, }
+       };
+#undef bit_name
+       __p_bits(buf, size, value, bits);
+}
+
+static void __p_read_format(char *buf, size_t size, u64 value)
+{
+#define bit_name(n) { PERF_FORMAT_##n, #n }
+       struct bit_names bits[] = {
+               bit_name(TOTAL_TIME_ENABLED), bit_name(TOTAL_TIME_RUNNING),
+               bit_name(ID), bit_name(GROUP),
+               { .name = NULL, }
+       };
+#undef bit_name
+       __p_bits(buf, size, value, bits);
+}
+
+#define BUF_SIZE               1024
+
+#define p_hex(val)             snprintf(buf, BUF_SIZE, "%#"PRIx64, (uint64_t)(val))
+#define p_unsigned(val)                snprintf(buf, BUF_SIZE, "%"PRIu64, (uint64_t)(val))
+#define p_signed(val)          snprintf(buf, BUF_SIZE, "%"PRId64, (int64_t)(val))
+#define p_sample_type(val)     __p_sample_type(buf, BUF_SIZE, val)
+#define p_branch_sample_type(val) __p_branch_sample_type(buf, BUF_SIZE, val)
+#define p_read_format(val)     __p_read_format(buf, BUF_SIZE, val)
+
+#define PRINT_ATTRn(_n, _f, _p)                                \
+do {                                                   \
+       if (attr->_f) {                                 \
+               _p(attr->_f);                           \
+               ret += attr__fprintf(fp, _n, buf, priv);\
+       }                                               \
+} while (0)
+
+#define PRINT_ATTRf(_f, _p)    PRINT_ATTRn(#_f, _f, _p)
+
+int perf_event_attr__fprintf(FILE *fp, struct perf_event_attr *attr,
+                            attr__fprintf_f attr__fprintf, void *priv)
+{
+       char buf[BUF_SIZE];
+       int ret = 0;
+
+       PRINT_ATTRf(type, p_unsigned);
+       PRINT_ATTRf(size, p_unsigned);
+       PRINT_ATTRf(config, p_hex);
+       PRINT_ATTRn("{ sample_period, sample_freq }", sample_period, p_unsigned);
+       PRINT_ATTRf(sample_type, p_sample_type);
+       PRINT_ATTRf(read_format, p_read_format);
+
+       PRINT_ATTRf(disabled, p_unsigned);
+       PRINT_ATTRf(inherit, p_unsigned);
+       PRINT_ATTRf(pinned, p_unsigned);
+       PRINT_ATTRf(exclusive, p_unsigned);
+       PRINT_ATTRf(exclude_user, p_unsigned);
+       PRINT_ATTRf(exclude_kernel, p_unsigned);
+       PRINT_ATTRf(exclude_hv, p_unsigned);
+       PRINT_ATTRf(exclude_idle, p_unsigned);
+       PRINT_ATTRf(mmap, p_unsigned);
+       PRINT_ATTRf(comm, p_unsigned);
+       PRINT_ATTRf(freq, p_unsigned);
+       PRINT_ATTRf(inherit_stat, p_unsigned);
+       PRINT_ATTRf(enable_on_exec, p_unsigned);
+       PRINT_ATTRf(task, p_unsigned);
+       PRINT_ATTRf(watermark, p_unsigned);
+       PRINT_ATTRf(precise_ip, p_unsigned);
+       PRINT_ATTRf(mmap_data, p_unsigned);
+       PRINT_ATTRf(sample_id_all, p_unsigned);
+       PRINT_ATTRf(exclude_host, p_unsigned);
+       PRINT_ATTRf(exclude_guest, p_unsigned);
+       PRINT_ATTRf(exclude_callchain_kernel, p_unsigned);
+       PRINT_ATTRf(exclude_callchain_user, p_unsigned);
+       PRINT_ATTRf(mmap2, p_unsigned);
+       PRINT_ATTRf(comm_exec, p_unsigned);
+       PRINT_ATTRf(use_clockid, p_unsigned);
+       PRINT_ATTRf(context_switch, p_unsigned);
+       PRINT_ATTRf(write_backward, p_unsigned);
+       PRINT_ATTRf(namespaces, p_unsigned);
+       PRINT_ATTRf(ksymbol, p_unsigned);
+       PRINT_ATTRf(bpf_event, p_unsigned);
+       PRINT_ATTRf(aux_output, p_unsigned);
+
+       PRINT_ATTRn("{ wakeup_events, wakeup_watermark }", wakeup_events, p_unsigned);
+       PRINT_ATTRf(bp_type, p_unsigned);
+       PRINT_ATTRn("{ bp_addr, config1 }", bp_addr, p_hex);
+       PRINT_ATTRn("{ bp_len, config2 }", bp_len, p_hex);
+       PRINT_ATTRf(branch_sample_type, p_branch_sample_type);
+       PRINT_ATTRf(sample_regs_user, p_hex);
+       PRINT_ATTRf(sample_stack_user, p_unsigned);
+       PRINT_ATTRf(clockid, p_signed);
+       PRINT_ATTRf(sample_regs_intr, p_hex);
+       PRINT_ATTRf(aux_watermark, p_unsigned);
+       PRINT_ATTRf(sample_max_stack, p_unsigned);
+
+       return ret;
+}
index fb597fa..5608da8 100644 (file)
@@ -20,7 +20,6 @@
 #include "debug.h"
 #include "pmu.h"
 #include "parse-events.h"
-#include "cpumap.h"
 #include "header.h"
 #include "pmu-events/pmu-events.h"
 #include "string2.h"
index b8e0967..91cab5f 100644 (file)
@@ -2331,6 +2331,7 @@ void clear_probe_trace_event(struct probe_trace_event *tev)
                }
        }
        zfree(&tev->args);
+       tev->nargs = 0;
 }
 
 struct kprobe_blacklist_node {
index d13db55..b659466 100644 (file)
@@ -16,6 +16,7 @@
 #include "strlist.h"
 #include "strfilter.h"
 #include "debug.h"
+#include "build-id.h"
 #include "dso.h"
 #include "color.h"
 #include "symbol.h"
index 505905f..cd9f95e 100644 (file)
@@ -1245,6 +1245,17 @@ static int expand_probe_args(Dwarf_Die *sc_die, struct probe_finder *pf,
        return n;
 }
 
+static bool trace_event_finder_overlap(struct trace_event_finder *tf)
+{
+       int i;
+
+       for (i = 0; i < tf->ntevs; i++) {
+               if (tf->pf.addr == tf->tevs[i].point.address)
+                       return true;
+       }
+       return false;
+}
+
 /* Add a found probe point into trace event list */
 static int add_probe_trace_event(Dwarf_Die *sc_die, struct probe_finder *pf)
 {
@@ -1255,6 +1266,14 @@ static int add_probe_trace_event(Dwarf_Die *sc_die, struct probe_finder *pf)
        struct perf_probe_arg *args = NULL;
        int ret, i;
 
+       /*
+        * For some reason (e.g. different column assigned to same address)
+        * This callback can be called with the address which already passed.
+        * Ignore it first.
+        */
+       if (trace_event_finder_overlap(tf))
+               return 0;
+
        /* Check number of tevs */
        if (tf->ntevs == tf->max_tevs) {
                pr_warning("Too many( > %d) probe point found.\n",
index c6dd478..9af1838 100644 (file)
@@ -10,6 +10,7 @@ util/python.c
 util/cap.c
 util/evlist.c
 util/evsel.c
+util/perf_event_attr_fprintf.c
 util/cpumap.c
 util/memswap.c
 util/mmap.c
index 07ca453..53f3105 100644 (file)
@@ -6,17 +6,15 @@
 #include <linux/err.h>
 #include <perf/cpumap.h>
 #include <traceevent/event-parse.h>
-#include "debug.h"
 #include "evlist.h"
 #include "callchain.h"
 #include "evsel.h"
 #include "event.h"
-#include "cpumap.h"
 #include "print_binary.h"
 #include "thread_map.h"
 #include "trace-event.h"
 #include "mmap.h"
-#include "util.h"
+#include <internal/lib.h>
 #include "../perf-sys.h"
 
 #if PY_MAJOR_VERSION < 3
@@ -61,6 +59,8 @@ int parse_callchain_record(const char *arg __maybe_unused,
  */
 int verbose;
 
+int eprintf(int level, int var, const char *fmt, ...);
+
 int eprintf(int level, int var, const char *fmt, ...)
 {
        va_list args;
@@ -884,7 +884,7 @@ static int pyrf_evlist__init(struct pyrf_evlist *pevlist,
 
 static void pyrf_evlist__delete(struct pyrf_evlist *pevlist)
 {
-       perf_evlist__exit(&pevlist->evlist);
+       evlist__exit(&pevlist->evlist);
        Py_TYPE(pevlist)->tp_free((PyObject*)pevlist);
 }
 
@@ -899,7 +899,7 @@ static PyObject *pyrf_evlist__mmap(struct pyrf_evlist *pevlist,
                                         &pages, &overwrite))
                return NULL;
 
-       if (perf_evlist__mmap(evlist, pages) < 0) {
+       if (evlist__mmap(evlist, pages) < 0) {
                PyErr_SetFromErrno(PyExc_OSError);
                return NULL;
        }
@@ -918,7 +918,7 @@ static PyObject *pyrf_evlist__poll(struct pyrf_evlist *pevlist,
        if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|i", kwlist, &timeout))
                return NULL;
 
-       n = perf_evlist__poll(evlist, timeout);
+       n = evlist__poll(evlist, timeout);
        if (n < 0) {
                PyErr_SetFromErrno(PyExc_OSError);
                return NULL;
@@ -935,17 +935,17 @@ static PyObject *pyrf_evlist__get_pollfd(struct pyrf_evlist *pevlist,
         PyObject *list = PyList_New(0);
        int i;
 
-       for (i = 0; i < evlist->pollfd.nr; ++i) {
+       for (i = 0; i < evlist->core.pollfd.nr; ++i) {
                PyObject *file;
 #if PY_MAJOR_VERSION < 3
-               FILE *fp = fdopen(evlist->pollfd.entries[i].fd, "r");
+               FILE *fp = fdopen(evlist->core.pollfd.entries[i].fd, "r");
 
                if (fp == NULL)
                        goto free_list;
 
                file = PyFile_FromFile(fp, "perf", "r", NULL);
 #else
-               file = PyFile_FromFd(evlist->pollfd.entries[i].fd, "perf", "r", -1,
+               file = PyFile_FromFd(evlist->core.pollfd.entries[i].fd, "perf", "r", -1,
                                     NULL, NULL, NULL, 0);
 #endif
                if (file == NULL)
@@ -984,14 +984,14 @@ static PyObject *pyrf_evlist__add(struct pyrf_evlist *pevlist,
        return Py_BuildValue("i", evlist->core.nr_entries);
 }
 
-static struct perf_mmap *get_md(struct evlist *evlist, int cpu)
+static struct mmap *get_md(struct evlist *evlist, int cpu)
 {
        int i;
 
-       for (i = 0; i < evlist->nr_mmaps; i++) {
-               struct perf_mmap *md = &evlist->mmap[i];
+       for (i = 0; i < evlist->core.nr_mmaps; i++) {
+               struct mmap *md = &evlist->mmap[i];
 
-               if (md->cpu == cpu)
+               if (md->core.cpu == cpu)
                        return md;
        }
 
@@ -1005,7 +1005,7 @@ static PyObject *pyrf_evlist__read_on_cpu(struct pyrf_evlist *pevlist,
        union perf_event *event;
        int sample_id_all = 1, cpu;
        static char *kwlist[] = { "cpu", "sample_id_all", NULL };
-       struct perf_mmap *md;
+       struct mmap *md;
        int err;
 
        if (!PyArg_ParseTupleAndKeywords(args, kwargs, "i|i", kwlist,
index 286fe81..8579505 100644 (file)
@@ -2,7 +2,6 @@
 #include "debug.h"
 #include "evlist.h"
 #include "evsel.h"
-#include "cpumap.h"
 #include "parse-events.h"
 #include <errno.h>
 #include <limits.h>
@@ -10,7 +9,6 @@
 #include <api/fs/fs.h>
 #include <subcmd/parse-options.h>
 #include <perf/cpumap.h>
-#include "util.h"
 #include "cloexec.h"
 #include "record.h"
 #include "../perf-sys.h"
@@ -32,7 +30,7 @@ static int perf_do_probe_api(setup_probe_fn_t fn, int cpu, const char *str)
        if (parse_events(evlist, str, NULL))
                goto out_delete;
 
-       evsel = perf_evlist__first(evlist);
+       evsel = evlist__first(evlist);
 
        while (1) {
                fd = sys_perf_event_open(&evsel->core.attr, pid, cpu, -1, flags);
@@ -173,7 +171,7 @@ void perf_evlist__config(struct evlist *evlist, struct record_opts *opts,
                use_sample_identifier = perf_can_sample_identifier();
                sample_id = true;
        } else if (evlist->core.nr_entries > 1) {
-               struct evsel *first = perf_evlist__first(evlist);
+               struct evsel *first = evlist__first(evlist);
 
                evlist__for_each_entry(evlist, evsel) {
                        if (evsel->core.attr.sample_type == first->core.attr.sample_type)
@@ -278,7 +276,7 @@ bool perf_evlist__can_select_event(struct evlist *evlist, const char *str)
        if (err)
                goto out_delete;
 
-       evsel = perf_evlist__last(temp_evlist);
+       evsel = evlist__last(temp_evlist);
 
        if (!evlist || perf_cpu_map__empty(evlist->core.cpus)) {
                struct perf_cpu_map *cpus = perf_cpu_map__new(NULL);
index 5e52e7b..f3d29d8 100644 (file)
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 #include "util.h"
 #include "rwsem.h"
 
index 24a9990..6785cd8 100644 (file)
 #include <sys/stat.h>
 #include <sys/types.h>
 
-#include "cpumap.h"
 #include "color.h"
 #include "evsel.h"
 #include "evlist.h"
index 4d9593e..05b43ab 100644 (file)
@@ -22,7 +22,6 @@
 #include <asm/byteorder.h>
 
 #include "debug.h"
-#include "util.h"
 #include "session.h"
 #include "evlist.h"
 #include "color.h"
index 666a56e..5d341ef 100644 (file)
@@ -37,7 +37,6 @@
 #include "../dso.h"
 #include "../callchain.h"
 #include "../evsel.h"
-#include "../util.h"
 #include "../event.h"
 #include "../thread.h"
 #include "../comm.h"
@@ -49,7 +48,6 @@
 #include "map.h"
 #include "symbol.h"
 #include "thread_map.h"
-#include "cpumap.h"
 #include "print_binary.h"
 #include "stat.h"
 #include "mem-events.h"
index e9e4a04..061bb4d 100644 (file)
@@ -22,7 +22,6 @@
 #include "symbol.h"
 #include "session.h"
 #include "tool.h"
-#include "cpumap.h"
 #include "perf_regs.h"
 #include "asm/bug.h"
 #include "auxtrace.h"
 #include "thread-stack.h"
 #include "sample-raw.h"
 #include "stat.h"
-#include "util.h"
 #include "ui/progress.h"
 #include "../perf.h"
 #include "arch/common.h"
+#include <internal/lib.h>
+#include <linux/err.h>
 
 #ifdef HAVE_ZSTD_SUPPORT
 static int perf_session__process_compressed_event(struct perf_session *session,
@@ -187,6 +187,7 @@ static int ordered_events__deliver_event(struct ordered_events *oe,
 struct perf_session *perf_session__new(struct perf_data *data,
                                       bool repipe, struct perf_tool *tool)
 {
+       int ret = -ENOMEM;
        struct perf_session *session = zalloc(sizeof(*session));
 
        if (!session)
@@ -201,13 +202,15 @@ struct perf_session *perf_session__new(struct perf_data *data,
 
        perf_env__init(&session->header.env);
        if (data) {
-               if (perf_data__open(data))
+               ret = perf_data__open(data);
+               if (ret < 0)
                        goto out_delete;
 
                session->data = data;
 
                if (perf_data__is_read(data)) {
-                       if (perf_session__open(session) < 0)
+                       ret = perf_session__open(session);
+                       if (ret < 0)
                                goto out_delete;
 
                        /*
@@ -222,8 +225,11 @@ struct perf_session *perf_session__new(struct perf_data *data,
                        perf_evlist__init_trace_event_sample_raw(session->evlist);
 
                        /* Open the directory data. */
-                       if (data->is_dir && perf_data__open_dir(data))
+                       if (data->is_dir) {
+                               ret = perf_data__open_dir(data);
+                       if (ret)
                                goto out_delete;
+                       }
                }
        } else  {
                session->machines.host.env = &perf_env;
@@ -256,7 +262,7 @@ struct perf_session *perf_session__new(struct perf_data *data,
  out_delete:
        perf_session__delete(session);
  out:
-       return NULL;
+       return ERR_PTR(ret);
 }
 
 static void perf_session__delete_threads(struct perf_session *session)
@@ -1317,6 +1323,7 @@ static int deliver_sample_value(struct evlist *evlist,
                                struct machine *machine)
 {
        struct perf_sample_id *sid = perf_evlist__id2sid(evlist, v->id);
+       struct evsel *evsel;
 
        if (sid) {
                sample->id     = v->id;
@@ -1336,7 +1343,8 @@ static int deliver_sample_value(struct evlist *evlist,
        if (!sample->period)
                return 0;
 
-       return tool->sample(tool, event, sample, sid->evsel, machine);
+       evsel = container_of(sid->evsel, struct evsel, core);
+       return tool->sample(tool, event, sample, evsel, machine);
 }
 
 static int deliver_sample_group(struct evlist *evlist,
@@ -2412,73 +2420,3 @@ int perf_event__process_id_index(struct perf_session *session,
        }
        return 0;
 }
-
-int perf_event__synthesize_id_index(struct perf_tool *tool,
-                                   perf_event__handler_t process,
-                                   struct evlist *evlist,
-                                   struct machine *machine)
-{
-       union perf_event *ev;
-       struct evsel *evsel;
-       size_t nr = 0, i = 0, sz, max_nr, n;
-       int err;
-
-       pr_debug2("Synthesizing id index\n");
-
-       max_nr = (UINT16_MAX - sizeof(struct perf_record_id_index)) /
-                sizeof(struct id_index_entry);
-
-       evlist__for_each_entry(evlist, evsel)
-               nr += evsel->ids;
-
-       n = nr > max_nr ? max_nr : nr;
-       sz = sizeof(struct perf_record_id_index) + n * sizeof(struct id_index_entry);
-       ev = zalloc(sz);
-       if (!ev)
-               return -ENOMEM;
-
-       ev->id_index.header.type = PERF_RECORD_ID_INDEX;
-       ev->id_index.header.size = sz;
-       ev->id_index.nr = n;
-
-       evlist__for_each_entry(evlist, evsel) {
-               u32 j;
-
-               for (j = 0; j < evsel->ids; j++) {
-                       struct id_index_entry *e;
-                       struct perf_sample_id *sid;
-
-                       if (i >= n) {
-                               err = process(tool, ev, NULL, machine);
-                               if (err)
-                                       goto out_err;
-                               nr -= n;
-                               i = 0;
-                       }
-
-                       e = &ev->id_index.entries[i++];
-
-                       e->id = evsel->id[j];
-
-                       sid = perf_evlist__id2sid(evlist, e->id);
-                       if (!sid) {
-                               free(ev);
-                               return -ENOENT;
-                       }
-
-                       e->idx = sid->idx;
-                       e->cpu = sid->cpu;
-                       e->tid = sid->tid;
-               }
-       }
-
-       sz = sizeof(struct perf_record_id_index) + nr * sizeof(struct id_index_entry);
-       ev->id_index.header.size = sz;
-       ev->id_index.nr = nr;
-
-       err = process(tool, ev, NULL, machine);
-out_err:
-       free(ev);
-
-       return err;
-}
index b7aa076..b4c9428 100644 (file)
@@ -138,9 +138,4 @@ int perf_session__deliver_synth_event(struct perf_session *session,
 int perf_event__process_id_index(struct perf_session *session,
                                 union perf_event *event);
 
-int perf_event__synthesize_id_index(struct perf_tool *tool,
-                                   perf_event__handler_t process,
-                                   struct evlist *evlist,
-                                   struct machine *machine);
-
 #endif /* __PERF_SESSION_H */
index a2308eb..43d1d41 100644 (file)
@@ -2329,7 +2329,7 @@ static struct evsel *find_evsel(struct evlist *evlist, char *event_name)
                if (nr > evlist->core.nr_entries)
                        return NULL;
 
-               evsel = perf_evlist__first(evlist);
+               evsel = evlist__first(evlist);
                while (--nr > 0)
                        evsel = perf_evsel__next(evsel);
 
index adfcf1f..d84ed8b 100644 (file)
@@ -15,7 +15,7 @@
 #include <string.h>
 #include "srccode.h"
 #include "debug.h"
-#include "util.h"
+#include <internal/lib.h> // page_size
 
 #define MAXSRCCACHE (32*1024*1024)
 #define MAXSRCFILES     64
index 70c87fd..2c41d47 100644 (file)
@@ -738,6 +738,8 @@ static void generic_metric(struct perf_stat_config *config,
        char *n, *pn;
 
        expr__ctx_init(&pctx);
+       /* Must be first id entry */
+       expr__add_id(&pctx, name, avg);
        for (i = 0; metric_events[i]; i++) {
                struct saved_value *v;
                struct stats *stats;
@@ -776,8 +778,6 @@ static void generic_metric(struct perf_stat_config *config,
                        expr__add_id(&pctx, n, avg_stats(stats)*scale);
        }
 
-       expr__add_id(&pctx, name, avg);
-
        if (!metric_events[i]) {
                const char *p = metric_expr;
 
index 8f1ea27..ebdd130 100644 (file)
@@ -4,6 +4,7 @@
 #include <math.h>
 #include <string.h>
 #include "counts.h"
+#include "cpumap.h"
 #include "debug.h"
 #include "header.h"
 #include "stat.h"
@@ -161,6 +162,15 @@ static void perf_evsel__free_prev_raw_counts(struct evsel *evsel)
        evsel->prev_raw_counts = NULL;
 }
 
+static void perf_evsel__reset_prev_raw_counts(struct evsel *evsel)
+{
+       if (evsel->prev_raw_counts) {
+               evsel->prev_raw_counts->aggr.val = 0;
+               evsel->prev_raw_counts->aggr.ena = 0;
+               evsel->prev_raw_counts->aggr.run = 0;
+       }
+}
+
 static int perf_evsel__alloc_stats(struct evsel *evsel, bool alloc_raw)
 {
        int ncpus = perf_evsel__nr_cpus(evsel);
@@ -211,6 +221,14 @@ void perf_evlist__reset_stats(struct evlist *evlist)
        }
 }
 
+void perf_evlist__reset_prev_raw_counts(struct evlist *evlist)
+{
+       struct evsel *evsel;
+
+       evlist__for_each_entry(evlist, evsel)
+               perf_evsel__reset_prev_raw_counts(evsel);
+}
+
 static void zero_per_pkg(struct evsel *counter)
 {
        if (counter->per_pkg_mask)
@@ -318,7 +336,7 @@ static int process_counter_maps(struct perf_stat_config *config,
        int ncpus = perf_evsel__nr_cpus(counter);
        int cpu, thread;
 
-       if (counter->system_wide)
+       if (counter->core.system_wide)
                nthreads = 1;
 
        for (thread = 0; thread < nthreads; thread++) {
@@ -493,45 +511,3 @@ int create_perf_stat_counter(struct evsel *evsel,
 
        return perf_evsel__open_per_thread(evsel, evsel->core.threads);
 }
-
-int perf_stat_synthesize_config(struct perf_stat_config *config,
-                               struct perf_tool *tool,
-                               struct evlist *evlist,
-                               perf_event__handler_t process,
-                               bool attrs)
-{
-       int err;
-
-       if (attrs) {
-               err = perf_event__synthesize_attrs(tool, evlist, process);
-               if (err < 0) {
-                       pr_err("Couldn't synthesize attrs.\n");
-                       return err;
-               }
-       }
-
-       err = perf_event__synthesize_extra_attr(tool, evlist, process,
-                                               attrs);
-
-       err = perf_event__synthesize_thread_map2(tool, evlist->core.threads,
-                                                process, NULL);
-       if (err < 0) {
-               pr_err("Couldn't synthesize thread map.\n");
-               return err;
-       }
-
-       err = perf_event__synthesize_cpu_map(tool, evlist->core.cpus,
-                                            process, NULL);
-       if (err < 0) {
-               pr_err("Couldn't synthesize thread map.\n");
-               return err;
-       }
-
-       err = perf_event__synthesize_stat_config(tool, config, process, NULL);
-       if (err < 0) {
-               pr_err("Couldn't synthesize config.\n");
-               return err;
-       }
-
-       return 0;
-}
index 14fe3e5..edbeb2f 100644 (file)
@@ -7,8 +7,9 @@
 #include <sys/types.h>
 #include <sys/resource.h>
 #include "rblist.h"
-#include "event.h"
 
+struct perf_cpu_map;
+struct perf_stat_config;
 struct timespec;
 
 struct stats {
@@ -192,6 +193,7 @@ void perf_stat__collect_metric_expr(struct evlist *);
 int perf_evlist__alloc_stats(struct evlist *evlist, bool alloc_raw);
 void perf_evlist__free_stats(struct evlist *evlist);
 void perf_evlist__reset_stats(struct evlist *evlist);
+void perf_evlist__reset_prev_raw_counts(struct evlist *evlist);
 
 int perf_stat_process_counter(struct perf_stat_config *config,
                              struct evsel *counter);
@@ -210,11 +212,6 @@ size_t perf_event__fprintf_stat_config(union perf_event *event, FILE *fp);
 int create_perf_stat_counter(struct evsel *evsel,
                             struct perf_stat_config *config,
                             struct target *target);
-int perf_stat_synthesize_config(struct perf_stat_config *config,
-                               struct perf_tool *tool,
-                               struct evlist *evlist,
-                               perf_event__handler_t process,
-                               bool attrs);
 void
 perf_evlist__print_counters(struct evlist *evlist,
                            struct perf_stat_config *config,
index 582f4a6..96f941e 100644 (file)
 #include <linux/string.h>
 #include <linux/time64.h>
 #include <linux/zalloc.h>
+#include <internal/cpumap.h>
 #include <perf/cpumap.h>
 
 #include "env.h"
 #include "svghelper.h"
-#include "cpumap.h"
 
 static u64 first_time, last_time;
 static u64 turbo_frequency, max_freq;
index 9428639..66f4be1 100644 (file)
@@ -7,6 +7,7 @@
 #include <unistd.h>
 #include <inttypes.h>
 
+#include "dso.h"
 #include "map.h"
 #include "map_groups.h"
 #include "symbol.h"
 #include "machine.h"
 #include "vdso.h"
 #include "debug.h"
-#include "util.h"
+#include "util/copyfile.h"
 #include <linux/ctype.h>
+#include <linux/kernel.h>
 #include <linux/zalloc.h>
 #include <symbol/kallsyms.h>
+#include <internal/lib.h>
 
 #ifndef EM_AARCH64
 #define EM_AARCH64     183  /* ARM 64 bit */
index 7e2813e..d6e99af 100644 (file)
@@ -1,8 +1,6 @@
-// SPDX-License-Identifier: GPL-2.0
 #include "dso.h"
 #include "symbol.h"
 #include "symsrc.h"
-#include "util.h"
 
 #include <errno.h>
 #include <unistd.h>
@@ -13,6 +11,7 @@
 #include <byteswap.h>
 #include <sys/stat.h>
 #include <linux/zalloc.h>
+#include <internal/lib.h>
 
 static bool check_need_swap(int file_endian)
 {
index 765c75d..a8f80e4 100644 (file)
@@ -19,7 +19,7 @@
 #include "build-id.h"
 #include "cap.h"
 #include "dso.h"
-#include "util.h"
+#include "util.h" // lsdir()
 #include "debug.h"
 #include "event.h"
 #include "machine.h"
diff --git a/tools/perf/util/synthetic-events.c b/tools/perf/util/synthetic-events.c
new file mode 100644 (file)
index 0000000..807cbca
--- /dev/null
@@ -0,0 +1,1884 @@
+// SPDX-License-Identifier: GPL-2.0-only 
+
+#include "util/debug.h"
+#include "util/dso.h"
+#include "util/event.h"
+#include "util/evlist.h"
+#include "util/machine.h"
+#include "util/map.h"
+#include "util/map_symbol.h"
+#include "util/branch.h"
+#include "util/memswap.h"
+#include "util/namespaces.h"
+#include "util/session.h"
+#include "util/stat.h"
+#include "util/symbol.h"
+#include "util/synthetic-events.h"
+#include "util/target.h"
+#include "util/time-utils.h"
+#include <linux/bitops.h>
+#include <linux/kernel.h>
+#include <linux/string.h>
+#include <linux/zalloc.h>
+#include <linux/perf_event.h>
+#include <asm/bug.h>
+#include <perf/evsel.h>
+#include <internal/cpumap.h>
+#include <perf/cpumap.h>
+#include <internal/lib.h> // page_size
+#include <internal/threadmap.h>
+#include <perf/threadmap.h>
+#include <symbol/kallsyms.h>
+#include <dirent.h>
+#include <errno.h>
+#include <inttypes.h>
+#include <stdio.h>
+#include <string.h>
+#include <uapi/linux/mman.h> /* To get things like MAP_HUGETLB even on older libc headers */
+#include <api/fs/fs.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <unistd.h>
+
+#define DEFAULT_PROC_MAP_PARSE_TIMEOUT 500
+
+unsigned int proc_map_timeout = DEFAULT_PROC_MAP_PARSE_TIMEOUT;
+
+int perf_tool__process_synth_event(struct perf_tool *tool,
+                                  union perf_event *event,
+                                  struct machine *machine,
+                                  perf_event__handler_t process)
+{
+       struct perf_sample synth_sample = {
+               .pid       = -1,
+               .tid       = -1,
+               .time      = -1,
+               .stream_id = -1,
+               .cpu       = -1,
+               .period    = 1,
+               .cpumode   = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK,
+       };
+
+       return process(tool, event, &synth_sample, machine);
+};
+
+/*
+ * Assumes that the first 4095 bytes of /proc/pid/stat contains
+ * the comm, tgid and ppid.
+ */
+static int perf_event__get_comm_ids(pid_t pid, char *comm, size_t len,
+                                   pid_t *tgid, pid_t *ppid)
+{
+       char filename[PATH_MAX];
+       char bf[4096];
+       int fd;
+       size_t size = 0;
+       ssize_t n;
+       char *name, *tgids, *ppids;
+
+       *tgid = -1;
+       *ppid = -1;
+
+       snprintf(filename, sizeof(filename), "/proc/%d/status", pid);
+
+       fd = open(filename, O_RDONLY);
+       if (fd < 0) {
+               pr_debug("couldn't open %s\n", filename);
+               return -1;
+       }
+
+       n = read(fd, bf, sizeof(bf) - 1);
+       close(fd);
+       if (n <= 0) {
+               pr_warning("Couldn't get COMM, tigd and ppid for pid %d\n",
+                          pid);
+               return -1;
+       }
+       bf[n] = '\0';
+
+       name = strstr(bf, "Name:");
+       tgids = strstr(bf, "Tgid:");
+       ppids = strstr(bf, "PPid:");
+
+       if (name) {
+               char *nl;
+
+               name = skip_spaces(name + 5);  /* strlen("Name:") */
+               nl = strchr(name, '\n');
+               if (nl)
+                       *nl = '\0';
+
+               size = strlen(name);
+               if (size >= len)
+                       size = len - 1;
+               memcpy(comm, name, size);
+               comm[size] = '\0';
+       } else {
+               pr_debug("Name: string not found for pid %d\n", pid);
+       }
+
+       if (tgids) {
+               tgids += 5;  /* strlen("Tgid:") */
+               *tgid = atoi(tgids);
+       } else {
+               pr_debug("Tgid: string not found for pid %d\n", pid);
+       }
+
+       if (ppids) {
+               ppids += 5;  /* strlen("PPid:") */
+               *ppid = atoi(ppids);
+       } else {
+               pr_debug("PPid: string not found for pid %d\n", pid);
+       }
+
+       return 0;
+}
+
+static int perf_event__prepare_comm(union perf_event *event, pid_t pid,
+                                   struct machine *machine,
+                                   pid_t *tgid, pid_t *ppid)
+{
+       size_t size;
+
+       *ppid = -1;
+
+       memset(&event->comm, 0, sizeof(event->comm));
+
+       if (machine__is_host(machine)) {
+               if (perf_event__get_comm_ids(pid, event->comm.comm,
+                                            sizeof(event->comm.comm),
+                                            tgid, ppid) != 0) {
+                       return -1;
+               }
+       } else {
+               *tgid = machine->pid;
+       }
+
+       if (*tgid < 0)
+               return -1;
+
+       event->comm.pid = *tgid;
+       event->comm.header.type = PERF_RECORD_COMM;
+
+       size = strlen(event->comm.comm) + 1;
+       size = PERF_ALIGN(size, sizeof(u64));
+       memset(event->comm.comm + size, 0, machine->id_hdr_size);
+       event->comm.header.size = (sizeof(event->comm) -
+                               (sizeof(event->comm.comm) - size) +
+                               machine->id_hdr_size);
+       event->comm.tid = pid;
+
+       return 0;
+}
+
+pid_t perf_event__synthesize_comm(struct perf_tool *tool,
+                                        union perf_event *event, pid_t pid,
+                                        perf_event__handler_t process,
+                                        struct machine *machine)
+{
+       pid_t tgid, ppid;
+
+       if (perf_event__prepare_comm(event, pid, machine, &tgid, &ppid) != 0)
+               return -1;
+
+       if (perf_tool__process_synth_event(tool, event, machine, process) != 0)
+               return -1;
+
+       return tgid;
+}
+
+static void perf_event__get_ns_link_info(pid_t pid, const char *ns,
+                                        struct perf_ns_link_info *ns_link_info)
+{
+       struct stat64 st;
+       char proc_ns[128];
+
+       sprintf(proc_ns, "/proc/%u/ns/%s", pid, ns);
+       if (stat64(proc_ns, &st) == 0) {
+               ns_link_info->dev = st.st_dev;
+               ns_link_info->ino = st.st_ino;
+       }
+}
+
+int perf_event__synthesize_namespaces(struct perf_tool *tool,
+                                     union perf_event *event,
+                                     pid_t pid, pid_t tgid,
+                                     perf_event__handler_t process,
+                                     struct machine *machine)
+{
+       u32 idx;
+       struct perf_ns_link_info *ns_link_info;
+
+       if (!tool || !tool->namespace_events)
+               return 0;
+
+       memset(&event->namespaces, 0, (sizeof(event->namespaces) +
+              (NR_NAMESPACES * sizeof(struct perf_ns_link_info)) +
+              machine->id_hdr_size));
+
+       event->namespaces.pid = tgid;
+       event->namespaces.tid = pid;
+
+       event->namespaces.nr_namespaces = NR_NAMESPACES;
+
+       ns_link_info = event->namespaces.link_info;
+
+       for (idx = 0; idx < event->namespaces.nr_namespaces; idx++)
+               perf_event__get_ns_link_info(pid, perf_ns__name(idx),
+                                            &ns_link_info[idx]);
+
+       event->namespaces.header.type = PERF_RECORD_NAMESPACES;
+
+       event->namespaces.header.size = (sizeof(event->namespaces) +
+                       (NR_NAMESPACES * sizeof(struct perf_ns_link_info)) +
+                       machine->id_hdr_size);
+
+       if (perf_tool__process_synth_event(tool, event, machine, process) != 0)
+               return -1;
+
+       return 0;
+}
+
+static int perf_event__synthesize_fork(struct perf_tool *tool,
+                                      union perf_event *event,
+                                      pid_t pid, pid_t tgid, pid_t ppid,
+                                      perf_event__handler_t process,
+                                      struct machine *machine)
+{
+       memset(&event->fork, 0, sizeof(event->fork) + machine->id_hdr_size);
+
+       /*
+        * for main thread set parent to ppid from status file. For other
+        * threads set parent pid to main thread. ie., assume main thread
+        * spawns all threads in a process
+       */
+       if (tgid == pid) {
+               event->fork.ppid = ppid;
+               event->fork.ptid = ppid;
+       } else {
+               event->fork.ppid = tgid;
+               event->fork.ptid = tgid;
+       }
+       event->fork.pid  = tgid;
+       event->fork.tid  = pid;
+       event->fork.header.type = PERF_RECORD_FORK;
+       event->fork.header.misc = PERF_RECORD_MISC_FORK_EXEC;
+
+       event->fork.header.size = (sizeof(event->fork) + machine->id_hdr_size);
+
+       if (perf_tool__process_synth_event(tool, event, machine, process) != 0)
+               return -1;
+
+       return 0;
+}
+
+int perf_event__synthesize_mmap_events(struct perf_tool *tool,
+                                      union perf_event *event,
+                                      pid_t pid, pid_t tgid,
+                                      perf_event__handler_t process,
+                                      struct machine *machine,
+                                      bool mmap_data)
+{
+       char filename[PATH_MAX];
+       FILE *fp;
+       unsigned long long t;
+       bool truncation = false;
+       unsigned long long timeout = proc_map_timeout * 1000000ULL;
+       int rc = 0;
+       const char *hugetlbfs_mnt = hugetlbfs__mountpoint();
+       int hugetlbfs_mnt_len = hugetlbfs_mnt ? strlen(hugetlbfs_mnt) : 0;
+
+       if (machine__is_default_guest(machine))
+               return 0;
+
+       snprintf(filename, sizeof(filename), "%s/proc/%d/task/%d/maps",
+                machine->root_dir, pid, pid);
+
+       fp = fopen(filename, "r");
+       if (fp == NULL) {
+               /*
+                * We raced with a task exiting - just return:
+                */
+               pr_debug("couldn't open %s\n", filename);
+               return -1;
+       }
+
+       event->header.type = PERF_RECORD_MMAP2;
+       t = rdclock();
+
+       while (1) {
+               char bf[BUFSIZ];
+               char prot[5];
+               char execname[PATH_MAX];
+               char anonstr[] = "//anon";
+               unsigned int ino;
+               size_t size;
+               ssize_t n;
+
+               if (fgets(bf, sizeof(bf), fp) == NULL)
+                       break;
+
+               if ((rdclock() - t) > timeout) {
+                       pr_warning("Reading %s time out. "
+                                  "You may want to increase "
+                                  "the time limit by --proc-map-timeout\n",
+                                  filename);
+                       truncation = true;
+                       goto out;
+               }
+
+               /* ensure null termination since stack will be reused. */
+               strcpy(execname, "");
+
+               /* 00400000-0040c000 r-xp 00000000 fd:01 41038  /bin/cat */
+               n = sscanf(bf, "%"PRI_lx64"-%"PRI_lx64" %s %"PRI_lx64" %x:%x %u %[^\n]\n",
+                      &event->mmap2.start, &event->mmap2.len, prot,
+                      &event->mmap2.pgoff, &event->mmap2.maj,
+                      &event->mmap2.min,
+                      &ino, execname);
+
+               /*
+                * Anon maps don't have the execname.
+                */
+               if (n < 7)
+                       continue;
+
+               event->mmap2.ino = (u64)ino;
+
+               /*
+                * Just like the kernel, see __perf_event_mmap in kernel/perf_event.c
+                */
+               if (machine__is_host(machine))
+                       event->header.misc = PERF_RECORD_MISC_USER;
+               else
+                       event->header.misc = PERF_RECORD_MISC_GUEST_USER;
+
+               /* map protection and flags bits */
+               event->mmap2.prot = 0;
+               event->mmap2.flags = 0;
+               if (prot[0] == 'r')
+                       event->mmap2.prot |= PROT_READ;
+               if (prot[1] == 'w')
+                       event->mmap2.prot |= PROT_WRITE;
+               if (prot[2] == 'x')
+                       event->mmap2.prot |= PROT_EXEC;
+
+               if (prot[3] == 's')
+                       event->mmap2.flags |= MAP_SHARED;
+               else
+                       event->mmap2.flags |= MAP_PRIVATE;
+
+               if (prot[2] != 'x') {
+                       if (!mmap_data || prot[0] != 'r')
+                               continue;
+
+                       event->header.misc |= PERF_RECORD_MISC_MMAP_DATA;
+               }
+
+out:
+               if (truncation)
+                       event->header.misc |= PERF_RECORD_MISC_PROC_MAP_PARSE_TIMEOUT;
+
+               if (!strcmp(execname, ""))
+                       strcpy(execname, anonstr);
+
+               if (hugetlbfs_mnt_len &&
+                   !strncmp(execname, hugetlbfs_mnt, hugetlbfs_mnt_len)) {
+                       strcpy(execname, anonstr);
+                       event->mmap2.flags |= MAP_HUGETLB;
+               }
+
+               size = strlen(execname) + 1;
+               memcpy(event->mmap2.filename, execname, size);
+               size = PERF_ALIGN(size, sizeof(u64));
+               event->mmap2.len -= event->mmap.start;
+               event->mmap2.header.size = (sizeof(event->mmap2) -
+                                       (sizeof(event->mmap2.filename) - size));
+               memset(event->mmap2.filename + size, 0, machine->id_hdr_size);
+               event->mmap2.header.size += machine->id_hdr_size;
+               event->mmap2.pid = tgid;
+               event->mmap2.tid = pid;
+
+               if (perf_tool__process_synth_event(tool, event, machine, process) != 0) {
+                       rc = -1;
+                       break;
+               }
+
+               if (truncation)
+                       break;
+       }
+
+       fclose(fp);
+       return rc;
+}
+
+int perf_event__synthesize_modules(struct perf_tool *tool, perf_event__handler_t process,
+                                  struct machine *machine)
+{
+       int rc = 0;
+       struct map *pos;
+       struct maps *maps = machine__kernel_maps(machine);
+       union perf_event *event = zalloc((sizeof(event->mmap) +
+                                         machine->id_hdr_size));
+       if (event == NULL) {
+               pr_debug("Not enough memory synthesizing mmap event "
+                        "for kernel modules\n");
+               return -1;
+       }
+
+       event->header.type = PERF_RECORD_MMAP;
+
+       /*
+        * kernel uses 0 for user space maps, see kernel/perf_event.c
+        * __perf_event_mmap
+        */
+       if (machine__is_host(machine))
+               event->header.misc = PERF_RECORD_MISC_KERNEL;
+       else
+               event->header.misc = PERF_RECORD_MISC_GUEST_KERNEL;
+
+       for (pos = maps__first(maps); pos; pos = map__next(pos)) {
+               size_t size;
+
+               if (!__map__is_kmodule(pos))
+                       continue;
+
+               size = PERF_ALIGN(pos->dso->long_name_len + 1, sizeof(u64));
+               event->mmap.header.type = PERF_RECORD_MMAP;
+               event->mmap.header.size = (sizeof(event->mmap) -
+                                       (sizeof(event->mmap.filename) - size));
+               memset(event->mmap.filename + size, 0, machine->id_hdr_size);
+               event->mmap.header.size += machine->id_hdr_size;
+               event->mmap.start = pos->start;
+               event->mmap.len   = pos->end - pos->start;
+               event->mmap.pid   = machine->pid;
+
+               memcpy(event->mmap.filename, pos->dso->long_name,
+                      pos->dso->long_name_len + 1);
+               if (perf_tool__process_synth_event(tool, event, machine, process) != 0) {
+                       rc = -1;
+                       break;
+               }
+       }
+
+       free(event);
+       return rc;
+}
+
+static int __event__synthesize_thread(union perf_event *comm_event,
+                                     union perf_event *mmap_event,
+                                     union perf_event *fork_event,
+                                     union perf_event *namespaces_event,
+                                     pid_t pid, int full, perf_event__handler_t process,
+                                     struct perf_tool *tool, struct machine *machine, bool mmap_data)
+{
+       char filename[PATH_MAX];
+       DIR *tasks;
+       struct dirent *dirent;
+       pid_t tgid, ppid;
+       int rc = 0;
+
+       /* special case: only send one comm event using passed in pid */
+       if (!full) {
+               tgid = perf_event__synthesize_comm(tool, comm_event, pid,
+                                                  process, machine);
+
+               if (tgid == -1)
+                       return -1;
+
+               if (perf_event__synthesize_namespaces(tool, namespaces_event, pid,
+                                                     tgid, process, machine) < 0)
+                       return -1;
+
+               /*
+                * send mmap only for thread group leader
+                * see thread__init_map_groups
+                */
+               if (pid == tgid &&
+                   perf_event__synthesize_mmap_events(tool, mmap_event, pid, tgid,
+                                                      process, machine, mmap_data))
+                       return -1;
+
+               return 0;
+       }
+
+       if (machine__is_default_guest(machine))
+               return 0;
+
+       snprintf(filename, sizeof(filename), "%s/proc/%d/task",
+                machine->root_dir, pid);
+
+       tasks = opendir(filename);
+       if (tasks == NULL) {
+               pr_debug("couldn't open %s\n", filename);
+               return 0;
+       }
+
+       while ((dirent = readdir(tasks)) != NULL) {
+               char *end;
+               pid_t _pid;
+
+               _pid = strtol(dirent->d_name, &end, 10);
+               if (*end)
+                       continue;
+
+               rc = -1;
+               if (perf_event__prepare_comm(comm_event, _pid, machine,
+                                            &tgid, &ppid) != 0)
+                       break;
+
+               if (perf_event__synthesize_fork(tool, fork_event, _pid, tgid,
+                                               ppid, process, machine) < 0)
+                       break;
+
+               if (perf_event__synthesize_namespaces(tool, namespaces_event, _pid,
+                                                     tgid, process, machine) < 0)
+                       break;
+
+               /*
+                * Send the prepared comm event
+                */
+               if (perf_tool__process_synth_event(tool, comm_event, machine, process) != 0)
+                       break;
+
+               rc = 0;
+               if (_pid == pid) {
+                       /* process the parent's maps too */
+                       rc = perf_event__synthesize_mmap_events(tool, mmap_event, pid, tgid,
+                                               process, machine, mmap_data);
+                       if (rc)
+                               break;
+               }
+       }
+
+       closedir(tasks);
+       return rc;
+}
+
+int perf_event__synthesize_thread_map(struct perf_tool *tool,
+                                     struct perf_thread_map *threads,
+                                     perf_event__handler_t process,
+                                     struct machine *machine,
+                                     bool mmap_data)
+{
+       union perf_event *comm_event, *mmap_event, *fork_event;
+       union perf_event *namespaces_event;
+       int err = -1, thread, j;
+
+       comm_event = malloc(sizeof(comm_event->comm) + machine->id_hdr_size);
+       if (comm_event == NULL)
+               goto out;
+
+       mmap_event = malloc(sizeof(mmap_event->mmap2) + machine->id_hdr_size);
+       if (mmap_event == NULL)
+               goto out_free_comm;
+
+       fork_event = malloc(sizeof(fork_event->fork) + machine->id_hdr_size);
+       if (fork_event == NULL)
+               goto out_free_mmap;
+
+       namespaces_event = malloc(sizeof(namespaces_event->namespaces) +
+                                 (NR_NAMESPACES * sizeof(struct perf_ns_link_info)) +
+                                 machine->id_hdr_size);
+       if (namespaces_event == NULL)
+               goto out_free_fork;
+
+       err = 0;
+       for (thread = 0; thread < threads->nr; ++thread) {
+               if (__event__synthesize_thread(comm_event, mmap_event,
+                                              fork_event, namespaces_event,
+                                              perf_thread_map__pid(threads, thread), 0,
+                                              process, tool, machine,
+                                              mmap_data)) {
+                       err = -1;
+                       break;
+               }
+
+               /*
+                * comm.pid is set to thread group id by
+                * perf_event__synthesize_comm
+                */
+               if ((int) comm_event->comm.pid != perf_thread_map__pid(threads, thread)) {
+                       bool need_leader = true;
+
+                       /* is thread group leader in thread_map? */
+                       for (j = 0; j < threads->nr; ++j) {
+                               if ((int) comm_event->comm.pid == perf_thread_map__pid(threads, j)) {
+                                       need_leader = false;
+                                       break;
+                               }
+                       }
+
+                       /* if not, generate events for it */
+                       if (need_leader &&
+                           __event__synthesize_thread(comm_event, mmap_event,
+                                                      fork_event, namespaces_event,
+                                                      comm_event->comm.pid, 0,
+                                                      process, tool, machine,
+                                                      mmap_data)) {
+                               err = -1;
+                               break;
+                       }
+               }
+       }
+       free(namespaces_event);
+out_free_fork:
+       free(fork_event);
+out_free_mmap:
+       free(mmap_event);
+out_free_comm:
+       free(comm_event);
+out:
+       return err;
+}
+
+static int __perf_event__synthesize_threads(struct perf_tool *tool,
+                                           perf_event__handler_t process,
+                                           struct machine *machine,
+                                           bool mmap_data,
+                                           struct dirent **dirent,
+                                           int start,
+                                           int num)
+{
+       union perf_event *comm_event, *mmap_event, *fork_event;
+       union perf_event *namespaces_event;
+       int err = -1;
+       char *end;
+       pid_t pid;
+       int i;
+
+       comm_event = malloc(sizeof(comm_event->comm) + machine->id_hdr_size);
+       if (comm_event == NULL)
+               goto out;
+
+       mmap_event = malloc(sizeof(mmap_event->mmap2) + machine->id_hdr_size);
+       if (mmap_event == NULL)
+               goto out_free_comm;
+
+       fork_event = malloc(sizeof(fork_event->fork) + machine->id_hdr_size);
+       if (fork_event == NULL)
+               goto out_free_mmap;
+
+       namespaces_event = malloc(sizeof(namespaces_event->namespaces) +
+                                 (NR_NAMESPACES * sizeof(struct perf_ns_link_info)) +
+                                 machine->id_hdr_size);
+       if (namespaces_event == NULL)
+               goto out_free_fork;
+
+       for (i = start; i < start + num; i++) {
+               if (!isdigit(dirent[i]->d_name[0]))
+                       continue;
+
+               pid = (pid_t)strtol(dirent[i]->d_name, &end, 10);
+               /* only interested in proper numerical dirents */
+               if (*end)
+                       continue;
+               /*
+                * We may race with exiting thread, so don't stop just because
+                * one thread couldn't be synthesized.
+                */
+               __event__synthesize_thread(comm_event, mmap_event, fork_event,
+                                          namespaces_event, pid, 1, process,
+                                          tool, machine, mmap_data);
+       }
+       err = 0;
+
+       free(namespaces_event);
+out_free_fork:
+       free(fork_event);
+out_free_mmap:
+       free(mmap_event);
+out_free_comm:
+       free(comm_event);
+out:
+       return err;
+}
+
+struct synthesize_threads_arg {
+       struct perf_tool *tool;
+       perf_event__handler_t process;
+       struct machine *machine;
+       bool mmap_data;
+       struct dirent **dirent;
+       int num;
+       int start;
+};
+
+static void *synthesize_threads_worker(void *arg)
+{
+       struct synthesize_threads_arg *args = arg;
+
+       __perf_event__synthesize_threads(args->tool, args->process,
+                                        args->machine, args->mmap_data,
+                                        args->dirent,
+                                        args->start, args->num);
+       return NULL;
+}
+
+int perf_event__synthesize_threads(struct perf_tool *tool,
+                                  perf_event__handler_t process,
+                                  struct machine *machine,
+                                  bool mmap_data,
+                                  unsigned int nr_threads_synthesize)
+{
+       struct synthesize_threads_arg *args = NULL;
+       pthread_t *synthesize_threads = NULL;
+       char proc_path[PATH_MAX];
+       struct dirent **dirent;
+       int num_per_thread;
+       int m, n, i, j;
+       int thread_nr;
+       int base = 0;
+       int err = -1;
+
+
+       if (machine__is_default_guest(machine))
+               return 0;
+
+       snprintf(proc_path, sizeof(proc_path), "%s/proc", machine->root_dir);
+       n = scandir(proc_path, &dirent, 0, alphasort);
+       if (n < 0)
+               return err;
+
+       if (nr_threads_synthesize == UINT_MAX)
+               thread_nr = sysconf(_SC_NPROCESSORS_ONLN);
+       else
+               thread_nr = nr_threads_synthesize;
+
+       if (thread_nr <= 1) {
+               err = __perf_event__synthesize_threads(tool, process,
+                                                      machine, mmap_data,
+                                                      dirent, base, n);
+               goto free_dirent;
+       }
+       if (thread_nr > n)
+               thread_nr = n;
+
+       synthesize_threads = calloc(sizeof(pthread_t), thread_nr);
+       if (synthesize_threads == NULL)
+               goto free_dirent;
+
+       args = calloc(sizeof(*args), thread_nr);
+       if (args == NULL)
+               goto free_threads;
+
+       num_per_thread = n / thread_nr;
+       m = n % thread_nr;
+       for (i = 0; i < thread_nr; i++) {
+               args[i].tool = tool;
+               args[i].process = process;
+               args[i].machine = machine;
+               args[i].mmap_data = mmap_data;
+               args[i].dirent = dirent;
+       }
+       for (i = 0; i < m; i++) {
+               args[i].num = num_per_thread + 1;
+               args[i].start = i * args[i].num;
+       }
+       if (i != 0)
+               base = args[i-1].start + args[i-1].num;
+       for (j = i; j < thread_nr; j++) {
+               args[j].num = num_per_thread;
+               args[j].start = base + (j - i) * args[i].num;
+       }
+
+       for (i = 0; i < thread_nr; i++) {
+               if (pthread_create(&synthesize_threads[i], NULL,
+                                  synthesize_threads_worker, &args[i]))
+                       goto out_join;
+       }
+       err = 0;
+out_join:
+       for (i = 0; i < thread_nr; i++)
+               pthread_join(synthesize_threads[i], NULL);
+       free(args);
+free_threads:
+       free(synthesize_threads);
+free_dirent:
+       for (i = 0; i < n; i++)
+               zfree(&dirent[i]);
+       free(dirent);
+
+       return err;
+}
+
+int __weak perf_event__synthesize_extra_kmaps(struct perf_tool *tool __maybe_unused,
+                                             perf_event__handler_t process __maybe_unused,
+                                             struct machine *machine __maybe_unused)
+{
+       return 0;
+}
+
+static int __perf_event__synthesize_kernel_mmap(struct perf_tool *tool,
+                                               perf_event__handler_t process,
+                                               struct machine *machine)
+{
+       size_t size;
+       struct map *map = machine__kernel_map(machine);
+       struct kmap *kmap;
+       int err;
+       union perf_event *event;
+
+       if (map == NULL)
+               return -1;
+
+       kmap = map__kmap(map);
+       if (!kmap->ref_reloc_sym)
+               return -1;
+
+       /*
+        * We should get this from /sys/kernel/sections/.text, but till that is
+        * available use this, and after it is use this as a fallback for older
+        * kernels.
+        */
+       event = zalloc((sizeof(event->mmap) + machine->id_hdr_size));
+       if (event == NULL) {
+               pr_debug("Not enough memory synthesizing mmap event "
+                        "for kernel modules\n");
+               return -1;
+       }
+
+       if (machine__is_host(machine)) {
+               /*
+                * kernel uses PERF_RECORD_MISC_USER for user space maps,
+                * see kernel/perf_event.c __perf_event_mmap
+                */
+               event->header.misc = PERF_RECORD_MISC_KERNEL;
+       } else {
+               event->header.misc = PERF_RECORD_MISC_GUEST_KERNEL;
+       }
+
+       size = snprintf(event->mmap.filename, sizeof(event->mmap.filename),
+                       "%s%s", machine->mmap_name, kmap->ref_reloc_sym->name) + 1;
+       size = PERF_ALIGN(size, sizeof(u64));
+       event->mmap.header.type = PERF_RECORD_MMAP;
+       event->mmap.header.size = (sizeof(event->mmap) -
+                       (sizeof(event->mmap.filename) - size) + machine->id_hdr_size);
+       event->mmap.pgoff = kmap->ref_reloc_sym->addr;
+       event->mmap.start = map->start;
+       event->mmap.len   = map->end - event->mmap.start;
+       event->mmap.pid   = machine->pid;
+
+       err = perf_tool__process_synth_event(tool, event, machine, process);
+       free(event);
+
+       return err;
+}
+
+int perf_event__synthesize_kernel_mmap(struct perf_tool *tool,
+                                      perf_event__handler_t process,
+                                      struct machine *machine)
+{
+       int err;
+
+       err = __perf_event__synthesize_kernel_mmap(tool, process, machine);
+       if (err < 0)
+               return err;
+
+       return perf_event__synthesize_extra_kmaps(tool, process, machine);
+}
+
+int perf_event__synthesize_thread_map2(struct perf_tool *tool,
+                                     struct perf_thread_map *threads,
+                                     perf_event__handler_t process,
+                                     struct machine *machine)
+{
+       union perf_event *event;
+       int i, err, size;
+
+       size  = sizeof(event->thread_map);
+       size += threads->nr * sizeof(event->thread_map.entries[0]);
+
+       event = zalloc(size);
+       if (!event)
+               return -ENOMEM;
+
+       event->header.type = PERF_RECORD_THREAD_MAP;
+       event->header.size = size;
+       event->thread_map.nr = threads->nr;
+
+       for (i = 0; i < threads->nr; i++) {
+               struct perf_record_thread_map_entry *entry = &event->thread_map.entries[i];
+               char *comm = perf_thread_map__comm(threads, i);
+
+               if (!comm)
+                       comm = (char *) "";
+
+               entry->pid = perf_thread_map__pid(threads, i);
+               strncpy((char *) &entry->comm, comm, sizeof(entry->comm));
+       }
+
+       err = process(tool, event, NULL, machine);
+
+       free(event);
+       return err;
+}
+
+static void synthesize_cpus(struct cpu_map_entries *cpus,
+                           struct perf_cpu_map *map)
+{
+       int i;
+
+       cpus->nr = map->nr;
+
+       for (i = 0; i < map->nr; i++)
+               cpus->cpu[i] = map->map[i];
+}
+
+static void synthesize_mask(struct perf_record_record_cpu_map *mask,
+                           struct perf_cpu_map *map, int max)
+{
+       int i;
+
+       mask->nr = BITS_TO_LONGS(max);
+       mask->long_size = sizeof(long);
+
+       for (i = 0; i < map->nr; i++)
+               set_bit(map->map[i], mask->mask);
+}
+
+static size_t cpus_size(struct perf_cpu_map *map)
+{
+       return sizeof(struct cpu_map_entries) + map->nr * sizeof(u16);
+}
+
+static size_t mask_size(struct perf_cpu_map *map, int *max)
+{
+       int i;
+
+       *max = 0;
+
+       for (i = 0; i < map->nr; i++) {
+               /* bit possition of the cpu is + 1 */
+               int bit = map->map[i] + 1;
+
+               if (bit > *max)
+                       *max = bit;
+       }
+
+       return sizeof(struct perf_record_record_cpu_map) + BITS_TO_LONGS(*max) * sizeof(long);
+}
+
+void *cpu_map_data__alloc(struct perf_cpu_map *map, size_t *size, u16 *type, int *max)
+{
+       size_t size_cpus, size_mask;
+       bool is_dummy = perf_cpu_map__empty(map);
+
+       /*
+        * Both array and mask data have variable size based
+        * on the number of cpus and their actual values.
+        * The size of the 'struct perf_record_cpu_map_data' is:
+        *
+        *   array = size of 'struct cpu_map_entries' +
+        *           number of cpus * sizeof(u64)
+        *
+        *   mask  = size of 'struct perf_record_record_cpu_map' +
+        *           maximum cpu bit converted to size of longs
+        *
+        * and finaly + the size of 'struct perf_record_cpu_map_data'.
+        */
+       size_cpus = cpus_size(map);
+       size_mask = mask_size(map, max);
+
+       if (is_dummy || (size_cpus < size_mask)) {
+               *size += size_cpus;
+               *type  = PERF_CPU_MAP__CPUS;
+       } else {
+               *size += size_mask;
+               *type  = PERF_CPU_MAP__MASK;
+       }
+
+       *size += sizeof(struct perf_record_cpu_map_data);
+       *size = PERF_ALIGN(*size, sizeof(u64));
+       return zalloc(*size);
+}
+
+void cpu_map_data__synthesize(struct perf_record_cpu_map_data *data, struct perf_cpu_map *map,
+                             u16 type, int max)
+{
+       data->type = type;
+
+       switch (type) {
+       case PERF_CPU_MAP__CPUS:
+               synthesize_cpus((struct cpu_map_entries *) data->data, map);
+               break;
+       case PERF_CPU_MAP__MASK:
+               synthesize_mask((struct perf_record_record_cpu_map *)data->data, map, max);
+       default:
+               break;
+       };
+}
+
+static struct perf_record_cpu_map *cpu_map_event__new(struct perf_cpu_map *map)
+{
+       size_t size = sizeof(struct perf_record_cpu_map);
+       struct perf_record_cpu_map *event;
+       int max;
+       u16 type;
+
+       event = cpu_map_data__alloc(map, &size, &type, &max);
+       if (!event)
+               return NULL;
+
+       event->header.type = PERF_RECORD_CPU_MAP;
+       event->header.size = size;
+       event->data.type   = type;
+
+       cpu_map_data__synthesize(&event->data, map, type, max);
+       return event;
+}
+
+int perf_event__synthesize_cpu_map(struct perf_tool *tool,
+                                  struct perf_cpu_map *map,
+                                  perf_event__handler_t process,
+                                  struct machine *machine)
+{
+       struct perf_record_cpu_map *event;
+       int err;
+
+       event = cpu_map_event__new(map);
+       if (!event)
+               return -ENOMEM;
+
+       err = process(tool, (union perf_event *) event, NULL, machine);
+
+       free(event);
+       return err;
+}
+
+int perf_event__synthesize_stat_config(struct perf_tool *tool,
+                                      struct perf_stat_config *config,
+                                      perf_event__handler_t process,
+                                      struct machine *machine)
+{
+       struct perf_record_stat_config *event;
+       int size, i = 0, err;
+
+       size  = sizeof(*event);
+       size += (PERF_STAT_CONFIG_TERM__MAX * sizeof(event->data[0]));
+
+       event = zalloc(size);
+       if (!event)
+               return -ENOMEM;
+
+       event->header.type = PERF_RECORD_STAT_CONFIG;
+       event->header.size = size;
+       event->nr          = PERF_STAT_CONFIG_TERM__MAX;
+
+#define ADD(__term, __val)                                     \
+       event->data[i].tag = PERF_STAT_CONFIG_TERM__##__term;   \
+       event->data[i].val = __val;                             \
+       i++;
+
+       ADD(AGGR_MODE,  config->aggr_mode)
+       ADD(INTERVAL,   config->interval)
+       ADD(SCALE,      config->scale)
+
+       WARN_ONCE(i != PERF_STAT_CONFIG_TERM__MAX,
+                 "stat config terms unbalanced\n");
+#undef ADD
+
+       err = process(tool, (union perf_event *) event, NULL, machine);
+
+       free(event);
+       return err;
+}
+
+int perf_event__synthesize_stat(struct perf_tool *tool,
+                               u32 cpu, u32 thread, u64 id,
+                               struct perf_counts_values *count,
+                               perf_event__handler_t process,
+                               struct machine *machine)
+{
+       struct perf_record_stat event;
+
+       event.header.type = PERF_RECORD_STAT;
+       event.header.size = sizeof(event);
+       event.header.misc = 0;
+
+       event.id        = id;
+       event.cpu       = cpu;
+       event.thread    = thread;
+       event.val       = count->val;
+       event.ena       = count->ena;
+       event.run       = count->run;
+
+       return process(tool, (union perf_event *) &event, NULL, machine);
+}
+
+int perf_event__synthesize_stat_round(struct perf_tool *tool,
+                                     u64 evtime, u64 type,
+                                     perf_event__handler_t process,
+                                     struct machine *machine)
+{
+       struct perf_record_stat_round event;
+
+       event.header.type = PERF_RECORD_STAT_ROUND;
+       event.header.size = sizeof(event);
+       event.header.misc = 0;
+
+       event.time = evtime;
+       event.type = type;
+
+       return process(tool, (union perf_event *) &event, NULL, machine);
+}
+
+size_t perf_event__sample_event_size(const struct perf_sample *sample, u64 type, u64 read_format)
+{
+       size_t sz, result = sizeof(struct perf_record_sample);
+
+       if (type & PERF_SAMPLE_IDENTIFIER)
+               result += sizeof(u64);
+
+       if (type & PERF_SAMPLE_IP)
+               result += sizeof(u64);
+
+       if (type & PERF_SAMPLE_TID)
+               result += sizeof(u64);
+
+       if (type & PERF_SAMPLE_TIME)
+               result += sizeof(u64);
+
+       if (type & PERF_SAMPLE_ADDR)
+               result += sizeof(u64);
+
+       if (type & PERF_SAMPLE_ID)
+               result += sizeof(u64);
+
+       if (type & PERF_SAMPLE_STREAM_ID)
+               result += sizeof(u64);
+
+       if (type & PERF_SAMPLE_CPU)
+               result += sizeof(u64);
+
+       if (type & PERF_SAMPLE_PERIOD)
+               result += sizeof(u64);
+
+       if (type & PERF_SAMPLE_READ) {
+               result += sizeof(u64);
+               if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)
+                       result += sizeof(u64);
+               if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)
+                       result += sizeof(u64);
+               /* PERF_FORMAT_ID is forced for PERF_SAMPLE_READ */
+               if (read_format & PERF_FORMAT_GROUP) {
+                       sz = sample->read.group.nr *
+                            sizeof(struct sample_read_value);
+                       result += sz;
+               } else {
+                       result += sizeof(u64);
+               }
+       }
+
+       if (type & PERF_SAMPLE_CALLCHAIN) {
+               sz = (sample->callchain->nr + 1) * sizeof(u64);
+               result += sz;
+       }
+
+       if (type & PERF_SAMPLE_RAW) {
+               result += sizeof(u32);
+               result += sample->raw_size;
+       }
+
+       if (type & PERF_SAMPLE_BRANCH_STACK) {
+               sz = sample->branch_stack->nr * sizeof(struct branch_entry);
+               sz += sizeof(u64);
+               result += sz;
+       }
+
+       if (type & PERF_SAMPLE_REGS_USER) {
+               if (sample->user_regs.abi) {
+                       result += sizeof(u64);
+                       sz = hweight64(sample->user_regs.mask) * sizeof(u64);
+                       result += sz;
+               } else {
+                       result += sizeof(u64);
+               }
+       }
+
+       if (type & PERF_SAMPLE_STACK_USER) {
+               sz = sample->user_stack.size;
+               result += sizeof(u64);
+               if (sz) {
+                       result += sz;
+                       result += sizeof(u64);
+               }
+       }
+
+       if (type & PERF_SAMPLE_WEIGHT)
+               result += sizeof(u64);
+
+       if (type & PERF_SAMPLE_DATA_SRC)
+               result += sizeof(u64);
+
+       if (type & PERF_SAMPLE_TRANSACTION)
+               result += sizeof(u64);
+
+       if (type & PERF_SAMPLE_REGS_INTR) {
+               if (sample->intr_regs.abi) {
+                       result += sizeof(u64);
+                       sz = hweight64(sample->intr_regs.mask) * sizeof(u64);
+                       result += sz;
+               } else {
+                       result += sizeof(u64);
+               }
+       }
+
+       if (type & PERF_SAMPLE_PHYS_ADDR)
+               result += sizeof(u64);
+
+       return result;
+}
+
+int perf_event__synthesize_sample(union perf_event *event, u64 type, u64 read_format,
+                                 const struct perf_sample *sample)
+{
+       __u64 *array;
+       size_t sz;
+       /*
+        * used for cross-endian analysis. See git commit 65014ab3
+        * for why this goofiness is needed.
+        */
+       union u64_swap u;
+
+       array = event->sample.array;
+
+       if (type & PERF_SAMPLE_IDENTIFIER) {
+               *array = sample->id;
+               array++;
+       }
+
+       if (type & PERF_SAMPLE_IP) {
+               *array = sample->ip;
+               array++;
+       }
+
+       if (type & PERF_SAMPLE_TID) {
+               u.val32[0] = sample->pid;
+               u.val32[1] = sample->tid;
+               *array = u.val64;
+               array++;
+       }
+
+       if (type & PERF_SAMPLE_TIME) {
+               *array = sample->time;
+               array++;
+       }
+
+       if (type & PERF_SAMPLE_ADDR) {
+               *array = sample->addr;
+               array++;
+       }
+
+       if (type & PERF_SAMPLE_ID) {
+               *array = sample->id;
+               array++;
+       }
+
+       if (type & PERF_SAMPLE_STREAM_ID) {
+               *array = sample->stream_id;
+               array++;
+       }
+
+       if (type & PERF_SAMPLE_CPU) {
+               u.val32[0] = sample->cpu;
+               u.val32[1] = 0;
+               *array = u.val64;
+               array++;
+       }
+
+       if (type & PERF_SAMPLE_PERIOD) {
+               *array = sample->period;
+               array++;
+       }
+
+       if (type & PERF_SAMPLE_READ) {
+               if (read_format & PERF_FORMAT_GROUP)
+                       *array = sample->read.group.nr;
+               else
+                       *array = sample->read.one.value;
+               array++;
+
+               if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) {
+                       *array = sample->read.time_enabled;
+                       array++;
+               }
+
+               if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) {
+                       *array = sample->read.time_running;
+                       array++;
+               }
+
+               /* PERF_FORMAT_ID is forced for PERF_SAMPLE_READ */
+               if (read_format & PERF_FORMAT_GROUP) {
+                       sz = sample->read.group.nr *
+                            sizeof(struct sample_read_value);
+                       memcpy(array, sample->read.group.values, sz);
+                       array = (void *)array + sz;
+               } else {
+                       *array = sample->read.one.id;
+                       array++;
+               }
+       }
+
+       if (type & PERF_SAMPLE_CALLCHAIN) {
+               sz = (sample->callchain->nr + 1) * sizeof(u64);
+               memcpy(array, sample->callchain, sz);
+               array = (void *)array + sz;
+       }
+
+       if (type & PERF_SAMPLE_RAW) {
+               u.val32[0] = sample->raw_size;
+               *array = u.val64;
+               array = (void *)array + sizeof(u32);
+
+               memcpy(array, sample->raw_data, sample->raw_size);
+               array = (void *)array + sample->raw_size;
+       }
+
+       if (type & PERF_SAMPLE_BRANCH_STACK) {
+               sz = sample->branch_stack->nr * sizeof(struct branch_entry);
+               sz += sizeof(u64);
+               memcpy(array, sample->branch_stack, sz);
+               array = (void *)array + sz;
+       }
+
+       if (type & PERF_SAMPLE_REGS_USER) {
+               if (sample->user_regs.abi) {
+                       *array++ = sample->user_regs.abi;
+                       sz = hweight64(sample->user_regs.mask) * sizeof(u64);
+                       memcpy(array, sample->user_regs.regs, sz);
+                       array = (void *)array + sz;
+               } else {
+                       *array++ = 0;
+               }
+       }
+
+       if (type & PERF_SAMPLE_STACK_USER) {
+               sz = sample->user_stack.size;
+               *array++ = sz;
+               if (sz) {
+                       memcpy(array, sample->user_stack.data, sz);
+                       array = (void *)array + sz;
+                       *array++ = sz;
+               }
+       }
+
+       if (type & PERF_SAMPLE_WEIGHT) {
+               *array = sample->weight;
+               array++;
+       }
+
+       if (type & PERF_SAMPLE_DATA_SRC) {
+               *array = sample->data_src;
+               array++;
+       }
+
+       if (type & PERF_SAMPLE_TRANSACTION) {
+               *array = sample->transaction;
+               array++;
+       }
+
+       if (type & PERF_SAMPLE_REGS_INTR) {
+               if (sample->intr_regs.abi) {
+                       *array++ = sample->intr_regs.abi;
+                       sz = hweight64(sample->intr_regs.mask) * sizeof(u64);
+                       memcpy(array, sample->intr_regs.regs, sz);
+                       array = (void *)array + sz;
+               } else {
+                       *array++ = 0;
+               }
+       }
+
+       if (type & PERF_SAMPLE_PHYS_ADDR) {
+               *array = sample->phys_addr;
+               array++;
+       }
+
+       return 0;
+}
+
+int perf_event__synthesize_id_index(struct perf_tool *tool, perf_event__handler_t process,
+                                   struct evlist *evlist, struct machine *machine)
+{
+       union perf_event *ev;
+       struct evsel *evsel;
+       size_t nr = 0, i = 0, sz, max_nr, n;
+       int err;
+
+       pr_debug2("Synthesizing id index\n");
+
+       max_nr = (UINT16_MAX - sizeof(struct perf_record_id_index)) /
+                sizeof(struct id_index_entry);
+
+       evlist__for_each_entry(evlist, evsel)
+               nr += evsel->core.ids;
+
+       n = nr > max_nr ? max_nr : nr;
+       sz = sizeof(struct perf_record_id_index) + n * sizeof(struct id_index_entry);
+       ev = zalloc(sz);
+       if (!ev)
+               return -ENOMEM;
+
+       ev->id_index.header.type = PERF_RECORD_ID_INDEX;
+       ev->id_index.header.size = sz;
+       ev->id_index.nr = n;
+
+       evlist__for_each_entry(evlist, evsel) {
+               u32 j;
+
+               for (j = 0; j < evsel->core.ids; j++) {
+                       struct id_index_entry *e;
+                       struct perf_sample_id *sid;
+
+                       if (i >= n) {
+                               err = process(tool, ev, NULL, machine);
+                               if (err)
+                                       goto out_err;
+                               nr -= n;
+                               i = 0;
+                       }
+
+                       e = &ev->id_index.entries[i++];
+
+                       e->id = evsel->core.id[j];
+
+                       sid = perf_evlist__id2sid(evlist, e->id);
+                       if (!sid) {
+                               free(ev);
+                               return -ENOENT;
+                       }
+
+                       e->idx = sid->idx;
+                       e->cpu = sid->cpu;
+                       e->tid = sid->tid;
+               }
+       }
+
+       sz = sizeof(struct perf_record_id_index) + nr * sizeof(struct id_index_entry);
+       ev->id_index.header.size = sz;
+       ev->id_index.nr = nr;
+
+       err = process(tool, ev, NULL, machine);
+out_err:
+       free(ev);
+
+       return err;
+}
+
+int __machine__synthesize_threads(struct machine *machine, struct perf_tool *tool,
+                                 struct target *target, struct perf_thread_map *threads,
+                                 perf_event__handler_t process, bool data_mmap,
+                                 unsigned int nr_threads_synthesize)
+{
+       if (target__has_task(target))
+               return perf_event__synthesize_thread_map(tool, threads, process, machine, data_mmap);
+       else if (target__has_cpu(target))
+               return perf_event__synthesize_threads(tool, process,
+                                                     machine, data_mmap,
+                                                     nr_threads_synthesize);
+       /* command specified */
+       return 0;
+}
+
+int machine__synthesize_threads(struct machine *machine, struct target *target,
+                               struct perf_thread_map *threads, bool data_mmap,
+                               unsigned int nr_threads_synthesize)
+{
+       return __machine__synthesize_threads(machine, NULL, target, threads,
+                                            perf_event__process, data_mmap,
+                                            nr_threads_synthesize);
+}
+
+static struct perf_record_event_update *event_update_event__new(size_t size, u64 type, u64 id)
+{
+       struct perf_record_event_update *ev;
+
+       size += sizeof(*ev);
+       size  = PERF_ALIGN(size, sizeof(u64));
+
+       ev = zalloc(size);
+       if (ev) {
+               ev->header.type = PERF_RECORD_EVENT_UPDATE;
+               ev->header.size = (u16)size;
+               ev->type        = type;
+               ev->id          = id;
+       }
+       return ev;
+}
+
+int perf_event__synthesize_event_update_unit(struct perf_tool *tool, struct evsel *evsel,
+                                            perf_event__handler_t process)
+{
+       size_t size = strlen(evsel->unit);
+       struct perf_record_event_update *ev;
+       int err;
+
+       ev = event_update_event__new(size + 1, PERF_EVENT_UPDATE__UNIT, evsel->core.id[0]);
+       if (ev == NULL)
+               return -ENOMEM;
+
+       strlcpy(ev->data, evsel->unit, size + 1);
+       err = process(tool, (union perf_event *)ev, NULL, NULL);
+       free(ev);
+       return err;
+}
+
+int perf_event__synthesize_event_update_scale(struct perf_tool *tool, struct evsel *evsel,
+                                             perf_event__handler_t process)
+{
+       struct perf_record_event_update *ev;
+       struct perf_record_event_update_scale *ev_data;
+       int err;
+
+       ev = event_update_event__new(sizeof(*ev_data), PERF_EVENT_UPDATE__SCALE, evsel->core.id[0]);
+       if (ev == NULL)
+               return -ENOMEM;
+
+       ev_data = (struct perf_record_event_update_scale *)ev->data;
+       ev_data->scale = evsel->scale;
+       err = process(tool, (union perf_event *)ev, NULL, NULL);
+       free(ev);
+       return err;
+}
+
+int perf_event__synthesize_event_update_name(struct perf_tool *tool, struct evsel *evsel,
+                                            perf_event__handler_t process)
+{
+       struct perf_record_event_update *ev;
+       size_t len = strlen(evsel->name);
+       int err;
+
+       ev = event_update_event__new(len + 1, PERF_EVENT_UPDATE__NAME, evsel->core.id[0]);
+       if (ev == NULL)
+               return -ENOMEM;
+
+       strlcpy(ev->data, evsel->name, len + 1);
+       err = process(tool, (union perf_event *)ev, NULL, NULL);
+       free(ev);
+       return err;
+}
+
+int perf_event__synthesize_event_update_cpus(struct perf_tool *tool, struct evsel *evsel,
+                                            perf_event__handler_t process)
+{
+       size_t size = sizeof(struct perf_record_event_update);
+       struct perf_record_event_update *ev;
+       int max, err;
+       u16 type;
+
+       if (!evsel->core.own_cpus)
+               return 0;
+
+       ev = cpu_map_data__alloc(evsel->core.own_cpus, &size, &type, &max);
+       if (!ev)
+               return -ENOMEM;
+
+       ev->header.type = PERF_RECORD_EVENT_UPDATE;
+       ev->header.size = (u16)size;
+       ev->type        = PERF_EVENT_UPDATE__CPUS;
+       ev->id          = evsel->core.id[0];
+
+       cpu_map_data__synthesize((struct perf_record_cpu_map_data *)ev->data,
+                                evsel->core.own_cpus, type, max);
+
+       err = process(tool, (union perf_event *)ev, NULL, NULL);
+       free(ev);
+       return err;
+}
+
+int perf_event__synthesize_attrs(struct perf_tool *tool, struct evlist *evlist,
+                                perf_event__handler_t process)
+{
+       struct evsel *evsel;
+       int err = 0;
+
+       evlist__for_each_entry(evlist, evsel) {
+               err = perf_event__synthesize_attr(tool, &evsel->core.attr, evsel->core.ids,
+                                                 evsel->core.id, process);
+               if (err) {
+                       pr_debug("failed to create perf header attribute\n");
+                       return err;
+               }
+       }
+
+       return err;
+}
+
+static bool has_unit(struct evsel *evsel)
+{
+       return evsel->unit && *evsel->unit;
+}
+
+static bool has_scale(struct evsel *evsel)
+{
+       return evsel->scale != 1;
+}
+
+int perf_event__synthesize_extra_attr(struct perf_tool *tool, struct evlist *evsel_list,
+                                     perf_event__handler_t process, bool is_pipe)
+{
+       struct evsel *evsel;
+       int err;
+
+       /*
+        * Synthesize other events stuff not carried within
+        * attr event - unit, scale, name
+        */
+       evlist__for_each_entry(evsel_list, evsel) {
+               if (!evsel->supported)
+                       continue;
+
+               /*
+                * Synthesize unit and scale only if it's defined.
+                */
+               if (has_unit(evsel)) {
+                       err = perf_event__synthesize_event_update_unit(tool, evsel, process);
+                       if (err < 0) {
+                               pr_err("Couldn't synthesize evsel unit.\n");
+                               return err;
+                       }
+               }
+
+               if (has_scale(evsel)) {
+                       err = perf_event__synthesize_event_update_scale(tool, evsel, process);
+                       if (err < 0) {
+                               pr_err("Couldn't synthesize evsel evsel.\n");
+                               return err;
+                       }
+               }
+
+               if (evsel->core.own_cpus) {
+                       err = perf_event__synthesize_event_update_cpus(tool, evsel, process);
+                       if (err < 0) {
+                               pr_err("Couldn't synthesize evsel cpus.\n");
+                               return err;
+                       }
+               }
+
+               /*
+                * Name is needed only for pipe output,
+                * perf.data carries event names.
+                */
+               if (is_pipe) {
+                       err = perf_event__synthesize_event_update_name(tool, evsel, process);
+                       if (err < 0) {
+                               pr_err("Couldn't synthesize evsel name.\n");
+                               return err;
+                       }
+               }
+       }
+       return 0;
+}
+
+int perf_event__synthesize_attr(struct perf_tool *tool, struct perf_event_attr *attr,
+                               u32 ids, u64 *id, perf_event__handler_t process)
+{
+       union perf_event *ev;
+       size_t size;
+       int err;
+
+       size = sizeof(struct perf_event_attr);
+       size = PERF_ALIGN(size, sizeof(u64));
+       size += sizeof(struct perf_event_header);
+       size += ids * sizeof(u64);
+
+       ev = zalloc(size);
+
+       if (ev == NULL)
+               return -ENOMEM;
+
+       ev->attr.attr = *attr;
+       memcpy(ev->attr.id, id, ids * sizeof(u64));
+
+       ev->attr.header.type = PERF_RECORD_HEADER_ATTR;
+       ev->attr.header.size = (u16)size;
+
+       if (ev->attr.header.size == size)
+               err = process(tool, ev, NULL, NULL);
+       else
+               err = -E2BIG;
+
+       free(ev);
+
+       return err;
+}
+
+int perf_event__synthesize_tracing_data(struct perf_tool *tool, int fd, struct evlist *evlist,
+                                       perf_event__handler_t process)
+{
+       union perf_event ev;
+       struct tracing_data *tdata;
+       ssize_t size = 0, aligned_size = 0, padding;
+       struct feat_fd ff;
+
+       /*
+        * We are going to store the size of the data followed
+        * by the data contents. Since the fd descriptor is a pipe,
+        * we cannot seek back to store the size of the data once
+        * we know it. Instead we:
+        *
+        * - write the tracing data to the temp file
+        * - get/write the data size to pipe
+        * - write the tracing data from the temp file
+        *   to the pipe
+        */
+       tdata = tracing_data_get(&evlist->core.entries, fd, true);
+       if (!tdata)
+               return -1;
+
+       memset(&ev, 0, sizeof(ev));
+
+       ev.tracing_data.header.type = PERF_RECORD_HEADER_TRACING_DATA;
+       size = tdata->size;
+       aligned_size = PERF_ALIGN(size, sizeof(u64));
+       padding = aligned_size - size;
+       ev.tracing_data.header.size = sizeof(ev.tracing_data);
+       ev.tracing_data.size = aligned_size;
+
+       process(tool, &ev, NULL, NULL);
+
+       /*
+        * The put function will copy all the tracing data
+        * stored in temp file to the pipe.
+        */
+       tracing_data_put(tdata);
+
+       ff = (struct feat_fd){ .fd = fd };
+       if (write_padded(&ff, NULL, 0, padding))
+               return -1;
+
+       return aligned_size;
+}
+
+int perf_event__synthesize_build_id(struct perf_tool *tool, struct dso *pos, u16 misc,
+                                   perf_event__handler_t process, struct machine *machine)
+{
+       union perf_event ev;
+       size_t len;
+
+       if (!pos->hit)
+               return 0;
+
+       memset(&ev, 0, sizeof(ev));
+
+       len = pos->long_name_len + 1;
+       len = PERF_ALIGN(len, NAME_ALIGN);
+       memcpy(&ev.build_id.build_id, pos->build_id, sizeof(pos->build_id));
+       ev.build_id.header.type = PERF_RECORD_HEADER_BUILD_ID;
+       ev.build_id.header.misc = misc;
+       ev.build_id.pid = machine->pid;
+       ev.build_id.header.size = sizeof(ev.build_id) + len;
+       memcpy(&ev.build_id.filename, pos->long_name, pos->long_name_len);
+
+       return process(tool, &ev, NULL, machine);
+}
+
+int perf_event__synthesize_stat_events(struct perf_stat_config *config, struct perf_tool *tool,
+                                      struct evlist *evlist, perf_event__handler_t process, bool attrs)
+{
+       int err;
+
+       if (attrs) {
+               err = perf_event__synthesize_attrs(tool, evlist, process);
+               if (err < 0) {
+                       pr_err("Couldn't synthesize attrs.\n");
+                       return err;
+               }
+       }
+
+       err = perf_event__synthesize_extra_attr(tool, evlist, process, attrs);
+       err = perf_event__synthesize_thread_map2(tool, evlist->core.threads, process, NULL);
+       if (err < 0) {
+               pr_err("Couldn't synthesize thread map.\n");
+               return err;
+       }
+
+       err = perf_event__synthesize_cpu_map(tool, evlist->core.cpus, process, NULL);
+       if (err < 0) {
+               pr_err("Couldn't synthesize thread map.\n");
+               return err;
+       }
+
+       err = perf_event__synthesize_stat_config(tool, config, process, NULL);
+       if (err < 0) {
+               pr_err("Couldn't synthesize config.\n");
+               return err;
+       }
+
+       return 0;
+}
+
+int __weak perf_event__synth_time_conv(const struct perf_event_mmap_page *pc __maybe_unused,
+                                      struct perf_tool *tool __maybe_unused,
+                                      perf_event__handler_t process __maybe_unused,
+                                      struct machine *machine __maybe_unused)
+{
+       return 0;
+}
+
+extern const struct perf_header_feature_ops feat_ops[HEADER_LAST_FEATURE];
+
+int perf_event__synthesize_features(struct perf_tool *tool, struct perf_session *session,
+                                   struct evlist *evlist, perf_event__handler_t process)
+{
+       struct perf_header *header = &session->header;
+       struct perf_record_header_feature *fe;
+       struct feat_fd ff;
+       size_t sz, sz_hdr;
+       int feat, ret;
+
+       sz_hdr = sizeof(fe->header);
+       sz = sizeof(union perf_event);
+       /* get a nice alignment */
+       sz = PERF_ALIGN(sz, page_size);
+
+       memset(&ff, 0, sizeof(ff));
+
+       ff.buf = malloc(sz);
+       if (!ff.buf)
+               return -ENOMEM;
+
+       ff.size = sz - sz_hdr;
+       ff.ph = &session->header;
+
+       for_each_set_bit(feat, header->adds_features, HEADER_FEAT_BITS) {
+               if (!feat_ops[feat].synthesize) {
+                       pr_debug("No record header feature for header :%d\n", feat);
+                       continue;
+               }
+
+               ff.offset = sizeof(*fe);
+
+               ret = feat_ops[feat].write(&ff, evlist);
+               if (ret || ff.offset <= (ssize_t)sizeof(*fe)) {
+                       pr_debug("Error writing feature\n");
+                       continue;
+               }
+               /* ff.buf may have changed due to realloc in do_write() */
+               fe = ff.buf;
+               memset(fe, 0, sizeof(*fe));
+
+               fe->feat_id = feat;
+               fe->header.type = PERF_RECORD_HEADER_FEATURE;
+               fe->header.size = ff.offset;
+
+               ret = process(tool, ff.buf, NULL, NULL);
+               if (ret) {
+                       free(ff.buf);
+                       return ret;
+               }
+       }
+
+       /* Send HEADER_LAST_FEATURE mark. */
+       fe = ff.buf;
+       fe->feat_id     = HEADER_LAST_FEATURE;
+       fe->header.type = PERF_RECORD_HEADER_FEATURE;
+       fe->header.size = sizeof(*fe);
+
+       ret = process(tool, ff.buf, NULL, NULL);
+
+       free(ff.buf);
+       return ret;
+}
diff --git a/tools/perf/util/synthetic-events.h b/tools/perf/util/synthetic-events.h
new file mode 100644 (file)
index 0000000..baead0c
--- /dev/null
@@ -0,0 +1,103 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PERF_SYNTHETIC_EVENTS_H
+#define __PERF_SYNTHETIC_EVENTS_H
+
+#include <stdbool.h>
+#include <sys/types.h> // pid_t
+#include <linux/compiler.h>
+#include <linux/types.h>
+
+struct auxtrace_record;
+struct dso;
+struct evlist;
+struct evsel;
+struct machine;
+struct perf_counts_values;
+struct perf_cpu_map;
+struct perf_event_attr;
+struct perf_event_mmap_page;
+struct perf_sample;
+struct perf_session;
+struct perf_stat_config;
+struct perf_thread_map;
+struct perf_tool;
+struct record_opts;
+struct target;
+
+union perf_event;
+
+typedef int (*perf_event__handler_t)(struct perf_tool *tool, union perf_event *event,
+                                    struct perf_sample *sample, struct machine *machine);
+
+int perf_event__synthesize_attrs(struct perf_tool *tool, struct evlist *evlist, perf_event__handler_t process);
+int perf_event__synthesize_attr(struct perf_tool *tool, struct perf_event_attr *attr, u32 ids, u64 *id, perf_event__handler_t process);
+int perf_event__synthesize_build_id(struct perf_tool *tool, struct dso *pos, u16 misc, perf_event__handler_t process, struct machine *machine);
+int perf_event__synthesize_cpu_map(struct perf_tool *tool, struct perf_cpu_map *cpus, perf_event__handler_t process, struct machine *machine);
+int perf_event__synthesize_event_update_cpus(struct perf_tool *tool, struct evsel *evsel, perf_event__handler_t process);
+int perf_event__synthesize_event_update_name(struct perf_tool *tool, struct evsel *evsel, perf_event__handler_t process);
+int perf_event__synthesize_event_update_scale(struct perf_tool *tool, struct evsel *evsel, perf_event__handler_t process);
+int perf_event__synthesize_event_update_unit(struct perf_tool *tool, struct evsel *evsel, perf_event__handler_t process);
+int perf_event__synthesize_extra_attr(struct perf_tool *tool, struct evlist *evsel_list, perf_event__handler_t process, bool is_pipe);
+int perf_event__synthesize_extra_kmaps(struct perf_tool *tool, perf_event__handler_t process, struct machine *machine);
+int perf_event__synthesize_features(struct perf_tool *tool, struct perf_session *session, struct evlist *evlist, perf_event__handler_t process);
+int perf_event__synthesize_id_index(struct perf_tool *tool, perf_event__handler_t process, struct evlist *evlist, struct machine *machine);
+int perf_event__synthesize_kernel_mmap(struct perf_tool *tool, perf_event__handler_t process, struct machine *machine);
+int perf_event__synthesize_mmap_events(struct perf_tool *tool, union perf_event *event, pid_t pid, pid_t tgid, perf_event__handler_t process, struct machine *machine, bool mmap_data);
+int perf_event__synthesize_modules(struct perf_tool *tool, perf_event__handler_t process, struct machine *machine);
+int perf_event__synthesize_namespaces(struct perf_tool *tool, union perf_event *event, pid_t pid, pid_t tgid, perf_event__handler_t process, struct machine *machine);
+int perf_event__synthesize_sample(union perf_event *event, u64 type, u64 read_format, const struct perf_sample *sample);
+int perf_event__synthesize_stat_config(struct perf_tool *tool, struct perf_stat_config *config, perf_event__handler_t process, struct machine *machine);
+int perf_event__synthesize_stat_events(struct perf_stat_config *config, struct perf_tool *tool, struct evlist *evlist, perf_event__handler_t process, bool attrs);
+int perf_event__synthesize_stat_round(struct perf_tool *tool, u64 time, u64 type, perf_event__handler_t process, struct machine *machine);
+int perf_event__synthesize_stat(struct perf_tool *tool, u32 cpu, u32 thread, u64 id, struct perf_counts_values *count, perf_event__handler_t process, struct machine *machine);
+int perf_event__synthesize_thread_map2(struct perf_tool *tool, struct perf_thread_map *threads, perf_event__handler_t process, struct machine *machine);
+int perf_event__synthesize_thread_map(struct perf_tool *tool, struct perf_thread_map *threads, perf_event__handler_t process, struct machine *machine, bool mmap_data);
+int perf_event__synthesize_threads(struct perf_tool *tool, perf_event__handler_t process, struct machine *machine, bool mmap_data, unsigned int nr_threads_synthesize);
+int perf_event__synthesize_tracing_data(struct perf_tool *tool, int fd, struct evlist *evlist, perf_event__handler_t process);
+int perf_event__synth_time_conv(const struct perf_event_mmap_page *pc, struct perf_tool *tool, perf_event__handler_t process, struct machine *machine);
+pid_t perf_event__synthesize_comm(struct perf_tool *tool, union perf_event *event, pid_t pid, perf_event__handler_t process, struct machine *machine);
+
+int perf_tool__process_synth_event(struct perf_tool *tool, union perf_event *event, struct machine *machine, perf_event__handler_t process);
+
+size_t perf_event__sample_event_size(const struct perf_sample *sample, u64 type, u64 read_format);
+
+int __machine__synthesize_threads(struct machine *machine, struct perf_tool *tool,
+                                 struct target *target, struct perf_thread_map *threads,
+                                 perf_event__handler_t process, bool data_mmap,
+                                 unsigned int nr_threads_synthesize);
+int machine__synthesize_threads(struct machine *machine, struct target *target,
+                               struct perf_thread_map *threads, bool data_mmap,
+                               unsigned int nr_threads_synthesize);
+
+#ifdef HAVE_AUXTRACE_SUPPORT
+int perf_event__synthesize_auxtrace_info(struct auxtrace_record *itr, struct perf_tool *tool,
+                                        struct perf_session *session, perf_event__handler_t process);
+
+#else // HAVE_AUXTRACE_SUPPORT
+
+#include <errno.h>
+
+static inline int
+perf_event__synthesize_auxtrace_info(struct auxtrace_record *itr __maybe_unused,
+                                    struct perf_tool *tool __maybe_unused,
+                                    struct perf_session *session __maybe_unused,
+                                    perf_event__handler_t process __maybe_unused)
+{
+       return -EINVAL;
+}
+#endif // HAVE_AUXTRACE_SUPPORT
+
+#ifdef HAVE_LIBBPF_SUPPORT
+int perf_event__synthesize_bpf_events(struct perf_session *session, perf_event__handler_t process,
+                                     struct machine *machine, struct record_opts *opts);
+#else // HAVE_LIBBPF_SUPPORT
+static inline int perf_event__synthesize_bpf_events(struct perf_session *session __maybe_unused,
+                                                   perf_event__handler_t process __maybe_unused,
+                                                   struct machine *machine __maybe_unused,
+                                                   struct record_opts *opts __maybe_unused)
+{
+       return 0;
+}
+#endif // HAVE_LIBBPF_SUPPORT
+
+#endif // __PERF_SYNTHETIC_EVENTS_H
index 565f7ae..a3db13d 100644 (file)
@@ -6,8 +6,6 @@
  */
 
 #include "target.h"
-#include "util.h"
-#include "debug.h"
 
 #include <pwd.h>
 #include <stdio.h>
index 51fb574..3dce2de 100644 (file)
@@ -5,7 +5,6 @@
  * Refactored from builtin-top.c, see that files for further copyright notes.
  */
 
-#include "cpumap.h"
 #include "event.h"
 #include "evlist.h"
 #include "evsel.h"
@@ -72,7 +71,7 @@ size_t perf_top__header_snprintf(struct perf_top *top, char *bf, size_t size)
        }
 
        if (top->evlist->core.nr_entries == 1) {
-               struct evsel *first = perf_evlist__first(top->evlist);
+               struct evsel *first = evlist__first(top->evlist);
                ret += SNPRINTF(bf + ret, size - ret, "%" PRIu64 "%s ",
                                (uint64_t)first->core.attr.sample_period,
                                opts->freq ? "Hz" : "");
index d63d542..086e98f 100644 (file)
@@ -2,7 +2,6 @@
 /*
  * Copyright (C) 2008,2009, Steven Rostedt <srostedt@redhat.com>
  */
-#include "util.h"
 #include <dirent.h>
 #include <mntent.h>
 #include <stdio.h>
@@ -19,6 +18,7 @@
 #include <linux/list.h>
 #include <linux/kernel.h>
 #include <linux/zalloc.h>
+#include <internal/lib.h> // page_size
 
 #include "trace-event.h"
 #include <api/fs/tracing_path.h>
index b6c0db0..8593d3c 100644 (file)
@@ -15,7 +15,6 @@
 #include <unistd.h>
 #include <errno.h>
 
-#include "util.h"
 #include "trace-event.h"
 #include "debug.h"
 
index 01b9d89..b3ee651 100644 (file)
@@ -14,7 +14,6 @@
 #include <api/fs/fs.h>
 #include "trace-event.h"
 #include "machine.h"
-#include "util.h"
 
 /*
  * global trace_event object used by trace_event__tp_format
index e0c3af3..3c5a632 100644 (file)
@@ -4,13 +4,12 @@
 
 #include <linux/types.h>
 
-#include "event.h"
-
 struct perf_tsc_conversion {
        u16 time_shift;
        u32 time_mult;
        u64 time_zero;
 };
+
 struct perf_event_mmap_page;
 
 int perf_read_tsc_conversion(const struct perf_event_mmap_page *pc,
@@ -20,13 +19,4 @@ u64 perf_time_to_tsc(u64 ns, struct perf_tsc_conversion *tc);
 u64 tsc_to_perf_time(u64 cyc, struct perf_tsc_conversion *tc);
 u64 rdtsc(void);
 
-struct perf_event_mmap_page;
-struct perf_tool;
-struct machine;
-
-int perf_event__synth_time_conv(const struct perf_event_mmap_page *pc,
-                               struct perf_tool *tool,
-                               perf_event__handler_t process,
-                               struct machine *machine);
-
-#endif
+#endif // __PERF_TSC_H
index 9ece188..15f6e46 100644 (file)
@@ -17,7 +17,6 @@
 #include "event.h"
 #include "perf_regs.h"
 #include "callchain.h"
-#include "util.h"
 
 static char *debuginfo_path;
 
index ebdbb05..1800887 100644 (file)
@@ -37,7 +37,6 @@
 #include "unwind.h"
 #include "map.h"
 #include "symbol.h"
-#include "util.h"
 #include "debug.h"
 #include "asm/bug.h"
 #include "dso.h"
index 3949a60..196438e 100644 (file)
@@ -8,7 +8,6 @@
  * Copyright (C) Linus Torvalds, 2005
  */
 #include "util.h"
-#include "debug.h"
 #include <stdio.h>
 #include <stdlib.h>
 #include <linux/compiler.h>
index 32322a2..5eda6e1 100644 (file)
@@ -2,9 +2,7 @@
 #include "util.h"
 #include "debug.h"
 #include "event.h"
-#include "namespaces.h"
 #include <api/fs/fs.h>
-#include <sys/mman.h>
 #include <sys/stat.h>
 #include <sys/utsname.h>
 #include <dirent.h>
@@ -41,8 +39,6 @@ void perf_set_multithreaded(void)
        perf_singlethreaded = false;
 }
 
-unsigned int page_size;
-
 int sysctl_perf_event_max_stack = PERF_MAX_STACK_DEPTH;
 int sysctl_perf_event_max_contexts_per_stack = PERF_MAX_CONTEXTS_PER_STACK;
 
@@ -234,138 +230,6 @@ out:
        return list;
 }
 
-static int slow_copyfile(const char *from, const char *to, struct nsinfo *nsi)
-{
-       int err = -1;
-       char *line = NULL;
-       size_t n;
-       FILE *from_fp, *to_fp;
-       struct nscookie nsc;
-
-       nsinfo__mountns_enter(nsi, &nsc);
-       from_fp = fopen(from, "r");
-       nsinfo__mountns_exit(&nsc);
-       if (from_fp == NULL)
-               goto out;
-
-       to_fp = fopen(to, "w");
-       if (to_fp == NULL)
-               goto out_fclose_from;
-
-       while (getline(&line, &n, from_fp) > 0)
-               if (fputs(line, to_fp) == EOF)
-                       goto out_fclose_to;
-       err = 0;
-out_fclose_to:
-       fclose(to_fp);
-       free(line);
-out_fclose_from:
-       fclose(from_fp);
-out:
-       return err;
-}
-
-int copyfile_offset(int ifd, loff_t off_in, int ofd, loff_t off_out, u64 size)
-{
-       void *ptr;
-       loff_t pgoff;
-
-       pgoff = off_in & ~(page_size - 1);
-       off_in -= pgoff;
-
-       ptr = mmap(NULL, off_in + size, PROT_READ, MAP_PRIVATE, ifd, pgoff);
-       if (ptr == MAP_FAILED)
-               return -1;
-
-       while (size) {
-               ssize_t ret = pwrite(ofd, ptr + off_in, size, off_out);
-               if (ret < 0 && errno == EINTR)
-                       continue;
-               if (ret <= 0)
-                       break;
-
-               size -= ret;
-               off_in += ret;
-               off_out += ret;
-       }
-       munmap(ptr, off_in + size);
-
-       return size ? -1 : 0;
-}
-
-static int copyfile_mode_ns(const char *from, const char *to, mode_t mode,
-                           struct nsinfo *nsi)
-{
-       int fromfd, tofd;
-       struct stat st;
-       int err;
-       char *tmp = NULL, *ptr = NULL;
-       struct nscookie nsc;
-
-       nsinfo__mountns_enter(nsi, &nsc);
-       err = stat(from, &st);
-       nsinfo__mountns_exit(&nsc);
-       if (err)
-               goto out;
-       err = -1;
-
-       /* extra 'x' at the end is to reserve space for '.' */
-       if (asprintf(&tmp, "%s.XXXXXXx", to) < 0) {
-               tmp = NULL;
-               goto out;
-       }
-       ptr = strrchr(tmp, '/');
-       if (!ptr)
-               goto out;
-       ptr = memmove(ptr + 1, ptr, strlen(ptr) - 1);
-       *ptr = '.';
-
-       tofd = mkstemp(tmp);
-       if (tofd < 0)
-               goto out;
-
-       if (fchmod(tofd, mode))
-               goto out_close_to;
-
-       if (st.st_size == 0) { /* /proc? do it slowly... */
-               err = slow_copyfile(from, tmp, nsi);
-               goto out_close_to;
-       }
-
-       nsinfo__mountns_enter(nsi, &nsc);
-       fromfd = open(from, O_RDONLY);
-       nsinfo__mountns_exit(&nsc);
-       if (fromfd < 0)
-               goto out_close_to;
-
-       err = copyfile_offset(fromfd, 0, tofd, 0, st.st_size);
-
-       close(fromfd);
-out_close_to:
-       close(tofd);
-       if (!err)
-               err = link(tmp, to);
-       unlink(tmp);
-out:
-       free(tmp);
-       return err;
-}
-
-int copyfile_ns(const char *from, const char *to, struct nsinfo *nsi)
-{
-       return copyfile_mode_ns(from, to, 0755, nsi);
-}
-
-int copyfile_mode(const char *from, const char *to, mode_t mode)
-{
-       return copyfile_mode_ns(from, to, mode, NULL);
-}
-
-int copyfile(const char *from, const char *to)
-{
-       return copyfile_mode(from, to, 0755);
-}
-
 size_t hex_width(u64 v)
 {
        size_t n = 1;
index 45a5c6f..9969b8b 100644 (file)
 #include <stddef.h>
 #include <linux/compiler.h>
 #include <sys/types.h>
-#include <internal/lib.h>
 
 /* General helper functions */
 void usage(const char *err) __noreturn;
 void die(const char *err, ...) __noreturn __printf(1, 2);
 
 struct dirent;
-struct nsinfo;
 struct strlist;
 
 int mkdir_p(char *path, mode_t mode);
@@ -26,15 +24,9 @@ int rm_rf(const char *path);
 int rm_rf_perf_data(const char *path);
 struct strlist *lsdir(const char *name, bool (*filter)(const char *, struct dirent *));
 bool lsdir_no_dot_filter(const char *name, struct dirent *d);
-int copyfile(const char *from, const char *to);
-int copyfile_mode(const char *from, const char *to, mode_t mode);
-int copyfile_ns(const char *from, const char *to, struct nsinfo *nsi);
-int copyfile_offset(int ifd, loff_t off_in, int ofd, loff_t off_out, u64 size);
 
 size_t hex_width(u64 v);
 
-extern unsigned int page_size;
-
 int sysctl__max_stack(void);
 
 int fetch_kernel_version(unsigned int *puint,
index e5e6599..ba4b439 100644 (file)
@@ -11,7 +11,7 @@
 
 #include "vdso.h"
 #include "dso.h"
-#include "util.h"
+#include <internal/lib.h>
 #include "map.h"
 #include "symbol.h"
 #include "machine.h"
index 59d456f..78d2297 100644 (file)
@@ -7,11 +7,9 @@
 #include <sys/mman.h>
 #include <zlib.h>
 #include <linux/compiler.h>
+#include <internal/lib.h>
 
 #include "util/compress.h"
-#include "util/util.h"
-#include "util/debug.h"
-
 
 #define CHUNK_SIZE  16384
 
index 59753b3..2a9890c 100644 (file)
@@ -38,6 +38,7 @@ static int fact_avx = 0xFF;
 static unsigned long long fact_trl;
 static int out_format_json;
 static int cmd_help;
+static int force_online_offline;
 
 /* clos related */
 static int current_clos = -1;
@@ -138,14 +139,14 @@ int out_format_is_json(void)
 int get_physical_package_id(int cpu)
 {
        return parse_int_file(
-               1, "/sys/devices/system/cpu/cpu%d/topology/physical_package_id",
+               0, "/sys/devices/system/cpu/cpu%d/topology/physical_package_id",
                cpu);
 }
 
 int get_physical_core_id(int cpu)
 {
        return parse_int_file(
-               1, "/sys/devices/system/cpu/cpu%d/topology/core_id", cpu);
+               0, "/sys/devices/system/cpu/cpu%d/topology/core_id", cpu);
 }
 
 int get_physical_die_id(int cpu)
@@ -165,6 +166,26 @@ int get_topo_max_cpus(void)
        return topo_max_cpus;
 }
 
+static void set_cpu_online_offline(int cpu, int state)
+{
+       char buffer[128];
+       int fd;
+
+       snprintf(buffer, sizeof(buffer),
+                "/sys/devices/system/cpu/cpu%d/online", cpu);
+
+       fd = open(buffer, O_WRONLY);
+       if (fd < 0)
+               err(-1, "%s open failed", buffer);
+
+       if (state)
+               write(fd, "1\n", 2);
+       else
+               write(fd, "0\n", 2);
+
+       close(fd);
+}
+
 #define MAX_PACKAGE_COUNT 8
 #define MAX_DIE_PER_PACKAGE 2
 static void for_each_online_package_in_set(void (*callback)(int, void *, void *,
@@ -402,6 +423,9 @@ void set_cpu_mask_from_punit_coremask(int cpu, unsigned long long core_mask,
                        int j;
 
                        for (j = 0; j < topo_max_cpus; ++j) {
+                               if (!CPU_ISSET_S(j, present_cpumask_size, present_cpumask))
+                                       continue;
+
                                if (cpu_map[j].pkg_id == pkg_id &&
                                    cpu_map[j].die_id == die_id &&
                                    cpu_map[j].punit_cpu_core == i) {
@@ -484,7 +508,7 @@ int isst_send_mbox_command(unsigned int cpu, unsigned char command,
                int write = 0;
                int clos_id, core_id, ret = 0;
 
-               debug_printf("CLOS %d\n", cpu);
+               debug_printf("CPU %d\n", cpu);
 
                if (parameter & BIT(MBOX_CMD_WRITE_BIT)) {
                        value = req_data;
@@ -649,8 +673,8 @@ static void exec_on_get_ctdp_cpu(int cpu, void *arg1, void *arg2, void *arg3,
        if (ret)
                perror("get_tdp_*");
        else
-               isst_display_result(cpu, outf, "perf-profile", (char *)arg3,
-                                   *(unsigned int *)arg4);
+               isst_ctdp_display_core_info(cpu, outf, arg3,
+                                           *(unsigned int *)arg4);
 }
 
 #define _get_tdp_level(desc, suffix, object, help)                                \
@@ -733,9 +757,34 @@ static void set_tdp_level_for_cpu(int cpu, void *arg1, void *arg2, void *arg3,
        ret = isst_set_tdp_level(cpu, tdp_level);
        if (ret)
                perror("set_tdp_level_for_cpu");
-       else
+       else {
                isst_display_result(cpu, outf, "perf-profile", "set_tdp_level",
                                    ret);
+               if (force_online_offline) {
+                       struct isst_pkg_ctdp_level_info ctdp_level;
+                       int pkg_id = get_physical_package_id(cpu);
+                       int die_id = get_physical_die_id(cpu);
+
+                       fprintf(stderr, "Option is set to online/offline\n");
+                       ctdp_level.core_cpumask_size =
+                               alloc_cpu_set(&ctdp_level.core_cpumask);
+                       isst_get_coremask_info(cpu, tdp_level, &ctdp_level);
+                       if (ctdp_level.cpu_count) {
+                               int i, max_cpus = get_topo_max_cpus();
+                               for (i = 0; i < max_cpus; ++i) {
+                                       if (pkg_id != get_physical_package_id(i) || die_id != get_physical_die_id(i))
+                                               continue;
+                                       if (CPU_ISSET_S(i, ctdp_level.core_cpumask_size, ctdp_level.core_cpumask)) {
+                                               fprintf(stderr, "online cpu %d\n", i);
+                                               set_cpu_online_offline(i, 1);
+                                       } else {
+                                               fprintf(stderr, "offline cpu %d\n", i);
+                                               set_cpu_online_offline(i, 0);
+                                       }
+                               }
+                       }
+               }
+       }
 }
 
 static void set_tdp_level(void)
@@ -744,6 +793,8 @@ static void set_tdp_level(void)
                fprintf(stderr, "Set Config TDP level\n");
                fprintf(stderr,
                        "\t Arguments: -l|--level : Specify tdp level\n");
+               fprintf(stderr,
+                       "\t Optional Arguments: -o | online : online/offline for the tdp level\n");
                exit(0);
        }
 
@@ -1082,6 +1133,40 @@ static void dump_clos_config(void)
        isst_ctdp_display_information_end(outf);
 }
 
+static void get_clos_info_for_cpu(int cpu, void *arg1, void *arg2, void *arg3,
+                                 void *arg4)
+{
+       int enable, ret, prio_type;
+
+       ret = isst_clos_get_clos_information(cpu, &enable, &prio_type);
+       if (ret)
+               perror("isst_clos_get_info");
+       else
+               isst_clos_display_clos_information(cpu, outf, enable, prio_type);
+}
+
+static void dump_clos_info(void)
+{
+       if (cmd_help) {
+               fprintf(stderr,
+                       "Print Intel Speed Select Technology core power information\n");
+               fprintf(stderr, "\tSpecify targeted cpu id with [--cpu|-c]\n");
+               exit(0);
+       }
+
+       if (!max_target_cpus) {
+               fprintf(stderr,
+                       "Invalid target cpu. Specify with [-c|--cpu]\n");
+               exit(0);
+       }
+
+       isst_ctdp_display_information_start(outf);
+       for_each_online_target_cpu_in_set(get_clos_info_for_cpu, NULL,
+                                         NULL, NULL, NULL);
+       isst_ctdp_display_information_end(outf);
+
+}
+
 static void set_clos_config_for_cpu(int cpu, void *arg1, void *arg2, void *arg3,
                                    void *arg4)
 {
@@ -1198,7 +1283,7 @@ static void get_clos_assoc_for_cpu(int cpu, void *arg1, void *arg2, void *arg3,
        if (ret)
                perror("isst_clos_get_assoc_status");
        else
-               isst_display_result(cpu, outf, "core-power", "get-assoc", clos);
+               isst_clos_display_assoc_information(cpu, outf, clos);
 }
 
 static void get_clos_assoc(void)
@@ -1208,13 +1293,17 @@ static void get_clos_assoc(void)
                fprintf(stderr, "\tSpecify targeted cpu id with [--cpu|-c]\n");
                exit(0);
        }
-       if (max_target_cpus)
-               for_each_online_target_cpu_in_set(get_clos_assoc_for_cpu, NULL,
-                                                 NULL, NULL, NULL);
-       else {
+
+       if (!max_target_cpus) {
                fprintf(stderr,
                        "Invalid target cpu. Specify with [-c|--cpu]\n");
+               exit(0);
        }
+
+       isst_ctdp_display_information_start(outf);
+       for_each_online_target_cpu_in_set(get_clos_assoc_for_cpu, NULL,
+                                         NULL, NULL, NULL);
+       isst_ctdp_display_information_end(outf);
 }
 
 static struct process_cmd_struct isst_cmds[] = {
@@ -1231,10 +1320,11 @@ static struct process_cmd_struct isst_cmds[] = {
        { "turbo-freq", "info", dump_fact_config },
        { "turbo-freq", "enable", set_fact_enable },
        { "turbo-freq", "disable", set_fact_disable },
-       { "core-power", "info", dump_clos_config },
+       { "core-power", "info", dump_clos_info },
        { "core-power", "enable", set_clos_enable },
        { "core-power", "disable", set_clos_disable },
        { "core-power", "config", set_clos_config },
+       { "core-power", "get-config", dump_clos_config },
        { "core-power", "assoc", set_clos_assoc },
        { "core-power", "get-assoc", get_clos_assoc },
        { NULL, NULL, NULL }
@@ -1316,6 +1406,7 @@ static void parse_cmd_args(int argc, int start, char **argv)
        static struct option long_options[] = {
                { "bucket", required_argument, 0, 'b' },
                { "level", required_argument, 0, 'l' },
+               { "online", required_argument, 0, 'o' },
                { "trl-type", required_argument, 0, 'r' },
                { "trl", required_argument, 0, 't' },
                { "help", no_argument, 0, 'h' },
@@ -1332,7 +1423,7 @@ static void parse_cmd_args(int argc, int start, char **argv)
        option_index = start;
 
        optind = start + 1;
-       while ((opt = getopt_long(argc, argv, "b:l:t:c:d:e:n:m:p:w:h",
+       while ((opt = getopt_long(argc, argv, "b:l:t:c:d:e:n:m:p:w:ho",
                                  long_options, &option_index)) != -1) {
                switch (opt) {
                case 'b':
@@ -1344,6 +1435,9 @@ static void parse_cmd_args(int argc, int start, char **argv)
                case 'l':
                        tdp_level = atoi(optarg);
                        break;
+               case 'o':
+                       force_online_offline = 1;
+                       break;
                case 't':
                        sscanf(optarg, "0x%llx", &fact_trl);
                        break;
@@ -1362,7 +1456,6 @@ static void parse_cmd_args(int argc, int start, char **argv)
                /* CLOS related */
                case 'c':
                        current_clos = atoi(optarg);
-                       printf("clos %d\n", current_clos);
                        break;
                case 'd':
                        clos_desired = atoi(optarg);
@@ -1433,6 +1526,7 @@ static void core_power_help(void)
        printf("\tenable\n");
        printf("\tdisable\n");
        printf("\tconfig\n");
+       printf("\tget-config\n");
        printf("\tassoc\n");
        printf("\tget-assoc\n");
 }
index 0bf341a..6dee533 100644 (file)
@@ -619,6 +619,31 @@ int isst_get_process_ctdp(int cpu, int tdp_level, struct isst_pkg_ctdp *pkg_dev)
        return 0;
 }
 
+int isst_clos_get_clos_information(int cpu, int *enable, int *type)
+{
+       unsigned int resp;
+       int ret;
+
+       ret = isst_send_mbox_command(cpu, CONFIG_CLOS, CLOS_PM_QOS_CONFIG, 0, 0,
+                                    &resp);
+       if (ret)
+               return ret;
+
+       debug_printf("cpu:%d CLOS_PM_QOS_CONFIG resp:%x\n", cpu, resp);
+
+       if (resp & BIT(1))
+               *enable = 1;
+       else
+               *enable = 0;
+
+       if (resp & BIT(2))
+               *type = 1;
+       else
+               *type = 0;
+
+       return 0;
+}
+
 int isst_pm_qos_config(int cpu, int enable_clos, int priority_type)
 {
        unsigned int req, resp;
index df4aa99..40346d5 100644 (file)
@@ -287,6 +287,26 @@ static void _isst_fact_display_information(int cpu, FILE *outf, int level,
        format_and_print(outf, base_level + 2, header, value);
 }
 
+void isst_ctdp_display_core_info(int cpu, FILE *outf, char *prefix,
+                                unsigned int val)
+{
+       char header[256];
+       char value[256];
+
+       snprintf(header, sizeof(header), "package-%d",
+                get_physical_package_id(cpu));
+       format_and_print(outf, 1, header, NULL);
+       snprintf(header, sizeof(header), "die-%d", get_physical_die_id(cpu));
+       format_and_print(outf, 2, header, NULL);
+       snprintf(header, sizeof(header), "cpu-%d", cpu);
+       format_and_print(outf, 3, header, NULL);
+
+       snprintf(value, sizeof(value), "%u", val);
+       format_and_print(outf, 4, prefix, value);
+
+       format_and_print(outf, 1, NULL, NULL);
+}
+
 void isst_ctdp_display_information(int cpu, FILE *outf, int tdp_level,
                                   struct isst_pkg_ctdp *pkg_dev)
 {
@@ -503,6 +523,57 @@ void isst_clos_display_information(int cpu, FILE *outf, int clos,
        format_and_print(outf, 1, NULL, NULL);
 }
 
+void isst_clos_display_clos_information(int cpu, FILE *outf,
+                                       int clos_enable, int type)
+{
+       char header[256];
+       char value[256];
+
+       snprintf(header, sizeof(header), "package-%d",
+                get_physical_package_id(cpu));
+       format_and_print(outf, 1, header, NULL);
+       snprintf(header, sizeof(header), "die-%d", get_physical_die_id(cpu));
+       format_and_print(outf, 2, header, NULL);
+       snprintf(header, sizeof(header), "cpu-%d", cpu);
+       format_and_print(outf, 3, header, NULL);
+
+       snprintf(header, sizeof(header), "core-power");
+       format_and_print(outf, 4, header, NULL);
+
+       snprintf(header, sizeof(header), "enable-status");
+       snprintf(value, sizeof(value), "%d", clos_enable);
+       format_and_print(outf, 5, header, value);
+
+       snprintf(header, sizeof(header), "priority-type");
+       snprintf(value, sizeof(value), "%d", type);
+       format_and_print(outf, 5, header, value);
+
+       format_and_print(outf, 1, NULL, NULL);
+}
+
+void isst_clos_display_assoc_information(int cpu, FILE *outf, int clos)
+{
+       char header[256];
+       char value[256];
+
+       snprintf(header, sizeof(header), "package-%d",
+                get_physical_package_id(cpu));
+       format_and_print(outf, 1, header, NULL);
+       snprintf(header, sizeof(header), "die-%d", get_physical_die_id(cpu));
+       format_and_print(outf, 2, header, NULL);
+       snprintf(header, sizeof(header), "cpu-%d", cpu);
+       format_and_print(outf, 3, header, NULL);
+
+       snprintf(header, sizeof(header), "get-assoc");
+       format_and_print(outf, 4, header, NULL);
+
+       snprintf(header, sizeof(header), "clos");
+       snprintf(value, sizeof(value), "%d", clos);
+       format_and_print(outf, 5, header, value);
+
+       format_and_print(outf, 1, NULL, NULL);
+}
+
 void isst_display_result(int cpu, FILE *outf, char *feature, char *cmd,
                         int result)
 {
index 2f7f627..d280b27 100644 (file)
@@ -187,12 +187,16 @@ extern int isst_send_msr_command(unsigned int cpu, unsigned int command,
                                 int write, unsigned long long *req_resp);
 
 extern int isst_get_ctdp_levels(int cpu, struct isst_pkg_ctdp *pkg_dev);
+extern int isst_get_coremask_info(int cpu, int config_index,
+                          struct isst_pkg_ctdp_level_info *ctdp_level);
 extern int isst_get_process_ctdp(int cpu, int tdp_level,
                                 struct isst_pkg_ctdp *pkg_dev);
 extern void isst_get_process_ctdp_complete(int cpu,
                                           struct isst_pkg_ctdp *pkg_dev);
 extern void isst_ctdp_display_information(int cpu, FILE *outf, int tdp_level,
                                          struct isst_pkg_ctdp *pkg_dev);
+extern void isst_ctdp_display_core_info(int cpu, FILE *outf, char *prefix,
+                                       unsigned int val);
 extern void isst_ctdp_display_information_start(FILE *outf);
 extern void isst_ctdp_display_information_end(FILE *outf);
 extern void isst_pbf_display_information(int cpu, FILE *outf, int level,
@@ -223,10 +227,14 @@ extern int isst_clos_associate(int cpu, int clos);
 extern int isst_clos_get_assoc_status(int cpu, int *clos_id);
 extern void isst_clos_display_information(int cpu, FILE *outf, int clos,
                                          struct isst_clos_config *clos_config);
-
+extern void isst_clos_display_assoc_information(int cpu, FILE *outf, int clos);
 extern int isst_read_reg(unsigned short reg, unsigned int *val);
 extern int isst_write_reg(int reg, unsigned int val);
 
 extern void isst_display_result(int cpu, FILE *outf, char *feature, char *cmd,
                                int result);
+
+extern int isst_clos_get_clos_information(int cpu, int *enable, int *type);
+extern void isst_clos_display_clos_information(int cpu, FILE *outf,
+                                              int clos_enable, int type);
 #endif
index 2d4baf5..57bd27d 100644 (file)
@@ -18,24 +18,13 @@ ssize_t security_show(struct device *dev,
         * For the test version we need to poll the "hardware" in order
         * to get the updated status for unlock testing.
         */
-       nvdimm->sec.state = nvdimm_security_state(nvdimm, NVDIMM_USER);
-       nvdimm->sec.ext_state = nvdimm_security_state(nvdimm, NVDIMM_MASTER);
+       nvdimm->sec.flags = nvdimm_security_flags(nvdimm, NVDIMM_USER);
 
-       switch (nvdimm->sec.state) {
-       case NVDIMM_SECURITY_DISABLED:
+       if (test_bit(NVDIMM_SECURITY_DISABLED, &nvdimm->sec.flags))
                return sprintf(buf, "disabled\n");
-       case NVDIMM_SECURITY_UNLOCKED:
+       if (test_bit(NVDIMM_SECURITY_UNLOCKED, &nvdimm->sec.flags))
                return sprintf(buf, "unlocked\n");
-       case NVDIMM_SECURITY_LOCKED:
+       if (test_bit(NVDIMM_SECURITY_LOCKED, &nvdimm->sec.flags))
                return sprintf(buf, "locked\n");
-       case NVDIMM_SECURITY_FROZEN:
-               return sprintf(buf, "frozen\n");
-       case NVDIMM_SECURITY_OVERWRITE:
-               return sprintf(buf, "overwrite\n");
-       default:
-               return -ENOTTY;
-       }
-
        return -ENOTTY;
 }
-
index cd040b5..3f55f2f 100644 (file)
@@ -132,7 +132,6 @@ void *__wrap_devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap)
        if (!nfit_res)
                return devm_memremap_pages(dev, pgmap);
 
-       pgmap->dev = dev;
        if (!pgmap->ref) {
                if (pgmap->ops && (pgmap->ops->kill || pgmap->ops->cleanup))
                        return ERR_PTR(-EINVAL);
index 507e6f4..bf6422a 100644 (file)
@@ -428,10 +428,9 @@ static int nd_intel_test_finish_query(struct nfit_test *t,
                        dev_dbg(dev, "%s: still verifying\n", __func__);
                        break;
                }
-
                dev_dbg(dev, "%s: transition out verify\n", __func__);
                fw->state = FW_STATE_UPDATED;
-               /* we are going to fall through if it's "done" */
+               /* fall through */
        case FW_STATE_UPDATED:
                nd_cmd->status = 0;
                /* bogus test version */
index 8059ce8..61df01c 100644 (file)
@@ -2,3 +2,5 @@ gpiogpio-event-mon
 gpiogpio-hammer
 gpioinclude/
 gpiolsgpio
+tpm2/SpaceTest.log
+tpm2/*.pyc
index 25b43a8..c3feccb 100644 (file)
@@ -126,9 +126,9 @@ endif
 # in the default INSTALL_HDR_PATH usr/include.
 khdr:
 ifeq (1,$(DEFAULT_INSTALL_HDR_PATH))
-       make --no-builtin-rules ARCH=$(ARCH) -C $(top_srcdir) headers_install
+       $(MAKE) --no-builtin-rules ARCH=$(ARCH) -C $(top_srcdir) headers_install
 else
-       make --no-builtin-rules INSTALL_HDR_PATH=$$BUILD/usr \
+       $(MAKE) --no-builtin-rules INSTALL_HDR_PATH=$$BUILD/usr \
                ARCH=$(ARCH) -C $(top_srcdir) headers_install
 endif
 
@@ -136,35 +136,35 @@ all: khdr
        @for TARGET in $(TARGETS); do           \
                BUILD_TARGET=$$BUILD/$$TARGET;  \
                mkdir $$BUILD_TARGET  -p;       \
-               make OUTPUT=$$BUILD_TARGET -C $$TARGET;\
+               $(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET;\
        done;
 
 run_tests: all
        @for TARGET in $(TARGETS); do \
                BUILD_TARGET=$$BUILD/$$TARGET;  \
-               make OUTPUT=$$BUILD_TARGET -C $$TARGET run_tests;\
+               $(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET run_tests;\
        done;
 
 hotplug:
        @for TARGET in $(TARGETS_HOTPLUG); do \
                BUILD_TARGET=$$BUILD/$$TARGET;  \
-               make OUTPUT=$$BUILD_TARGET -C $$TARGET;\
+               $(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET;\
        done;
 
 run_hotplug: hotplug
        @for TARGET in $(TARGETS_HOTPLUG); do \
                BUILD_TARGET=$$BUILD/$$TARGET;  \
-               make OUTPUT=$$BUILD_TARGET -C $$TARGET run_full_test;\
+               $(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET run_full_test;\
        done;
 
 clean_hotplug:
        @for TARGET in $(TARGETS_HOTPLUG); do \
                BUILD_TARGET=$$BUILD/$$TARGET;  \
-               make OUTPUT=$$BUILD_TARGET -C $$TARGET clean;\
+               $(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET clean;\
        done;
 
 run_pstore_crash:
-       make -C pstore run_crash
+       $(MAKE) -C pstore run_crash
 
 # Use $BUILD as the default install root. $BUILD points to the
 # right output location for the following cases:
@@ -184,7 +184,7 @@ ifdef INSTALL_PATH
        install -m 744 kselftest/prefix.pl $(INSTALL_PATH)/kselftest/
        @for TARGET in $(TARGETS); do \
                BUILD_TARGET=$$BUILD/$$TARGET;  \
-               make OUTPUT=$$BUILD_TARGET -C $$TARGET INSTALL_PATH=$(INSTALL_PATH)/$$TARGET install; \
+               $(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET INSTALL_PATH=$(INSTALL_PATH)/$$TARGET install; \
        done;
 
        @# Ask all targets to emit their test scripts
@@ -203,7 +203,7 @@ ifdef INSTALL_PATH
                echo "[ -w /dev/kmsg ] && echo \"kselftest: Running tests in $$TARGET\" >> /dev/kmsg" >> $(ALL_SCRIPT); \
                echo "cd $$TARGET" >> $(ALL_SCRIPT); \
                echo -n "run_many" >> $(ALL_SCRIPT); \
-               make -s --no-print-directory OUTPUT=$$BUILD_TARGET -C $$TARGET emit_tests >> $(ALL_SCRIPT); \
+               $(MAKE) -s --no-print-directory OUTPUT=$$BUILD_TARGET -C $$TARGET emit_tests >> $(ALL_SCRIPT); \
                echo "" >> $(ALL_SCRIPT);           \
                echo "cd \$$ROOT" >> $(ALL_SCRIPT); \
        done;
@@ -216,7 +216,7 @@ endif
 clean:
        @for TARGET in $(TARGETS); do \
                BUILD_TARGET=$$BUILD/$$TARGET;  \
-               make OUTPUT=$$BUILD_TARGET -C $$TARGET clean;\
+               $(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET clean;\
        done;
 
 .PHONY: khdr all run_tests hotplug run_hotplug clean_hotplug run_pstore_crash install clean
index 7717c0a..ac73850 100644 (file)
@@ -28,7 +28,7 @@ if [ -z "$FEATURE" ]; then
     exit_unsupported
 fi
 
-echo "Test snapshot tigger"
+echo "Test snapshot trigger"
 echo 0 > snapshot
 echo 1 > events/sched/sched_process_fork/enable
 ( echo "forked")
index fa7c24e..2ff6003 100755 (executable)
@@ -37,11 +37,20 @@ is_ima_sig_required()
        # sequentially.  As a result, a policy rule may be defined, but
        # might not necessarily be used.  This test assumes if a policy
        # rule is specified, that is the intent.
+
+       # First check for appended signature (modsig), then xattr
        if [ $ima_read_policy -eq 1 ]; then
                check_ima_policy "appraise" "func=KEXEC_KERNEL_CHECK" \
-                       "appraise_type=imasig"
+                       "appraise_type=imasig|modsig"
                ret=$?
-               [ $ret -eq 1 ] && log_info "IMA signature required";
+               if [ $ret -eq 1 ]; then
+                       log_info "IMA or appended(modsig) signature required"
+               else
+                       check_ima_policy "appraise" "func=KEXEC_KERNEL_CHECK" \
+                               "appraise_type=imasig"
+                       ret=$?
+                       [ $ret -eq 1 ] && log_info "IMA signature required";
+               fi
        fi
        return $ret
 }
@@ -84,6 +93,22 @@ check_for_imasig()
        return $ret
 }
 
+# Return 1 for appended signature (modsig) found and 0 for not found.
+check_for_modsig()
+{
+       local module_sig_string="~Module signature appended~"
+       local sig="$(tail --bytes $((${#module_sig_string} + 1)) $KERNEL_IMAGE)"
+       local ret=0
+
+       if [ "$sig" == "$module_sig_string" ]; then
+               ret=1
+               log_info "kexec kernel image modsig signed"
+       else
+               log_info "kexec kernel image not modsig signed"
+       fi
+       return $ret
+}
+
 kexec_file_load_test()
 {
        local succeed_msg="kexec_file_load succeeded"
@@ -98,7 +123,8 @@ kexec_file_load_test()
                # In secureboot mode with an architecture  specific
                # policy, make sure either an IMA or PE signature exists.
                if [ $secureboot -eq 1 ] && [ $arch_policy -eq 1 ] && \
-                       [ $ima_signed -eq 0 ] && [ $pe_signed -eq 0 ]; then
+                       [ $ima_signed -eq 0 ] && [ $pe_signed -eq 0 ] \
+                         && [ $ima_modsig -eq 0 ]; then
                        log_fail "$succeed_msg (missing sig)"
                fi
 
@@ -107,7 +133,8 @@ kexec_file_load_test()
                        log_fail "$succeed_msg (missing PE sig)"
                fi
 
-               if [ $ima_sig_required -eq 1 ] && [ $ima_signed -eq 0 ]; then
+               if [ $ima_sig_required -eq 1 ] && [ $ima_signed -eq 0 ] \
+                    && [ $ima_modsig -eq 0 ]; then
                        log_fail "$succeed_msg (missing IMA sig)"
                fi
 
@@ -204,5 +231,8 @@ pe_signed=$?
 check_for_imasig
 ima_signed=$?
 
+check_for_modsig
+ima_modsig=$?
+
 # Test loading the kernel image via kexec_file_load syscall
 kexec_file_load_test
index dc3346e..5614222 100644 (file)
@@ -19,8 +19,6 @@
 #include "kvm_util.h"
 #include "processor.h"
 
-#define DEBUG printf
-
 #define VCPU_ID                                1
 
 /* The memory slot index to track dirty pages */
@@ -249,14 +247,12 @@ static void vm_dirty_log_verify(unsigned long *bmap)
 }
 
 static struct kvm_vm *create_vm(enum vm_guest_mode mode, uint32_t vcpuid,
-                               uint64_t extra_mem_pages, void *guest_code,
-                               unsigned long type)
+                               uint64_t extra_mem_pages, void *guest_code)
 {
        struct kvm_vm *vm;
        uint64_t extra_pg_pages = extra_mem_pages / 512 * 2;
 
-       vm = _vm_create(mode, DEFAULT_GUEST_PHY_PAGES + extra_pg_pages,
-                       O_RDWR, type);
+       vm = _vm_create(mode, DEFAULT_GUEST_PHY_PAGES + extra_pg_pages, O_RDWR);
        kvm_vm_elf_load(vm, program_invocation_name, 0, 0);
 #ifdef __x86_64__
        vm_create_irqchip(vm);
@@ -265,67 +261,35 @@ static struct kvm_vm *create_vm(enum vm_guest_mode mode, uint32_t vcpuid,
        return vm;
 }
 
+#define DIRTY_MEM_BITS 30 /* 1G */
+#define PAGE_SHIFT_4K  12
+
 static void run_test(enum vm_guest_mode mode, unsigned long iterations,
                     unsigned long interval, uint64_t phys_offset)
 {
-       unsigned int guest_pa_bits, guest_page_shift;
        pthread_t vcpu_thread;
        struct kvm_vm *vm;
-       uint64_t max_gfn;
        unsigned long *bmap;
-       unsigned long type = 0;
-
-       switch (mode) {
-       case VM_MODE_P52V48_4K:
-               guest_pa_bits = 52;
-               guest_page_shift = 12;
-               break;
-       case VM_MODE_P52V48_64K:
-               guest_pa_bits = 52;
-               guest_page_shift = 16;
-               break;
-       case VM_MODE_P48V48_4K:
-               guest_pa_bits = 48;
-               guest_page_shift = 12;
-               break;
-       case VM_MODE_P48V48_64K:
-               guest_pa_bits = 48;
-               guest_page_shift = 16;
-               break;
-       case VM_MODE_P40V48_4K:
-               guest_pa_bits = 40;
-               guest_page_shift = 12;
-               break;
-       case VM_MODE_P40V48_64K:
-               guest_pa_bits = 40;
-               guest_page_shift = 16;
-               break;
-       default:
-               TEST_ASSERT(false, "Unknown guest mode, mode: 0x%x", mode);
-       }
 
-       DEBUG("Testing guest mode: %s\n", vm_guest_mode_string(mode));
-
-#ifdef __x86_64__
        /*
-        * FIXME
-        * The x86_64 kvm selftests framework currently only supports a
-        * single PML4 which restricts the number of physical address
-        * bits we can change to 39.
+        * We reserve page table for 2 times of extra dirty mem which
+        * will definitely cover the original (1G+) test range.  Here
+        * we do the calculation with 4K page size which is the
+        * smallest so the page number will be enough for all archs
+        * (e.g., 64K page size guest will need even less memory for
+        * page tables).
         */
-       guest_pa_bits = 39;
-#endif
-#ifdef __aarch64__
-       if (guest_pa_bits != 40)
-               type = KVM_VM_TYPE_ARM_IPA_SIZE(guest_pa_bits);
-#endif
-       max_gfn = (1ul << (guest_pa_bits - guest_page_shift)) - 1;
-       guest_page_size = (1ul << guest_page_shift);
+       vm = create_vm(mode, VCPU_ID,
+                      2ul << (DIRTY_MEM_BITS - PAGE_SHIFT_4K),
+                      guest_code);
+
+       guest_page_size = vm_get_page_size(vm);
        /*
         * A little more than 1G of guest page sized pages.  Cover the
         * case where the size is not aligned to 64 pages.
         */
-       guest_num_pages = (1ul << (30 - guest_page_shift)) + 16;
+       guest_num_pages = (1ul << (DIRTY_MEM_BITS -
+                                  vm_get_page_shift(vm))) + 16;
 #ifdef __s390x__
        /* Round up to multiple of 1M (segment size) */
        guest_num_pages = (guest_num_pages + 0xff) & ~0xffUL;
@@ -335,7 +299,8 @@ static void run_test(enum vm_guest_mode mode, unsigned long iterations,
                         !!((guest_num_pages * guest_page_size) % host_page_size);
 
        if (!phys_offset) {
-               guest_test_phys_mem = (max_gfn - guest_num_pages) * guest_page_size;
+               guest_test_phys_mem = (vm_get_max_gfn(vm) -
+                                      guest_num_pages) * guest_page_size;
                guest_test_phys_mem &= ~(host_page_size - 1);
        } else {
                guest_test_phys_mem = phys_offset;
@@ -351,8 +316,6 @@ static void run_test(enum vm_guest_mode mode, unsigned long iterations,
        bmap = bitmap_alloc(host_num_pages);
        host_bmap_track = bitmap_alloc(host_num_pages);
 
-       vm = create_vm(mode, VCPU_ID, guest_num_pages, guest_code, type);
-
 #ifdef USE_CLEAR_DIRTY_LOG
        struct kvm_enable_cap cap = {};
 
@@ -482,7 +445,7 @@ int main(int argc, char *argv[])
 #endif
 
 #ifdef __x86_64__
-       vm_guest_mode_params_init(VM_MODE_P52V48_4K, true, true);
+       vm_guest_mode_params_init(VM_MODE_PXXV48_4K, true, true);
 #endif
 #ifdef __aarch64__
        vm_guest_mode_params_init(VM_MODE_P40V48_4K, true, true);
index 5463b78..29cccaf 100644 (file)
@@ -24,6 +24,12 @@ struct kvm_vm;
 typedef uint64_t vm_paddr_t; /* Virtual Machine (Guest) physical address */
 typedef uint64_t vm_vaddr_t; /* Virtual Machine (Guest) virtual address */
 
+#ifndef NDEBUG
+#define DEBUG(...) printf(__VA_ARGS__);
+#else
+#define DEBUG(...)
+#endif
+
 /* Minimum allocated guest virtual and physical addresses */
 #define KVM_UTIL_MIN_VADDR             0x2000
 
@@ -38,11 +44,14 @@ enum vm_guest_mode {
        VM_MODE_P48V48_64K,
        VM_MODE_P40V48_4K,
        VM_MODE_P40V48_64K,
+       VM_MODE_PXXV48_4K,      /* For 48bits VA but ANY bits PA */
        NUM_VM_MODES,
 };
 
-#ifdef __aarch64__
+#if defined(__aarch64__)
 #define VM_MODE_DEFAULT VM_MODE_P40V48_4K
+#elif defined(__x86_64__)
+#define VM_MODE_DEFAULT VM_MODE_PXXV48_4K
 #else
 #define VM_MODE_DEFAULT VM_MODE_P52V48_4K
 #endif
@@ -60,8 +69,7 @@ int kvm_check_cap(long cap);
 int vm_enable_cap(struct kvm_vm *vm, struct kvm_enable_cap *cap);
 
 struct kvm_vm *vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm);
-struct kvm_vm *_vm_create(enum vm_guest_mode mode, uint64_t phy_pages,
-                         int perm, unsigned long type);
+struct kvm_vm *_vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm);
 void kvm_vm_free(struct kvm_vm *vmp);
 void kvm_vm_restart(struct kvm_vm *vmp, int perm);
 void kvm_vm_release(struct kvm_vm *vmp);
@@ -146,6 +154,10 @@ void vm_vcpu_add_default(struct kvm_vm *vm, uint32_t vcpuid, void *guest_code);
 
 bool vm_is_unrestricted_guest(struct kvm_vm *vm);
 
+unsigned int vm_get_page_size(struct kvm_vm *vm);
+unsigned int vm_get_page_shift(struct kvm_vm *vm);
+unsigned int vm_get_max_gfn(struct kvm_vm *vm);
+
 struct kvm_userspace_memory_region *
 kvm_userspace_memory_region_find(struct kvm_vm *vm, uint64_t start,
                                 uint64_t end);
index 80d1974..0c17f2e 100644 (file)
@@ -325,6 +325,9 @@ uint64_t vcpu_get_msr(struct kvm_vm *vm, uint32_t vcpuid, uint64_t msr_index);
 void vcpu_set_msr(struct kvm_vm *vm, uint32_t vcpuid, uint64_t msr_index,
                  uint64_t msr_value);
 
+uint32_t kvm_get_cpuid_max(void);
+void kvm_get_cpu_address_width(unsigned int *pa_bits, unsigned int *va_bits);
+
 /*
  * Basic CPU control in CR0
  */
index 486400a..86036a5 100644 (file)
@@ -264,6 +264,9 @@ void aarch64_vcpu_setup(struct kvm_vm *vm, int vcpuid, struct kvm_vcpu_init *ini
        case VM_MODE_P52V48_4K:
                TEST_ASSERT(false, "AArch64 does not support 4K sized pages "
                                   "with 52-bit physical address ranges");
+       case VM_MODE_PXXV48_4K:
+               TEST_ASSERT(false, "AArch64 does not support 4K sized pages "
+                                  "with ANY-bit physical address ranges");
        case VM_MODE_P52V48_64K:
                tcr_el1 |= 1ul << 14; /* TG0 = 64KB */
                tcr_el1 |= 6ul << 32; /* IPS = 52 bits */
index 6e49bb0..80a338b 100644 (file)
@@ -8,6 +8,7 @@
 #include "test_util.h"
 #include "kvm_util.h"
 #include "kvm_util_internal.h"
+#include "processor.h"
 
 #include <assert.h>
 #include <sys/mman.h>
@@ -84,7 +85,7 @@ int vm_enable_cap(struct kvm_vm *vm, struct kvm_enable_cap *cap)
        return ret;
 }
 
-static void vm_open(struct kvm_vm *vm, int perm, unsigned long type)
+static void vm_open(struct kvm_vm *vm, int perm)
 {
        vm->kvm_fd = open(KVM_DEV_PATH, perm);
        if (vm->kvm_fd < 0)
@@ -95,18 +96,19 @@ static void vm_open(struct kvm_vm *vm, int perm, unsigned long type)
                exit(KSFT_SKIP);
        }
 
-       vm->fd = ioctl(vm->kvm_fd, KVM_CREATE_VM, type);
+       vm->fd = ioctl(vm->kvm_fd, KVM_CREATE_VM, vm->type);
        TEST_ASSERT(vm->fd >= 0, "KVM_CREATE_VM ioctl failed, "
                "rc: %i errno: %i", vm->fd, errno);
 }
 
 const char * const vm_guest_mode_string[] = {
-       "PA-bits:52, VA-bits:48, 4K pages",
-       "PA-bits:52, VA-bits:48, 64K pages",
-       "PA-bits:48, VA-bits:48, 4K pages",
-       "PA-bits:48, VA-bits:48, 64K pages",
-       "PA-bits:40, VA-bits:48, 4K pages",
-       "PA-bits:40, VA-bits:48, 64K pages",
+       "PA-bits:52,  VA-bits:48,  4K pages",
+       "PA-bits:52,  VA-bits:48, 64K pages",
+       "PA-bits:48,  VA-bits:48,  4K pages",
+       "PA-bits:48,  VA-bits:48, 64K pages",
+       "PA-bits:40,  VA-bits:48,  4K pages",
+       "PA-bits:40,  VA-bits:48, 64K pages",
+       "PA-bits:ANY, VA-bits:48,  4K pages",
 };
 _Static_assert(sizeof(vm_guest_mode_string)/sizeof(char *) == NUM_VM_MODES,
               "Missing new mode strings?");
@@ -130,17 +132,17 @@ _Static_assert(sizeof(vm_guest_mode_string)/sizeof(char *) == NUM_VM_MODES,
  * descriptor to control the created VM is created with the permissions
  * given by perm (e.g. O_RDWR).
  */
-struct kvm_vm *_vm_create(enum vm_guest_mode mode, uint64_t phy_pages,
-                         int perm, unsigned long type)
+struct kvm_vm *_vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm)
 {
        struct kvm_vm *vm;
 
+       DEBUG("Testing guest mode: %s\n", vm_guest_mode_string(mode));
+
        vm = calloc(1, sizeof(*vm));
        TEST_ASSERT(vm != NULL, "Insufficient Memory");
 
        vm->mode = mode;
-       vm->type = type;
-       vm_open(vm, perm, type);
+       vm->type = 0;
 
        /* Setup mode specific traits. */
        switch (vm->mode) {
@@ -186,10 +188,32 @@ struct kvm_vm *_vm_create(enum vm_guest_mode mode, uint64_t phy_pages,
                vm->page_size = 0x10000;
                vm->page_shift = 16;
                break;
+       case VM_MODE_PXXV48_4K:
+#ifdef __x86_64__
+               kvm_get_cpu_address_width(&vm->pa_bits, &vm->va_bits);
+               TEST_ASSERT(vm->va_bits == 48, "Linear address width "
+                           "(%d bits) not supported", vm->va_bits);
+               vm->pgtable_levels = 4;
+               vm->page_size = 0x1000;
+               vm->page_shift = 12;
+               DEBUG("Guest physical address width detected: %d\n",
+                     vm->pa_bits);
+#else
+               TEST_ASSERT(false, "VM_MODE_PXXV48_4K not supported on "
+                           "non-x86 platforms");
+#endif
+               break;
        default:
                TEST_ASSERT(false, "Unknown guest mode, mode: 0x%x", mode);
        }
 
+#ifdef __aarch64__
+       if (vm->pa_bits != 40)
+               vm->type = KVM_VM_TYPE_ARM_IPA_SIZE(vm->pa_bits);
+#endif
+
+       vm_open(vm, perm);
+
        /* Limit to VA-bit canonical virtual addresses. */
        vm->vpages_valid = sparsebit_alloc();
        sparsebit_set_num(vm->vpages_valid,
@@ -212,7 +236,7 @@ struct kvm_vm *_vm_create(enum vm_guest_mode mode, uint64_t phy_pages,
 
 struct kvm_vm *vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm)
 {
-       return _vm_create(mode, phy_pages, perm, 0);
+       return _vm_create(mode, phy_pages, perm);
 }
 
 /*
@@ -232,7 +256,7 @@ void kvm_vm_restart(struct kvm_vm *vmp, int perm)
 {
        struct userspace_mem_region *region;
 
-       vm_open(vmp, perm, vmp->type);
+       vm_open(vmp, perm);
        if (vmp->has_irqchip)
                vm_create_irqchip(vmp);
 
@@ -1628,3 +1652,18 @@ bool vm_is_unrestricted_guest(struct kvm_vm *vm)
 
        return val == 'Y';
 }
+
+unsigned int vm_get_page_size(struct kvm_vm *vm)
+{
+       return vm->page_size;
+}
+
+unsigned int vm_get_page_shift(struct kvm_vm *vm)
+{
+       return vm->page_shift;
+}
+
+unsigned int vm_get_max_gfn(struct kvm_vm *vm)
+{
+       return vm->max_gfn;
+}
index 0a5e487..c53dbc6 100644 (file)
@@ -228,7 +228,7 @@ void sregs_dump(FILE *stream, struct kvm_sregs *sregs,
 
 void virt_pgd_alloc(struct kvm_vm *vm, uint32_t pgd_memslot)
 {
-       TEST_ASSERT(vm->mode == VM_MODE_P52V48_4K, "Attempt to use "
+       TEST_ASSERT(vm->mode == VM_MODE_PXXV48_4K, "Attempt to use "
                "unknown or unsupported guest mode, mode: 0x%x", vm->mode);
 
        /* If needed, create page map l4 table. */
@@ -261,7 +261,7 @@ void virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
        uint16_t index[4];
        struct pageMapL4Entry *pml4e;
 
-       TEST_ASSERT(vm->mode == VM_MODE_P52V48_4K, "Attempt to use "
+       TEST_ASSERT(vm->mode == VM_MODE_PXXV48_4K, "Attempt to use "
                "unknown or unsupported guest mode, mode: 0x%x", vm->mode);
 
        TEST_ASSERT((vaddr % vm->page_size) == 0,
@@ -547,7 +547,7 @@ vm_paddr_t addr_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva)
        struct pageDirectoryEntry *pde;
        struct pageTableEntry *pte;
 
-       TEST_ASSERT(vm->mode == VM_MODE_P52V48_4K, "Attempt to use "
+       TEST_ASSERT(vm->mode == VM_MODE_PXXV48_4K, "Attempt to use "
                "unknown or unsupported guest mode, mode: 0x%x", vm->mode);
 
        index[0] = (gva >> 12) & 0x1ffu;
@@ -621,7 +621,7 @@ static void vcpu_setup(struct kvm_vm *vm, int vcpuid, int pgd_memslot, int gdt_m
        kvm_setup_gdt(vm, &sregs.gdt, gdt_memslot, pgd_memslot);
 
        switch (vm->mode) {
-       case VM_MODE_P52V48_4K:
+       case VM_MODE_PXXV48_4K:
                sregs.cr0 = X86_CR0_PE | X86_CR0_NE | X86_CR0_PG;
                sregs.cr4 |= X86_CR4_PAE | X86_CR4_OSFXSR;
                sregs.efer |= (EFER_LME | EFER_LMA | EFER_NX);
@@ -1157,3 +1157,25 @@ bool is_intel_cpu(void)
        chunk = (const uint32_t *)("GenuineIntel");
        return (ebx == chunk[0] && edx == chunk[1] && ecx == chunk[2]);
 }
+
+uint32_t kvm_get_cpuid_max(void)
+{
+       return kvm_get_supported_cpuid_entry(0x80000000)->eax;
+}
+
+void kvm_get_cpu_address_width(unsigned int *pa_bits, unsigned int *va_bits)
+{
+       struct kvm_cpuid_entry2 *entry;
+       bool pae;
+
+       /* SDM 4.1.4 */
+       if (kvm_get_cpuid_max() < 0x80000008) {
+               pae = kvm_get_supported_cpuid_entry(1)->edx & (1 << 6);
+               *pa_bits = pae ? 36 : 32;
+               *va_bits = 32;
+       } else {
+               entry = kvm_get_supported_cpuid_entry(0x80000008);
+               *pa_bits = entry->eax & 0xff;
+               *va_bits = (entry->eax >> 8) & 0xff;
+       }
+}
index 4bfc9a9..da4d89a 100644 (file)
@@ -32,7 +32,7 @@ void ucall(uint64_t cmd, int nargs, ...)
        va_end(va);
 
        asm volatile("in %[port], %%al"
-               : : [port] "d" (UCALL_PIO_PORT), "D" (&uc) : "rax");
+               : : [port] "d" (UCALL_PIO_PORT), "D" (&uc) : "rax", "memory");
 }
 
 uint64_t get_ucall(struct kvm_vm *vm, uint32_t vcpu_id, struct ucall *uc)
index ee59831..443a2b5 100644 (file)
@@ -26,6 +26,25 @@ static void guest_code(void)
 {
 }
 
+static int smt_possible(void)
+{
+       char buf[16];
+       FILE *f;
+       bool res = 1;
+
+       f = fopen("/sys/devices/system/cpu/smt/control", "r");
+       if (f) {
+               if (fread(buf, sizeof(*buf), sizeof(buf), f) > 0) {
+                       if (!strncmp(buf, "forceoff", 8) ||
+                           !strncmp(buf, "notsupported", 12))
+                               res = 0;
+               }
+               fclose(f);
+       }
+
+       return res;
+}
+
 static void test_hv_cpuid(struct kvm_cpuid2 *hv_cpuid_entries,
                          int evmcs_enabled)
 {
@@ -59,6 +78,14 @@ static void test_hv_cpuid(struct kvm_cpuid2 *hv_cpuid_entries,
                TEST_ASSERT(!entry->padding[0] && !entry->padding[1] &&
                            !entry->padding[2], "padding should be zero");
 
+               if (entry->function == 0x40000004) {
+                       int nononarchcs = !!(entry->eax & (1UL << 18));
+
+                       TEST_ASSERT(nononarchcs == !smt_possible(),
+                                   "NoNonArchitecturalCoreSharing bit"
+                                   " doesn't reflect SMT setting");
+               }
+
                /*
                 * If needed for debug:
                 * fprintf(stdout,
index 0dd7700..ad23100 100644 (file)
@@ -1 +1,3 @@
+CONFIG_LIVEPATCH=y
+CONFIG_DYNAMIC_DEBUG=y
 CONFIG_TEST_LIVEPATCH=m
index 020c44f..f2f7ec0 100644 (file)
@@ -1 +1,2 @@
-membarrier_test
+membarrier_test_multi_thread
+membarrier_test_single_thread
index 97e3bdf..34d1c81 100644 (file)
@@ -1,7 +1,8 @@
 # SPDX-License-Identifier: GPL-2.0-only
 CFLAGS += -g -I../../../../usr/include/
+LDLIBS += -lpthread
 
-TEST_GEN_PROGS := membarrier_test
+TEST_GEN_PROGS := membarrier_test_single_thread \
+               membarrier_test_multi_thread
 
 include ../lib.mk
-
@@ -1,10 +1,11 @@
-// SPDX-License-Identifier: GPL-2.0
+/* SPDX-License-Identifier: GPL-2.0 */
 #define _GNU_SOURCE
 #include <linux/membarrier.h>
 #include <syscall.h>
 #include <stdio.h>
 #include <errno.h>
 #include <string.h>
+#include <pthread.h>
 
 #include "../kselftest.h"
 
@@ -223,7 +224,7 @@ static int test_membarrier_global_expedited_success(void)
        return 0;
 }
 
-static int test_membarrier(void)
+static int test_membarrier_fail(void)
 {
        int status;
 
@@ -233,10 +234,27 @@ static int test_membarrier(void)
        status = test_membarrier_flags_fail();
        if (status)
                return status;
-       status = test_membarrier_global_success();
+       status = test_membarrier_private_expedited_fail();
        if (status)
                return status;
-       status = test_membarrier_private_expedited_fail();
+       status = sys_membarrier(MEMBARRIER_CMD_QUERY, 0);
+       if (status < 0) {
+               ksft_test_result_fail("sys_membarrier() failed\n");
+               return status;
+       }
+       if (status & MEMBARRIER_CMD_PRIVATE_EXPEDITED_SYNC_CORE) {
+               status = test_membarrier_private_expedited_sync_core_fail();
+               if (status)
+                       return status;
+       }
+       return 0;
+}
+
+static int test_membarrier_success(void)
+{
+       int status;
+
+       status = test_membarrier_global_success();
        if (status)
                return status;
        status = test_membarrier_register_private_expedited_success();
@@ -251,9 +269,6 @@ static int test_membarrier(void)
                return status;
        }
        if (status & MEMBARRIER_CMD_PRIVATE_EXPEDITED_SYNC_CORE) {
-               status = test_membarrier_private_expedited_sync_core_fail();
-               if (status)
-                       return status;
                status = test_membarrier_register_private_expedited_sync_core_success();
                if (status)
                        return status;
@@ -300,14 +315,3 @@ static int test_membarrier_query(void)
        ksft_test_result_pass("sys_membarrier available\n");
        return 0;
 }
-
-int main(int argc, char **argv)
-{
-       ksft_print_header();
-       ksft_set_plan(13);
-
-       test_membarrier_query();
-       test_membarrier();
-
-       return ksft_exit_pass();
-}
diff --git a/tools/testing/selftests/membarrier/membarrier_test_multi_thread.c b/tools/testing/selftests/membarrier/membarrier_test_multi_thread.c
new file mode 100644 (file)
index 0000000..ac5613e
--- /dev/null
@@ -0,0 +1,73 @@
+// SPDX-License-Identifier: GPL-2.0
+#define _GNU_SOURCE
+#include <linux/membarrier.h>
+#include <syscall.h>
+#include <stdio.h>
+#include <errno.h>
+#include <string.h>
+#include <pthread.h>
+
+#include "membarrier_test_impl.h"
+
+static int thread_ready, thread_quit;
+static pthread_mutex_t test_membarrier_thread_mutex =
+       PTHREAD_MUTEX_INITIALIZER;
+static pthread_cond_t test_membarrier_thread_cond =
+       PTHREAD_COND_INITIALIZER;
+
+void *test_membarrier_thread(void *arg)
+{
+       pthread_mutex_lock(&test_membarrier_thread_mutex);
+       thread_ready = 1;
+       pthread_cond_broadcast(&test_membarrier_thread_cond);
+       pthread_mutex_unlock(&test_membarrier_thread_mutex);
+
+       pthread_mutex_lock(&test_membarrier_thread_mutex);
+       while (!thread_quit)
+               pthread_cond_wait(&test_membarrier_thread_cond,
+                                 &test_membarrier_thread_mutex);
+       pthread_mutex_unlock(&test_membarrier_thread_mutex);
+
+       return NULL;
+}
+
+static int test_mt_membarrier(void)
+{
+       int i;
+       pthread_t test_thread;
+
+       pthread_create(&test_thread, NULL,
+                      test_membarrier_thread, NULL);
+
+       pthread_mutex_lock(&test_membarrier_thread_mutex);
+       while (!thread_ready)
+               pthread_cond_wait(&test_membarrier_thread_cond,
+                                 &test_membarrier_thread_mutex);
+       pthread_mutex_unlock(&test_membarrier_thread_mutex);
+
+       test_membarrier_fail();
+
+       test_membarrier_success();
+
+       pthread_mutex_lock(&test_membarrier_thread_mutex);
+       thread_quit = 1;
+       pthread_cond_broadcast(&test_membarrier_thread_cond);
+       pthread_mutex_unlock(&test_membarrier_thread_mutex);
+
+       pthread_join(test_thread, NULL);
+
+       return 0;
+}
+
+int main(int argc, char **argv)
+{
+       ksft_print_header();
+       ksft_set_plan(13);
+
+       test_membarrier_query();
+
+       /* Multi-threaded */
+       test_mt_membarrier();
+
+       return ksft_exit_pass();
+}
diff --git a/tools/testing/selftests/membarrier/membarrier_test_single_thread.c b/tools/testing/selftests/membarrier/membarrier_test_single_thread.c
new file mode 100644 (file)
index 0000000..c1c9639
--- /dev/null
@@ -0,0 +1,24 @@
+// SPDX-License-Identifier: GPL-2.0
+#define _GNU_SOURCE
+#include <linux/membarrier.h>
+#include <syscall.h>
+#include <stdio.h>
+#include <errno.h>
+#include <string.h>
+#include <pthread.h>
+
+#include "membarrier_test_impl.h"
+
+int main(int argc, char **argv)
+{
+       ksft_print_header();
+       ksft_set_plan(13);
+
+       test_membarrier_query();
+
+       test_membarrier_fail();
+
+       test_membarrier_success();
+
+       return ksft_exit_pass();
+}
index 6ef7f16..7f8b5c8 100644 (file)
@@ -199,6 +199,11 @@ struct seccomp_notif_sizes {
 };
 #endif
 
+#ifndef PTRACE_EVENTMSG_SYSCALL_ENTRY
+#define PTRACE_EVENTMSG_SYSCALL_ENTRY  1
+#define PTRACE_EVENTMSG_SYSCALL_EXIT   2
+#endif
+
 #ifndef seccomp
 int seccomp(unsigned int op, unsigned int flags, void *args)
 {
index 9dd8484..1a5db1e 100644 (file)
@@ -2,3 +2,4 @@
 include ../lib.mk
 
 TEST_PROGS := test_smoke.sh test_space.sh
+TEST_PROGS_EXTENDED := tpm2.py tpm2_tests.py
index c2333c7..afff120 100644 (file)
@@ -19,7 +19,7 @@
 
 int fd;
 const char v = 'V';
-static const char sopts[] = "bdehp:t:Tn:NL";
+static const char sopts[] = "bdehp:t:Tn:NLf:";
 static const struct option lopts[] = {
        {"bootstatus",          no_argument, NULL, 'b'},
        {"disable",             no_argument, NULL, 'd'},
@@ -31,6 +31,7 @@ static const struct option lopts[] = {
        {"pretimeout",    required_argument, NULL, 'n'},
        {"getpretimeout",       no_argument, NULL, 'N'},
        {"gettimeleft",         no_argument, NULL, 'L'},
+       {"file",          required_argument, NULL, 'f'},
        {NULL,                  no_argument, NULL, 0x0}
 };
 
@@ -69,16 +70,19 @@ static void term(int sig)
 static void usage(char *progname)
 {
        printf("Usage: %s [options]\n", progname);
-       printf(" -b, --bootstatus    Get last boot status (Watchdog/POR)\n");
-       printf(" -d, --disable       Turn off the watchdog timer\n");
-       printf(" -e, --enable        Turn on the watchdog timer\n");
-       printf(" -h, --help          Print the help message\n");
-       printf(" -p, --pingrate=P    Set ping rate to P seconds (default %d)\n", DEFAULT_PING_RATE);
-       printf(" -t, --timeout=T     Set timeout to T seconds\n");
-       printf(" -T, --gettimeout    Get the timeout\n");
-       printf(" -n, --pretimeout=T  Set the pretimeout to T seconds\n");
-       printf(" -N, --getpretimeout Get the pretimeout\n");
-       printf(" -L, --gettimeleft   Get the time left until timer expires\n");
+       printf(" -f, --file\t\tOpen watchdog device file\n");
+       printf("\t\t\tDefault is /dev/watchdog\n");
+       printf(" -b, --bootstatus\tGet last boot status (Watchdog/POR)\n");
+       printf(" -d, --disable\t\tTurn off the watchdog timer\n");
+       printf(" -e, --enable\t\tTurn on the watchdog timer\n");
+       printf(" -h, --help\t\tPrint the help message\n");
+       printf(" -p, --pingrate=P\tSet ping rate to P seconds (default %d)\n",
+              DEFAULT_PING_RATE);
+       printf(" -t, --timeout=T\tSet timeout to T seconds\n");
+       printf(" -T, --gettimeout\tGet the timeout\n");
+       printf(" -n, --pretimeout=T\tSet the pretimeout to T seconds\n");
+       printf(" -N, --getpretimeout\tGet the pretimeout\n");
+       printf(" -L, --gettimeleft\tGet the time left until timer expires\n");
        printf("\n");
        printf("Parameters are parsed left-to-right in real-time.\n");
        printf("Example: %s -d -t 10 -p 5 -e\n", progname);
@@ -92,14 +96,20 @@ int main(int argc, char *argv[])
        int ret;
        int c;
        int oneshot = 0;
+       char *file = "/dev/watchdog";
 
        setbuf(stdout, NULL);
 
-       fd = open("/dev/watchdog", O_WRONLY);
+       while ((c = getopt_long(argc, argv, sopts, lopts, NULL)) != -1) {
+               if (c == 'f')
+                       file = optarg;
+       }
+
+       fd = open(file, O_WRONLY);
 
        if (fd == -1) {
                if (errno == ENOENT)
-                       printf("Watchdog device not enabled.\n");
+                       printf("Watchdog device (%s) not found.\n", file);
                else if (errno == EACCES)
                        printf("Run watchdog as root.\n");
                else
@@ -108,6 +118,8 @@ int main(int argc, char *argv[])
                exit(-1);
        }
 
+       optind = 0;
+
        while ((c = getopt_long(argc, argv, sopts, lopts, NULL)) != -1) {
                switch (c) {
                case 'b':
@@ -190,6 +202,9 @@ int main(int argc, char *argv[])
                        else
                                printf("WDIOC_GETTIMELEFT error '%s'\n", strerror(errno));
                        break;
+               case 'f':
+                       /* Handled above */
+                       break;
 
                default:
                        usage(argv[0]);
index 6a89eb0..e6f7cb2 100644 (file)
@@ -11,6 +11,9 @@ datafile_y = initramfs_data.cpio$(suffix_y)
 datafile_d_y = .$(datafile_y).d
 AFLAGS_initramfs_data.o += -DINITRAMFS_IMAGE="usr/$(datafile_y)"
 
+# clean rules do not have CONFIG_INITRAMFS_COMPRESSION.  So clean up after all
+# possible compression formats.
+clean-files += initramfs_data.cpio*
 
 # Generate builtin.o based on initramfs_data.o
 obj-$(CONFIG_BLK_DEV_INITRD) := initramfs_data.o